summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 17:32:43 +0000
commit6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /third_party/jpeg-xl
parentInitial commit. (diff)
downloadthunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.tar.xz
thunderbird-6bf0a5cb5034a7e684dcc3500e841785237ce2dd.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/jpeg-xl')
-rw-r--r--third_party/jpeg-xl/.bazelignore1
-rw-r--r--third_party/jpeg-xl/.clang-format4
-rw-r--r--third_party/jpeg-xl/.clang-tidy70
-rw-r--r--third_party/jpeg-xl/.github/ISSUE_TEMPLATE/bug_report.md37
-rw-r--r--third_party/jpeg-xl/.github/ISSUE_TEMPLATE/feature_request.md20
-rw-r--r--third_party/jpeg-xl/.github/workflows/build_test.yml433
-rw-r--r--third_party/jpeg-xl/.github/workflows/build_test_cross.yml183
-rw-r--r--third_party/jpeg-xl/.github/workflows/conformance.yml185
-rw-r--r--third_party/jpeg-xl/.github/workflows/debug_ci.yml59
-rw-r--r--third_party/jpeg-xl/.github/workflows/fuzz.yml56
-rw-r--r--third_party/jpeg-xl/.github/workflows/pull_request.yml42
-rw-r--r--third_party/jpeg-xl/.github/workflows/release.yaml378
-rw-r--r--third_party/jpeg-xl/.readthedocs.yaml17
-rw-r--r--third_party/jpeg-xl/AUTHORS67
-rw-r--r--third_party/jpeg-xl/BUILD.bazel22
-rw-r--r--third_party/jpeg-xl/BUILDING.md98
-rw-r--r--third_party/jpeg-xl/BUILDING_Haiku.md20
-rw-r--r--third_party/jpeg-xl/BUILDING_OSX.md41
-rw-r--r--third_party/jpeg-xl/CHANGELOG.md294
-rw-r--r--third_party/jpeg-xl/CMakeLists.txt494
-rw-r--r--third_party/jpeg-xl/CODE_OF_CONDUCT.md93
-rw-r--r--third_party/jpeg-xl/CONTRIBUTING.md132
-rw-r--r--third_party/jpeg-xl/CONTRIBUTORS23
-rw-r--r--third_party/jpeg-xl/LICENSE27
-rw-r--r--third_party/jpeg-xl/PATENTS22
-rw-r--r--third_party/jpeg-xl/README.md133
-rw-r--r--third_party/jpeg-xl/SECURITY.md73
-rw-r--r--third_party/jpeg-xl/WORKSPACE742
-rwxr-xr-xthird_party/jpeg-xl/bash_test.sh320
-rwxr-xr-xthird_party/jpeg-xl/ci.sh1552
-rw-r--r--third_party/jpeg-xl/cmake/FindAtomics.cmake53
-rw-r--r--third_party/jpeg-xl/cmake/FindBrotli.cmake75
-rw-r--r--third_party/jpeg-xl/cmake/FindHWY.cmake66
-rw-r--r--third_party/jpeg-xl/cmake/FindLCMS2.cmake59
-rw-r--r--third_party/jpeg-xl/debian/changelog95
-rw-r--r--third_party/jpeg-xl/debian/compat1
-rw-r--r--third_party/jpeg-xl/debian/control88
-rw-r--r--third_party/jpeg-xl/debian/copyright194
-rw-r--r--third_party/jpeg-xl/debian/jxl.install3
-rw-r--r--third_party/jpeg-xl/debian/libjxl-dev.install4
-rw-r--r--third_party/jpeg-xl/debian/libjxl-gdk-pixbuf.install3
-rw-r--r--third_party/jpeg-xl/debian/libjxl-gimp-plugin.install1
-rw-r--r--third_party/jpeg-xl/debian/libjxl.install1
-rwxr-xr-xthird_party/jpeg-xl/debian/rules21
-rw-r--r--third_party/jpeg-xl/debian/source/format1
-rwxr-xr-xthird_party/jpeg-xl/deps.sh84
-rw-r--r--third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder21
-rw-r--r--third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder-run-aarch6436
-rw-r--r--third_party/jpeg-xl/docker/README.md7
-rwxr-xr-xthird_party/jpeg-xl/docker/build.sh83
-rw-r--r--third_party/jpeg-xl/docker/scripts/99_norecommends1
-rw-r--r--third_party/jpeg-xl/docker/scripts/binutils_align_fix.patch28
-rwxr-xr-xthird_party/jpeg-xl/docker/scripts/emsdk_install.sh37
-rwxr-xr-xthird_party/jpeg-xl/docker/scripts/jpegxl_builder.sh516
-rwxr-xr-xthird_party/jpeg-xl/docker/scripts/msan_install.sh131
-rwxr-xr-xthird_party/jpeg-xl/docker/scripts/qemu_install.sh83
-rw-r--r--third_party/jpeg-xl/examples/CMakeLists.txt56
-rw-r--r--third_party/jpeg-xl/examples/decode_exif_metadata.cc172
-rw-r--r--third_party/jpeg-xl/examples/decode_oneshot.cc251
-rw-r--r--third_party/jpeg-xl/examples/decode_progressive.cc245
-rw-r--r--third_party/jpeg-xl/examples/encode_oneshot.cc276
-rw-r--r--third_party/jpeg-xl/examples/examples.cmake11
-rw-r--r--third_party/jpeg-xl/experimental/fast_lossless/.gitignore1
-rw-r--r--third_party/jpeg-xl/experimental/fast_lossless/README.md10
-rwxr-xr-xthird_party/jpeg-xl/experimental/fast_lossless/build-android.sh27
-rwxr-xr-xthird_party/jpeg-xl/experimental/fast_lossless/build.sh27
-rwxr-xr-xthird_party/jpeg-xl/experimental/fast_lossless/cross_compile_aarch64.sh26
-rw-r--r--third_party/jpeg-xl/experimental/fast_lossless/fast_lossless_main.cc113
-rw-r--r--third_party/jpeg-xl/experimental/fast_lossless/pam-input.h289
-rw-r--r--third_party/jpeg-xl/lib/BUILD256
-rw-r--r--third_party/jpeg-xl/lib/CMakeLists.txt168
-rw-r--r--third_party/jpeg-xl/lib/compatibility.cmake30
-rw-r--r--third_party/jpeg-xl/lib/extras/LICENSE.apngdis27
-rw-r--r--third_party/jpeg-xl/lib/extras/README.md5
-rw-r--r--third_party/jpeg-xl/lib/extras/codec.cc191
-rw-r--r--third_party/jpeg-xl/lib/extras/codec.h70
-rw-r--r--third_party/jpeg-xl/lib/extras/codec_test.cc645
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/apng.cc962
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/apng.h34
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/color_description.cc218
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/color_description.h23
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/color_description_test.cc38
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/color_hints.cc66
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/color_hints.h72
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/decode.cc132
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/decode.h55
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/exr.cc184
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/exr.h32
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/gif.cc400
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/gif.h33
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jpegli.cc271
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jpegli.h41
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jpg.cc322
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jpg.h44
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jxl.cc561
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/jxl.h69
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/pgx.cc202
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/pgx.h35
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/pgx_test.cc79
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/pnm.cc474
-rw-r--r--third_party/jpeg-xl/lib/extras/dec/pnm.h41
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/apng.cc371
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/apng.h23
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/encode.cc170
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/encode.h83
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/exr.cc200
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/exr.h23
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jpegli.cc503
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jpegli.h47
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jpg.cc427
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jpg.h23
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jxl.cc276
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/jxl.h78
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/npy.cc322
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/npy.h23
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/pgx.cc123
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/pgx.h24
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/pnm.cc303
-rw-r--r--third_party/jpeg-xl/lib/extras/enc/pnm.h28
-rw-r--r--third_party/jpeg-xl/lib/extras/exif.cc55
-rw-r--r--third_party/jpeg-xl/lib/extras/exif.h20
-rw-r--r--third_party/jpeg-xl/lib/extras/hlg.cc56
-rw-r--r--third_party/jpeg-xl/lib/extras/hlg.h21
-rw-r--r--third_party/jpeg-xl/lib/extras/jpegli_test.cc405
-rw-r--r--third_party/jpeg-xl/lib/extras/packed_image.h170
-rw-r--r--third_party/jpeg-xl/lib/extras/packed_image_convert.cc300
-rw-r--r--third_party/jpeg-xl/lib/extras/packed_image_convert.h36
-rw-r--r--third_party/jpeg-xl/lib/extras/size_constraints.h43
-rw-r--r--third_party/jpeg-xl/lib/extras/time.cc60
-rw-r--r--third_party/jpeg-xl/lib/extras/time.h19
-rw-r--r--third_party/jpeg-xl/lib/extras/tone_mapping.cc132
-rw-r--r--third_party/jpeg-xl/lib/extras/tone_mapping.h30
-rw-r--r--third_party/jpeg-xl/lib/extras/tone_mapping_gbench.cc40
-rw-r--r--third_party/jpeg-xl/lib/gbench_main.cc8
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/butteraugli.h160
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/butteraugli_cxx.h60
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/cms_interface.h232
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/codestream_header.h430
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/color_encoding.h162
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/decode.h1446
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/decode_cxx.h57
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/encode.h1213
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/encode_cxx.h57
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/memory_manager.h72
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/parallel_runner.h156
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h78
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner_cxx.h64
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner.h72
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner_cxx.h64
-rw-r--r--third_party/jpeg-xl/lib/include/jxl/types.h186
-rw-r--r--third_party/jpeg-xl/lib/jpegli.cmake106
-rw-r--r--third_party/jpeg-xl/lib/jpegli/README.md28
-rw-r--r--third_party/jpeg-xl/lib/jpegli/adaptive_quantization.cc563
-rw-r--r--third_party/jpeg-xl/lib/jpegli/adaptive_quantization.h21
-rw-r--r--third_party/jpeg-xl/lib/jpegli/bit_writer.cc60
-rw-r--r--third_party/jpeg-xl/lib/jpegli/bit_writer.h107
-rw-r--r--third_party/jpeg-xl/lib/jpegli/bitstream.cc1136
-rw-r--r--third_party/jpeg-xl/lib/jpegli/bitstream.h37
-rw-r--r--third_party/jpeg-xl/lib/jpegli/color_quantize.cc533
-rw-r--r--third_party/jpeg-xl/lib/jpegli/color_quantize.h30
-rw-r--r--third_party/jpeg-xl/lib/jpegli/color_transform.cc281
-rw-r--r--third_party/jpeg-xl/lib/jpegli/color_transform.h24
-rw-r--r--third_party/jpeg-xl/lib/jpegli/common.cc59
-rw-r--r--third_party/jpeg-xl/lib/jpegli/common.h67
-rw-r--r--third_party/jpeg-xl/lib/jpegli/common_internal.h150
-rw-r--r--third_party/jpeg-xl/lib/jpegli/dct-inl.h266
-rw-r--r--third_party/jpeg-xl/lib/jpegli/dct.cc75
-rw-r--r--third_party/jpeg-xl/lib/jpegli/dct.h20
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode.cc981
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode.h111
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_api_test.cc1305
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_internal.h150
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_marker.cc588
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_marker.h34
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_scan.cc566
-rw-r--r--third_party/jpeg-xl/lib/jpegli/decode_scan.h33
-rw-r--r--third_party/jpeg-xl/lib/jpegli/destination_manager.cc148
-rw-r--r--third_party/jpeg-xl/lib/jpegli/downsample.cc356
-rw-r--r--third_party/jpeg-xl/lib/jpegli/downsample.h24
-rw-r--r--third_party/jpeg-xl/lib/jpegli/encode.cc1153
-rw-r--r--third_party/jpeg-xl/lib/jpegli/encode.h159
-rw-r--r--third_party/jpeg-xl/lib/jpegli/encode_api_test.cc856
-rw-r--r--third_party/jpeg-xl/lib/jpegli/encode_internal.h101
-rw-r--r--third_party/jpeg-xl/lib/jpegli/entropy_coding.cc605
-rw-r--r--third_party/jpeg-xl/lib/jpegli/entropy_coding.h45
-rw-r--r--third_party/jpeg-xl/lib/jpegli/error.cc102
-rw-r--r--third_party/jpeg-xl/lib/jpegli/error.h39
-rw-r--r--third_party/jpeg-xl/lib/jpegli/error_handling_test.cc1290
-rw-r--r--third_party/jpeg-xl/lib/jpegli/huffman.cc321
-rw-r--r--third_party/jpeg-xl/lib/jpegli/huffman.h50
-rw-r--r--third_party/jpeg-xl/lib/jpegli/idct.cc692
-rw-r--r--third_party/jpeg-xl/lib/jpegli/idct.h24
-rw-r--r--third_party/jpeg-xl/lib/jpegli/input.cc414
-rw-r--r--third_party/jpeg-xl/lib/jpegli/input.h20
-rw-r--r--third_party/jpeg-xl/lib/jpegli/input_suspension_test.cc612
-rw-r--r--third_party/jpeg-xl/lib/jpegli/jpeg.version.6211
-rw-r--r--third_party/jpeg-xl/lib/jpegli/jpeg.version.89
-rw-r--r--third_party/jpeg-xl/lib/jpegli/libjpeg_wrapper.cc260
-rw-r--r--third_party/jpeg-xl/lib/jpegli/memory_manager.cc181
-rw-r--r--third_party/jpeg-xl/lib/jpegli/memory_manager.h40
-rw-r--r--third_party/jpeg-xl/lib/jpegli/output_suspension_test.cc219
-rw-r--r--third_party/jpeg-xl/lib/jpegli/quant.cc748
-rw-r--r--third_party/jpeg-xl/lib/jpegli/quant.h23
-rw-r--r--third_party/jpeg-xl/lib/jpegli/render.cc802
-rw-r--r--third_party/jpeg-xl/lib/jpegli/render.h28
-rw-r--r--third_party/jpeg-xl/lib/jpegli/simd.cc38
-rw-r--r--third_party/jpeg-xl/lib/jpegli/simd.h18
-rw-r--r--third_party/jpeg-xl/lib/jpegli/source_manager.cc90
-rw-r--r--third_party/jpeg-xl/lib/jpegli/source_manager_test.cc141
-rw-r--r--third_party/jpeg-xl/lib/jpegli/streaming_test.cc233
-rw-r--r--third_party/jpeg-xl/lib/jpegli/test_utils.cc1240
-rw-r--r--third_party/jpeg-xl/lib/jpegli/test_utils.h318
-rw-r--r--third_party/jpeg-xl/lib/jpegli/testing.h35
-rw-r--r--third_party/jpeg-xl/lib/jpegli/transcode_api_test.cc133
-rw-r--r--third_party/jpeg-xl/lib/jpegli/transpose-inl.h111
-rw-r--r--third_party/jpeg-xl/lib/jpegli/upsample.cc137
-rw-r--r--third_party/jpeg-xl/lib/jpegli/upsample.h26
-rw-r--r--third_party/jpeg-xl/lib/jxl.cmake329
-rw-r--r--third_party/jpeg-xl/lib/jxl/ac_context.h149
-rw-r--r--third_party/jpeg-xl/lib/jxl/ac_strategy.cc108
-rw-r--r--third_party/jpeg-xl/lib/jxl/ac_strategy.h261
-rw-r--r--third_party/jpeg-xl/lib/jxl/ac_strategy_test.cc237
-rw-r--r--third_party/jpeg-xl/lib/jxl/alpha.cc115
-rw-r--r--third_party/jpeg-xl/lib/jxl/alpha.h66
-rw-r--r--third_party/jpeg-xl/lib/jxl/alpha_test.cc134
-rw-r--r--third_party/jpeg-xl/lib/jxl/ans_common.cc148
-rw-r--r--third_party/jpeg-xl/lib/jxl/ans_common.h143
-rw-r--r--third_party/jpeg-xl/lib/jxl/ans_common_test.cc43
-rw-r--r--third_party/jpeg-xl/lib/jxl/ans_params.h36
-rw-r--r--third_party/jpeg-xl/lib/jxl/ans_test.cc278
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/arch_macros.h33
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/bits.h147
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/byte_order.h274
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/cache_aligned.cc157
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/cache_aligned.h74
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/compiler_specific.h157
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/data_parallel.cc23
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/data_parallel.h120
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/file_io.h153
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/float.h98
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/iaca.h65
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/os_macros.h50
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/override.h29
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/padded_bytes.cc63
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/padded_bytes.h197
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/printf_macros.h34
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/profiler.cc540
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/profiler.h170
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/random.cc21
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/random.h95
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/sanitizer_definitions.h44
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/scope_guard.h48
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/span.h60
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/status.h326
-rw-r--r--third_party/jpeg-xl/lib/jxl/base/tsc_timer.h172
-rw-r--r--third_party/jpeg-xl/lib/jxl/bit_reader_test.cc262
-rw-r--r--third_party/jpeg-xl/lib/jxl/bits_test.cc87
-rw-r--r--third_party/jpeg-xl/lib/jxl/blending.cc152
-rw-r--r--third_party/jpeg-xl/lib/jxl/blending.h24
-rw-r--r--third_party/jpeg-xl/lib/jxl/blending_test.cc37
-rw-r--r--third_party/jpeg-xl/lib/jxl/box_content_decoder.cc101
-rw-r--r--third_party/jpeg-xl/lib/jxl/box_content_decoder.h49
-rw-r--r--third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.cc1988
-rw-r--r--third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.h209
-rw-r--r--third_party/jpeg-xl/lib/jxl/butteraugli_test.cc103
-rw-r--r--third_party/jpeg-xl/lib/jxl/butteraugli_wrapper.cc203
-rw-r--r--third_party/jpeg-xl/lib/jxl/byte_order_test.cc53
-rw-r--r--third_party/jpeg-xl/lib/jxl/chroma_from_luma.cc21
-rw-r--r--third_party/jpeg-xl/lib/jxl/chroma_from_luma.h147
-rw-r--r--third_party/jpeg-xl/lib/jxl/codec_in_out.h116
-rw-r--r--third_party/jpeg-xl/lib/jxl/coeff_order.cc153
-rw-r--r--third_party/jpeg-xl/lib/jxl/coeff_order.h64
-rw-r--r--third_party/jpeg-xl/lib/jxl/coeff_order_fwd.h47
-rw-r--r--third_party/jpeg-xl/lib/jxl/coeff_order_test.cc97
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_encoding_internal.cc753
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_encoding_internal.h463
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_encoding_internal_test.cc157
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_management.cc682
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_management.h40
-rw-r--r--third_party/jpeg-xl/lib/jxl/color_management_test.cc405
-rw-r--r--third_party/jpeg-xl/lib/jxl/common.h245
-rw-r--r--third_party/jpeg-xl/lib/jxl/compressed_dc.cc318
-rw-r--r--third_party/jpeg-xl/lib/jxl/compressed_dc.h34
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve-inl.h297
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve.h105
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_separable5.cc261
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_separable7.cc285
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_slow.cc212
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_symmetric3.cc194
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc185
-rw-r--r--third_party/jpeg-xl/lib/jxl/convolve_test.cc252
-rw-r--r--third_party/jpeg-xl/lib/jxl/data_parallel_test.cc87
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct-inl.h334
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_block-inl.h108
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_for_test.h99
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_scales.cc31
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_scales.h379
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_test.cc389
-rw-r--r--third_party/jpeg-xl/lib/jxl/dct_util.h86
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_ans.cc374
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_ans.h462
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_bit_reader.h354
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_cache.cc229
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_cache.h261
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_context_map.cc86
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_context_map.h30
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_external_image.cc493
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_external_image.h46
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_external_image_gbench.cc56
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_frame.cc878
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_frame.h329
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_group.cc801
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_group.h49
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_group_border.cc184
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_group_border.h47
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_huffman.cc255
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_huffman.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_modular.cc774
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_modular.h140
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_noise.cc131
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_noise.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.cc347
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.h151
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_tone_mapping-inl.h234
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_transforms-inl.h853
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.cc41
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_xyb-inl.h346
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_xyb.cc329
-rw-r--r--third_party/jpeg-xl/lib/jxl/dec_xyb.h89
-rw-r--r--third_party/jpeg-xl/lib/jxl/decode.cc2809
-rw-r--r--third_party/jpeg-xl/lib/jxl/decode_test.cc5507
-rw-r--r--third_party/jpeg-xl/lib/jxl/decode_to_jpeg.cc169
-rw-r--r--third_party/jpeg-xl/lib/jxl/decode_to_jpeg.h217
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc1168
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h74
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.cc1145
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.h66
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ans.cc1688
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ans.h143
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ans_params.h76
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ar_control_field.cc325
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_ar_control_field.h49
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_aux_out.cc205
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_aux_out.h163
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_bit_writer.cc201
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_bit_writer.h129
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.cc99
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.h59
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.cc211
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.h25
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_cache.cc218
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_cache.h93
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.cc409
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.h68
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_cluster.cc295
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_cluster.h63
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc291
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_coeff_order.h54
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_color_management.cc1293
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_color_management.h90
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_comparator.cc130
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_comparator.h52
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_context_map.cc141
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_context_map.h35
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_detect_dots.cc626
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_detect_dots.h67
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.cc71
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.h34
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc274
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_entropy_coder.h46
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_external_image.cc183
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_external_image.h45
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_external_image_gbench.cc46
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_external_image_test.cc79
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc3860
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_fast_lossless.h72
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_fields.cc239
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_fields.h37
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_file.cc141
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_file.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_frame.cc1745
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_frame.h78
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_gaborish.cc61
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_gaborish.h26
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_gaborish_test.cc77
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_gamma_correct.h36
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_group.cc426
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_group.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_heuristics.cc948
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_heuristics.h81
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_huffman.cc214
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_huffman.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_huffman_tree.cc328
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_huffman_tree.h52
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc406
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_icc_codec.h33
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_image_bundle.cc154
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_image_bundle.h25
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_jxl_skcms.h54
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_linalg.cc52
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_linalg.h24
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_linalg_test.cc118
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_modular.cc1762
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_modular.h92
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_noise.cc374
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_noise.h34
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_optimize.cc163
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_optimize.h218
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_optimize_test.cc109
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_params.h225
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc813
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.h109
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_photon_noise.cc89
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_photon_noise.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_photon_noise_test.cc51
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_progressive_split.cc82
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_progressive_split.h131
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_quant_weights.cc214
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_quant_weights.h37
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_splines.cc98
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_splines.h38
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_toc.cc45
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_toc.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_transforms-inl.h827
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_transforms.cc41
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_transforms.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_xyb.cc520
-rw-r--r--third_party/jpeg-xl/lib/jxl/enc_xyb.h56
-rw-r--r--third_party/jpeg-xl/lib/jxl/encode.cc2128
-rw-r--r--third_party/jpeg-xl/lib/jxl/encode_internal.h275
-rw-r--r--third_party/jpeg-xl/lib/jxl/encode_test.cc1405
-rw-r--r--third_party/jpeg-xl/lib/jxl/entropy_coder.cc70
-rw-r--r--third_party/jpeg-xl/lib/jxl/entropy_coder.h45
-rw-r--r--third_party/jpeg-xl/lib/jxl/entropy_coder_test.cc68
-rw-r--r--third_party/jpeg-xl/lib/jxl/epf.cc146
-rw-r--r--third_party/jpeg-xl/lib/jxl/epf.h33
-rw-r--r--third_party/jpeg-xl/lib/jxl/exif.h87
-rw-r--r--third_party/jpeg-xl/lib/jxl/fake_parallel_runner_testonly.h79
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct-inl.h238
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct.cc37
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct.h9
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct128-inl.h2137
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct16-inl.h180
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct256-inl.h4811
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct32-inl.h419
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct64-inl.h985
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct8-inl.h80
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_dct_test.cc378
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_math-inl.h236
-rw-r--r--third_party/jpeg-xl/lib/jxl/fast_math_test.cc288
-rw-r--r--third_party/jpeg-xl/lib/jxl/field_encodings.h134
-rw-r--r--third_party/jpeg-xl/lib/jxl/fields.cc642
-rw-r--r--third_party/jpeg-xl/lib/jxl/fields.h377
-rw-r--r--third_party/jpeg-xl/lib/jxl/fields_test.cc429
-rw-r--r--third_party/jpeg-xl/lib/jxl/frame_header.cc494
-rw-r--r--third_party/jpeg-xl/lib/jxl/frame_header.h503
-rw-r--r--third_party/jpeg-xl/lib/jxl/gamma_correct_test.cc37
-rw-r--r--third_party/jpeg-xl/lib/jxl/gauss_blur.cc623
-rw-r--r--third_party/jpeg-xl/lib/jxl/gauss_blur.h94
-rw-r--r--third_party/jpeg-xl/lib/jxl/gauss_blur_gbench.cc126
-rw-r--r--third_party/jpeg-xl/lib/jxl/gauss_blur_test.cc453
-rw-r--r--third_party/jpeg-xl/lib/jxl/gradient_test.cc207
-rw-r--r--third_party/jpeg-xl/lib/jxl/headers.cc194
-rw-r--r--third_party/jpeg-xl/lib/jxl/headers.h97
-rw-r--r--third_party/jpeg-xl/lib/jxl/huffman_table.cc161
-rw-r--r--third_party/jpeg-xl/lib/jxl/huffman_table.h28
-rw-r--r--third_party/jpeg-xl/lib/jxl/iaca_test.cc21
-rw-r--r--third_party/jpeg-xl/lib/jxl/icc_codec.cc389
-rw-r--r--third_party/jpeg-xl/lib/jxl/icc_codec.h57
-rw-r--r--third_party/jpeg-xl/lib/jxl/icc_codec_common.cc190
-rw-r--r--third_party/jpeg-xl/lib/jxl/icc_codec_common.h106
-rw-r--r--third_party/jpeg-xl/lib/jxl/icc_codec_test.cc207
-rw-r--r--third_party/jpeg-xl/lib/jxl/image.cc251
-rw-r--r--third_party/jpeg-xl/lib/jxl/image.h497
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_bundle.cc125
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_bundle.h254
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_bundle_test.cc37
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_metadata.cc472
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_metadata.h425
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_ops.h805
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_ops_test.cc164
-rw-r--r--third_party/jpeg-xl/lib/jxl/image_test_utils.h257
-rw-r--r--third_party/jpeg-xl/lib/jxl/inverse_mtf-inl.h90
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.cc145
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.h19
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.cc1050
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.h35
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_output_chunk.h72
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_serialization_state.h96
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.cc384
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.cc1053
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.h36
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc103
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h41
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.cc451
-rw-r--r--third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.h216
-rw-r--r--third_party/jpeg-xl/lib/jxl/jxl.syms5
-rw-r--r--third_party/jpeg-xl/lib/jxl/jxl.version17
-rw-r--r--third_party/jpeg-xl/lib/jxl/jxl_inspection.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/jxl_osx.syms1
-rw-r--r--third_party/jpeg-xl/lib/jxl/jxl_test.cc1537
-rw-r--r--third_party/jpeg-xl/lib/jxl/lehmer_code.h102
-rw-r--r--third_party/jpeg-xl/lib/jxl/lehmer_code_test.cc98
-rw-r--r--third_party/jpeg-xl/lib/jxl/libjxl.pc.in13
-rw-r--r--third_party/jpeg-xl/lib/jxl/loop_filter.cc98
-rw-r--r--third_party/jpeg-xl/lib/jxl/loop_filter.h76
-rw-r--r--third_party/jpeg-xl/lib/jxl/luminance.cc26
-rw-r--r--third_party/jpeg-xl/lib/jxl/luminance.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/matrix_ops.h84
-rw-r--r--third_party/jpeg-xl/lib/jxl/memory_manager_internal.cc18
-rw-r--r--third_party/jpeg-xl/lib/jxl/memory_manager_internal.h101
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h626
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.cc107
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.h66
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.cc124
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.h27
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.cc562
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.h47
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.cc1023
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.h157
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.cc622
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.h135
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/encoding/ma_common.h28
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/modular_image.cc77
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/modular_image.h118
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/options.h117
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc606
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc73
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h17
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc141
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h20
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc46
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc176
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/palette.h129
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc153
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/rct.h20
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc478
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h90
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc98
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular/transform/transform.h148
-rw-r--r--third_party/jpeg-xl/lib/jxl/modular_test.cc541
-rw-r--r--third_party/jpeg-xl/lib/jxl/noise.h60
-rw-r--r--third_party/jpeg-xl/lib/jxl/opsin_image_test.cc123
-rw-r--r--third_party/jpeg-xl/lib/jxl/opsin_inverse_test.cc57
-rw-r--r--third_party/jpeg-xl/lib/jxl/opsin_params.cc44
-rw-r--r--third_party/jpeg-xl/lib/jxl/opsin_params.h86
-rw-r--r--third_party/jpeg-xl/lib/jxl/padded_bytes_test.cc126
-rw-r--r--third_party/jpeg-xl/lib/jxl/passes_state.cc70
-rw-r--r--third_party/jpeg-xl/lib/jxl/passes_state.h133
-rw-r--r--third_party/jpeg-xl/lib/jxl/passes_test.cc402
-rw-r--r--third_party/jpeg-xl/lib/jxl/patch_dictionary_internal.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/patch_dictionary_test.cc58
-rw-r--r--third_party/jpeg-xl/lib/jxl/preview_test.cc68
-rw-r--r--third_party/jpeg-xl/lib/jxl/quant_weights.cc1239
-rw-r--r--third_party/jpeg-xl/lib/jxl/quant_weights.h448
-rw-r--r--third_party/jpeg-xl/lib/jxl/quant_weights_test.cc240
-rw-r--r--third_party/jpeg-xl/lib/jxl/quantizer-inl.h74
-rw-r--r--third_party/jpeg-xl/lib/jxl/quantizer.cc156
-rw-r--r--third_party/jpeg-xl/lib/jxl/quantizer.h182
-rw-r--r--third_party/jpeg-xl/lib/jxl/quantizer_test.cc81
-rw-r--r--third_party/jpeg-xl/lib/jxl/rational_polynomial-inl.h98
-rw-r--r--third_party/jpeg-xl/lib/jxl/rational_polynomial_test.cc238
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc865
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.h111
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.cc132
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.h139
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_stage.h171
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_test.cc562
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.cc266
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.h37
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.cc247
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.h24
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc129
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.h27
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.cc524
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.cc191
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.h20
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.cc122
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.h25
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.cc311
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.h32
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.cc48
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.h22
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.cc63
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.h21
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.cc52
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.h21
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.cc202
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.h21
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.cc151
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.h37
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.cc187
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.h26
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.cc601
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.h31
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc176
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.h26
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.cc85
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.h25
-rw-r--r--third_party/jpeg-xl/lib/jxl/render_pipeline/test_render_pipeline_stages.h101
-rw-r--r--third_party/jpeg-xl/lib/jxl/roundtrip_test.cc839
-rw-r--r--third_party/jpeg-xl/lib/jxl/sanitizers.h242
-rw-r--r--third_party/jpeg-xl/lib/jxl/simd_util-inl.h349
-rw-r--r--third_party/jpeg-xl/lib/jxl/simd_util_test.cc84
-rw-r--r--third_party/jpeg-xl/lib/jxl/speed_tier_test.cc108
-rw-r--r--third_party/jpeg-xl/lib/jxl/splines.cc694
-rw-r--r--third_party/jpeg-xl/lib/jxl/splines.h148
-rw-r--r--third_party/jpeg-xl/lib/jxl/splines_gbench.cc52
-rw-r--r--third_party/jpeg-xl/lib/jxl/splines_test.cc348
-rw-r--r--third_party/jpeg-xl/lib/jxl/test_image.cc453
-rw-r--r--third_party/jpeg-xl/lib/jxl/test_image.h94
-rw-r--r--third_party/jpeg-xl/lib/jxl/test_utils.cc673
-rw-r--r--third_party/jpeg-xl/lib/jxl/test_utils.h175
-rw-r--r--third_party/jpeg-xl/lib/jxl/testing.h73
-rw-r--r--third_party/jpeg-xl/lib/jxl/tf_gbench.cc143
-rw-r--r--third_party/jpeg-xl/lib/jxl/toc.cc105
-rw-r--r--third_party/jpeg-xl/lib/jxl/toc.h55
-rw-r--r--third_party/jpeg-xl/lib/jxl/toc_test.cc92
-rw-r--r--third_party/jpeg-xl/lib/jxl/transfer_functions-inl.h413
-rw-r--r--third_party/jpeg-xl/lib/jxl/transpose-inl.h203
-rw-r--r--third_party/jpeg-xl/lib/jxl/version.h.in39
-rw-r--r--third_party/jpeg-xl/lib/jxl/xorshift128plus-inl.h103
-rw-r--r--third_party/jpeg-xl/lib/jxl/xorshift128plus_test.cc378
-rw-r--r--third_party/jpeg-xl/lib/jxl_benchmark.cmake36
-rw-r--r--third_party/jpeg-xl/lib/jxl_extras.cmake169
-rw-r--r--third_party/jpeg-xl/lib/jxl_lists.bzl637
-rw-r--r--third_party/jpeg-xl/lib/jxl_lists.cmake631
-rw-r--r--third_party/jpeg-xl/lib/jxl_tests.cmake84
-rw-r--r--third_party/jpeg-xl/lib/jxl_threads.cmake120
-rw-r--r--third_party/jpeg-xl/lib/jxl_vars.bzl46
-rw-r--r--third_party/jpeg-xl/lib/threads/libjxl_threads.pc.in13
-rw-r--r--third_party/jpeg-xl/lib/threads/resizable_parallel_runner.cc195
-rw-r--r--third_party/jpeg-xl/lib/threads/thread_parallel_runner.cc101
-rw-r--r--third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.cc215
-rw-r--r--third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.h166
-rw-r--r--third_party/jpeg-xl/lib/threads/thread_parallel_runner_test.cc123
-rw-r--r--third_party/jpeg-xl/plugins/CMakeLists.txt21
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/CMakeLists.txt83
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/README.md50
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/jxl.thumbnailer4
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/loaders_test.cache16
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader-jxl.c816
-rw-r--r--third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader_test.cc41
-rw-r--r--third_party/jpeg-xl/plugins/gimp/CMakeLists.txt28
-rw-r--r--third_party/jpeg-xl/plugins/gimp/common.cc27
-rw-r--r--third_party/jpeg-xl/plugins/gimp/common.h45
-rw-r--r--third_party/jpeg-xl/plugins/gimp/file-jxl-load.cc487
-rw-r--r--third_party/jpeg-xl/plugins/gimp/file-jxl-load.h17
-rw-r--r--third_party/jpeg-xl/plugins/gimp/file-jxl-save.cc895
-rw-r--r--third_party/jpeg-xl/plugins/gimp/file-jxl-save.h18
-rw-r--r--third_party/jpeg-xl/plugins/gimp/file-jxl.cc157
-rw-r--r--third_party/jpeg-xl/plugins/mime/CMakeLists.txt6
-rw-r--r--third_party/jpeg-xl/plugins/mime/README.md37
-rw-r--r--third_party/jpeg-xl/plugins/mime/image-jxl.xml13
-rw-r--r--third_party/jpeg-xl/third_party/CMakeLists.txt175
-rw-r--r--third_party/jpeg-xl/third_party/HEVCSoftware/README.md2
-rw-r--r--third_party/jpeg-xl/third_party/HEVCSoftware/cfg/LICENSE31
-rw-r--r--third_party/jpeg-xl/third_party/HEVCSoftware/cfg/encoder_intra_main_scc_10.cfg136
-rw-r--r--third_party/jpeg-xl/third_party/dirent.cc142
-rw-r--r--third_party/jpeg-xl/third_party/dirent.h49
-rw-r--r--third_party/jpeg-xl/third_party/lcms2.cmake77
-rw-r--r--third_party/jpeg-xl/third_party/sjpeg.cmake27
-rw-r--r--third_party/jpeg-xl/third_party/skcms.cmake51
-rw-r--r--third_party/jpeg-xl/third_party/testing.cmake85
669 files changed, 158999 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/.bazelignore b/third_party/jpeg-xl/.bazelignore
new file mode 100644
index 0000000000..912eacc1b5
--- /dev/null
+++ b/third_party/jpeg-xl/.bazelignore
@@ -0,0 +1 @@
+third_party
diff --git a/third_party/jpeg-xl/.clang-format b/third_party/jpeg-xl/.clang-format
new file mode 100644
index 0000000000..a61b61c569
--- /dev/null
+++ b/third_party/jpeg-xl/.clang-format
@@ -0,0 +1,4 @@
+BasedOnStyle: Google
+IncludeCategories:
+ - Regex: '^<hwy/'
+ Priority: 2
diff --git a/third_party/jpeg-xl/.clang-tidy b/third_party/jpeg-xl/.clang-tidy
new file mode 100644
index 0000000000..abccf4ed47
--- /dev/null
+++ b/third_party/jpeg-xl/.clang-tidy
@@ -0,0 +1,70 @@
+# Disabled checks:
+# - google-readability-todo: We don't use the google TODO format.
+#
+# - modernize-deprecated-headers: We don't use std:: versions of the standard
+# types and functions like size_t or printf, so we should include <stdio.h>
+# instead <cstdio>.
+# - modernize-return-braced-init-list: this often doesn't improve readability.
+# - modernize-use-auto: is too aggressive towards using auto.
+# - modernize-use-default-member-init: with a mix of constructors and default
+# member initialization this can be confusing if enforced.
+# - modernize-use-trailing-return-type: does not improve readability when used
+# systematically.
+# - modernize-use-using: typedefs are ok.
+#
+# - readability-else-after-return: It doesn't always improve readability.
+# - readability-static-accessed-through-instance
+# It is often more useful and readable to access a constant of a passed
+# variable (like d.N) instead of using the type of the variable that could be
+# long and complex.
+# - readability-uppercase-literal-suffix: we write 1.0f, not 1.0F.
+
+Checks: >-
+ bugprone-*,
+ clang-*,
+ -clang-diagnostic-unused-command-line-argument,
+ google-*,
+ modernize-*,
+ performance-*,
+ readability-*,
+ -google-readability-todo,
+ -modernize-deprecated-headers,
+ -modernize-return-braced-init-list,
+ -modernize-use-auto,
+ -modernize-use-default-member-init,
+ -modernize-use-trailing-return-type,
+ -modernize-use-using,
+ -readability-else-after-return,
+ -readability-function-cognitive-complexity,
+ -readability-static-accessed-through-instance,
+ -readability-uppercase-literal-suffix,
+
+
+WarningsAsErrors: >-
+ bugprone-argument-comment,
+ bugprone-macro-parentheses,
+ bugprone-suspicious-string-compare,
+ bugprone-use-after-move,
+ clang-*,
+ clang-analyzer-*,
+ -clang-diagnostic-unused-command-line-argument,
+ google-build-using-namespace,
+ google-explicit-constructor,
+ google-readability-braces-around-statements,
+ google-readability-namespace-comments,
+ modernize-use-override,
+ readability-inconsistent-declaration-parameter-name
+
+# We are only interested in the headers from this projects, excluding
+# third_party/ and build/.
+HeaderFilterRegex: '^.*/(lib|tools)/.*\.h$'
+
+CheckOptions:
+ - key: readability-braces-around-statements.ShortStatementLines
+ value: '2'
+ - key: google-readability-braces-around-statements.ShortStatementLines
+ value: '2'
+ - key: readability-implicit-bool-conversion.AllowPointerConditions
+ value: '1'
+ - key: readability-implicit-bool-conversion.AllowIntegerConditions
+ value: '1'
diff --git a/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/bug_report.md b/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000000..a7a1429861
--- /dev/null
+++ b/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,37 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots or example input/output images to help explain your problem.
+
+**Environment**
+ - OS: [e.g. Windows]
+ - Compiler version: [e.g. clang 11.0.1]
+ - CPU type: [e.g. x86_64]
+ - cjxl/djxl version string: [e.g. cjxl [v0.3.7 | SIMD supported: SSE4,Scalar]]
+
+**Additional context**
+Add any other context about the problem here.
+
+<!--
+Currently github does not allow uploading files that end in `.jxl`, but when you
+rename them for example as `image.jxl.jpg`, it will be possible to upload them
+and also view them in browsers that are configured to support it.
+
+See https://github.com/orgs/github-community/discussions/18139
+-->
diff --git a/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/feature_request.md b/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000000..bbcbbe7d61
--- /dev/null
+++ b/third_party/jpeg-xl/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,20 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/third_party/jpeg-xl/.github/workflows/build_test.yml b/third_party/jpeg-xl/.github/workflows/build_test.yml
new file mode 100644
index 0000000000..ddc81b6fb0
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/build_test.yml
@@ -0,0 +1,433 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow for building and running tests.
+
+name: Build/Test
+on:
+ push:
+ branches:
+ - main
+ - v*.*.x
+ pull_request:
+ types: [opened, reopened, labeled, synchronize]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ ubuntu_build:
+ name: Ubuntu Build ${{ matrix.name }}
+ runs-on: ${{ matrix.os || 'ubuntu-latest' }}
+ strategy:
+ matrix:
+ # We have one job per "name" in the matrix. Attributes are set on the
+ # specific job names.
+ name: [release, debug, asan, msan, scalar]
+ include:
+ - name: release
+ test_in_pr: true
+ # Track static stack size on build and check it doesn't exceed 3 kB.
+ env_stack_size: 1
+ max_stack: 3000
+ # Conformance tooling test requires numpy.
+ apt_pkgs: graphviz python3-numpy
+ - name: lowprecision
+ mode: release
+ test_in_pr: true
+ cmake_args: -DCMAKE_CXX_FLAGS=-DJXL_HIGH_PRECISION=0
+ - name: debug
+ # Runs on AVX3 CPUs require more stack than others. Make sure to
+ # test on AVX3-enabled CPUs when changing this value.
+ env_test_stack_size: 4000
+ # Build scalar-only hwy instructions.
+ - name: scalar
+ mode: release
+ cxxflags: -DHWY_COMPILE_ONLY_SCALAR
+ # Disabling optional features to speed up msan build a little bit.
+ - name: msan
+ skip_install: true
+ cmake_args: >-
+ -DJPEGXL_ENABLE_DEVTOOLS=OFF -DJPEGXL_ENABLE_PLUGINS=OFF
+ -DJPEGXL_ENABLE_VIEWERS=OFF
+ - name: asan
+ skip_install: true
+ - name: coverage
+ apt_pkgs: gcovr
+ # Coverage builds require a bit more RAM.
+ env_test_stack_size: 2048
+ # Build with support for decoding to JPEG bytes disabled. Produces a
+ # smaller build if only decoding to pixels is needed.
+ - name: release-nojpeg
+ mode: release
+ cxxflags: -DJXL_DEBUG_ON_ABORT=0
+ cmake_args: >-
+ -DJPEGXL_ENABLE_TRANSCODE_JPEG=OFF
+ -DJPEGXL_ENABLE_PLUGINS=OFF
+ -DJPEGXL_ENABLE_VIEWERS=OFF
+ # Build optimized for binary size, all features not needed for
+ # reconstructing pixels is disabled.
+ - name: release:minimal
+ mode: release
+ cxxflags: -DJXL_DEBUG_ON_ABORT=0
+ cmake_args: >-
+ -DJPEGXL_ENABLE_TRANSCODE_JPEG=OFF
+ -DJPEGXL_ENABLE_BOXES=OFF
+ -DJPEGXL_ENABLE_PLUGINS=OFF
+ -DJPEGXL_ENABLE_VIEWERS=OFF
+ # Builds with gcc in release mode
+ - name: release:gcc8
+ mode: release
+ apt_pkgs: gcc-8 g++-8
+ cmake_args: >-
+ -DCMAKE_C_COMPILER=gcc-8 -DCMAKE_CXX_COMPILER=g++-8
+ # Builds with clang-5 in release mode
+ - name: release:clang-5
+ os: ubuntu-18.04
+ mode: release
+ # TODO(eustas): investigate, why static brotli library is not found.
+ skip_install: true
+ apt_pkgs: clang-5.0
+ cmake_args: >-
+ -DCMAKE_C_COMPILER=clang-5.0 -DCMAKE_CXX_COMPILER=clang++-5.0
+ -DJPEGXL_ENABLE_PLUGINS=OFF
+
+ env:
+ CCACHE_DIR: ${{ github.workspace }}/.ccache
+ # Whether we track the stack size.
+ STACK_SIZE: ${{ matrix.env_stack_size }}
+ TEST_STACK_LIMIT: ${{ matrix.env_test_stack_size }}
+ WILL_RUN_TESTS: ${{ github.event_name == 'push' || (github.event_name == 'pull_request' && (matrix.test_in_pr || contains(github.event.pull_request.labels.*.name, 'CI:full'))) }}
+
+ steps:
+ - name: Install build deps
+ run: |
+ sudo apt update
+ sudo apt install -y \
+ ccache \
+ clang-7 \
+ cmake \
+ doxygen \
+ libbenchmark-dev \
+ libbenchmark-tools \
+ libbrotli-dev \
+ libgdk-pixbuf2.0-dev \
+ libgif-dev \
+ libgtest-dev \
+ libgtk2.0-dev \
+ libjpeg-dev \
+ libopenexr-dev \
+ libpng-dev \
+ libwebp-dev \
+ ninja-build \
+ pkg-config \
+ xvfb \
+ ${{ matrix.apt_pkgs }} \
+ #
+ echo "CC=clang-7" >> $GITHUB_ENV
+ echo "CXX=clang++-7" >> $GITHUB_ENV
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 2
+
+ - name: Setup the LLVM source path
+ if: matrix.name == 'msan'
+ run: |
+ LLVM_ROOT=${GITHUB_WORKSPACE}/llvm_root
+ mkdir -p ${LLVM_ROOT}
+ echo "LLVM_ROOT=${LLVM_ROOT}" >> $GITHUB_ENV
+ - name: Cache LLVM sources
+ if: matrix.name == 'msan'
+ uses: actions/cache@v2
+ with:
+ path: ${{ env.LLVM_ROOT }}
+ key: llvm
+ - name: Checkout the LLVM source
+ if: matrix.name == 'msan'
+ uses: actions/checkout@v2
+ with:
+ submodules: false
+ repository: llvm/llvm-project
+ ref: llvmorg-7.0.1
+ path: llvm_root
+
+ - name: Sphinx dependencies
+ # Dependencies for sphinx HTML documentation
+ if: matrix.name == 'release'
+ run: |
+ pip3 install -r doc/sphinx/requirements.txt
+ - name: Git environment
+ id: git-env
+ run: |
+ echo "::set-output name=parent::$(git rev-parse ${{ github.sha }}^)"
+ shell: bash
+ - name: ccache
+ uses: actions/cache@v2
+ with:
+ path: ${{ env.CCACHE_DIR }}
+ # When the cache hits the key it is not updated, so if this is a rebuild
+ # of the same Pull Request it will reuse the cache if still around. For
+ # either Pull Requests or new pushes to main, this will use the parent
+ # hash as the starting point from the restore-keys entry.
+ key: build-${{ runner.os }}-${{ github.sha }}-${{ matrix.name }}
+ restore-keys: |
+ build-${{ runner.os }}-${{ steps.git-env.outputs.parent }}-${{ matrix.name }}
+ - name: Build
+ if: matrix.name != 'coverage' || env.WILL_RUN_TESTS == 'true'
+ run: |
+ mkdir -p ${CCACHE_DIR}
+ echo "max_size = 200M" > ${CCACHE_DIR}/ccache.conf
+ mode="${{ matrix.mode }}"
+ build_tests=$([ "$WILL_RUN_TESTS" == "true" ] && echo "ON" || echo "OFF")
+ [[ -n "${mode}" ]] || mode="${{ matrix.name }}"
+ ./ci.sh ${mode} -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DBUILD_TESTING=${build_tests} \
+ ${{ matrix.cmake_args }}
+ env:
+ SKIP_TEST: 1
+ CMAKE_CXX_FLAGS: ${{ matrix.cxxflags }}
+ - name: Build stats
+ run: |
+ awk '!/^#/ {total[$4]+=($2-$1);cntr[$4]+=1} END {for (key in total) print total[key]/cntr[key] " " key}' build/.ninja_log | sort -n | tail -n 25
+ - name: ccache stats
+ run: ccache --show-stats
+ - name: Build stats ${{ matrix.name }}
+ if: matrix.mode == 'release' || matrix.name == 'release'
+ run: |
+ tools/build_stats.py --save build/stats.json \
+ --max-stack ${{ matrix.max_stack || '0' }} \
+ cjxl djxl libjxl.so libjxl_dec.so
+ # Check that we can build the example project against the installed libs.
+ - name: Install and build examples
+ if: |
+ (matrix.mode == 'release' || matrix.name == 'release') &&
+ !matrix.skip_install
+ run: |
+ set -x
+ sudo cmake --build build -- install
+ cmake -Bbuild-example -Hexamples -G Ninja
+ cmake --build build-example
+ if ldd build-example/decode_oneshot_static | grep libjxl; then
+ echo "decode_oneshot_static is not using the static lib" >&2
+ exit 1
+ fi
+ # Test that the built binaries run.
+ echo -e -n "PF\n1 1\n-1.0\n\0\0\x80\x3f\0\0\x80\x3f\0\0\x80\x3f" > test.pfm
+ build-example/encode_oneshot test.pfm test.jxl
+ build-example/encode_oneshot_static test.pfm test-static.jxl
+ build-example/decode_oneshot test.jxl dec.pfm dec.icc
+ build-example/decode_oneshot_static test.jxl dec-static.pfm dec-static.icc
+ # Run the tests on push and when requested in pull_request.
+ - name: Test ${{ matrix.mode }}
+ if: env.WILL_RUN_TESTS == 'true'
+ run: |
+ ./ci.sh test ${{ matrix.ctest_args }}
+ # Print the running time summary for the slowest tests.
+ - name: Test runtime stats
+ run: |
+ sort build/Testing/Temporary/CTestCostData.txt -k 3 -n | tail -n 20 || true
+ - name: Build HTML documentation (sphinx/readthetdocs)
+ if: matrix.name == 'release'
+ run: |
+ cmake --build build -- rtd-html
+ - name: Coverage report
+ if: github.event_name == 'push' && matrix.name == 'coverage'
+ run: |
+ ./ci.sh coverage_report
+ - name: Coverage upload to Codecov
+ if: github.event_name == 'push' && matrix.name == 'coverage'
+ uses: codecov/codecov-action@v2
+ with:
+ flags: unittests
+ files: build/coverage.xml
+ - name: Fast benchmark ${{ matrix.mode }}
+ if: |
+ matrix.name != 'coverage' && (github.event_name == 'push' ||
+ (github.event_name == 'pull_request' && (
+ matrix.test_in_pr ||
+ contains(github.event.pull_request.labels.*.name, 'CI:full'))))
+ run: |
+ STORE_IMAGES=0 ./ci.sh fast_benchmark
+ # Run gbench once, just to make sure it runs, not for actual benchmarking.
+ # This doesn't work on msan because we use gbench library from the system
+ # which is not instrumented by msan.
+ - name: gbench check
+ if: |
+ matrix.name == 'release' || (
+ github.event_name == 'push' && matrix.name != 'msan')
+ run: |
+ ./ci.sh gbench --benchmark_min_time=0
+
+ windows_msys:
+ name: Windows MSYS2 / ${{ matrix.msystem }}
+ runs-on: windows-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - msystem: mingw64
+ - msystem: clang64
+ - msystem: mingw32
+ # TODO(eustas): investigate HWY Mul failures
+ disable_tests:
+ - HwyMulTestGroup/HwyMulTest\.TestAllMulHigh/EMU128
+ - HwyMulTestGroup/HwyMulTest\.TestAllMulFixedPoint15/EMU128
+ - DecodeTest/DecodeTestParam\.PixelTest/280x12RGBAtoRGBAf32BECallbackEarlyBufferO2
+ - DecodeTest/DecodeTestParam\.PixelTest/280x12RGBAtoRGBAf32BECallbackEarlyBufferO3
+ - DecodeTest/DecodeTestParam\.PixelTest/280x12RGBAtoRGBAf32BECallbackEarlyBufferO7
+ - DecodeTest/DecodeTestParam\.PixelTest/280x12RGBAtoRGBAf32BECallbackEarlyBufferO8
+ - msystem: clang32
+ # TODO(eustas): investigate HWY Sort and JXL ANS failures
+ disable_tests:
+ - SortTestGroup/SortTest\.TestAllSort/.*
+ - ANSTest\.RandomUnbalancedStreamRoundtrip3
+ - ANSTest\.RandomUnbalancedStreamRoundtripBig
+
+ defaults:
+ run:
+ shell: msys2 {0}
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 1
+ - uses: msys2/setup-msys2@v2
+ with:
+ msystem: ${{ matrix.msystem }}
+ update: true
+ path-type: inherit
+ install: >-
+ base-devel
+ git
+ pacboy: >-
+ brotli:p
+ cmake:p
+ giflib:p
+ gtest:p
+ libavif:p
+ libjpeg-turbo:p
+ libpng:p
+ libwebp:p
+ ninja:p
+ toolchain:p
+
+ - name: CMake configure
+ run: |
+ cmake \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DJPEGXL_ENABLE_JNI=OFF \
+ -DJPEGXL_ENABLE_MANPAGES=OFF \
+ -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+ -DJPEGXL_FORCE_SYSTEM_GTEST=ON \
+ -B build \
+ -G Ninja
+ - name: CMake build
+ run: cmake --build build
+ - name: Test
+ if: |
+ github.event_name == 'push' ||
+ (github.event_name == 'pull_request' &&
+ contains(github.event.pull_request.labels.*.name, 'CI:full'))
+ run: ctest --test-dir build --parallel 2 --output-on-failure -E "${{ join(matrix.disable_tests, '|') }}"
+
+ wasm32_build:
+ name: WASM wasm32/${{ matrix.variant }}
+ runs-on: ubuntu-latest
+ env:
+ CCACHE_DIR: ${{ github.workspace }}/.ccache
+ BUILD_TARGET: wasm32
+ EM_VERSION: 3.1.1
+ NODE_VERSION: 18
+
+ strategy:
+ matrix:
+ include:
+ - variant: scalar
+ - variant: simd
+
+ steps:
+ - uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 1
+ - name: Install build deps
+ shell: bash
+ run: |
+ set -x
+ sudo apt update
+ pkgs=(
+ # Build dependencies
+ ccache
+ cmake
+ doxygen
+ ninja-build
+ pkg-config
+ )
+ DEBIAN_FRONTEND=noninteractive sudo apt install -y "${pkgs[@]}"
+
+ - name: Git environment
+ id: git-env
+ run: |
+ echo "::set-output name=parent::$(git rev-parse ${{ github.sha }}^)"
+ shell: bash
+ - name: ccache
+ uses: actions/cache@v2
+ with:
+ path: ${{ env.CCACHE_DIR }}
+ key: build-wasm-${{ runner.os }}-${{ github.sha }}-${{ matrix.variant }}
+ restore-keys: |
+ build-wasm-${{ runner.os }}-${{ steps.git-env.outputs.parent }}-${{ matrix.variant }}
+
+ - name: Install node
+ uses: actions/setup-node@v3
+ with:
+ node-version: ${{env.NODE_VERSION}}
+
+ - name: Get non-EMSDK node path
+ run: which node >> $HOME/.base_node_path
+
+ - name: Install emsdk
+ uses: mymindstorm/setup-emsdk@v11
+ # TODO(deymo): We could cache this action but it doesn't work when running
+ # in a matrix.
+ with:
+ version: ${{env.EM_VERSION}}
+ no-cache: true
+
+ - name: Set EMSDK node version
+ run: |
+ echo "NODE_JS='$(cat $HOME/.base_node_path)'" >> $EMSDK/.emscripten
+ emsdk construct_env
+
+ # TODO(deymo): Build and install other dependencies like libpng, libjpeg,
+ # etc.
+ - name: Build
+ run: |
+ mkdir -p ${CCACHE_DIR}
+ echo "max_size = 200M" > ${CCACHE_DIR}/ccache.conf
+ if [[ "${{ matrix.variant }}" == "simd" ]]; then
+ export ENABLE_WASM_SIMD=1
+ fi
+ ./ci.sh release \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache
+ env:
+ SKIP_TEST: 1
+ - name: ccache stats
+ run: ccache --show-stats
+
+ - name: Test
+ if: |
+ github.event_name == 'push' ||
+ (github.event_name == 'pull_request' &&
+ contains(github.event.pull_request.labels.*.name, 'CI:full'))
+ run: |
+ ./ci.sh test
diff --git a/third_party/jpeg-xl/.github/workflows/build_test_cross.yml b/third_party/jpeg-xl/.github/workflows/build_test_cross.yml
new file mode 100644
index 0000000000..5b537205c4
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/build_test_cross.yml
@@ -0,0 +1,183 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow for building and running tests.
+
+name: Build/Test Cross
+on:
+ push:
+ branches:
+ - main
+ - v*.*.x
+ pull_request:
+ types: [opened, reopened, labeled, synchronize]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ cross_compile_ubuntu:
+ name: Cross-compiling ${{ matrix.build_target }} ${{ matrix.variant }}
+ runs-on: [ubuntu-22.04]
+ container:
+ image: debian:bullseye
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - arch: arm64
+ build_target: aarch64-linux-gnu
+ cmake_args:
+ - -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-aarch64-static
+
+ - arch: arm64
+ variant: SVE
+ build_target: aarch64-linux-gnu
+ cmake_args:
+ - -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-aarch64-static
+ - -DJPEGXL_ENABLE_OPENEXR=off
+ - -DJPEGXL_ENABLE_SIZELESS_VECTORS=on
+ cmake_flags: -march=armv8-a+sve
+ c_compiler: aarch64-linux-gnu-gcc
+ cxx_compiler: aarch64-linux-gnu-g++
+ disable_tests: true
+
+ - arch: arm64
+ variant: lowprecision
+ build_target: aarch64-linux-gnu
+ cmake_args:
+ - -DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-aarch64-static
+ - -DCMAKE_CXX_FLAGS=-DJXL_HIGH_PRECISION=0
+
+ - arch: armhf
+ build_target: arm-linux-gnueabihf
+ cmake_args: [-DCMAKE_CROSSCOMPILING_EMULATOR=/usr/bin/qemu-arm-static]
+
+ - arch: i386
+ test_in_pr: true
+ build_target: i686-linux-gnu
+
+ env:
+ BUILD_DIR: build
+ WILL_RUN_TESTS: ${{ (github.event_name == 'push' || (github.event_name == 'pull_request' && (matrix.test_in_pr || contains(github.event.pull_request.labels.*.name, 'CI:full')))) && !matrix.disable_tests }}
+
+ steps:
+ - name: Setup apt
+ shell: bash
+ run: |
+ set -x
+ apt-get update -y
+ apt-get install -y ca-certificates debian-ports-archive-keyring
+
+ dpkg --add-architecture "${{ matrix.arch }}"
+
+ # Update the sources.list with the split of supported architectures.
+ bkplist="/etc/apt/sources.list.bkp"
+ mv /etc/apt/sources.list "${bkplist}"
+
+ newlist="/etc/apt/sources.list"
+ rm -f "${newlist}"
+
+ main_list="amd64,${{ matrix.arch }}"
+ port_list=""
+ if [[ "${{ matrix.arch }}" == "i386" ]]; then
+ main_list="amd64,i386"
+ else
+ port_list="${{ matrix.arch }}"
+ fi
+
+ grep -v -E '^#' "${bkplist}" |
+ sed -E "s;^deb (http[^ ]+) (.*)\$;deb [arch=${main_list}] \\1 \\2\ndeb-src [arch=${main_list}] \\1 \\2;" \
+ | tee -a "${newlist}"
+
+ - name: Install build deps
+ shell: bash
+ run: |
+ set -x
+ apt update
+ pkgs=(
+ # Build dependencies
+ cmake
+ doxygen
+ git
+ graphviz
+ ninja-build
+ pkg-config
+ qemu-user-static
+ xdg-utils
+ xvfb
+
+ # Toolchain for cross-compiling.
+ clang-11
+ g++-aarch64-linux-gnu
+ libc6-dev-${{ matrix.arch }}-cross
+ libstdc++-10-dev-${{ matrix.arch }}-cross
+ libstdc++-10-dev:${{ matrix.arch }}
+
+ # Dependencies
+ libbrotli-dev:${{ matrix.arch }}
+ libgif-dev:${{ matrix.arch }}
+ libjpeg-dev:${{ matrix.arch }}
+ libpng-dev:${{ matrix.arch }}
+ libwebp-dev:${{ matrix.arch }}
+
+ # For OpenEXR:
+ libilmbase-dev:${{ matrix.arch }}
+ libopenexr-dev:${{ matrix.arch }}
+
+ # GTK plugins
+ libgdk-pixbuf2.0-dev:${{ matrix.arch }}
+ libgtk2.0-dev:${{ matrix.arch }}
+
+ # QT
+ libqt5x11extras5-dev:${{ matrix.arch }}
+ qtbase5-dev:${{ matrix.arch }}
+ )
+ if [[ "${{ matrix.build_target }}" != "x86_64-linux-gnu" ]]; then
+ pkgs+=(
+ binutils-${{ matrix.build_target }}
+ gcc-${{ matrix.build_target }}
+ )
+ fi
+ if [[ "${{ matrix.arch }}" != "i386" ]]; then
+ pkgs+=(
+ # TCMalloc
+ libgoogle-perftools-dev:${{ matrix.arch }}
+ libgoogle-perftools4:${{ matrix.arch }}
+ libtcmalloc-minimal4:${{ matrix.arch }}
+ libunwind-dev:${{ matrix.arch }}
+ )
+ fi
+ DEBIAN_FRONTEND=noninteractive apt install -y "${pkgs[@]}"
+ echo "CC=${{ matrix.c_compiler || 'clang-11' }}" >> $GITHUB_ENV
+ echo "CXX=${{ matrix.cxx_compiler || 'clang++-11' }}" >> $GITHUB_ENV
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 1
+ - name: Build
+ run: |
+ CMAKE_FLAGS="${{ matrix.cmake_flags }}" ./ci.sh release \
+ -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+ -DJPEGXL_ENABLE_JNI=OFF \
+ ${{ join(matrix.cmake_args, ' ') }}
+ env:
+ SKIP_TEST: 1
+ BUILD_TARGET: ${{ matrix.build_target }}
+ - name: Build stats ${{ matrix.build_target }}
+ run: |
+ tools/build_stats.py --save build/stats.json \
+ --binutils ${{ matrix.build_target }}- \
+ --max-stack ${{ matrix.max_stack || '0' }} \
+ cjxl djxl libjxl.so libjxl_dec.so
+ # Run the tests on push and when requested in pull_request.
+ - name: Test
+ if: env.WILL_RUN_TESTS == 'true'
+ run: |
+ ./ci.sh test
+ env:
+ BUILD_TARGET: ${{ matrix.build_target }}
diff --git a/third_party/jpeg-xl/.github/workflows/conformance.yml b/third_party/jpeg-xl/.github/workflows/conformance.yml
new file mode 100644
index 0000000000..9c85878f98
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/conformance.yml
@@ -0,0 +1,185 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow for running conformance tests.
+
+name: Conformance
+on:
+ push:
+ branches:
+ - main
+ - v*.*.x
+ pull_request:
+ types: [opened, reopened, labeled, synchronize]
+
+env:
+ LIBJXL_VERSION: 0.8.0
+ LIBJXL_ABI_VERSION: 0.8
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ warmup: # If necessary, fetch files just once, before tests are run.
+ name: Warmup caches
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout the conformance source
+ uses: actions/checkout@v2
+ with:
+ repository: libjxl/conformance
+ # TODO(eustas): move ref to a global variable / file?
+ ref: 43d8135b50e53167ee6fee0b842b9eb15cfc4aa2
+ path: conformance
+ - name: Cache
+ uses: actions/cache@v2
+ with:
+ path: ${{ github.workspace }}/conformance/.objects
+ key: conformance-refs
+ - name: Download and link conformance files
+ run: |
+ ${{ github.workspace }}/conformance/scripts/download_and_symlink.sh
+
+ build:
+ name: Conformance Build ${{ matrix.name }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - name: AVX3
+ cflags: -DHWY_DISABLED_TARGETS=HWY_AVX3-1
+ - name: AVX2
+ cflags: -DHWY_DISABLED_TARGETS=HWY_AVX2-1
+ - name: SSE4
+ cflags: -DHWY_DISABLED_TARGETS=HWY_SSE4-1
+ - name: SSSE3
+ cflags: -DHWY_DISABLED_TARGETS=HWY_SSSE3-1
+ - name: EMU128
+ cflags: -DHWY_COMPILE_ONLY_EMU128=1
+ - name: SCALAR
+ cflags: -DHWY_COMPILE_ONLY_SCALAR=1
+ - name: SCALAR_ASAN
+ cflags: -DHWY_COMPILE_ONLY_SCALAR=1
+ build_type: asan
+ env:
+ CCACHE_DIR: ${{ github.workspace }}/.ccache
+ steps:
+ - name: Install build deps
+ run: |
+ sudo apt update
+ sudo apt install -y \
+ ccache \
+ clang-7 \
+ cmake \
+ doxygen \
+ libbenchmark-dev \
+ libbenchmark-tools \
+ libbrotli-dev \
+ libgdk-pixbuf2.0-dev \
+ libgif-dev \
+ libgtest-dev \
+ libgtk2.0-dev \
+ libjpeg-dev \
+ libopenexr-dev \
+ libpng-dev \
+ libwebp-dev \
+ ninja-build \
+ pkg-config \
+ xvfb \
+ ${{ matrix.apt_pkgs }} \
+ #
+ echo "CC=clang-7" >> $GITHUB_ENV
+ echo "CXX=clang++-7" >> $GITHUB_ENV
+ - name: Checkout the jxl source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 2
+ - name: Git environment
+ id: git-env
+ run: |
+ echo "::set-output name=parent::$(git rev-parse ${{ github.sha }}^)"
+ shell: bash
+ - name: ccache
+ uses: actions/cache@v2
+ with:
+ path: ${{ env.CCACHE_DIR }}
+ # When the cache hits the key it is not updated, so if this is a rebuild
+ # of the same Pull Request it will reuse the cache if still around. For
+ # either Pull Requests or new pushes to main, this will use the parent
+ # hash as the starting point from the restore-keys entry.
+ key: conformance-${{ runner.os }}-${{ github.sha }}-${{ matrix.name }}
+ restore-keys: |
+ conformance-${{ runner.os }}-${{ steps.git-env.outputs.parent }}-${{ matrix.name }}
+ - name: Build
+ run: |
+ mkdir -p ${CCACHE_DIR}
+ echo "max_size = 200M" > ${CCACHE_DIR}/ccache.conf
+ CMAKE_FLAGS="${{ matrix.cflags }}" \
+ ./ci.sh ${{ matrix.build_type || 'release' }} -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+ -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
+ -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+ -DBUILD_TESTING=OFF
+ # Flatten the artifacts directory structure
+ cp tools/conformance/conformance.py build/tools/conformance
+ cp tools/conformance/lcms2.py build/tools/conformance
+ cp build/tools/djxl build/tools/conformance
+ cp build/libjxl.so.${{ env.LIBJXL_VERSION }} build/tools/conformance
+ cp build/libjxl_threads.so.${{ env.LIBJXL_VERSION }} build/tools/conformance
+ env:
+ SKIP_TEST: 1
+ - uses: actions/upload-artifact@v2
+ with:
+ name: conformance_binary-${{ matrix.name }}
+ path: |
+ build/tools/conformance/conformance.py
+ build/tools/conformance/lcms2.py
+ build/tools/conformance/djxl
+ build/tools/conformance/libjxl.so.${{ env.LIBJXL_VERSION }}
+ build/tools/conformance/libjxl_threads.so.${{ env.LIBJXL_VERSION }}
+ - name: ccache stats
+ run: ccache --show-stats
+
+ run:
+ name: Conformance Test ${{ matrix.name }} on ${{ matrix.target }}
+ needs: [warmup, build]
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ name: [main_level5, main_level10]
+ target: [AVX3, AVX2, SSE4, SSSE3, EMU128, SCALAR, SCALAR_ASAN]
+ steps:
+ - name: Install deps
+ run: |
+ pip install numpy
+ - name: Checkout the conformance source
+ uses: actions/checkout@v2
+ with:
+ repository: libjxl/conformance
+ ref: 43d8135b50e53167ee6fee0b842b9eb15cfc4aa2
+ path: conformance
+ - name: Cache
+ uses: actions/cache@v2
+ with:
+ path: ${{ github.workspace }}/conformance/.objects
+ key: conformance-refs
+ - name: Download and link conformance files
+ run: |
+ ${{ github.workspace }}/conformance/scripts/download_and_symlink.sh
+ - uses: actions/download-artifact@v2
+ with:
+ name: conformance_binary-${{ matrix.target }}
+ - name: Run conformance tests
+ run: |
+ chmod +x djxl
+ ln -s libjxl.so.${{ env.LIBJXL_VERSION }} libjxl.so.${{ env.LIBJXL_ABI_VERSION }}
+ ln -s libjxl_threads.so.${{ env.LIBJXL_VERSION }} libjxl_threads.so.${{ env.LIBJXL_ABI_VERSION }}
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`
+ python conformance.py \
+ --decoder=`pwd`/djxl \
+ --corpus=`pwd`/conformance/testcases/${{ matrix.name }}.txt
diff --git a/third_party/jpeg-xl/.github/workflows/debug_ci.yml b/third_party/jpeg-xl/.github/workflows/debug_ci.yml
new file mode 100644
index 0000000000..fb3522eb28
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/debug_ci.yml
@@ -0,0 +1,59 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow for building and then debugging on a specific commit.
+
+name: Build and Test debugging
+on:
+ push:
+ branches:
+ - ci-*-debug
+
+jobs:
+ ubuntu_build:
+ name: Ubuntu Build and SSH
+ runs-on: [ubuntu-latest]
+
+ steps:
+ - name: Install build deps
+ run: |
+ sudo apt update
+ sudo apt install -y \
+ ccache \
+ clang-7 \
+ cmake \
+ doxygen \
+ libbrotli-dev \
+ libgdk-pixbuf2.0-dev \
+ libgif-dev \
+ libgtest-dev \
+ libgtk2.0-dev \
+ libjpeg-dev \
+ libopenexr-dev \
+ libpng-dev \
+ libwebp-dev \
+ ninja-build \
+ pkg-config \
+ xvfb \
+ ${{ matrix.apt_pkgs }} \
+ #
+ echo "CC=clang-7" >> $GITHUB_ENV
+ echo "CXX=clang++-7" >> $GITHUB_ENV
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 2
+ - name: Build
+ run: |
+ ./ci.sh $(echo ${{ github.ref }} | sed 's_refs/heads/ci-\([a-z_]*\)-debug_\1_') \
+ -DJPEGXL_FORCE_SYSTEM_BROTLI=ON
+ env:
+ SKIP_TEST: 1
+ - name: Setup tmate session
+ uses: mxschmitt/action-tmate@v3
+
+
+
diff --git a/third_party/jpeg-xl/.github/workflows/fuzz.yml b/third_party/jpeg-xl/.github/workflows/fuzz.yml
new file mode 100644
index 0000000000..188a4c79c7
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/fuzz.yml
@@ -0,0 +1,56 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# CI on pull-requests to run the fuzzer from oss-fuzz. See:
+#
+# https://google.github.io/oss-fuzz/getting-started/continuous-integration/
+
+name: CIFuzz
+on:
+ pull_request:
+ types: [opened, reopened, synchronize]
+ paths:
+ - '**.c'
+ - '**.cc'
+ - '**.cmake'
+ - '**.h'
+ - '**CMakeLists.txt'
+ - .github/workflows/fuzz.yml
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
+jobs:
+ fuzzing:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout source
+ uses: actions/checkout@v2
+ id: checkout
+ with:
+ # The build_fuzzers action checks out the code to the storage/libjxl
+ # directory already, but doesn't check out the submodules. This step
+ # is a workaround for checking out the submodules.
+ path: storage/libjxl
+ submodules: true
+ - name: Build Fuzzers
+ id: build
+ uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'libjxl'
+ language: c++
+ - name: Run Fuzzers
+ uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+ with:
+ oss-fuzz-project-name: 'libjxl'
+ language: c++
+ fuzz-seconds: 600
+ - name: Upload Crash
+ uses: actions/upload-artifact@v1
+ if: failure() && steps.build.outcome == 'success'
+ with:
+ name: artifacts
+ path: ./out/artifacts
diff --git a/third_party/jpeg-xl/.github/workflows/pull_request.yml b/third_party/jpeg-xl/.github/workflows/pull_request.yml
new file mode 100644
index 0000000000..b1214e1061
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/pull_request.yml
@@ -0,0 +1,42 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow to run pull-requests specific checks.
+
+name: PR
+on:
+ pull_request:
+ types: [opened, reopened, synchronize]
+
+jobs:
+ # Checks that the AUTHORS files is updated with new contributors.
+ authors:
+ runs-on: [ubuntu-latest]
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ - name: Check AUTHORS file
+ run:
+ ./ci.sh authors
+
+ format:
+ runs-on: [ubuntu-latest]
+ steps:
+ - name: Install build deps
+ run: |
+ sudo apt update
+ sudo apt install -y \
+ clang-format \
+ clang-format-7 \
+ clang-format-8 \
+ clang-format-9 \
+ clang-format-10 \
+ clang-format-11 \
+ #
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ - name: clang-format
+ run:
+ ./ci.sh lint >&2
diff --git a/third_party/jpeg-xl/.github/workflows/release.yaml b/third_party/jpeg-xl/.github/workflows/release.yaml
new file mode 100644
index 0000000000..4222266598
--- /dev/null
+++ b/third_party/jpeg-xl/.github/workflows/release.yaml
@@ -0,0 +1,378 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Workflow for building the release binaries.
+#
+# This workflow runs as a post-submit step, when pushing to main or the release
+# branches (v*.*.x), and when creating a release in GitHub.
+#
+# In the GitHub release case, in addition to build the release binaries it also
+# uploads the binaries to the given release automatically.
+
+name: Release build / deploy
+on:
+ push:
+ branches:
+ - main
+ - v*.*.x
+ release:
+ types: [ published ]
+
+jobs:
+ ubuntu_static_x86_64:
+ name: Release linux x86_64 static
+ runs-on: [ubuntu-latest]
+ steps:
+ - name: Install build deps
+ run: |
+ sudo apt update
+ sudo apt install -y \
+ asciidoc \
+ clang \
+ cmake \
+ doxygen \
+ libbrotli-dev \
+ libgdk-pixbuf2.0-dev \
+ libgif-dev \
+ libgtest-dev \
+ libgtk2.0-dev \
+ libjpeg-dev \
+ libopenexr-dev \
+ libpng-dev \
+ libwebp-dev \
+ ninja-build \
+ pkg-config \
+ #
+ echo "CC=clang" >> $GITHUB_ENV
+ echo "CXX=clang++" >> $GITHUB_ENV
+
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 1
+
+ - name: Build
+ env:
+ SKIP_TEST: 1
+ run: |
+ ./ci.sh release \
+ -DJPEGXL_DEP_LICENSE_DIR=/usr/share/doc \
+ -DJPEGXL_STATIC=ON \
+ -DBUILD_TESTING=OFF \
+ -DJPEGXL_ENABLE_VIEWERS=OFF \
+ -DJPEGXL_ENABLE_PLUGINS=OFF \
+ -DJPEGXL_ENABLE_OPENEXR=OFF \
+
+ - name: Package release tarball
+ run: |
+ cd build
+ tar -zcvf ${{ runner.workspace }}/release_file.tar.gz \
+ LICENSE* tools/{cjxl,djxl,benchmark_xl}
+ ln -s ${{ runner.workspace }}/release_file.tar.gz \
+ ${{ runner.workspace }}/jxl-linux-x86_64-static-${{ github.event.release.tag_name }}.tar.gz
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: jxl-linux-x86_64-static
+ path: ${{ runner.workspace }}/release_file.tar.gz
+
+ - name: Upload binaries to release
+ if: github.event_name == 'release'
+ uses: AButler/upload-release-assets@v2.0
+ with:
+ files: ${{ runner.workspace }}/jxl-linux-x86_64-static-${{ github.event.release.tag_name }}.tar.gz
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+
+ # Build .deb packages Ubuntu/Debian
+ release_ubuntu_pkg:
+ name: .deb packages / ${{ matrix.os }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ os:
+ - ubuntu:20.04
+ - ubuntu:18.04
+ - debian:buster
+ - debian:bullseye
+ - debian:bookworm
+ - debian:sid
+
+ container:
+ image: ${{ matrix.os }}
+
+ steps:
+ - name: Set env
+ shell: 'bash'
+ id: 'env'
+ run: |
+ artifact_name="jxl-debs-amd64-${matrix_os/:/-}"
+ echo ${artifact_name}
+ echo "::set-output name=artifact_name::${artifact_name}"
+ env:
+ matrix_os: ${{ matrix.os }}
+
+ - name: Install build deps
+ run: |
+ apt update
+ DEBIAN_FRONTEND=noninteractive apt install -y \
+ build-essential \
+ devscripts \
+ #
+
+ - name: Install git (only 18.04)
+ if: matrix.os == 'ubuntu:18.04'
+ # Ubuntu 18.04 ships with git 2.17 but we need 2.18 or newer for
+ # actions/checkout@v2 to work
+ shell: 'bash'
+ run: |
+ apt install -y \
+ libcurl4-openssl-dev \
+ libexpat1-dev \
+ libssl-dev \
+ wget \
+ zlib1g-dev \
+ #
+ git_version="2.32.0"
+ wget -nv \
+ "https://github.com/git/git/archive/refs/tags/v${git_version}.tar.gz"
+ tar -zxf "v${git_version}.tar.gz"
+ cd "git-${git_version}"
+ make prefix=/usr -j4 install
+
+ - name: Install gcc-8 (only 18.04)
+ if: matrix.os == 'ubuntu:18.04'
+ # Compiler bug workaround: install and use gcc-8
+ shell: 'bash'
+ run: |
+ apt install -y \
+ gcc-8 \
+ g++-8 \
+ #
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100
+ update-alternatives --set g++ /usr/bin/g++-8
+ update-alternatives --set gcc /usr/bin/gcc-8
+
+ - name: Set git safe dir
+ run: |
+ export GIT_CEILING_DIRECTORIES=/__w # only work before git v2.35.2
+ git config --global --add safe.directory /__w/libjxl/libjxl
+
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 1
+
+ - name: Stamp non-release versions
+ # Stamps the built package with the commit date as part of the version
+ # after the version number so newer release candidates can override older
+ # ones.
+ if: github.event_name != 'release'
+ shell: 'bash'
+ run: |
+ # Committer timestamp.
+ set -x
+ commit_timestamp=$(git show -s --format=%ct)
+ commit_datetime=$(date --utc "--date=@${commit_timestamp}" '+%Y%m%d%H%M%S')
+ commit_ref=$(git rev-parse --short HEAD)
+ sem_version=$(dpkg-parsechangelog --show-field Version)
+ sem_version="${sem_version%%-*}"
+ deb_version="${sem_version}~alpha${commit_datetime}-0+git${commit_ref}"
+ dch -M --distribution unstable -b --newversion "${deb_version}" \
+ "Stamping build with version ${deb_version}"
+
+ - name: Stamp release versions
+ # Mark the version as released
+ if: github.event_name == 'release'
+ shell: 'bash'
+ run: |
+ if head -n1 debian/changelog | grep UNRELEASED; then
+ dch -M --distribution unstable --release ''
+ fi
+
+ - name: Install gtest (only 18.04)
+ if: matrix.os == 'ubuntu:18.04'
+ # In Ubuntu 18.04 no package installed the libgtest.a. libgtest-dev
+ # installs the source files only.
+ run: |
+ apt install -y libgtest-dev cmake
+ for prj in googletest googlemock; do
+ (cd /usr/src/googletest/${prj}/ &&
+ cmake CMakeLists.txt -DCMAKE_INSTALL_PREFIX=/usr &&
+ make all install)
+ done
+ # Remove libgmock-dev dependency in Ubuntu 18.04. It doesn't exist there.
+ sed '/libgmock-dev,/d' -i debian/control
+
+ - name: Install gmock-dev (debian:sid)
+ # gtest-dev cmake depends on gmock-dev, but it is not installed by the
+ # package.
+ if: matrix.os == 'debian:sid'
+ run: |
+ apt install -y libgmock-dev
+
+ - name: Remove libjxl-gimp-plugin package (only 18.04)
+ if: matrix.os == 'ubuntu:18.04'
+ run: |
+ # Gimp 2.8 is not supported.
+ sed -i '/Package: libjxl-gimp-plugin/,/^$/d' debian/control
+
+ - name: Build hwy
+ run: |
+ apt build-dep -y ./third_party/highway
+ ./ci.sh debian_build highway
+ dpkg -i build/debs/libhwy-dev_*_amd64.deb
+
+ - name: Build libjxl
+ run: |
+ apt build-dep -y .
+ ./ci.sh debian_build jpeg-xl
+
+ - name: Stats
+ run: |
+ ./ci.sh debian_stats
+
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: ${{ steps.env.outputs.artifact_name }}
+ path: |
+ build/debs/*jxl*.*
+
+ - name: Package release tarball
+ if: github.event_name == 'release'
+ run: |
+ (cd build/debs/; find -maxdepth 1 -name '*jxl*.*') | \
+ tar -zcvf release_file.tar.gz -C build/debs/ -T -
+ ln -s release_file.tar.gz \
+ ${{ steps.env.outputs.artifact_name }}-${{ github.event.release.tag_name }}.tar.gz
+
+ - name: Upload binaries to release
+ if: github.event_name == 'release'
+ uses: AButler/upload-release-assets@v2.0
+ with:
+ files: ${{ steps.env.outputs.artifact_name }}-${{ github.event.release.tag_name }}.tar.gz
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+
+
+ windows_build:
+ name: Windows Build (vcpkg / ${{ matrix.triplet }})
+ runs-on: [windows-2019]
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - triplet: x86-windows-static
+ arch: '-A Win32'
+ - triplet: x64-windows-static
+ arch: '-A x64'
+
+ env:
+ VCPKG_VERSION: '2022.06.16.1'
+ VCPKG_ROOT: vcpkg
+ VCPKG_DISABLE_METRICS: 1
+
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ submodules: true
+ fetch-depth: 2
+
+ - uses: actions/cache@v2
+ id: cache-vcpkg
+ with:
+ path: vcpkg
+ key: release-${{ runner.os }}-vcpkg-${{ env.VCPKG_VERSION }}-${{ matrix.triplet }}
+
+ - name: Download vcpkg
+ if: steps.cache-vcpkg.outputs.cache-hit != 'true'
+ # wget doesn't seem to work under bash.
+ shell: 'powershell'
+ run: |
+ C:\msys64\usr\bin\wget.exe -nv `
+ https://github.com/microsoft/vcpkg/archive/refs/tags/${{ env.VCPKG_VERSION }}.zip `
+ -O vcpkg.zip
+ - name: Bootstrap vcpkg
+ if: steps.cache-vcpkg.outputs.cache-hit != 'true'
+ shell: 'bash'
+ run: |
+ set -x
+ unzip -q vcpkg.zip
+ rm -rf ${VCPKG_ROOT}
+ mv vcpkg-${VCPKG_VERSION} ${VCPKG_ROOT}
+ ${VCPKG_ROOT}/bootstrap-vcpkg.sh
+
+ - name: Install libraries with vcpkg
+ shell: 'bash'
+ run: |
+ set -x
+ ${VCPKG_ROOT}/vcpkg --triplet ${{ matrix.triplet }} install \
+ giflib \
+ libjpeg-turbo \
+ libpng \
+ libwebp \
+ #
+
+ - name: Configure
+ shell: 'bash'
+ run: |
+ set -x
+ mkdir build
+ cmake -Bbuild -H. ${{ matrix.arch }} \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DCMAKE_INSTALL_PREFIX=`pwd`/prefix \
+ -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake \
+ -DJPEGXL_ENABLE_OPENEXR=OFF \
+ -DJPEGXL_ENABLE_PLUGINS=OFF \
+ -DJPEGXL_ENABLE_TCMALLOC=OFF \
+ -DJPEGXL_ENABLE_VIEWERS=OFF \
+ -DVCPKG_TARGET_TRIPLET=${{ matrix.triplet }} \
+ #
+ - name: Build
+ shell: 'bash'
+ run: |
+ set -x
+ cmake --build build --config Release
+ - name: Install
+ shell: 'bash'
+ run: |
+ set -x
+ cmake --build build --config Release --target install
+ for pkg in giflib libjpeg-turbo libpng libwebp zlib; do
+ cp vcpkg/installed/${{matrix.triplet}}/share/${pkg}/copyright \
+ prefix/bin/LICENSE.${pkg}
+ done
+ cp third_party/sjpeg/COPYING prefix/bin/LICENSE.sjpeg
+ cp third_party/skcms/LICENSE prefix/bin/LICENSE.skcms
+ cp third_party/highway/LICENSE prefix/bin/LICENSE.highway
+ cp third_party/brotli/LICENSE prefix/bin/LICENSE.brotli
+ cp LICENSE prefix/bin/LICENSE.libjxl
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v2
+ with:
+ name: jxl-${{matrix.triplet}}
+ path: |
+ prefix/bin/*
+
+ - name: Package release zip
+ if: github.event_name == 'release'
+ shell: 'powershell'
+ run: |
+ Compress-Archive -Path prefix\bin\* `
+ -DestinationPath jxl-${{matrix.triplet}}.zip
+
+ - name: Upload binaries to release
+ if: github.event_name == 'release'
+ uses: AButler/upload-release-assets@v2.0
+ with:
+ files: jxl-${{matrix.triplet}}.zip
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/third_party/jpeg-xl/.readthedocs.yaml b/third_party/jpeg-xl/.readthedocs.yaml
new file mode 100644
index 0000000000..6d714ba1aa
--- /dev/null
+++ b/third_party/jpeg-xl/.readthedocs.yaml
@@ -0,0 +1,17 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+#
+# readthedocs.io configuration file. See:
+# https://docs.readthedocs.io/en/stable/config-file/v2.html
+
+version: 2
+
+sphinx:
+ configuration: doc/sphinx/conf.py
+
+python:
+ version: "3.7"
+ install:
+ - requirements: doc/sphinx/requirements.txt
diff --git a/third_party/jpeg-xl/AUTHORS b/third_party/jpeg-xl/AUTHORS
new file mode 100644
index 0000000000..2ff5d04f67
--- /dev/null
+++ b/third_party/jpeg-xl/AUTHORS
@@ -0,0 +1,67 @@
+# List of the project authors for copyright purposes. When contributing to the
+# project add your name or your organization's name to this list. See
+# CONTRIBUTING.md for details.
+#
+# For organizations:
+# Organization <email pattern: *@domain>
+#
+# For individuals:
+# Name <email address>
+#
+# Please keep each list sorted. If you wish to change your email address please
+# send a pull request.
+
+# Organizations:
+Cloudinary Ltd. <*@cloudinary.com>
+Google LLC <*@google.com>
+
+# Individuals:
+a-shvedov
+Alex Xu (Hello71) <alex_y_xu@yahoo.ca>
+Alexander Sago <cagelight@gmail.com>
+Alistair Barrow
+Andrius Lukas Narbutas <andrius4669@gmail.com>
+Aous Naman <aous@unsw.edu.au>
+Artem Selishchev
+Biswapriyo Nath <nathbappai@gmail.com>
+CanadianBaconBoi <beamconnor@gmail.com>
+Damiano Albani <damiano.albani@gmail.com>
+Daniel Novomeský <dnovomesky@gmail.com>
+David Burnett <vargolsoft@gmail.com>
+Dirk Lemstra <dirk@lemstra.org>
+Don Olmstead <don.j.olmstead@gmail.com>
+Dong Xu <xdong181@gmail.com>
+Even Rouault <even.rouault@spatialys.com>
+Fred Brennan <copypaste@kittens.ph>
+gi-man
+Heiko Becker <heirecka@exherbo.org>
+Jim Robinson <jimbo2150@gmail.com>
+Jon Sneyers <jon@cloudinary.com>
+Joshua Root <jmr@macports.org>
+Kai Hollberg <Schweinepriester@users.noreply.github.com>
+Kleis Auke Wolthuizen <github@kleisauke.nl>
+L. E. Segovia
+Leo Izen <leo.izen@gmail.com>
+Lovell Fuller
+Maarten DB <anonymous.maarten@gmail.com>
+Marcin Konicki <ahwayakchih@gmail.com>
+Martin Strunz
+Mathieu Malaterre <mathieu.malaterre@gmail.com>
+Mikk Leini <mikk.leini@krakul.eu>
+Misaki Kasumi <misakikasumi@outlook.com>
+Nicholas Hayes <0xC0000054@users.noreply.github.com>
+Nigel Tao <nigeltao@golang.org>
+Petr Diblík
+Pieter Wuille
+roland-rollo
+Samuel Leong <wvvwvvvvwvvw@gmail.com>
+Sandro <sandro.jaeckel@gmail.com>
+Sergey Fedorov <vital.had@gmail.com>
+Stephan T. Lavavej <stl@nuwen.net>
+Thomas Bonfort <thomas.bonfort@airbus.com>
+tmkk <tmkkmac@gmail.com>
+Vincent Torri <vincent.torri@gmail.com>
+xiota
+Yonatan Nebenzhal <yonatan.nebenzhl@gmail.com>
+Ziemowit Zabawa <ziemek.zabawa@outlook.com>
+源文雨 <41315874+fumiama@users.noreply.github.com>
diff --git a/third_party/jpeg-xl/BUILD.bazel b/third_party/jpeg-xl/BUILD.bazel
new file mode 100644
index 0000000000..0b81fc7b8a
--- /dev/null
+++ b/third_party/jpeg-xl/BUILD.bazel
@@ -0,0 +1,22 @@
+package(default_visibility = ["//:__subpackages__"])
+
+filegroup(
+ name = "testdata",
+ srcs = glob([
+ "testdata/**/*.icc",
+ "testdata/**/*.pam",
+ "testdata/**/*.pfm",
+ "testdata/**/*.pgm",
+ "testdata/**/*.pnm",
+ "testdata/**/*.ppm",
+ "testdata/**/*.png",
+ "testdata/**/*.jpg",
+ "testdata/**/*.jxl",
+ "testdata/**/*.gif",
+ "testdata/**/*.y4m",
+ "testdata/**/*.jxl",
+ "testdata/**/*.png",
+ "testdata/**/*.jpg",
+ "testdata/position_encoding/*.txt",
+ ]),
+)
diff --git a/third_party/jpeg-xl/BUILDING.md b/third_party/jpeg-xl/BUILDING.md
new file mode 100644
index 0000000000..8fc4561aa0
--- /dev/null
+++ b/third_party/jpeg-xl/BUILDING.md
@@ -0,0 +1,98 @@
+# Compilation
+
+For more details and other workflows see the "Advanced guide" below.
+
+## Checking out the code
+
+```bash
+git clone https://github.com/libjxl/libjxl.git --recursive --shallow-submodules
+```
+
+This repository uses git submodules to handle some third party dependencies
+under `third_party`, that's why it is important to pass `--recursive`. If you
+didn't check out with `--recursive`, or any submodule has changed, run:
+
+```bash
+git submodule update --init --recursive --depth 1 --recommend-shallow
+```
+
+The `--shallow-submodules` and `--depth 1 --recommend-shallow` options create
+shallow clones which only downloads the commits requested, and is all that is
+needed to build `libjxl`. Should full clones be necessary, you could always run:
+
+```bash
+git submodule foreach git fetch --unshallow
+git submodule update --init --recursive
+```
+
+which pulls the rest of the commits in the submodules.
+
+Important: If you downloaded a zip file or tarball from the web interface you
+won't get the needed submodules and the code will not compile. You can download
+these external dependencies from source running `./deps.sh`. The git workflow
+described above is recommended instead.
+
+## Installing dependencies
+
+Required dependencies for compiling the code, in a Debian/Ubuntu based
+distribution run:
+
+```bash
+sudo apt install cmake pkg-config libbrotli-dev
+```
+
+Optional dependencies for supporting other formats in the `cjxl`/`djxl` tools,
+in a Debian/Ubuntu based distribution run:
+
+```bash
+sudo apt install libgif-dev libjpeg-dev libopenexr-dev libpng-dev libwebp-dev
+```
+
+We recommend using a recent Clang compiler (version 7 or newer), for that
+install clang and set `CC` and `CXX` variables.
+
+```bash
+sudo apt install clang
+export CC=clang CXX=clang++
+```
+
+## Building
+
+```bash
+cd libjxl
+mkdir build
+cd build
+cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF ..
+cmake --build . -- -j$(nproc)
+```
+
+The encoder/decoder tools will be available in the `build/tools` directory.
+
+## <a name="installing"></a> Installing
+
+```bash
+sudo cmake --install .
+```
+
+
+
+## Advanced guide
+
+### Building with Docker
+
+We build a common environment based on Debian/Ubuntu using Docker. Other
+systems may have different combinations of versions and dependencies that
+have not been tested and may not work. For those cases we recommend using the
+Docker container as explained in the
+[step by step guide](doc/developing_in_docker.md).
+
+### Building JPEG XL for developers
+
+For experienced developers, we provide build instructions for several other environments:
+
+* [Building on Debian](doc/developing_in_debian.md)
+* Building on Windows with [vcpkg](doc/developing_in_windows_vcpkg.md) (Visual Studio 2019)
+* Building on Windows with [MSYS2](doc/developing_in_windows_msys.md)
+* [Cross Compiling for Windows with Crossroad](doc/developing_with_crossroad.md)
+
+If you encounter any difficulties, please use Docker instead.
diff --git a/third_party/jpeg-xl/BUILDING_Haiku.md b/third_party/jpeg-xl/BUILDING_Haiku.md
new file mode 100644
index 0000000000..1ffca1453c
--- /dev/null
+++ b/third_party/jpeg-xl/BUILDING_Haiku.md
@@ -0,0 +1,20 @@
+## Disclaimer
+
+Haiku builds are not officially supported, i.e. the build might not work at all,
+some tests may fail and some sub-projects are excluded from build.
+
+This manual outlines Haiku-specific setup. For general building and testing
+instructions see "[BUILDING](BUILDING.md)" and
+"[Building and Testing changes](doc/building_and_testing.md)".
+
+## Dependencies
+
+```shell
+pkgman install llvm9_clang ninja cmake doxygen libjpeg_turbo_devel giflib_devel
+```
+
+## Building
+
+```shell
+TEST_STACK_LIMIT=none CMAKE_FLAGS="-I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++ -I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++/x86_64-unknown-haiku" CMAKE_SHARED_LINKER_FLAGS="-shared -Xlinker -soname=libjpegxl.so -lpthread" ./ci.sh opt
+```
diff --git a/third_party/jpeg-xl/BUILDING_OSX.md b/third_party/jpeg-xl/BUILDING_OSX.md
new file mode 100644
index 0000000000..b5f5e34db7
--- /dev/null
+++ b/third_party/jpeg-xl/BUILDING_OSX.md
@@ -0,0 +1,41 @@
+## Disclaimer
+
+OSX builds have "best effort" support, i.e. build might not work at all, some
+tests may fail and some sub-projects are excluded from build.
+
+This manual outlines OSX specific setup. For general building and testing
+instructions see "[BUILDING](BUILDING.md)" and
+"[Building and Testing changes](doc/building_and_testing.md)".
+
+[Homebrew](https://brew.sh/) is a popular package manager. JPEG XL library and
+binaries could be installed using it:
+
+```bash
+brew install jpeg-xl
+```
+
+## Dependencies
+
+Make sure that `brew doctor` does not report serious problems and up-to-date
+version of XCode is installed.
+
+Installing (actually, building) `clang` might take a couple hours.
+
+```bash
+brew install llvm
+```
+
+```bash
+brew install coreutils cmake giflib jpeg-turbo libpng ninja zlib
+```
+
+Before building the project check that `which clang` is
+`/usr/local/opt/llvm/bin/clang`, not the one provided by XCode. If not, update
+`PATH` environment variable.
+
+Also, setting `CMAKE_PREFIX_PATH` might be necessary for correct include paths
+resolving, e.g.:
+
+```bash
+export CMAKE_PREFIX_PATH=`brew --prefix giflib`:`brew --prefix jpeg-turbo`:`brew --prefix libpng`:`brew --prefix zlib`
+``` \ No newline at end of file
diff --git a/third_party/jpeg-xl/CHANGELOG.md b/third_party/jpeg-xl/CHANGELOG.md
new file mode 100644
index 0000000000..20c974c55c
--- /dev/null
+++ b/third_party/jpeg-xl/CHANGELOG.md
@@ -0,0 +1,294 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## Unreleased
+
+### Added
+ - encoder API: add `JxlEncoderSetExtraChannelDistance` to adjust the quality
+ of extra channels (like alpha) separately.
+
+### Removed
+
+### Changed
+ - changed the name of the cjxl flag `photon_noise` to `photon_noise_iso`
+
+## [0.8.0] - 2023-01-18
+
+### Added
+ - decoder API: new function `JxlDecoderSetImageBitDepth` to set the bit depth
+ of the output buffer.
+ - decoder API proposal: add `JxlDecoderSetOutputColorProfile` and
+ `JxlDecoderSetCms` to enable decoding to desired colorspace; NB: not
+ implemented yet.
+ - encoder API: new function `JxlEncoderSetFrameBitDepth` to set the bit depth
+ of the input buffer.
+ - encoder API: add an effort 10 option for lossless compression; using this
+ setting requires calling `JxlEncoderAllowExpertOptions`.
+ - encoder API: new `JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES` enum value to
+ allow explicit control of metadata compression
+
+### Removed
+ - common API: removed `JxlIntrinsicSizeHeader`
+ - decoder API: removed deprecated `JXL_DEC_NEED_DC_OUT_BUFFER` and
+ `JXL_DEC_DC_IMAGE` events, `JxlDecoderDCOutBufferSize` and
+ `JxlDecoderSetDCOutBuffer` functions
+
+### Changed / clarified
+ - encoder API: `JxlEncoderProcessOutput` requires at least 32 bytes of output
+ space to proceed and guarantees that at least one byte will be written
+## [0.7] - 2022-07-21
+
+### Added
+ - Export version information in headers.
+ - decoder API: Ability to decode the content of metadata boxes:
+ `JXL_DEC_BOX`, `JXL_DEC_BOX_NEED_MORE_OUTPUT`, `JxlDecoderSetBoxBuffer`,
+ `JxlDecoderGetBoxType`, `JxlDecoderGetBoxSizeRaw` and
+ `JxlDecoderSetDecompressBoxes`.
+ - decoder API: ability to mark the input is finished: `JxlDecoderCloseInput`.
+ - decoder API: ability to request updates on different progressive events using
+ `JxlDecoderSetProgressiveDetail`; currently supported events are
+ `kDC`, `kLastPasses` and `kPasses`.
+ - decoder API: ability to specify desired intensity target using
+ `JxlDecoderSetDesiredIntensityTarget`
+ - decoder API: new function `JxlDecoderSetCoalesced` to allow decoding
+ non-coalesced (unblended) frames, e.g. layers of a composite still image
+ or the cropped frames of a recompressed GIF/APNG.
+ - decoder API: new function `JxlDecoderSetUnpremultiplyAlpha` to set
+ preference for getting an associated alpha channel with premultiplied or
+ unpremultiplied colors.
+ - decoder API: field added to `JxlFrameHeader`: a `JxlLayerInfo` struct
+ that contains crop dimensions and offsets and blending information for
+ the non-coalesced case.
+ - decoder API: new function `JxlDecoderGetExtraChannelBlendInfo` to get
+ the blending information for extra channels in the non-coalesced case.
+ - decoder API: new function `JxlDecoderSetMultithreadedImageOutCallback`,
+ allowing output callbacks to receive more information about the number of
+ threads on which they are running.
+ - decoder API: new function `JxlDecoderSkipCurrentFrame` to skip processing
+ the current frame after a progressive detail is reached.
+ - decoder API: new function `JxlDecoderGetIntendedDownsamplingRatio` to get
+ the intended downsampling ratio of progressive steps, based on the
+ information in the frame header.
+ - decoder API: new function `JxlDecoderSetRenderSpotcolors` to allow disabling
+ rendering of spot colors.
+ - decoder/encoder API: add two fields to `JXLBasicInfo`: `intrinsic_xsize`
+ and `intrinsic_ysize` to signal the intrinsic size.
+ - encoder API: ability to add metadata boxes, added new functions
+ `JxlEncoderAddBox`, `JxlEncoderUseBoxes`, `JxlEncoderCloseBoxes` and
+ `JxlEncoderCloseFrames`.
+ - encoder API: added ability to set several encoder options / extra fields to
+ frames using `JxlEncoderSetFrameName`, `JxlEncoderFrameSettingsSetOption`,
+ `JxlEncoderFrameSettingsSetFloatOption`.
+ - encoder API: added ability to check required codestream compatibility level
+ and force specified using `JxlEncoderGetRequiredCodestreamLevel` and
+ `JxlEncoderSetCodestreamLevel`.
+ - encoder API: added ability to force emitting box-based container format
+ using `JxlEncoderUseContainer`.
+ - encoder API: added ability to store JPEG metadata for lossless reconstruction
+ using `JxlEncoderStoreJPEGMetadata`
+ - encoder API: new functions `JxlEncoderSetFrameHeader` and
+ `JxlEncoderSetExtraChannelBlendInfo` to set animation
+ and blending parameters of the frame, and `JxlEncoderInitFrameHeader` and
+ `JxlEncoderInitBlendInfo` to initialize the structs to set.
+ - encoder API: ability to encode arbitrary extra channels:
+ `JxlEncoderInitExtraChannelInfo`, `JxlEncoderSetExtraChannelInfo`,
+ `JxlEncoderSetExtraChannelName` and `JxlEncoderSetExtraChannelBuffer`.
+ - encoder API: ability to plug custom CMS implementation using
+ `JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms)`
+ - encoder API: added `JxlEncoderGetError` to retrieve last encoder error.
+
+### Changed
+- decoder API: using `JxlDecoderCloseInput` at the end of all input is required
+ when using JXL_DEC_BOX, and is now also encouraged in other cases, but not
+ required in those other cases for backwards compatibility.
+- encoder API: `JxlEncoderCloseInput` now closes both frames and boxes input.
+- CLI: `cjxl` and `djxl` have been reimplemented on the base of public decoder
+ and encoder API; dropped dependency on `gflags` for argument parsing.
+
+### Deprecated
+- decoder API: `JXL_DEC_EXTENSIONS` event: use `JXL_DEC_BASIC_INFO`
+- decoder / encoder API: pixel types `JXL_TYPE_BOOLEAN` and `JXL_TYPE_UINT32`:
+ consider using `JXL_TYPE_UINT8` and `JXL_TYPE_FLOAT` correspondingly.
+- decoder API: pixel format parameter for `JxlDecoderGetColorAsEncodedProfile`
+ and `JxlDecoderGetICCProfileSize`: pass `NULL`.
+- decoder API: `JxlDecoderDefaultPixelFormat`
+- encoder API: `JxlEncoderOptions`: use `JxlEncoderFrameSettings` instead.
+- encoder API: `JxlEncoderOptionsCreate`: use `JxlEncoderFrameSettingsCreate`
+ instead.
+- encoder API: `JxlEncoderOptionsSetDistance`: use `JxlEncoderSetFrameDistance`
+ instead.
+- encoder API: `JxlEncoderOptionsSetLossless`: use `JxlEncoderSetFrameLossless`
+ instead.
+- encoder API: `JxlEncoderOptionsSetEffort`: use
+ `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, effort)`
+ instead.
+- encoder API: `JxlEncoderOptionsSetDecodingSpeed`: use
+ `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, tier)`
+ instead.
+- encoder API: deprecated `JXL_ENC_NOT_SUPPORTED`, the encoder returns
+ `JXL_ENC_ERROR` instead and there is no need to handle
+ `JXL_ENC_NOT_SUPPORTED`.
+
+## [0.6.1] - 2021-10-29
+### Changed
+ - Security: Fix OOB read in splines rendering (#735 -
+ [CVE-2021-22563](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22563))
+ - Security: Fix OOB copy (read/write) in out-of-order/multi-threaded decoding
+ (#708 - [CVE-2021-22564](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22564))
+ - Fix segfault in `djxl` tool with `--allow_partial_files` flag (#781).
+ - Fix border in extra channels when using upsampling (#796)
+
+## [0.6] - 2021-10-04
+### Added
+ - API: New functions to decode extra channels:
+ `JxlDecoderExtraChannelBufferSize` and `JxlDecoderSetExtraChannelBuffer`.
+ - API: New function `JxlEncoderInitBasicInfo` to initialize `JxlBasicInfo`
+ (only needed when encoding). NOTE: it is now required to call this function
+ when using the encoder. Padding was added to the struct for forward
+ compatibility.
+ - API: Support for encoding oriented images.
+ - API: FLOAT16 support in the encoder API.
+ - Rewrite of the GDK pixbuf loader plugin. Added proper color management and
+ animation support.
+ - Rewrite of GIMP plugin. Added compression parameters dialog and switched to
+ using the public C API.
+ - Debian packages for GDK pixbuf loader (`libjxl-gdk-pixbuf`) and GIMP
+ (`libjxl-gimp-plugin`) plugins.
+ - `cjxl`/`djxl` support for `stdin` and `stdout`.
+
+### Changed
+ - API: Renamed the field `alpha_associated` in `JxlExtraChannelInfo` to
+ `alpha_premultiplied`, to match the corresponding name in `JxlBasicInfo`.
+ - Improved the 2x2 downscaling method in the encoder for the optional color
+ channel resampling for low bit rates.
+ - Fixed: the combination of floating point original data, XYB color encoding,
+ and Modular mode was broken (in both encoder and decoder). It now works.
+ NOTE: this can cause the current encoder to write jxl bitstreams that do
+ not decode with the old decoder. In particular this will happen when using
+ cjxl with PFM, EXR, or floating point PSD input, and a combination of XYB
+ and modular mode is used (which caused an encoder error before), e.g.
+ using options like `-m -q 80` (lossy modular), `-d 4.5` or `--progressive_dc=1`
+ (modular DC frame), or default lossy encoding on an image where patches
+ end up being used. There is no problem when using cjxl with PNG, JPEG, GIF,
+ APNG, PPM, PGM, PGX, or integer (8-bit or 16-bit) PSD input.
+ - `libjxl` static library now bundles skcms, fixing static linking in
+ downstream projects when skcms is used.
+ - Spline rendering performance improvements.
+ - Butteraugli changes for less visual masking.
+
+## [0.5] - 2021-08-02
+### Added
+ - API: New function to decode the image using a callback outputting a part of a
+ row per call.
+ - API: 16-bit float output support.
+ - API: `JxlDecoderRewind` and `JxlDecoderSkipFrames` functions to skip more
+ efficiently to earlier animation frames.
+ - API: `JxlDecoderSetPreferredColorProfile` function to choose color profile in
+ certain circumstances.
+ - encoder: Adding `center_x` and `center_y` flags for more control of the tile
+ order.
+ - New encoder speeds `lightning` (1) and `thunder` (2).
+
+### Changed
+ - Re-licensed the project under a BSD 3-Clause license. See the
+ [LICENSE](LICENSE) and [PATENTS](PATENTS) files for details.
+ - Full JPEG XL part 1 specification support: Implemented all the spec required
+ to decode files to pixels, including cases that are not used by the encoder
+ yet. Part 2 of the spec (container format) is final but not fully implemented
+ here.
+ - Butteraugli metric improvements. Exact numbers are different from previous
+ versions.
+ - Memory reductions during decoding.
+ - Reduce the size of the jxl_dec library by removing dependencies.
+ - A few encoding speedups.
+ - Clarify the security policy.
+ - Significant encoding improvements (~5 %) and less ringing.
+ - Butteraugli metric to have some less masking.
+ - `cjxl` flag `--speed` is deprecated and replaced by the `--effort` synonym.
+
+### Removed
+- API for returning a downsampled DC was deprecated
+ (`JxlDecoderDCOutBufferSize` and `JxlDecoderSetDCOutBuffer`) and will be
+ removed in the next release.
+
+## [0.3.7] - 2021-03-29
+### Changed
+ - Fix a rounding issue in 8-bit decoding.
+
+## [0.3.6] - 2021-03-25
+### Changed
+ - Fix a bug that could result in the generation of invalid codestreams as
+ well as failure to decode valid streams.
+
+## [0.3.5] - 2021-03-23
+### Added
+ - New encode-time options for faster decoding at the cost of quality.
+ - Man pages for cjxl and djxl.
+
+### Changed
+ - Memory usage improvements.
+ - Faster decoding to 8-bit output with the C API.
+ - GIMP plugin: avoid the sRGB conversion dialog for sRGB images, do not show
+ a console window on Windows.
+ - Various bug fixes.
+
+## [0.3.4] - 2021-03-16
+### Changed
+ - Improved box parsing.
+ - Improved metadata handling.
+ - Performance and memory usage improvements.
+
+## [0.3.3] - 2021-03-05
+### Changed
+ - Performance improvements for small images.
+ - Add a (flag-protected) non-high-precision mode with better speed.
+ - Significantly speed up the PQ EOTF.
+ - Allow optional HDR tone mapping in djxl (--tone_map, --display_nits).
+ - Change the behavior of djxl -j to make it consistent with cjxl (#153).
+ - Improve image quality.
+ - Improve EXIF handling.
+
+## [0.3.2] - 2021-02-12
+### Changed
+ - Fix embedded ICC encoding regression
+ [#149](https://gitlab.com/wg1/jpeg-xl/-/issues/149).
+
+## [0.3.1] - 2021-02-10
+### Changed
+ - New experimental Butteraugli API (`jxl/butteraugli.h`).
+ - Encoder improvements to low quality settings.
+ - Bug fixes, including fuzzer-found potential security bug fixes.
+ - Fixed `-q 100` and `-d 0` not triggering lossless modes.
+
+## [0.3] - 2021-01-29
+### Changed
+ - Minor change to the Decoder C API to accommodate future work for other ways
+ to provide input.
+ - Future decoder C API changes will be backwards compatible.
+ - Lots of bug fixes since the previous version.
+
+## [0.2] - 2020-12-24
+### Added
+ - JPEG XL bitstream format is frozen. Files encoded with 0.2 will be supported
+ by future versions.
+
+### Changed
+ - Files encoded with previous versions are not supported.
+
+## [0.1.1] - 2020-12-01
+
+## [0.1] - 2020-11-14
+### Added
+ - Initial release of an encoder (`cjxl`) and decoder (`djxl`) that work
+ together as well as a benchmark tool for comparison with other codecs
+ (`benchmark_xl`).
+ - Note: JPEG XL format is in the final stages of standardization, minor changes
+ to the codestream format are still possible but we are not expecting any
+ changes beyond what is required by bug fixing.
+ - API: new decoder API in C, check the `examples/` directory for its example
+ usage. The C API is a work in progress and likely to change both in API and
+ ABI in future releases.
diff --git a/third_party/jpeg-xl/CMakeLists.txt b/third_party/jpeg-xl/CMakeLists.txt
new file mode 100644
index 0000000000..02af13f78e
--- /dev/null
+++ b/third_party/jpeg-xl/CMakeLists.txt
@@ -0,0 +1,494 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Ubuntu bionic ships with cmake 3.10.
+cmake_minimum_required(VERSION 3.10)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+# Honor VISIBILITY_INLINES_HIDDEN on all types of targets.
+if(POLICY CMP0063)
+ cmake_policy(SET CMP0063 NEW)
+endif()
+# Pass CMAKE_EXE_LINKER_FLAGS to CC and CXX compilers when testing if they work.
+if(POLICY CMP0065)
+ cmake_policy(SET CMP0065 NEW)
+endif()
+
+# Set PIE flags for POSITION_INDEPENDENT_CODE targets, added in 3.14.
+if(POLICY CMP0083)
+ cmake_policy(SET CMP0083 NEW)
+endif()
+
+project(LIBJXL LANGUAGES C CXX)
+
+include(CheckCXXSourceCompiles)
+check_cxx_source_compiles(
+ "int main() {
+ #if !defined(__EMSCRIPTEN__)
+ static_assert(false, \"__EMSCRIPTEN__ is not defined\");
+ #endif
+ return 0;
+ }"
+ JPEGXL_EMSCRIPTEN
+)
+
+message(STATUS "CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}")
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-fsanitize=fuzzer-no-link" CXX_FUZZERS_SUPPORTED)
+check_cxx_compiler_flag("-Xclang -mconstructor-aliases" CXX_CONSTRUCTOR_ALIASES_SUPPORTED)
+check_cxx_compiler_flag("-fmacro-prefix-map=OLD=NEW" CXX_MACRO_PREFIX_MAP)
+check_cxx_compiler_flag("-fno-rtti" CXX_NO_RTTI_SUPPORTED)
+
+# Enabled PIE binaries by default if supported.
+include(CheckPIESupported OPTIONAL RESULT_VARIABLE CHECK_PIE_SUPPORTED)
+if(CHECK_PIE_SUPPORTED)
+ check_pie_supported(LANGUAGES CXX)
+ if(CMAKE_CXX_LINK_PIE_SUPPORTED)
+ set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
+ endif()
+endif()
+
+### Project build options:
+if(CXX_FUZZERS_SUPPORTED)
+ # Enabled by default except on arm64, Windows and Apple builds.
+ set(ENABLE_FUZZERS_DEFAULT true)
+endif()
+find_package(PkgConfig)
+if(NOT APPLE AND NOT WIN32 AND NOT HAIKU AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
+ pkg_check_modules(TCMallocMinimalVersionCheck QUIET IMPORTED_TARGET
+ libtcmalloc_minimal)
+ if(TCMallocMinimalVersionCheck_FOUND AND
+ NOT TCMallocMinimalVersionCheck_VERSION VERSION_EQUAL 2.8.0)
+ # Enabled by default except on Windows and Apple builds for
+ # tcmalloc != 2.8.0. tcmalloc 2.8.1 already has a fix for this issue.
+ set(ENABLE_TCMALLOC_DEFAULT true)
+ else()
+ message(STATUS
+ "tcmalloc version ${TCMallocMinimalVersionCheck_VERSION} -- "
+ "tcmalloc 2.8.0 disabled due to "
+ "https://github.com/gperftools/gperftools/issues/1204")
+ endif()
+endif()
+
+check_cxx_source_compiles(
+ "int main() {
+ #if !defined(HWY_DISABLED_TARGETS)
+ static_assert(false, \"HWY_DISABLED_TARGETS is not defined\");
+ #endif
+ return 0;
+ }"
+ JXL_HWY_DISABLED_TARGETS_FORCED
+)
+
+set(WARNINGS_AS_ERRORS_DEFAULT false)
+
+if((SANITIZER STREQUAL "msan") OR JPEGXL_EMSCRIPTEN)
+ set(BUNDLE_LIBPNG_DEFAULT YES)
+else()
+ set(BUNDLE_LIBPNG_DEFAULT NO)
+endif()
+
+# Standard cmake naming for building shared libraries.
+get_property(SHARED_LIBS_SUPPORTED GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS)
+option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" ${SHARED_LIBS_SUPPORTED})
+
+set(JPEGXL_ENABLE_FUZZERS ${ENABLE_FUZZERS_DEFAULT} CACHE BOOL
+ "Build JPEGXL fuzzer targets.")
+set(JPEGXL_ENABLE_DEVTOOLS false CACHE BOOL
+ "Build JPEGXL developer tools.")
+set(JPEGXL_ENABLE_TOOLS true CACHE BOOL
+ "Build JPEGXL user tools: cjxl and djxl.")
+set(JPEGXL_ENABLE_JPEGLI true CACHE BOOL
+ "Build jpegli library.")
+set(JPEGXL_ENABLE_JPEGLI_LIBJPEG true CACHE BOOL
+ "Build libjpeg.so shared library based on jpegli.")
+set(JPEGLI_LIBJPEG_LIBRARY_VERSION "62.3.0" CACHE STRING
+ "Library version of the libjpeg.so shared library that we build.")
+set(JPEGLI_LIBJPEG_LIBRARY_SOVERSION "62" CACHE STRING
+ "Library so-version of the libjpeg.so shared library that we build.")
+set(JPEGXL_ENABLE_DOXYGEN true CACHE BOOL
+ "Generate C API documentation using Doxygen.")
+set(JPEGXL_ENABLE_MANPAGES true CACHE BOOL
+ "Build and install man pages for the command-line tools.")
+set(JPEGXL_ENABLE_BENCHMARK true CACHE BOOL
+ "Build JPEGXL benchmark tools.")
+set(JPEGXL_ENABLE_EXAMPLES true CACHE BOOL
+ "Build JPEGXL library usage examples.")
+set(JPEGXL_BUNDLE_LIBPNG ${BUNDLE_LIBPNG_DEFAULT} CACHE BOOL
+ "Build libpng from source and link it statically.")
+set(JPEGXL_ENABLE_JNI true CACHE BOOL
+ "Build JPEGXL JNI Java wrapper, if Java dependencies are installed.")
+set(JPEGXL_ENABLE_SJPEG true CACHE BOOL
+ "Build JPEGXL with support for encoding with sjpeg.")
+set(JPEGXL_ENABLE_OPENEXR true CACHE BOOL
+ "Build JPEGXL with support for OpenEXR if available.")
+set(JPEGXL_ENABLE_SKCMS true CACHE BOOL
+ "Build with skcms instead of lcms2.")
+set(JPEGXL_BUNDLE_SKCMS true CACHE BOOL
+ "When building with skcms, bundle it into libjxl.a.")
+set(JPEGXL_ENABLE_VIEWERS false CACHE BOOL
+ "Build JPEGXL viewer tools for evaluation.")
+set(JPEGXL_ENABLE_TCMALLOC ${ENABLE_TCMALLOC_DEFAULT} CACHE BOOL
+ "Build JPEGXL using gperftools (tcmalloc) allocator.")
+set(JPEGXL_ENABLE_PLUGINS false CACHE BOOL
+ "Build third-party plugins to support JPEG XL in other applications.")
+set(JPEGXL_ENABLE_COVERAGE false CACHE BOOL
+ "Enable code coverage tracking for libjxl. This also enables debug and disables optimizations.")
+set(JPEGXL_ENABLE_PROFILER false CACHE BOOL
+ "Builds in support for profiling (printed by tools if extra flags given)")
+set(JPEGXL_ENABLE_SIZELESS_VECTORS false CACHE BOOL
+ "Builds in support for SVE/RVV vectorization")
+set(JPEGXL_ENABLE_TRANSCODE_JPEG true CACHE BOOL
+ "Builds in support for decoding transcoded JXL files back to JPEG,\
+ disabling it makes the decoder reject JXL_DEC_JPEG_RECONSTRUCTION events,\
+ (default enabled)")
+set(JPEGXL_ENABLE_BOXES true CACHE BOOL
+ "Builds in support for decoding boxes in JXL files,\
+ disabling it makes the decoder reject JXL_DEC_BOX events,\
+ (default enabled)")
+set(JPEGXL_STATIC false CACHE BOOL
+ "Build tools as static binaries.")
+set(JPEGXL_WARNINGS_AS_ERRORS ${WARNINGS_AS_ERRORS_DEFAULT} CACHE BOOL
+ "Treat warnings as errors during compilation.")
+set(JPEGXL_DEP_LICENSE_DIR "" CACHE STRING
+ "Directory where to search for system dependencies \"copyright\" files.")
+set(JPEGXL_FORCE_NEON false CACHE BOOL
+ "Set flags to enable NEON in arm if not enabled by your toolchain.")
+set(JPEGXL_TEST_TOOLS false CACHE BOOL
+ "Run scripts that test the encoding / decoding tools.")
+
+# Force system dependencies.
+set(JPEGXL_FORCE_SYSTEM_BROTLI false CACHE BOOL
+ "Force using system installed brotli instead of third_party/brotli source.")
+set(JPEGXL_FORCE_SYSTEM_GTEST false CACHE BOOL
+ "Force using system installed googletest (gtest/gmock) instead of third_party/googletest source.")
+set(JPEGXL_FORCE_SYSTEM_LCMS2 false CACHE BOOL
+ "Force using system installed lcms2 instead of third_party/lcms source.")
+set(JPEGXL_FORCE_SYSTEM_HWY false CACHE BOOL
+ "Force using system installed highway (libhwy-dev) instead of third_party/highway source.")
+
+# Check minimum compiler versions. Older compilers are not supported and fail
+# with hard to understand errors.
+if (NOT CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID)
+ message(FATAL_ERROR "Different C/C++ compilers set: "
+ "${CMAKE_C_COMPILER_ID} vs ${CMAKE_CXX_COMPILER_ID}")
+endif()
+if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ # Android NDK's toolchain.cmake fakes the clang version in
+ # CMAKE_CXX_COMPILER_VERSION with an incorrect number, so ignore this.
+ if (NOT CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION MATCHES "clang"
+ AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5)
+ message(FATAL_ERROR
+ "Minimum Clang version required is Clang 5, please update.")
+ endif()
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7)
+ message(FATAL_ERROR
+ "Minimum GCC version required is 7, please update.")
+ endif()
+endif()
+
+message(STATUS
+ "Compiled IDs C:${CMAKE_C_COMPILER_ID}, C++:${CMAKE_CXX_COMPILER_ID}")
+
+# CMAKE_EXPORT_COMPILE_COMMANDS is used to generate the compilation database
+# used by clang-tidy.
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+if(JPEGXL_STATIC)
+ set(BUILD_SHARED_LIBS 0)
+ # Clang developers say that in case to use "static" we have to build stdlib
+ # ourselves; for real use case we don't care about stdlib, as it is "granted",
+ # so just linking all other libraries is fine.
+ if (NOT MSVC AND NOT APPLE)
+ set(CMAKE_FIND_LIBRARY_SUFFIXES .a)
+ set(CMAKE_EXE_LINKER_FLAGS
+ "${CMAKE_EXE_LINKER_FLAGS} -static -static-libgcc -static-libstdc++")
+ endif()
+endif() # JPEGXL_STATIC
+
+# Threads
+set(THREADS_PREFER_PTHREAD_FLAG YES)
+find_package(Threads REQUIRED)
+
+# These settings are important to drive check_cxx_source_compiles
+# See CMP0067 (min cmake version is 3.10 anyway)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_EXTENSIONS OFF)
+set(CMAKE_CXX_STANDARD_REQUIRED YES)
+
+# Atomics
+find_package(Atomics REQUIRED)
+
+if(JPEGXL_STATIC)
+ if (MINGW)
+ # In MINGW libstdc++ uses pthreads directly. When building statically a
+ # program (regardless of whether the source code uses pthread or not) the
+ # toolchain will add stdc++ and pthread to the linking step but stdc++ will
+ # be linked statically while pthread will be linked dynamically.
+ # To avoid this and have pthread statically linked with need to pass it in
+ # the command line with "-Wl,-Bstatic -lpthread -Wl,-Bdynamic" but the
+ # linker will discard it if not used by anything else up to that point in
+ # the linker command line. If the program or any dependency don't use
+ # pthread directly -lpthread is discarded and libstdc++ (added by the
+ # toolchain later) will then use the dynamic version. For this we also need
+ # to pass -lstdc++ explicitly before -lpthread. For pure C programs -lstdc++
+ # will be discarded anyway.
+ # This adds these flags as dependencies for *all* targets. Adding this to
+ # CMAKE_EXE_LINKER_FLAGS instead would cause them to be included before any
+ # object files and therefore discarded. This should be set in the
+ # INTERFACE_LINK_LIBRARIES of Threads::Threads but some third_part targets
+ # don't depend on it.
+ link_libraries(-Wl,-Bstatic -lstdc++ -lpthread -Wl,-Bdynamic)
+ elseif(CMAKE_USE_PTHREADS_INIT)
+ # "whole-archive" is not supported on OSX.
+ if (NOT APPLE)
+ # Set pthreads as a whole-archive, otherwise weak symbols in the static
+ # libraries will discard pthreads symbols leading to segmentation fault at
+ # runtime.
+ message(STATUS "Using -lpthread as --whole-archive")
+ set_target_properties(Threads::Threads PROPERTIES
+ INTERFACE_LINK_LIBRARIES
+ "-Wl,--whole-archive;-lpthread;-Wl,--no-whole-archive")
+ endif()
+ endif()
+endif() # JPEGXL_STATIC
+
+if (JPEGXL_EMSCRIPTEN)
+ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
+endif()
+
+if (CXX_MACRO_PREFIX_MAP)
+ add_compile_options(-fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}=.)
+endif()
+
+if (CXX_NO_RTTI_SUPPORTED)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+endif()
+
+# Internal flags for coverage builds:
+set(JPEGXL_COVERAGE_FLAGS)
+set(JPEGXL_COVERAGE_LINK_FLAGS)
+
+if (MSVC)
+ # TODO(janwas): add flags
+ add_definitions(-D_CRT_SECURE_NO_WARNINGS)
+else ()
+ # Global compiler flags for all targets here and in subdirectories.
+ add_definitions(
+ # Avoid changing the binary based on the current time and date.
+ -D__DATE__="redacted"
+ -D__TIMESTAMP__="redacted"
+ -D__TIME__="redacted"
+ )
+
+ # TODO(eustas): JXL currently compiles, but does not pass tests...
+ if (NOT JXL_HWY_DISABLED_TARGETS_FORCED AND NOT JPEGXL_ENABLE_SIZELESS_VECTORS)
+ add_definitions(-DHWY_DISABLED_TARGETS=\(HWY_SVE|HWY_SVE2|HWY_SVE_256|HWY_SVE2_128|HWY_RVV\))
+ message("Warning: HWY_SVE, HWY_SVE2, HWY_SVE_256, HWY_SVE2_128 and HWY_RVV CPU targets are disabled")
+ endif()
+
+ # In CMake before 3.12 it is problematic to pass repeated flags like -Xclang.
+ # For this reason we place them in CMAKE_CXX_FLAGS instead.
+ # See https://gitlab.kitware.com/cmake/cmake/issues/15826
+
+ # Machine flags.
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funwind-tables")
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mrelax-all")
+ endif()
+ if (CXX_CONSTRUCTOR_ALIASES_SUPPORTED)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mconstructor-aliases")
+ endif()
+
+ if(WIN32)
+ # Not supported by clang-cl, but frame pointers are default on Windows
+ else()
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
+ endif()
+
+ # CPU flags - remove once we have NEON dynamic dispatch
+
+ # TODO(janwas): this also matches M1, but only ARMv7 is intended/needed.
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+ if(JPEGXL_FORCE_NEON)
+ # GCC requires these flags, otherwise __ARM_NEON is undefined.
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
+ -mfpu=neon-vfpv4 -mfloat-abi=hard")
+ endif()
+ endif()
+
+ # Force build with optimizations in release mode.
+ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2")
+
+ add_compile_options(
+ # Ignore this to allow redefining __DATE__ and others.
+ -Wno-builtin-macro-redefined
+
+ # Global warning settings.
+ -Wall
+ )
+
+ if (JPEGXL_WARNINGS_AS_ERRORS)
+ add_compile_options(-Werror)
+ endif ()
+
+ if(JPEGXL_ENABLE_COVERAGE)
+ set(JPEGXL_COVERAGE_FLAGS
+ -g -O0 -fprofile-arcs -ftest-coverage
+ -DJXL_ENABLE_ASSERT=0 -DJXL_ENABLE_CHECK=0
+ )
+ set(JPEGXL_COVERAGE_LINK_FLAGS
+ --coverage
+ )
+ endif() # JPEGXL_ENABLE_COVERAGE
+endif () # !MSVC
+
+include(GNUInstallDirs)
+
+# Separately build/configure testing frameworks and other third_party libraries
+# to allow disabling tests in those libraries.
+include(third_party/testing.cmake)
+add_subdirectory(third_party)
+# Copy the JXL license file to the output build directory.
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.jpeg-xl COPYONLY)
+
+# Enable tests regardless of where they are defined.
+enable_testing()
+include(CTest)
+# Specify default location of `testdata`:
+if(NOT DEFINED JPEGXL_TEST_DATA_PATH)
+ set(JPEGXL_TEST_DATA_PATH "${PROJECT_SOURCE_DIR}/testdata")
+endif()
+
+# Libraries.
+add_subdirectory(lib)
+
+if(BUILD_TESTING)
+ # Script to run tests over the source code in bash.
+ find_program (BASH_PROGRAM bash)
+ if(BASH_PROGRAM)
+ add_test(
+ NAME bash_test
+ COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/bash_test.sh)
+ endif()
+endif() # BUILD_TESTING
+
+# Documentation generated by Doxygen
+if(JPEGXL_ENABLE_DOXYGEN)
+ find_package(Doxygen)
+ if(DOXYGEN_FOUND)
+ set(DOXYGEN_GENERATE_HTML "YES")
+ set(DOXYGEN_GENERATE_XML "YES")
+ set(DOXYGEN_STRIP_FROM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lib/include")
+ set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "README.md")
+ if(JPEGXL_WARNINGS_AS_ERRORS)
+ set(DOXYGEN_WARN_AS_ERROR "YES")
+ endif()
+ set(DOXYGEN_QUIET "YES")
+ doxygen_add_docs(doc
+ "${CMAKE_CURRENT_SOURCE_DIR}/lib/include"
+ "${CMAKE_CURRENT_SOURCE_DIR}/doc/api.txt"
+ WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}"
+ COMMENT "Generating C API documentation")
+
+ # Add sphinx doc build step for readthedocs.io (requires doxygen too).
+ find_program(SPHINX_BUILD_PROGRAM sphinx-build)
+ if(SPHINX_BUILD_PROGRAM)
+ add_custom_command(
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent"
+ COMMENT "Generating readthedocs.io output on ${CMAKE_CURRENT_BINARY_DIR}/rtd"
+ COMMAND ${SPHINX_BUILD_PROGRAM} -q -W -b html -j auto
+ ${CMAKE_SOURCE_DIR}/doc/sphinx
+ ${CMAKE_CURRENT_BINARY_DIR}/rtd
+ DEPENDS doc
+ )
+ # This command runs the documentation generation every time since the output
+ # target file doesn't exist.
+ add_custom_target(rtd-html
+ DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent
+ )
+ else() # SPHINX_BUILD_PROGRAM\
+ message(WARNING "sphinx-build not found, skipping rtd documentation")
+ endif() # SPHINX_BUILD_PROGRAM
+
+ else()
+ # Create a "doc" target for compatibility since "doc" is not otherwise added to
+ # the build when doxygen is not installed.
+ add_custom_target(doc false
+ COMMENT "Error: Can't generate doc since Doxygen not installed.")
+ endif() # DOXYGEN_FOUND
+endif() # JPEGXL_ENABLE_DOXYGEN
+
+if(JPEGXL_ENABLE_MANPAGES)
+ find_program(ASCIIDOC a2x)
+ if(ASCIIDOC)
+ file(STRINGS "${ASCIIDOC}" ASCIIDOC_SHEBANG LIMIT_COUNT 1)
+ if(ASCIIDOC_SHEBANG MATCHES "/sh|/bash" OR MINGW)
+ set(ASCIIDOC_PY_FOUND ON)
+ # Run the program directly and set ASCIIDOC as empty.
+ set(ASCIIDOC_PY "${ASCIIDOC}")
+ set(ASCIIDOC "")
+ elseif(ASCIIDOC_SHEBANG MATCHES "python2")
+ find_package(Python2 COMPONENTS Interpreter)
+ set(ASCIIDOC_PY_FOUND "${Python2_Interpreter_FOUND}")
+ set(ASCIIDOC_PY Python2::Interpreter)
+ elseif(ASCIIDOC_SHEBANG MATCHES "python3")
+ find_package(Python3 COMPONENTS Interpreter)
+ set(ASCIIDOC_PY_FOUND "${Python3_Interpreter_FOUND}")
+ set(ASCIIDOC_PY Python3::Interpreter)
+ else()
+ find_package(Python COMPONENTS Interpreter QUIET)
+ if(NOT Python_Interpreter_FOUND)
+ find_program(ASCIIDOC_PY python)
+ if(ASCIIDOC_PY)
+ set(ASCIIDOC_PY_FOUND ON)
+ endif()
+ else()
+ set(ASCIIDOC_PY_FOUND "${Python_Interpreter_FOUND}")
+ set(ASCIIDOC_PY Python::Interpreter)
+ endif()
+ endif()
+
+ if (ASCIIDOC_PY_FOUND)
+ set(MANPAGE_FILES "")
+ set(MANPAGES "")
+ foreach(PAGE IN ITEMS cjxl djxl)
+ # Invoking the Python interpreter ourselves instead of running the a2x binary
+ # directly is necessary on MSYS2, otherwise it is run through cmd.exe which
+ # does not recognize it.
+ add_custom_command(
+ OUTPUT "${PAGE}.1"
+ COMMAND "${ASCIIDOC_PY}"
+ ARGS ${ASCIIDOC}
+ --format manpage --destination-dir="${CMAKE_CURRENT_BINARY_DIR}"
+ "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt"
+ MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt")
+ list(APPEND MANPAGE_FILES "${CMAKE_CURRENT_BINARY_DIR}/${PAGE}.1")
+ list(APPEND MANPAGES "${PAGE}.1")
+ endforeach()
+ add_custom_target(manpages ALL DEPENDS ${MANPAGES})
+ install(FILES ${MANPAGE_FILES} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
+ endif() # ASCIIDOC_PY_FOUND
+ else()
+ message(WARNING "asciidoc was not found, the man pages will not be installed.")
+ endif() # ASCIIDOC
+endif() # JPEGXL_ENABLE_MANPAGES
+
+# Example usage code.
+if (JPEGXL_ENABLE_EXAMPLES)
+ include(examples/examples.cmake)
+endif ()
+
+# Plugins for third-party software
+if (JPEGXL_ENABLE_PLUGINS)
+ add_subdirectory(plugins)
+endif ()
+
+# Binary tools
+add_subdirectory(tools)
diff --git a/third_party/jpeg-xl/CODE_OF_CONDUCT.md b/third_party/jpeg-xl/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000000..b2d81a3214
--- /dev/null
+++ b/third_party/jpeg-xl/CODE_OF_CONDUCT.md
@@ -0,0 +1,93 @@
+# Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of
+experience, education, socio-economic status, nationality, personal appearance,
+race, religion, or sexual identity and orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+ advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+ address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+ professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or reject
+comments, commits, code, wiki edits, issues, and other contributions that are
+not aligned to this Code of Conduct, or to ban temporarily or permanently any
+contributor for other behaviors that they deem inappropriate, threatening,
+offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+This Code of Conduct also applies outside the project spaces when the Project
+Steward has a reasonable belief that an individual's behavior may have a
+negative impact on the project or its community.
+
+## Conflict Resolution
+
+We do not believe that all conflict is bad; healthy debate and disagreement
+often yield positive results. However, it is never okay to be disrespectful or
+to engage in behavior that violates the project’s code of conduct.
+
+If you see someone violating the code of conduct, you are encouraged to address
+the behavior directly with those involved. Many issues can be resolved quickly
+and easily, and this gives people more control over the outcome of their
+dispute. If you are unable to resolve the matter for any reason, or if the
+behavior is threatening or harassing, report it. We are dedicated to providing
+an environment where participants feel welcome and safe.
+
+Reports should be directed to Jyrki Alakuijala <jyrki@google.com>, the
+Project Steward(s) for JPEG XL. It is the Project Steward’s duty to
+receive and address reported violations of the code of conduct. They will then
+work with a committee consisting of representatives from the Open Source
+Programs Office and the Google Open Source Strategy team. If for any reason you
+are uncomfortable reaching out to the Project Steward, please email
+opensource@google.com.
+
+We will investigate every complaint, but you may not receive a direct response.
+We will use our discretion in determining when and how to follow up on reported
+incidents, which may range from not taking action to permanent expulsion from
+the project and project-sponsored spaces. We will notify the accused of the
+report and provide them an opportunity to discuss it before any action is taken.
+The identity of the reporter will be omitted from the details of the report
+supplied to the accused. In potentially harmful situations, such as ongoing
+harassment or threats to anyone's safety, we may take action without notice.
+
+## Attribution
+
+This Code of Conduct is adapted from the Contributor Covenant, version 1.4,
+available at
+https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
diff --git a/third_party/jpeg-xl/CONTRIBUTING.md b/third_party/jpeg-xl/CONTRIBUTING.md
new file mode 100644
index 0000000000..cb6459797c
--- /dev/null
+++ b/third_party/jpeg-xl/CONTRIBUTING.md
@@ -0,0 +1,132 @@
+# Contributing to libjxl
+
+## Contributing with bug reports
+
+For security-related issues please see [SECURITY.md](SECURITY.md).
+
+We welcome suggestions, feature requests and bug reports. Before opening a new
+issue please take a look if there is already an existing one in the following
+link:
+
+ * https://github.com/libjxl/libjxl/issues
+
+## Contributing with patches and Pull Requests
+
+We'd love to accept your contributions to the JPEG XL Project. Please read
+through this section before sending a Pull Request.
+
+### Contributor License Agreements
+
+Our project is open source under the terms outlined in the [LICENSE](LICENSE)
+and [PATENTS](PATENTS) files. Before we can accept your contributions, even for
+small changes, there are just a few small guidelines you need to follow:
+
+Please fill out either the individual or corporate Contributor License Agreement
+(CLA) with Google. JPEG XL Project is an an effort by multiple individuals and
+companies, including the initial contributors Cloudinary and Google, but Google
+is the legal entity in charge of receiving these CLA and relicensing this
+software:
+
+ * If you are an individual writing original source code and you're sure you
+ own the intellectual property, then you'll need to sign an [individual
+ CLA](https://code.google.com/legal/individual-cla-v1.0.html).
+
+ * If you work for a company that wants to allow you to contribute your work,
+ then you'll need to sign a [corporate
+ CLA](https://code.google.com/legal/corporate-cla-v1.0.html).
+
+Follow either of the two links above to access the appropriate CLA and
+instructions for how to sign and return it. Once we receive it, we'll be able
+to accept your pull requests.
+
+***NOTE***: Only original source code from you and other people that have signed
+the CLA can be accepted into the main repository.
+
+### License
+
+Contributions are licensed under the project's [LICENSE](LICENSE). Each new
+file must include the following header when possible, with comment style adapted
+to the language as needed:
+
+```
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+```
+
+### Code Reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+### Contribution philosophy
+
+ * Prefer small changes, even if they don't implement a complete feature. Small
+ changes are easier to review and can be submitted faster. Think about what's
+ the smallest unit you can send that makes sense to review and submit in
+ isolation. For example, new modules that are not yet used by the tools but
+ have their own unittests are ok. If you have unrelated changes that
+ you discovered while working on something else, please send them in a
+ different Pull Request. If your are refactoring code and changing
+ functionality try to send the refactor first without any change in
+ functionality. Reviewers may ask you to split a Pull Request and it is
+ easier to create a smaller change from the beginning.
+
+ * Describe your commits. Add a meaningful description to your commit message, explain what you are changing if it is not trivially obvious, but more importantly explain *why* you are making those changes. For example "Fix
+ build" is not a good commit message, describe what build and if it makes sense
+ why is this fixing it or why was it failing without this. It is very likely
+ that people far in the future without any context you have right now will be
+ looking at your commit trying to figure out why was the change introduced. If
+ related to an issue in this or another repository include a link to it.
+
+ * Code Style: We follow the [Google C++ Coding
+ Style](https://google.github.io/styleguide/cppguide.html). A
+ [clang-format](https://clang.llvm.org/docs/ClangFormat.html) configuration
+ file is available to automatically format your code, you can invoke it with
+ the `./ci.sh lint` helper tool.
+
+ * Testing: Test your change and explain in the commit message *how* your
+ commit was tested. For example adding unittests or in some cases just testing
+ with the existing ones is enough. In any case, mention what testing was
+ performed so reviewers can evaluate whether that's enough testing. In many
+ cases, testing that the Continuous Integration workflow passes is enough.
+
+ * Make one commit per Pull Request / review, unless there's a good reason not
+ to. If you have multiple changes send multiple Pull Requests and each one can
+ have its own review.
+
+ * When addressing comments from reviewers prefer to squash or fixup your
+ edits and force-push your commit. When merging changes into the repository we
+ don't want to include the history of code review back and forth changes or
+ typos. Reviewers can click on the "force-pushed" automatic comment on a Pull
+ Request to see the changes between versions. We use "Rebase and merge" policy
+ to keep a linear git history which is easier to reason about.
+
+ * Your change must pass the build and test workflows. There's a `ci.sh` script
+ to help building and testing these configurations. See [building and
+ testing](doc/building_and_testing.md) for more details.
+
+### Contributing checklist.
+
+ * Sign the CLA (only needed once per user, see above).
+
+ * AUTHORS: If this is your first contribution, add your name or your
+ company name to the [AUTHORS](AUTHORS) file for copyright tracking purposes.
+
+ * Style guide. Check `./ci.sh lint`.
+
+ * Meaningful commit description: What and *why*, links to issues, testing
+ procedure.
+
+ * Squashed multiple edits into a single commit.
+
+ * Upload your changes to your fork and [create a Pull
+ Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request).
+
+# Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google.com/conduct/).
diff --git a/third_party/jpeg-xl/CONTRIBUTORS b/third_party/jpeg-xl/CONTRIBUTORS
new file mode 100644
index 0000000000..848096f921
--- /dev/null
+++ b/third_party/jpeg-xl/CONTRIBUTORS
@@ -0,0 +1,23 @@
+# This files lists individuals who made significant contributions to the JPEG XL
+# code base, such as design, adding features, performing experiments, ...
+# Small changes such as a small bugfix or fixing spelling errors are not
+# included. If you'd like to be included in this file thanks to a significant
+# contribution, feel free to send a pull request changing this file.
+Alex Deymo
+Alexander Rhatushnyak
+Evgenii Kliuchnikov
+Iulia-Maria Comșa
+Jan Wassenberg
+Jon Sneyers
+Jyrki Alakuijala
+Krzysztof Potempa
+Lode Vandevenne
+Luca Versari
+Martin Bruse
+Moritz Firsching
+Renata Khasanova
+Robert Obryk
+Sami Boukortt
+Sebastian Gomez-Gonzalez
+Thomas Fischbacher
+Zoltan Szabadka
diff --git a/third_party/jpeg-xl/LICENSE b/third_party/jpeg-xl/LICENSE
new file mode 100644
index 0000000000..c66034b105
--- /dev/null
+++ b/third_party/jpeg-xl/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) the JPEG XL Project Authors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/jpeg-xl/PATENTS b/third_party/jpeg-xl/PATENTS
new file mode 100644
index 0000000000..c95b8f4105
--- /dev/null
+++ b/third_party/jpeg-xl/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the JPEG XL project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of JPEG XL, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of JPEG XL. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of JPEG XL or any code incorporated within this
+implementation of JPEG XL constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of JPEG XL
+shall terminate as of the date such litigation is filed.
diff --git a/third_party/jpeg-xl/README.md b/third_party/jpeg-xl/README.md
new file mode 100644
index 0000000000..1e9a9adbd1
--- /dev/null
+++ b/third_party/jpeg-xl/README.md
@@ -0,0 +1,133 @@
+# JPEG XL reference implementation
+
+[![Build/Test](https://github.com/libjxl/libjxl/actions/workflows/build_test.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/build_test.yml)
+[![Build/Test Cross](https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml)
+[![Conformance](https://github.com/libjxl/libjxl/actions/workflows/conformance.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/conformance.yml)
+[![CIFuzz](https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml)
+[![Releases](https://github.com/libjxl/libjxl/actions/workflows/release.yaml/badge.svg)](
+https://github.com/libjxl/libjxl/actions/workflows/release.yaml)
+[![Doc](https://readthedocs.org/projects/libjxl/badge/?version=latest)](
+https://libjxl.readthedocs.io/en/latest/?badge=latest)
+[![codecov](https://codecov.io/gh/libjxl/libjxl/branch/main/graph/badge.svg)](
+https://codecov.io/gh/libjxl/libjxl)
+
+<img src="doc/jxl.svg" width="100" align="right" alt="JXL logo">
+
+This repository contains a reference implementation of JPEG XL (encoder and
+decoder), called `libjxl`. This software library is
+[used by many applications that support JPEG XL](doc/software_support.md).
+
+JPEG XL was standardized in 2022 as [ISO/IEC 18181](https://jpeg.org/jpegxl/workplan.html).
+The [core codestream](doc/format_overview.md#codestream-features) is specified in 18181-1,
+the [file format](doc/format_overview.md#file-format-features) in 18181-2.
+[Decoder conformance](https://github.com/libjxl/conformance) is defined in 18181-3,
+and 18181-4 is the [reference software](https://github.com/libjxl/libjxl).
+
+The library API, command line options, and tools in this repository are subject
+to change, however files encoded with `cjxl` conform to the JPEG XL specification
+and can be decoded with current and future `djxl` decoders or the `libjxl` decoding library.
+
+## Installation
+
+In most Linux distributions, installing `libjxl` is just a matter of using the package management system.
+For example in Debian-based distributions: `apt install libjxl-tools` will install `cjxl` and `djxl`
+and other tools like `benchmark_xl` are available in the package `libjxl-devtools`.
+On MacOS, you can use [Homebrew](https://brew.sh/): `brew install jpeg-xl`.
+
+[![libjxl packaging status](https://repology.org/badge/vertical-allrepos/libjxl.svg?exclude_unsupported=1&columns=3&exclude_sources=modules,site&header=libjxl%20packaging%20status)](https://repology.org/project/libjxl/versions)
+
+From the [releases page](https://github.com/libjxl/libjxl/releases/) the following can be downloaded:
+ - Windows binaries
+ - Debian and Ubuntu .deb packages
+
+Of course you can also [build libjxl from sources](BUILDING.md).
+
+
+## Usage
+
+To encode a source image to JPEG XL with default settings:
+
+```bash
+cjxl input.png output.jxl
+```
+
+The desired visual fidelity can be selected using the `--distance` parameter
+(in units of just-noticeable difference, where 0 is lossless and the most useful lossy range is 0.5 .. 3.0),
+or using `--quality` (on a scale from 0 to 100, roughly matching libjpeg).
+The [encode effort](doc/encode_effort.md) can be selected using the `--effort` parameter.
+
+For more settings run `cjxl --help` or for a full list of options
+run `cjxl -v -v --help`.
+
+To decode a JPEG XL file run:
+
+```bash
+djxl input.jxl output.png
+```
+
+When possible `cjxl`/`djxl` are able to read/write the following
+image formats: .exr, .gif, .jpeg/.jpg, .pfm, .pgm/.ppm, .pgx, .png.
+Specifically for JPEG files, the default `cjxl` behavior is to apply lossless
+recompression and the default `djxl` behavior is to reconstruct the original
+JPEG file (when the extension of the output file is .jpg).
+
+### Benchmarking
+
+For speed benchmarks on single images in single or multi-threaded decoding
+`djxl` can print decoding speed information. See `djxl --help` for details
+on the decoding options and note that the output image is optional for
+benchmarking purposes.
+
+For more comprehensive benchmarking options, see the
+[benchmarking guide](doc/benchmarking.md).
+
+### Library API
+
+Besides the `libjxl` library [API documentation](https://libjxl.readthedocs.io/en/latest/),
+there are [example applications](examples/) and [plugins](plugins/) that can be used as a reference or
+starting point for developers who wish to integrate `libjxl` in their project.
+
+
+## License
+
+This software is available under a 3-clause BSD license which can be found in
+the [LICENSE](LICENSE) file, with an "Additional IP Rights Grant" as outlined in
+the [PATENTS](PATENTS) file.
+
+Please note that the PATENTS file only mentions Google since Google is the legal
+entity receiving the Contributor License Agreements (CLA) from all contributors
+to the JPEG XL Project, including the initial main contributors to the JPEG XL
+format: Cloudinary and Google.
+
+## Additional documentation
+
+### Codec description
+
+* [JPEG XL Format Overview](doc/format_overview.md)
+* [Introductory paper](https://www.spiedigitallibrary.org/proceedings/Download?fullDOI=10.1117%2F12.2529237) (open-access)
+* [XL Overview](doc/xl_overview.md) - a brief introduction to the source code modules
+* [JPEG XL white paper](https://ds.jpeg.org/whitepapers/jpeg-xl-whitepaper.pdf)
+* [JPEG XL official website](https://jpeg.org/jpegxl)
+* [JPEG XL community website](https://jpegxl.info)
+
+### Development process
+
+* [More information on testing/build options](doc/building_and_testing.md)
+* [Git guide for JPEG XL](doc/developing_in_github.md) - for developers
+* [Fuzzing](doc/fuzzing.md) - for developers
+* [Building Web Assembly artifacts](doc/building_wasm.md)
+* [Test coverage on Codecov.io](https://app.codecov.io/gh/libjxl/libjxl) - for
+ developers
+* [libjxl documentation on readthedocs.io](https://libjxl.readthedocs.io/)
+
+### Contact
+
+If you encounter a bug or other issue with the software, please open an Issue here.
+
+There is a [subreddit about JPEG XL](https://www.reddit.com/r/jpegxl/), and
+informal chatting with developers and early adopters of `libjxl` can be done on the
+[JPEG XL Discord server](https://discord.gg/DqkQgDRTFu).
diff --git a/third_party/jpeg-xl/SECURITY.md b/third_party/jpeg-xl/SECURITY.md
new file mode 100644
index 0000000000..d03012a63a
--- /dev/null
+++ b/third_party/jpeg-xl/SECURITY.md
@@ -0,0 +1,73 @@
+# Security and Vulnerability Policy for libjxl
+
+## TL;DR:
+
+CPE prefix: `cpe:2.3:a:libjxl_project:libjxl`
+
+To report a security issue, please email libjxl-security@google.com.
+
+Include in your email a description of the issue, the steps you took to create
+the issue, affected versions, and if known, mitigations for the issue. Our
+vulnerability management team will acknowledge receiving your email within 3
+working days.
+
+This project follows a 90 day disclosure timeline.
+
+For all other bugs, where there are no security implications about disclosing
+the unpatched bug, open a [new issue](https://github.com/libjxl/libjxl/issues)
+checking first for existing similar issues. If in doubt about the security
+impact of a bug you discovered, email first.
+
+## Policy overview
+
+libjxl's Security Policy is based on the [Google Open Source program
+guidelines](https://github.com/google/oss-vulnerability-guide) for coordinated
+vulnerability disclosure.
+
+Early versions of `libjxl` had a different security policy that didn't provide
+security and vulnerability disclosure support. Versions up to and including
+0.3.7 are not covered and won't receive any security advisory.
+
+Only released versions, starting from version 0.5, are covered by this policy.
+Development branches, arbitrary commits from `main` branch or even releases with
+backported features externally patched on top are not covered. Only those
+versions with a release tag in `libjxl`'s repository are covered, starting from
+version 0.5.
+
+## What's a "Security bug"
+
+A security bug is a bug that can potentially be exploited to let an attacker
+gain unauthorized access or privileges such as disclosing information or
+arbitrary code execution. Not all fuzzer-found bugs and not all assert()
+failures are considered security bugs in libjxl. For a detailed explanation and
+examples see our [Security Vulnerabilities Playbook](doc/vuln_playbook.md).
+
+## What to expect
+
+To report a security issue, please email libjxl-security@google.com with all the
+details about the bug you encountered.
+
+ * Include a description of the issue, steps to reproduce, etc. Compiler
+ versions, flags, exact version used and even CPU are often relevant given our
+ usage of SIMD and run-time dispatch of SIMD instructions.
+
+ * A member of our security team will reply to you within 3 business days. Note
+ that business days are different in different countries.
+
+ * We will evaluate the issue and we may require more input from your side to
+ reproduce it.
+
+ * If the issue fits in the description of a security bug, we will issue a
+ CVE, publish a fix and make a new minor or patch release with it. There is
+ a maximum of 90 day disclosure timeline, we ask you to not publish the
+ details before the 90 day deadline or the release date (whichever comes
+ first).
+
+ * In the case that we publish a CVE we will credit the external researcher who
+ reported the issue. When reporting security issues please let us know if you
+ need to include specific information while doing so, like for example a
+ company affiliation.
+
+Our security team follows the [Security Vulnerabilities
+Playbook](doc/vuln_playbook.md). For more details about the process and policies
+please take a look at it.
diff --git a/third_party/jpeg-xl/WORKSPACE b/third_party/jpeg-xl/WORKSPACE
new file mode 100644
index 0000000000..f0c63df47d
--- /dev/null
+++ b/third_party/jpeg-xl/WORKSPACE
@@ -0,0 +1,742 @@
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository")
+
+http_archive(
+ name = "bazel_skylib",
+ sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506",
+ urls = [
+ "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+ "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz",
+ ],
+)
+
+load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
+
+bazel_skylib_workspace()
+
+local_repository(
+ name = "highway",
+ path = "third_party/highway",
+)
+
+local_repository(
+ name = "brotli",
+ path = "third_party/brotli",
+)
+
+new_local_repository(
+ name = "googletest",
+ build_file = "third_party/googletest/BUILD.bazel",
+ path = "third_party/googletest",
+)
+
+new_local_repository(
+ name = "skcms",
+ build_file_content = """
+cc_library(
+ name = "skcms",
+ srcs = [
+ "skcms.cc",
+ "skcms_internal.h",
+ "src/Transform_inl.h",
+ ],
+ hdrs = ["skcms.h"],
+ visibility = ["//visibility:public"],
+)
+ """,
+ path = "third_party/skcms",
+)
+
+new_git_repository(
+ name = "zlib",
+ build_file_content = """
+cc_library(
+ name = "zlib",
+ defines = ["HAVE_UNISTD_H"],
+ srcs = [
+ "adler32.c",
+ "compress.c",
+ "crc32.c",
+ "crc32.h",
+ "deflate.c",
+ "deflate.h",
+ "gzclose.c",
+ "gzguts.h",
+ "gzlib.c",
+ "gzread.c",
+ "gzwrite.c",
+ "infback.c",
+ "inffast.c",
+ "inffast.h",
+ "inffixed.h",
+ "inflate.c",
+ "inflate.h",
+ "inftrees.c",
+ "inftrees.h",
+ "trees.c",
+ "trees.h",
+ "uncompr.c",
+ "zconf.h",
+ "zutil.c",
+ "zutil.h",
+ ],
+ hdrs = ["zlib.h"],
+ includes = ["."],
+ visibility = ["//visibility:public"],
+)
+ """,
+ remote = "https://github.com/madler/zlib",
+ tag = "v1.2.13",
+)
+
+new_local_repository(
+ name = "png",
+ build_file_content = """
+genrule(
+ name = "pnglibconf",
+ srcs = ["scripts/pnglibconf.h.prebuilt"],
+ outs = ["pnglibconf.h"],
+ cmd = "cp -f $< $@",
+)
+cc_library(
+ name = "png",
+ srcs = [
+ "png.c",
+ "pngconf.h",
+ "pngdebug.h",
+ "pngerror.c",
+ "pngget.c",
+ "pnginfo.h",
+ ":pnglibconf",
+ "pngmem.c",
+ "pngpread.c",
+ "pngpriv.h",
+ "pngread.c",
+ "pngrio.c",
+ "pngrtran.c",
+ "pngrutil.c",
+ "pngset.c",
+ "pngstruct.h",
+ "pngtrans.c",
+ "pngwio.c",
+ "pngwrite.c",
+ "pngwtran.c",
+ "pngwutil.c",
+ ],
+ hdrs = ["png.h"],
+ includes = ["."],
+ linkopts = ["-lm"],
+ visibility = ["//visibility:public"],
+ deps = ["@zlib//:zlib"],
+)
+ """,
+ path = "third_party/libpng",
+)
+
+new_git_repository(
+ name = "libjpeg_turbo",
+ build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+ "@BITS_IN_JSAMPLE@" : "8",
+ "@BUILD@" : "20230125",
+ "@CMAKE_PROJECT_NAME@" : "libjpeg-turbo",
+ "@COPYRIGHT_YEAR@" : "2023",
+ "@INLINE@" : "__inline__",
+ "@JPEG_LIB_VERSION@" : "62",
+ "@LIBJPEG_TURBO_VERSION_NUMBER@" : "2001005",
+ "@SIZE_T@" : "8",
+ "@THREAD_LOCAL@" : "__thread",
+ "@VERSION@" : "2.1.5",
+}
+YES_DEFINES = [
+ "C_ARITH_CODING_SUPPORTED", "D_ARITH_CODING_SUPPORTED",
+ "MEM_SRCDST_SUPPORTED", "HAVE_LOCALE_H", "HAVE_STDDEF_H", "HAVE_STDLIB_H",
+ "NEED_SYS_TYPES_H", "HAVE_UNSIGNED_CHAR", "HAVE_UNSIGNED_SHORT",
+ "HAVE_BUILTIN_CTZL"
+]
+NO_DEFINES = [
+ "WITH_SIMD", "NEED_BSD_STRINGS", "INCOMPLETE_TYPES_BROKEN",
+ "RIGHT_SHIFT_IS_UNSIGNED", "HAVE_INTRIN_H"
+]
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+[
+ expand_template(
+ name = "expand_" + src,
+ template = src + ".in",
+ out = src,
+ substitutions = SUBSTITUTIONS,
+ ) for src in ["jconfig.h", "jconfigint.h", "jversion.h"]
+]
+cc_library(
+ name = "jpeg",
+ srcs = [
+ "jaricom.c",
+ "jcapimin.c",
+ "jcapistd.c",
+ "jcarith.c",
+ "jccoefct.c",
+ "jccolor.c",
+ "jcdctmgr.c",
+ "jchuff.c",
+ "jchuff.h",
+ "jcicc.c",
+ "jcinit.c",
+ "jcmainct.c",
+ "jcmarker.c",
+ "jcmaster.c",
+ "jcomapi.c",
+ "jconfig.h",
+ "jconfigint.h",
+ "jcparam.c",
+ "jcphuff.c",
+ "jcprepct.c",
+ "jcsample.c",
+ "jctrans.c",
+ "jdapimin.c",
+ "jdapistd.c",
+ "jdarith.c",
+ "jdatadst.c",
+ "jdatasrc.c",
+ "jdcoefct.c",
+ "jdcoefct.h",
+ "jdcolor.c",
+ "jdct.h",
+ "jddctmgr.c",
+ "jdhuff.c",
+ "jdhuff.h",
+ "jdicc.c",
+ "jdinput.c",
+ "jdmainct.c",
+ "jdmainct.h",
+ "jdmarker.c",
+ "jdmaster.c",
+ "jdmaster.h",
+ "jdmerge.c",
+ "jdmerge.h",
+ "jdphuff.c",
+ "jdpostct.c",
+ "jdsample.c",
+ "jdsample.h",
+ "jdtrans.c",
+ "jerror.c",
+ "jerror.h",
+ "jfdctflt.c",
+ "jfdctfst.c",
+ "jfdctint.c",
+ "jidctflt.c",
+ "jidctfst.c",
+ "jidctint.c",
+ "jidctred.c",
+ "jinclude.h",
+ "jmemmgr.c",
+ "jmemnobs.c",
+ "jmemsys.h",
+ "jmorecfg.h",
+ "jpeg_nbits_table.h",
+ "jpegcomp.h",
+ "jpegint.h",
+ "jpeglib.h",
+ "jquant1.c",
+ "jquant2.c",
+ "jsimd_none.c",
+ "jsimd.h",
+ "jsimddct.h",
+ "jutils.c",
+ "jversion.h",
+ ],
+ hdrs = [
+ "jccolext.c",
+ "jdcol565.c",
+ "jdcolext.c",
+ "jdmrg565.c",
+ "jdmrgext.c",
+ "jerror.h",
+ "jinclude.h",
+ "jpeglib.h",
+ "jstdhuff.c",
+ ],
+ includes = ["."],
+ visibility = ["//visibility:public"],
+)
+ """,
+ remote = "https://github.com/libjpeg-turbo/libjpeg-turbo.git",
+ tag = "2.1.4",
+)
+
+http_archive(
+ name = "gif",
+ build_file_content = """
+cc_library(
+ name = "gif",
+ srcs = [
+ "dgif_lib.c", "egif_lib.c", "gifalloc.c", "gif_err.c", "gif_font.c",
+ "gif_hash.c", "openbsd-reallocarray.c", "gif_hash.h",
+ "gif_lib_private.h"
+ ],
+ hdrs = ["gif_lib.h"],
+ includes = ["."],
+ visibility = ["//visibility:public"],
+)
+ """,
+ sha256 = "31da5562f44c5f15d63340a09a4fd62b48c45620cd302f77a6d9acf0077879bd",
+ strip_prefix = "giflib-5.2.1",
+ url = "https://netcologne.dl.sourceforge.net/project/giflib/giflib-5.2.1.tar.gz",
+)
+
+new_git_repository(
+ name = "imath",
+ build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+ "@IMATH_INTERNAL_NAMESPACE@": "Imath_3_1",
+ "@IMATH_LIB_VERSION@": "3.1.4",
+ "@IMATH_NAMESPACE_CUSTOM@": "0",
+ "@IMATH_NAMESPACE@": "Imath",
+ "@IMATH_PACKAGE_NAME@": "Imath 3.1.4",
+ "@IMATH_VERSION_MAJOR@": "3",
+ "@IMATH_VERSION_MINOR@": "1",
+ "@IMATH_VERSION_PATCH@": "4",
+ "@IMATH_VERSION@": "3.1.4",
+}
+YES_DEFINES = [
+ "IMATH_HALF_USE_LOOKUP_TABLE", "IMATH_ENABLE_API_VISIBILITY",
+]
+NO_DEFINES = [
+ "IMATH_HAVE_LARGE_STACK",
+]
+ONE_DEFINES = [
+ "IMATH_USE_NOEXCEPT",
+]
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES
+})
+expand_template(
+ name = "expand_ImathConfig",
+ template = "config/ImathConfig.h.in",
+ out = "src/Imath/ImathConfig.h",
+ substitutions = SUBSTITUTIONS,
+)
+cc_library(
+ name = "Imath",
+ srcs = [
+ "src/Imath/ImathColorAlgo.cpp",
+ ":src/Imath/ImathConfig.h",
+ "src/Imath/ImathFun.cpp",
+ "src/Imath/ImathMatrixAlgo.cpp",
+ "src/Imath/ImathRandom.cpp",
+ "src/Imath/half.cpp",
+ "src/Imath/toFloat.h",
+ ],
+ hdrs = [
+ "src/Imath/ImathBox.h",
+ "src/Imath/ImathBoxAlgo.h",
+ "src/Imath/ImathColor.h",
+ "src/Imath/ImathColorAlgo.h",
+ "src/Imath/ImathEuler.h",
+ "src/Imath/ImathExport.h",
+ "src/Imath/ImathForward.h",
+ "src/Imath/ImathFrame.h",
+ "src/Imath/ImathFrustum.h",
+ "src/Imath/ImathFrustumTest.h",
+ "src/Imath/ImathFun.h",
+ "src/Imath/ImathGL.h",
+ "src/Imath/ImathGLU.h",
+ "src/Imath/ImathInt64.h",
+ "src/Imath/ImathInterval.h",
+ "src/Imath/ImathLine.h",
+ "src/Imath/ImathLineAlgo.h",
+ "src/Imath/ImathMath.h",
+ "src/Imath/ImathMatrix.h",
+ "src/Imath/ImathMatrixAlgo.h",
+ "src/Imath/ImathNamespace.h",
+ "src/Imath/ImathPlane.h",
+ "src/Imath/ImathPlatform.h",
+ "src/Imath/ImathQuat.h",
+ "src/Imath/ImathRandom.h",
+ "src/Imath/ImathRoots.h",
+ "src/Imath/ImathShear.h",
+ "src/Imath/ImathSphere.h",
+ "src/Imath/ImathTypeTraits.h",
+ "src/Imath/ImathVec.h",
+ "src/Imath/ImathVecAlgo.h",
+ "src/Imath/half.h",
+ "src/Imath/halfFunction.h",
+ "src/Imath/halfLimits.h",
+ ],
+ includes = ["src/Imath"],
+ visibility = ["//visibility:public"],
+)
+""",
+ remote = "https://github.com/AcademySoftwareFoundation/imath",
+ tag = "v3.1.5",
+)
+
+new_git_repository(
+ name = "openexr",
+ build_file_content = """
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+SUBSTITUTIONS = {
+ "@IEX_INTERNAL_NAMESPACE@": "Iex_3_0",
+ "@IEX_NAMESPACE_CUSTOM@": "0",
+ "@IEX_NAMESPACE@": "Iex",
+ "@ILMTHREAD_INTERNAL_NAMESPACE@": "IlmThread_3_0",
+ "@ILMTHREAD_NAMESPACE_CUSTOM@": "0",
+ "@ILMTHREAD_NAMESPACE@": "IlmThread",
+ "@OPENEXR_IMF_NAMESPACE@": "Imf",
+ "@OPENEXR_INTERNAL_IMF_NAMESPACE@": "Imf_3_0",
+ "@OPENEXR_LIB_VERSION@": "3.0.4",
+ "@OPENEXR_NAMESPACE_CUSTOM@": "0",
+ "@OPENEXR_PACKAGE_NAME@": "OpenEXR 3.0.4",
+ "@OPENEXR_VERSION_EXTRA@": "",
+ "@OPENEXR_VERSION_MAJOR@": "3",
+ "@OPENEXR_VERSION_MINOR@": "0",
+ "@OPENEXR_VERSION_PATCH@": "4",
+ "@OPENEXR_VERSION@": "3.0.4",
+}
+YES_DEFINES = [
+ "OPENEXR_ENABLE_API_VISIBILITY", "OPENEXR_IMF_HAVE_COMPLETE_IOMANIP",
+ "OPENEXR_HAVE_LARGE_STACK",
+]
+NO_DEFINES = [
+ "HAVE_UCONTEXT_H", "IEX_HAVE_CONTROL_REGISTER_SUPPORT",
+ "IEX_HAVE_SIGCONTEXT_CONTROL_REGISTER_SUPPORT", "OPENEXR_IMF_HAVE_DARWIN",
+ "OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX", "OPENEXR_IMF_HAVE_LINUX_PROCFS",
+ "OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN",
+]
+ONE_DEFINES = [
+ "ILMTHREAD_THREADING_ENABLED",
+]
+ZERO_DEFINES = [
+ "ILMTHREAD_HAVE_POSIX_SEMAPHORES",
+]
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "#define " + key for key in YES_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES
+})
+SUBSTITUTIONS.update({
+ "#cmakedefine01 " + key : "#define " + key + " 0" for key in ZERO_DEFINES
+})
+[
+ expand_template(
+ name = "expand_" + item,
+ template = "cmake/" + item + ".h.in",
+ out = "src/lib/Iex/" + item + ".h",
+ substitutions = SUBSTITUTIONS,
+ ) for item in ["IexConfig", "IexConfigInternal"]
+]
+[
+expand_template(
+ name = "expand_" + item,
+ template = "cmake/" + item + ".h.in",
+ out = "src/lib/IlmThread/" + item + ".h",
+ substitutions = SUBSTITUTIONS,
+ ) for item in ["IlmThreadConfig"]
+]
+[
+expand_template(
+ name = "expand_" + item,
+ template = "cmake/" + item + ".h.in",
+ out = "src/lib/OpenEXR/" + item + ".h",
+ substitutions = SUBSTITUTIONS,
+ ) for item in ["OpenEXRConfig", "OpenEXRConfigInternal"]
+]
+cc_library(
+ name = "Iex",
+ srcs = [
+ "src/lib/Iex/IexBaseExc.cpp",
+ "src/lib/Iex/IexMathFloatExc.cpp",
+ "src/lib/Iex/IexMathFpu.cpp",
+ "src/lib/Iex/IexThrowErrnoExc.cpp",
+ ],
+ hdrs = [
+ "src/lib/Iex/Iex.h",
+ "src/lib/Iex/IexBaseExc.h",
+ ":src/lib/Iex/IexConfig.h",
+ ":src/lib/Iex/IexConfigInternal.h",
+ "src/lib/Iex/IexErrnoExc.h",
+ "src/lib/Iex/IexExport.h",
+ "src/lib/Iex/IexForward.h",
+ "src/lib/Iex/IexMacros.h",
+ "src/lib/Iex/IexMathExc.h",
+ "src/lib/Iex/IexMathFloatExc.h",
+ "src/lib/Iex/IexMathFpu.h",
+ "src/lib/Iex/IexMathIeeeExc.h",
+ "src/lib/Iex/IexNamespace.h",
+ "src/lib/Iex/IexThrowErrnoExc.h",
+ ":src/lib/OpenEXR/OpenEXRConfig.h",
+ ],
+ includes = [
+ "src/lib/Iex",
+ "src/lib/OpenEXR",
+ ],
+)
+
+cc_library(
+ name = "IlmThread",
+ srcs = [
+ "src/lib/IlmThread/IlmThread.cpp",
+ "src/lib/IlmThread/IlmThreadPool.cpp",
+ "src/lib/IlmThread/IlmThreadSemaphore.cpp",
+ "src/lib/IlmThread/IlmThreadSemaphoreOSX.cpp",
+ "src/lib/IlmThread/IlmThreadSemaphorePosix.cpp",
+ "src/lib/IlmThread/IlmThreadSemaphorePosixCompat.cpp",
+ "src/lib/IlmThread/IlmThreadSemaphoreWin32.cpp",
+ ],
+ hdrs = [
+ "src/lib/IlmThread/IlmThread.h",
+ ":src/lib/IlmThread/IlmThreadConfig.h",
+ "src/lib/IlmThread/IlmThreadExport.h",
+ "src/lib/IlmThread/IlmThreadForward.h",
+ "src/lib/IlmThread/IlmThreadMutex.h",
+ "src/lib/IlmThread/IlmThreadNamespace.h",
+ "src/lib/IlmThread/IlmThreadPool.h",
+ "src/lib/IlmThread/IlmThreadSemaphore.h",
+ ],
+ includes = ["src/lib/IlmThread"],
+ deps = [":Iex"],
+)
+cc_library(
+ name = "OpenEXR",
+ srcs = [
+ "src/lib/OpenEXR/ImfAcesFile.cpp",
+ "src/lib/OpenEXR/ImfAttribute.cpp",
+ "src/lib/OpenEXR/ImfB44Compressor.cpp",
+ "src/lib/OpenEXR/ImfBoxAttribute.cpp",
+ "src/lib/OpenEXR/ImfCRgbaFile.cpp",
+ "src/lib/OpenEXR/ImfChannelList.cpp",
+ "src/lib/OpenEXR/ImfChannelListAttribute.cpp",
+ "src/lib/OpenEXR/ImfChromaticities.cpp",
+ "src/lib/OpenEXR/ImfChromaticitiesAttribute.cpp",
+ "src/lib/OpenEXR/ImfCompositeDeepScanLine.cpp",
+ "src/lib/OpenEXR/ImfCompressionAttribute.cpp",
+ "src/lib/OpenEXR/ImfCompressor.cpp",
+ "src/lib/OpenEXR/ImfConvert.cpp",
+ "src/lib/OpenEXR/ImfDeepCompositing.cpp",
+ "src/lib/OpenEXR/ImfDeepFrameBuffer.cpp",
+ "src/lib/OpenEXR/ImfDeepImageStateAttribute.cpp",
+ "src/lib/OpenEXR/ImfDeepScanLineInputFile.cpp",
+ "src/lib/OpenEXR/ImfDeepScanLineInputPart.cpp",
+ "src/lib/OpenEXR/ImfDeepScanLineOutputFile.cpp",
+ "src/lib/OpenEXR/ImfDeepScanLineOutputPart.cpp",
+ "src/lib/OpenEXR/ImfDeepTiledInputFile.cpp",
+ "src/lib/OpenEXR/ImfDeepTiledInputPart.cpp",
+ "src/lib/OpenEXR/ImfDeepTiledOutputFile.cpp",
+ "src/lib/OpenEXR/ImfDeepTiledOutputPart.cpp",
+ "src/lib/OpenEXR/ImfDoubleAttribute.cpp",
+ "src/lib/OpenEXR/ImfDwaCompressor.cpp",
+ "src/lib/OpenEXR/ImfEnvmap.cpp",
+ "src/lib/OpenEXR/ImfEnvmapAttribute.cpp",
+ "src/lib/OpenEXR/ImfFastHuf.cpp",
+ "src/lib/OpenEXR/ImfFloatAttribute.cpp",
+ "src/lib/OpenEXR/ImfFloatVectorAttribute.cpp",
+ "src/lib/OpenEXR/ImfFrameBuffer.cpp",
+ "src/lib/OpenEXR/ImfFramesPerSecond.cpp",
+ "src/lib/OpenEXR/ImfGenericInputFile.cpp",
+ "src/lib/OpenEXR/ImfGenericOutputFile.cpp",
+ "src/lib/OpenEXR/ImfHeader.cpp",
+ "src/lib/OpenEXR/ImfHuf.cpp",
+ "src/lib/OpenEXR/ImfIDManifest.cpp",
+ "src/lib/OpenEXR/ImfIDManifestAttribute.cpp",
+ "src/lib/OpenEXR/ImfIO.cpp",
+ "src/lib/OpenEXR/ImfInputFile.cpp",
+ "src/lib/OpenEXR/ImfInputPart.cpp",
+ "src/lib/OpenEXR/ImfInputPartData.cpp",
+ "src/lib/OpenEXR/ImfIntAttribute.cpp",
+ "src/lib/OpenEXR/ImfKeyCode.cpp",
+ "src/lib/OpenEXR/ImfKeyCodeAttribute.cpp",
+ "src/lib/OpenEXR/ImfLineOrderAttribute.cpp",
+ "src/lib/OpenEXR/ImfLut.cpp",
+ "src/lib/OpenEXR/ImfMatrixAttribute.cpp",
+ "src/lib/OpenEXR/ImfMisc.cpp",
+ "src/lib/OpenEXR/ImfMultiPartInputFile.cpp",
+ "src/lib/OpenEXR/ImfMultiPartOutputFile.cpp",
+ "src/lib/OpenEXR/ImfMultiView.cpp",
+ "src/lib/OpenEXR/ImfOpaqueAttribute.cpp",
+ "src/lib/OpenEXR/ImfOutputFile.cpp",
+ "src/lib/OpenEXR/ImfOutputPart.cpp",
+ "src/lib/OpenEXR/ImfOutputPartData.cpp",
+ "src/lib/OpenEXR/ImfPartType.cpp",
+ "src/lib/OpenEXR/ImfPizCompressor.cpp",
+ "src/lib/OpenEXR/ImfPreviewImage.cpp",
+ "src/lib/OpenEXR/ImfPreviewImageAttribute.cpp",
+ "src/lib/OpenEXR/ImfPxr24Compressor.cpp",
+ "src/lib/OpenEXR/ImfRational.cpp",
+ "src/lib/OpenEXR/ImfRationalAttribute.cpp",
+ "src/lib/OpenEXR/ImfRgbaFile.cpp",
+ "src/lib/OpenEXR/ImfRgbaYca.cpp",
+ "src/lib/OpenEXR/ImfRle.cpp",
+ "src/lib/OpenEXR/ImfRleCompressor.cpp",
+ "src/lib/OpenEXR/ImfScanLineInputFile.cpp",
+ "src/lib/OpenEXR/ImfStandardAttributes.cpp",
+ "src/lib/OpenEXR/ImfStdIO.cpp",
+ "src/lib/OpenEXR/ImfStringAttribute.cpp",
+ "src/lib/OpenEXR/ImfStringVectorAttribute.cpp",
+ "src/lib/OpenEXR/ImfSystemSpecific.cpp",
+ "src/lib/OpenEXR/ImfTestFile.cpp",
+ "src/lib/OpenEXR/ImfThreading.cpp",
+ "src/lib/OpenEXR/ImfTileDescriptionAttribute.cpp",
+ "src/lib/OpenEXR/ImfTileOffsets.cpp",
+ "src/lib/OpenEXR/ImfTiledInputFile.cpp",
+ "src/lib/OpenEXR/ImfTiledInputPart.cpp",
+ "src/lib/OpenEXR/ImfTiledMisc.cpp",
+ "src/lib/OpenEXR/ImfTiledOutputFile.cpp",
+ "src/lib/OpenEXR/ImfTiledOutputPart.cpp",
+ "src/lib/OpenEXR/ImfTiledRgbaFile.cpp",
+ "src/lib/OpenEXR/ImfTimeCode.cpp",
+ "src/lib/OpenEXR/ImfTimeCodeAttribute.cpp",
+ "src/lib/OpenEXR/ImfVecAttribute.cpp",
+ "src/lib/OpenEXR/ImfVersion.cpp",
+ "src/lib/OpenEXR/ImfWav.cpp",
+ "src/lib/OpenEXR/ImfZip.cpp",
+ "src/lib/OpenEXR/ImfZipCompressor.cpp",
+ "src/lib/OpenEXR/b44ExpLogTable.h",
+ "src/lib/OpenEXR/dwaLookups.h",
+ ],
+ hdrs = [
+ ":src/lib/Iex/IexConfig.h",
+ ":src/lib/Iex/IexConfigInternal.h",
+ ":src/lib/IlmThread/IlmThreadConfig.h",
+ "src/lib/OpenEXR/ImfAcesFile.h",
+ "src/lib/OpenEXR/ImfArray.h",
+ "src/lib/OpenEXR/ImfAttribute.h",
+ "src/lib/OpenEXR/ImfAutoArray.h",
+ "src/lib/OpenEXR/ImfB44Compressor.h",
+ "src/lib/OpenEXR/ImfBoxAttribute.h",
+ "src/lib/OpenEXR/ImfCRgbaFile.h",
+ "src/lib/OpenEXR/ImfChannelList.h",
+ "src/lib/OpenEXR/ImfChannelListAttribute.h",
+ "src/lib/OpenEXR/ImfCheckedArithmetic.h",
+ "src/lib/OpenEXR/ImfChromaticities.h",
+ "src/lib/OpenEXR/ImfChromaticitiesAttribute.h",
+ "src/lib/OpenEXR/ImfCompositeDeepScanLine.h",
+ "src/lib/OpenEXR/ImfCompression.h",
+ "src/lib/OpenEXR/ImfCompressionAttribute.h",
+ "src/lib/OpenEXR/ImfCompressor.h",
+ "src/lib/OpenEXR/ImfConvert.h",
+ "src/lib/OpenEXR/ImfDeepCompositing.h",
+ "src/lib/OpenEXR/ImfDeepFrameBuffer.h",
+ "src/lib/OpenEXR/ImfDeepImageState.h",
+ "src/lib/OpenEXR/ImfDeepImageStateAttribute.h",
+ "src/lib/OpenEXR/ImfDeepScanLineInputFile.h",
+ "src/lib/OpenEXR/ImfDeepScanLineInputPart.h",
+ "src/lib/OpenEXR/ImfDeepScanLineOutputFile.h",
+ "src/lib/OpenEXR/ImfDeepScanLineOutputPart.h",
+ "src/lib/OpenEXR/ImfDeepTiledInputFile.h",
+ "src/lib/OpenEXR/ImfDeepTiledInputPart.h",
+ "src/lib/OpenEXR/ImfDeepTiledOutputFile.h",
+ "src/lib/OpenEXR/ImfDeepTiledOutputPart.h",
+ "src/lib/OpenEXR/ImfDoubleAttribute.h",
+ "src/lib/OpenEXR/ImfDwaCompressor.h",
+ "src/lib/OpenEXR/ImfDwaCompressorSimd.h",
+ "src/lib/OpenEXR/ImfEnvmap.h",
+ "src/lib/OpenEXR/ImfEnvmapAttribute.h",
+ "src/lib/OpenEXR/ImfExport.h",
+ "src/lib/OpenEXR/ImfFastHuf.h",
+ "src/lib/OpenEXR/ImfFloatAttribute.h",
+ "src/lib/OpenEXR/ImfFloatVectorAttribute.h",
+ "src/lib/OpenEXR/ImfForward.h",
+ "src/lib/OpenEXR/ImfFrameBuffer.h",
+ "src/lib/OpenEXR/ImfFramesPerSecond.h",
+ "src/lib/OpenEXR/ImfGenericInputFile.h",
+ "src/lib/OpenEXR/ImfGenericOutputFile.h",
+ "src/lib/OpenEXR/ImfHeader.h",
+ "src/lib/OpenEXR/ImfHuf.h",
+ "src/lib/OpenEXR/ImfIDManifest.h",
+ "src/lib/OpenEXR/ImfIDManifestAttribute.h",
+ "src/lib/OpenEXR/ImfIO.h",
+ "src/lib/OpenEXR/ImfInputFile.h",
+ "src/lib/OpenEXR/ImfInputPart.h",
+ "src/lib/OpenEXR/ImfInputPartData.h",
+ "src/lib/OpenEXR/ImfInputStreamMutex.h",
+ "src/lib/OpenEXR/ImfInt64.h",
+ "src/lib/OpenEXR/ImfIntAttribute.h",
+ "src/lib/OpenEXR/ImfKeyCode.h",
+ "src/lib/OpenEXR/ImfKeyCodeAttribute.h",
+ "src/lib/OpenEXR/ImfLineOrder.h",
+ "src/lib/OpenEXR/ImfLineOrderAttribute.h",
+ "src/lib/OpenEXR/ImfLut.h",
+ "src/lib/OpenEXR/ImfMatrixAttribute.h",
+ "src/lib/OpenEXR/ImfMisc.h",
+ "src/lib/OpenEXR/ImfMultiPartInputFile.h",
+ "src/lib/OpenEXR/ImfMultiPartOutputFile.h",
+ "src/lib/OpenEXR/ImfMultiView.h",
+ "src/lib/OpenEXR/ImfName.h",
+ "src/lib/OpenEXR/ImfNamespace.h",
+ "src/lib/OpenEXR/ImfOpaqueAttribute.h",
+ "src/lib/OpenEXR/ImfOptimizedPixelReading.h",
+ "src/lib/OpenEXR/ImfOutputFile.h",
+ "src/lib/OpenEXR/ImfOutputPart.h",
+ "src/lib/OpenEXR/ImfOutputPartData.h",
+ "src/lib/OpenEXR/ImfOutputStreamMutex.h",
+ "src/lib/OpenEXR/ImfPartHelper.h",
+ "src/lib/OpenEXR/ImfPartType.h",
+ "src/lib/OpenEXR/ImfPixelType.h",
+ "src/lib/OpenEXR/ImfPizCompressor.h",
+ "src/lib/OpenEXR/ImfPreviewImage.h",
+ "src/lib/OpenEXR/ImfPreviewImageAttribute.h",
+ "src/lib/OpenEXR/ImfPxr24Compressor.h",
+ "src/lib/OpenEXR/ImfRational.h",
+ "src/lib/OpenEXR/ImfRationalAttribute.h",
+ "src/lib/OpenEXR/ImfRgba.h",
+ "src/lib/OpenEXR/ImfRgbaFile.h",
+ "src/lib/OpenEXR/ImfRgbaYca.h",
+ "src/lib/OpenEXR/ImfRle.h",
+ "src/lib/OpenEXR/ImfRleCompressor.h",
+ "src/lib/OpenEXR/ImfScanLineInputFile.h",
+ "src/lib/OpenEXR/ImfSimd.h",
+ "src/lib/OpenEXR/ImfStandardAttributes.h",
+ "src/lib/OpenEXR/ImfStdIO.h",
+ "src/lib/OpenEXR/ImfStringAttribute.h",
+ "src/lib/OpenEXR/ImfStringVectorAttribute.h",
+ "src/lib/OpenEXR/ImfSystemSpecific.h",
+ "src/lib/OpenEXR/ImfTestFile.h",
+ "src/lib/OpenEXR/ImfThreading.h",
+ "src/lib/OpenEXR/ImfTileDescription.h",
+ "src/lib/OpenEXR/ImfTileDescriptionAttribute.h",
+ "src/lib/OpenEXR/ImfTileOffsets.h",
+ "src/lib/OpenEXR/ImfTiledInputFile.h",
+ "src/lib/OpenEXR/ImfTiledInputPart.h",
+ "src/lib/OpenEXR/ImfTiledMisc.h",
+ "src/lib/OpenEXR/ImfTiledOutputFile.h",
+ "src/lib/OpenEXR/ImfTiledOutputPart.h",
+ "src/lib/OpenEXR/ImfTiledRgbaFile.h",
+ "src/lib/OpenEXR/ImfTimeCode.h",
+ "src/lib/OpenEXR/ImfTimeCodeAttribute.h",
+ "src/lib/OpenEXR/ImfVecAttribute.h",
+ "src/lib/OpenEXR/ImfVersion.h",
+ "src/lib/OpenEXR/ImfWav.h",
+ "src/lib/OpenEXR/ImfXdr.h",
+ "src/lib/OpenEXR/ImfZip.h",
+ "src/lib/OpenEXR/ImfZipCompressor.h",
+ ":src/lib/OpenEXR/OpenEXRConfig.h",
+ ":src/lib/OpenEXR/OpenEXRConfigInternal.h",
+ ],
+ includes = ["src/lib/OpenEXR"],
+ deps = [
+ ":IlmThread",
+ "@imath//:Imath",
+ "@zlib//:zlib",
+ ],
+ visibility = ["//visibility:public"],
+)
+""",
+ remote = "https://github.com/AcademySoftwareFoundation/openexr",
+ tag = "v3.1.5",
+)
diff --git a/third_party/jpeg-xl/bash_test.sh b/third_party/jpeg-xl/bash_test.sh
new file mode 100755
index 0000000000..3dce72aa0b
--- /dev/null
+++ b/third_party/jpeg-xl/bash_test.sh
@@ -0,0 +1,320 @@
+#!/bin/bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Tests implemented in bash. These typically will run checks about the source
+# code rather than the compiled one.
+
+MYDIR=$(dirname $(realpath "$0"))
+
+set -u
+
+test_includes() {
+ local ret=0
+ local f
+ for f in $(git ls-files | grep -E '(\.cc|\.cpp|\.h)$'); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ # Check that the public files (in lib/include/ directory) don't use the full
+ # path to the public header since users of the library will include the
+ # library as: #include "jxl/foobar.h".
+ if [[ "${f#lib/include/}" != "${f}" ]]; then
+ if grep -i -H -n -E '#include\s*[<"]lib/include/jxl' "$f" >&2; then
+ echo "Don't add \"include/\" to the include path of public headers." >&2
+ ret=1
+ fi
+ fi
+
+ if [[ "${f#third_party/}" == "$f" ]]; then
+ # $f is not in third_party/
+
+ # Check that local files don't use the full path to third_party/
+ # directory since the installed versions will not have that path.
+ # Add an exception for third_party/dirent.h.
+ if grep -v -F 'third_party/dirent.h' "$f" | \
+ grep -i -H -n -E '#include\s*[<"]third_party/' >&2 &&
+ [[ $ret -eq 0 ]]; then
+ cat >&2 <<EOF
+$f: Don't add third_party/ to the include path of third_party projects. This \
+makes it harder to use installed system libraries instead of the third_party/ \
+ones.
+EOF
+ ret=1
+ fi
+ fi
+
+ done
+ return ${ret}
+}
+
+test_include_collision() {
+ local ret=0
+ local f
+ for f in $(git ls-files | grep -E '^lib/include/'); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ local base=${f#lib/include/}
+ if [[ -e "lib/${base}" ]]; then
+ echo "$f: Name collision, both $f and lib/${base} exist." >&2
+ ret=1
+ fi
+ done
+ return ${ret}
+}
+
+test_copyright() {
+ local ret=0
+ local f
+ for f in $(
+ git ls-files | grep -E \
+ '(Dockerfile.*|\.c|\.cc|\.cpp|\.gni|\.h|\.java|\.sh|\.m|\.py|\.ui|\.yml)$'); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ if [[ "${f#third_party/}" == "$f" ]]; then
+ # $f is not in third_party/
+ if ! head -n 10 "$f" |
+ grep -F 'Copyright (c) the JPEG XL Project Authors.' >/dev/null ; then
+ echo "$f: Missing Copyright blob near the top of the file." >&2
+ ret=1
+ fi
+ if ! head -n 10 "$f" |
+ grep -F 'Use of this source code is governed by a BSD-style' \
+ >/dev/null ; then
+ echo "$f: Missing License blob near the top of the file." >&2
+ ret=1
+ fi
+ fi
+ done
+ return ${ret}
+}
+
+# Check that we don't use "%zu" or "%zd" in format string for size_t.
+test_printf_size_t() {
+ local ret=0
+ if grep -n -E '%[0-9]*z[udx]' \
+ $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$'); then
+ echo "Don't use '%zu' or '%zd' in a format string, instead use " \
+ "'%\" PRIuS \"' or '%\" PRIdS \"'." >&2
+ ret=1
+ fi
+
+ if grep -n -E 'gtest\.h' \
+ $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then
+ echo "Don't include gtest directly, instead include 'testing.h'. " >&2
+ ret=1
+ fi
+
+ if grep -n -E 'gmock\.h' \
+ $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then
+ echo "Don't include gmock directly, instead include 'testing.h'. " >&2
+ ret=1
+ fi
+
+ local f
+ for f in $(git ls-files | grep -E "\.cc$" | xargs grep 'PRI[udx]S' |
+ cut -f 1 -d : | uniq); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ if ! grep -F printf_macros.h "$f" >/dev/null; then
+ echo "$f: Add lib/jxl/base/printf_macros.h for PRI.S, or use other " \
+ "types for code outside lib/jxl library." >&2
+ ret=1
+ fi
+ done
+
+ for f in $(git ls-files | grep -E "\.h$" | grep -v -E '(printf_macros\.h|testing\.h)' |
+ xargs grep -n 'PRI[udx]S'); do
+ # Having PRIuS / PRIdS in a header file means that printf_macros.h may
+ # be included before a system header, in particular before gtest headers.
+ # those may re-define PRIuS unconditionally causing a compile error.
+ echo "$f: Don't use PRI.S in header files. Sorry."
+ ret=1
+ done
+
+ return ${ret}
+}
+
+# Check that "dec_" code doesn't depend on "enc_" headers.
+test_dec_enc_deps() {
+ local ret=0
+ local f
+ for f in $(git ls-files | grep -E '/dec_'); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ if [[ "${f#third_party/}" == "$f" ]]; then
+ # $f is not in third_party/
+ if grep -n -H -E "#include.*/enc_" "$f" >&2; then
+ echo "$f: Don't include \"enc_*\" files from \"dec_*\" files." >&2
+ ret=1
+ fi
+ fi
+ done
+ return ${ret}
+}
+
+# Check for git merge conflict markers.
+test_merge_conflict() {
+ local ret=0
+ TEXT_FILES='(\.cc|\.cpp|\.h|\.sh|\.m|\.py|\.md|\.txt|\.cmake)$'
+ for f in $(git ls-files | grep -E "${TEXT_FILES}"); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ if grep -E '^<<<<<<< ' "$f"; then
+ echo "$f: Found git merge conflict marker. Please resolve." >&2
+ ret=1
+ fi
+ done
+ return ${ret}
+}
+
+# Check that the library and the package have the same version. This prevents
+# accidentally having them out of sync.
+get_version() {
+ local varname=$1
+ local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1)
+ [[ -n "${line}" ]]
+ line="${line#set(${varname} }"
+ line="${line%)}"
+ echo "${line}"
+}
+
+test_version() {
+ local major=$(get_version JPEGXL_MAJOR_VERSION)
+ local minor=$(get_version JPEGXL_MINOR_VERSION)
+ local patch=$(get_version JPEGXL_PATCH_VERSION)
+ # Check that the version is not empty
+ if [[ -z "${major}${minor}${patch}" ]]; then
+ echo "Couldn't parse version from CMakeLists.txt" >&2
+ return 1
+ fi
+ local pkg_version=$(head -n 1 debian/changelog)
+ # Get only the part between the first "jpeg-xl (" and the following ")".
+ pkg_version="${pkg_version#jpeg-xl (}"
+ pkg_version="${pkg_version%%)*}"
+ if [[ -z "${pkg_version}" ]]; then
+ echo "Couldn't parse version from debian package" >&2
+ return 1
+ fi
+
+ local lib_version="${major}.${minor}.${patch}"
+ lib_version="${lib_version%.0}"
+ if [[ "${pkg_version}" != "${lib_version}"* ]]; then
+ echo "Debian package version (${pkg_version}) doesn't match library" \
+ "version (${lib_version})." >&2
+ return 1
+ fi
+ return 0
+}
+
+# Check that the SHA versions in deps.sh matches the git submodules.
+test_deps_version() {
+ while IFS= read -r line; do
+ if [[ "${line:0:10}" != "[submodule" ]]; then
+ continue
+ fi
+ line="${line#[submodule \"}"
+ line="${line%\"]}"
+ local varname=$(tr '[:lower:]' '[:upper:]' <<< "${line}")
+ varname="${varname/\//_}"
+ if ! grep -F "${varname}=" deps.sh >/dev/null; then
+ # Ignoring submodule not in deps.sh
+ continue
+ fi
+ local deps_sha=$(grep -F "${varname}=" deps.sh | cut -f 2 -d '"')
+ [[ -n "${deps_sha}" ]]
+ local git_sha=$(git ls-tree -r HEAD "${line}" | cut -f 1 | cut -f 3 -d ' ')
+ if [[ "${deps_sha}" != "${git_sha}" ]]; then
+ cat >&2 <<EOF
+deps.sh: SHA for project ${line} is at ${deps_sha} but the git submodule is at
+${git_sha}. Please update deps.sh
+
+If you did not intend to change the submodule's SHA value, it is possible that
+you accidentally included this change in your commit after a rebase or checkout
+without running "git submodule --init". To revert the submodule change run from
+the top checkout directory:
+
+ git -C ${line} checkout ${deps_sha}
+ git commit --amend ${line}
+
+EOF
+ return 1
+ fi
+ done < .gitmodules
+}
+
+# Make sure that all the Fields objects are fuzzed directly.
+test_fuzz_fields() {
+ local ret=0
+ # List all the classes of the form "ClassName : public Fields".
+ # This doesn't catch class names that are too long to fit.
+ local field_classes=$( git ls-files |
+ grep -E '\.(cc|h)' | grep -v 'test\.cc$' |
+ xargs grep -h -o -E '\b[^ ]+ : public Fields' | cut -f 1 -d ' ')
+ local classname
+ for classname in ${field_classes}; do
+ if [ ! -e "$classname" ]; then
+ continue
+ fi
+ if ! grep -E "\\b${classname}\\b" tools/fields_fuzzer.cc >/dev/null; then
+ cat >&2 <<EOF
+tools/fields_fuzzer.cc: Class ${classname} not found in the fields_fuzzer.
+EOF
+ ret=1
+ fi
+ done
+ return $ret
+}
+
+# Test that we don't use %n in C++ code to avoid using it in printf and scanf.
+# This test is not very precise but in cases where "module n" is needed we would
+# normally have "% n" instead of "%n". Using %n is not allowed in Android 10+.
+test_percent_n() {
+ local ret=0
+ local f
+ for f in $(git ls-files | grep -E '(\.cc|\.cpp|\.h)$'); do
+ if [ ! -e "$f" ]; then
+ continue
+ fi
+ if grep -i -H -n -E '%h*n' "$f" >&2; then
+ echo "Don't use \"%n\"." >&2
+ ret=1
+ fi
+ done
+ return ${ret}
+}
+
+main() {
+ local ret=0
+ cd "${MYDIR}"
+
+ if ! git rev-parse >/dev/null 2>/dev/null; then
+ echo "Not a git checkout, skipping bash_test"
+ return 0
+ fi
+
+ IFS=$'\n'
+ for f in $(declare -F); do
+ local test_name=$(echo "$f" | cut -f 3 -d ' ')
+ # Runs all the local bash functions that start with "test_".
+ if [[ "${test_name}" == test_* ]]; then
+ echo "Test ${test_name}: Start"
+ if ${test_name}; then
+ echo "Test ${test_name}: PASS"
+ else
+ echo "Test ${test_name}: FAIL"
+ ret=1
+ fi
+ fi
+ done
+ return ${ret}
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/ci.sh b/third_party/jpeg-xl/ci.sh
new file mode 100755
index 0000000000..e55f0f8a89
--- /dev/null
+++ b/third_party/jpeg-xl/ci.sh
@@ -0,0 +1,1552 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Continuous integration helper module. This module is meant to be called from
+# the .gitlab-ci.yml file during the continuous integration build, as well as
+# from the command line for developers.
+
+set -eu
+
+OS=`uname -s`
+
+MYDIR=$(dirname $(realpath "$0"))
+
+### Environment parameters:
+TEST_STACK_LIMIT="${TEST_STACK_LIMIT:-256}"
+CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-RelWithDebInfo}
+CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH:-}
+CMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER:-}
+CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER:-}
+CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM:-}
+SKIP_BUILD="${SKIP_BUILD:-0}"
+SKIP_TEST="${SKIP_TEST:-0}"
+TARGETS="${TARGETS:-all doc}"
+TEST_SELECTOR="${TEST_SELECTOR:-}"
+BUILD_TARGET="${BUILD_TARGET:-}"
+ENABLE_WASM_SIMD="${ENABLE_WASM_SIMD:-0}"
+if [[ -n "${BUILD_TARGET}" ]]; then
+ BUILD_DIR="${BUILD_DIR:-${MYDIR}/build-${BUILD_TARGET%%-*}}"
+else
+ BUILD_DIR="${BUILD_DIR:-${MYDIR}/build}"
+fi
+# Whether we should post a message in the MR when the build fails.
+POST_MESSAGE_ON_ERROR="${POST_MESSAGE_ON_ERROR:-1}"
+
+# Set default compilers to clang if not already set
+export CC=${CC:-clang}
+export CXX=${CXX:-clang++}
+
+# Time limit for the "fuzz" command in seconds (0 means no limit).
+FUZZER_MAX_TIME="${FUZZER_MAX_TIME:-0}"
+
+SANITIZER="none"
+
+
+if [[ "${BUILD_TARGET%%-*}" == "x86_64" ||
+ "${BUILD_TARGET%%-*}" == "i686" ]]; then
+ # Default to building all targets, even if compiler baseline is SSE4
+ HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-HWY_EMU128}
+else
+ HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-}
+fi
+
+# Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS
+CMAKE_FLAGS=${CMAKE_FLAGS:-}
+CMAKE_C_FLAGS="${CMAKE_C_FLAGS:-} ${CMAKE_FLAGS}"
+CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS:-} ${CMAKE_FLAGS}"
+
+CMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR:-}
+CMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS:-}
+CMAKE_FIND_ROOT_PATH=${CMAKE_FIND_ROOT_PATH:-}
+CMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS:-}
+CMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS:-}
+CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE:-}
+
+if [[ "${ENABLE_WASM_SIMD}" -ne "0" ]]; then
+ CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -msimd128"
+ CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -msimd128"
+ CMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS} -msimd128"
+fi
+
+if [[ "${ENABLE_WASM_SIMD}" -eq "2" ]]; then
+ CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_WANT_WASM2"
+ CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -DHWY_WANT_WASM2"
+fi
+
+if [[ ! -z "${HWY_BASELINE_TARGETS}" ]]; then
+ CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS}"
+fi
+
+# Version inferred from the CI variables.
+CI_COMMIT_SHA=${CI_COMMIT_SHA:-${GITHUB_SHA:-}}
+JPEGXL_VERSION=${JPEGXL_VERSION:-${CI_COMMIT_SHA:0:8}}
+
+# Benchmark parameters
+STORE_IMAGES=${STORE_IMAGES:-1}
+BENCHMARK_CORPORA="${MYDIR}/third_party/corpora"
+
+# Local flags passed to sanitizers.
+UBSAN_FLAGS=(
+ -fsanitize=alignment
+ -fsanitize=bool
+ -fsanitize=bounds
+ -fsanitize=builtin
+ -fsanitize=enum
+ -fsanitize=float-cast-overflow
+ -fsanitize=float-divide-by-zero
+ -fsanitize=integer-divide-by-zero
+ -fsanitize=null
+ -fsanitize=object-size
+ -fsanitize=pointer-overflow
+ -fsanitize=return
+ -fsanitize=returns-nonnull-attribute
+ -fsanitize=shift-base
+ -fsanitize=shift-exponent
+ -fsanitize=unreachable
+ -fsanitize=vla-bound
+
+ -fno-sanitize-recover=undefined
+ # Brunsli uses unaligned accesses to uint32_t, so alignment is just a warning.
+ -fsanitize-recover=alignment
+)
+# -fsanitize=function doesn't work on aarch64 and arm.
+if [[ "${BUILD_TARGET%%-*}" != "aarch64" &&
+ "${BUILD_TARGET%%-*}" != "arm" ]]; then
+ UBSAN_FLAGS+=(
+ -fsanitize=function
+ )
+fi
+if [[ "${BUILD_TARGET%%-*}" != "arm" ]]; then
+ UBSAN_FLAGS+=(
+ -fsanitize=signed-integer-overflow
+ )
+fi
+
+CLANG_TIDY_BIN=$(which clang-tidy-6.0 clang-tidy-7 clang-tidy-8 clang-tidy | head -n 1)
+# Default to "cat" if "colordiff" is not installed or if stdout is not a tty.
+if [[ -t 1 ]]; then
+ COLORDIFF_BIN=$(which colordiff cat | head -n 1)
+else
+ COLORDIFF_BIN="cat"
+fi
+FIND_BIN=$(which gfind find | head -n 1)
+# "false" will disable wine64 when not installed. This won't allow
+# cross-compiling.
+WINE_BIN=$(which wine64 false | head -n 1)
+
+CLANG_VERSION="${CLANG_VERSION:-}"
+# Detect the clang version suffix and store it in CLANG_VERSION. For example,
+# "6.0" for clang 6 or "7" for clang 7.
+detect_clang_version() {
+ if [[ -n "${CLANG_VERSION}" ]]; then
+ return 0
+ fi
+ local clang_version=$("${CC:-clang}" --version | head -n1)
+ clang_version=${clang_version#"Debian "}
+ clang_version=${clang_version#"Ubuntu "}
+ local llvm_tag
+ case "${clang_version}" in
+ "clang version 6."*)
+ CLANG_VERSION="6.0"
+ ;;
+ "clang version "*)
+ # Any other clang version uses just the major version number.
+ local suffix="${clang_version#clang version }"
+ CLANG_VERSION="${suffix%%.*}"
+ ;;
+ "emcc"*)
+ # We can't use asan or msan in the emcc case.
+ ;;
+ *)
+ echo "Unknown clang version: ${clang_version}" >&2
+ return 1
+ esac
+}
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+ if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+ rm -fr "${CLEANUP_FILES[@]}"
+ fi
+}
+
+# Executed on exit.
+on_exit() {
+ local retcode="$1"
+ # Always cleanup the CLEANUP_FILES.
+ cleanup
+
+ # Post a message in the MR when requested with POST_MESSAGE_ON_ERROR but only
+ # if the run failed and we are not running from a MR pipeline.
+ if [[ ${retcode} -ne 0 && -n "${CI_BUILD_NAME:-}" &&
+ -n "${POST_MESSAGE_ON_ERROR}" && -z "${CI_MERGE_REQUEST_ID:-}" &&
+ "${CI_BUILD_REF_NAME}" = "master" ]]; then
+ load_mr_vars_from_commit
+ { set +xeu; } 2>/dev/null
+ local message="**Run ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} failed.**
+
+Check the output of the job at ${CI_JOB_URL:-} to see if this was your problem.
+If it was, please rollback this change or fix the problem ASAP, broken builds
+slow down development. Check if the error already existed in the previous build
+as well.
+
+Pipeline: ${CI_PIPELINE_URL}
+
+Previous build commit: ${CI_COMMIT_BEFORE_SHA}
+"
+ cmd_post_mr_comment "${message}"
+ fi
+}
+
+trap 'retcode=$?; { set +x; } 2>/dev/null; on_exit ${retcode}' INT TERM EXIT
+
+
+# These variables are populated when calling merge_request_commits().
+
+# The current hash at the top of the current branch or merge request branch (if
+# running from a merge request pipeline).
+MR_HEAD_SHA=""
+# The common ancestor between the current commit and the tracked branch, such
+# as master. This includes a list
+MR_ANCESTOR_SHA=""
+
+# Populate MR_HEAD_SHA and MR_ANCESTOR_SHA.
+merge_request_commits() {
+ { set +x; } 2>/dev/null
+ # GITHUB_SHA is the current reference being build in GitHub Actions.
+ if [[ -n "${GITHUB_SHA:-}" ]]; then
+ # GitHub normally does a checkout of a merge commit on a shallow repository
+ # by default. We want to get a bit more of the history to be able to diff
+ # changes on the Pull Request if needed. This fetches 10 more commits which
+ # should be enough given that PR normally should have 1 commit.
+ git -C "${MYDIR}" fetch -q origin "${GITHUB_SHA}" --depth 10
+ MR_HEAD_SHA="$(git rev-parse "FETCH_HEAD^2" 2>/dev/null ||
+ echo "${GITHUB_SHA}")"
+ else
+ # CI_BUILD_REF is the reference currently being build in the CI workflow.
+ MR_HEAD_SHA=$(git -C "${MYDIR}" rev-parse -q "${CI_BUILD_REF:-HEAD}")
+ fi
+
+ if [[ -n "${CI_MERGE_REQUEST_IID:-}" ]]; then
+ # Merge request pipeline in CI. In this case the upstream is called "origin"
+ # but it refers to the forked project that's the source of the merge
+ # request. We need to get the target of the merge request, for which we need
+ # to query that repository using our CI_JOB_TOKEN.
+ echo "machine gitlab.com login gitlab-ci-token password ${CI_JOB_TOKEN}" \
+ >> "${HOME}/.netrc"
+ git -C "${MYDIR}" fetch "${CI_MERGE_REQUEST_PROJECT_URL}" \
+ "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}"
+ MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+ elif [[ -n "${GITHUB_BASE_REF:-}" ]]; then
+ # Pull request workflow in GitHub Actions. GitHub checkout action uses
+ # "origin" as the remote for the git checkout.
+ git -C "${MYDIR}" fetch -q origin "${GITHUB_BASE_REF}"
+ MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD)
+ else
+ # We are in a local branch, not a merge request.
+ MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q HEAD@{upstream} || true)
+ fi
+
+ if [[ -z "${MR_ANCESTOR_SHA}" ]]; then
+ echo "Warning, not tracking any branch, using the last commit in HEAD.">&2
+ # This prints the return value with just HEAD.
+ MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q "${MR_HEAD_SHA}^")
+ else
+ # GitHub runs the pipeline on a merge commit, no need to look for the common
+ # ancestor in that case.
+ if [[ -z "${GITHUB_BASE_REF:-}" ]]; then
+ MR_ANCESTOR_SHA=$(git -C "${MYDIR}" merge-base \
+ "${MR_ANCESTOR_SHA}" "${MR_HEAD_SHA}")
+ fi
+ fi
+ set -x
+}
+
+# Load the MR iid from the landed commit message when running not from a
+# merge request workflow. This is useful to post back results at the merge
+# request when running pipelines from master.
+load_mr_vars_from_commit() {
+ { set +x; } 2>/dev/null
+ if [[ -z "${CI_MERGE_REQUEST_IID:-}" ]]; then
+ local mr_iid=$(git rev-list --format=%B --max-count=1 HEAD |
+ grep -F "${CI_PROJECT_URL}" | grep -F "/merge_requests" | head -n 1)
+ # mr_iid contains a string like this if it matched:
+ # Part-of: <https://gitlab.com/wg1/jpeg-xlm/merge_requests/123456>
+ if [[ -n "${mr_iid}" ]]; then
+ mr_iid=$(echo "${mr_iid}" |
+ sed -E 's,^.*merge_requests/([0-9]+)>.*$,\1,')
+ CI_MERGE_REQUEST_IID="${mr_iid}"
+ CI_MERGE_REQUEST_PROJECT_ID=${CI_PROJECT_ID}
+ fi
+ fi
+ set -x
+}
+
+# Posts a comment to the current merge request.
+cmd_post_mr_comment() {
+ { set +x; } 2>/dev/null
+ local comment="$1"
+ if [[ -n "${BOT_TOKEN:-}" && -n "${CI_MERGE_REQUEST_IID:-}" ]]; then
+ local url="${CI_API_V4_URL}/projects/${CI_MERGE_REQUEST_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}/notes"
+ curl -X POST -g \
+ -H "PRIVATE-TOKEN: ${BOT_TOKEN}" \
+ --data-urlencode "body=${comment}" \
+ --output /dev/null \
+ "${url}"
+ fi
+ set -x
+}
+
+# Set up and export the environment variables needed by the child processes.
+export_env() {
+ if [[ "${BUILD_TARGET}" == *mingw32 ]]; then
+ # Wine needs to know the paths to the mingw dlls. These should be
+ # separated by ';'.
+ WINEPATH=$("${CC:-clang}" -print-search-dirs --target="${BUILD_TARGET}" \
+ | grep -F 'libraries: =' | cut -f 2- -d '=' | tr ':' ';')
+ # We also need our own libraries in the wine path.
+ local real_build_dir=$(realpath "${BUILD_DIR}")
+ # Some library .dll dependencies are installed in /bin:
+ export WINEPATH="${WINEPATH};${real_build_dir};${real_build_dir}/third_party/brotli;/usr/${BUILD_TARGET}/bin"
+
+ local prefix="${BUILD_DIR}/wineprefix"
+ mkdir -p "${prefix}"
+ export WINEPREFIX=$(realpath "${prefix}")
+ fi
+ # Sanitizers need these variables to print and properly format the stack
+ # traces:
+ LLVM_SYMBOLIZER=$("${CC:-clang}" -print-prog-name=llvm-symbolizer || true)
+ if [[ -n "${LLVM_SYMBOLIZER}" ]]; then
+ export ASAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+ export MSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+ export UBSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}"
+ fi
+}
+
+cmake_configure() {
+ export_env
+
+ if [[ "${STACK_SIZE:-0}" == 1 ]]; then
+ # Dump the stack size of each function in the .stack_sizes section for
+ # analysis.
+ CMAKE_C_FLAGS+=" -fstack-size-section"
+ CMAKE_CXX_FLAGS+=" -fstack-size-section"
+ fi
+
+ local args=(
+ -B"${BUILD_DIR}" -H"${MYDIR}"
+ -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}"
+ -G Ninja
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}"
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}"
+ -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}"
+ -DCMAKE_MODULE_LINKER_FLAGS="${CMAKE_MODULE_LINKER_FLAGS}"
+ -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}"
+ -DJPEGXL_VERSION="${JPEGXL_VERSION}"
+ -DSANITIZER="${SANITIZER}"
+ # These are not enabled by default in cmake.
+ -DJPEGXL_ENABLE_VIEWERS=ON
+ -DJPEGXL_ENABLE_PLUGINS=ON
+ -DJPEGXL_ENABLE_DEVTOOLS=ON
+ # We always use libfuzzer in the ci.sh wrapper.
+ -DJPEGXL_FUZZER_LINK_FLAGS="-fsanitize=fuzzer"
+ )
+ if [[ "${BUILD_TARGET}" != *mingw32 ]]; then
+ args+=(
+ -DJPEGXL_WARNINGS_AS_ERRORS=ON
+ )
+ fi
+ if [[ -n "${BUILD_TARGET}" ]]; then
+ local system_name="Linux"
+ if [[ "${BUILD_TARGET}" == *mingw32 ]]; then
+ # When cross-compiling with mingw the target must be set to Windows and
+ # run programs with wine.
+ system_name="Windows"
+ args+=(
+ -DCMAKE_CROSSCOMPILING_EMULATOR="${WINE_BIN}"
+ # Normally CMake automatically defines MINGW=1 when building with the
+ # mingw compiler (x86_64-w64-mingw32-gcc) but we are normally compiling
+ # with clang.
+ -DMINGW=1
+ )
+ fi
+ # EMSCRIPTEN toolchain sets the right values itself
+ if [[ "${BUILD_TARGET}" != wasm* ]]; then
+ # If set, BUILD_TARGET must be the target triplet such as
+ # x86_64-unknown-linux-gnu.
+ args+=(
+ -DCMAKE_C_COMPILER_TARGET="${BUILD_TARGET}"
+ -DCMAKE_CXX_COMPILER_TARGET="${BUILD_TARGET}"
+ # Only the first element of the target triplet.
+ -DCMAKE_SYSTEM_PROCESSOR="${BUILD_TARGET%%-*}"
+ -DCMAKE_SYSTEM_NAME="${system_name}"
+ -DCMAKE_TOOLCHAIN_FILE="${CMAKE_TOOLCHAIN_FILE}"
+ )
+ else
+ args+=(
+ # sjpeg confuses WASM SIMD with SSE.
+ -DSJPEG_ENABLE_SIMD=OFF
+ # Building shared libs is not very useful for WASM.
+ -DBUILD_SHARED_LIBS=OFF
+ )
+ fi
+ args+=(
+ # These are needed to make googletest work when cross-compiling.
+ -DCMAKE_CROSSCOMPILING=1
+ -DHAVE_STD_REGEX=0
+ -DHAVE_POSIX_REGEX=0
+ -DHAVE_GNU_POSIX_REGEX=0
+ -DHAVE_STEADY_CLOCK=0
+ -DHAVE_THREAD_SAFETY_ATTRIBUTES=0
+ )
+ if [[ -z "${CMAKE_FIND_ROOT_PATH}" ]]; then
+ # find_package() will look in this prefix for libraries.
+ CMAKE_FIND_ROOT_PATH="/usr/${BUILD_TARGET}"
+ fi
+ if [[ -z "${CMAKE_PREFIX_PATH}" ]]; then
+ CMAKE_PREFIX_PATH="/usr/${BUILD_TARGET}"
+ fi
+ # Use pkg-config for the target. If there's no pkg-config available for the
+ # target we can set the PKG_CONFIG_PATH to the appropriate path in most
+ # linux distributions.
+ local pkg_config=$(which "${BUILD_TARGET}-pkg-config" || true)
+ if [[ -z "${pkg_config}" ]]; then
+ pkg_config=$(which pkg-config)
+ export PKG_CONFIG_LIBDIR="/usr/${BUILD_TARGET}/lib/pkgconfig"
+ fi
+ if [[ -n "${pkg_config}" ]]; then
+ args+=(-DPKG_CONFIG_EXECUTABLE="${pkg_config}")
+ fi
+ fi
+ if [[ -n "${CMAKE_CROSSCOMPILING_EMULATOR}" ]]; then
+ args+=(
+ -DCMAKE_CROSSCOMPILING_EMULATOR="${CMAKE_CROSSCOMPILING_EMULATOR}"
+ )
+ fi
+ if [[ -n "${CMAKE_FIND_ROOT_PATH}" ]]; then
+ args+=(
+ -DCMAKE_FIND_ROOT_PATH="${CMAKE_FIND_ROOT_PATH}"
+ )
+ fi
+ if [[ -n "${CMAKE_PREFIX_PATH}" ]]; then
+ args+=(
+ -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}"
+ )
+ fi
+ if [[ -n "${CMAKE_C_COMPILER_LAUNCHER}" ]]; then
+ args+=(
+ -DCMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER}"
+ )
+ fi
+ if [[ -n "${CMAKE_CXX_COMPILER_LAUNCHER}" ]]; then
+ args+=(
+ -DCMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER}"
+ )
+ fi
+ if [[ -n "${CMAKE_MAKE_PROGRAM}" ]]; then
+ args+=(
+ -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
+ )
+ fi
+ if [[ "${BUILD_TARGET}" == wasm* ]]; then
+ emcmake cmake "${args[@]}" "$@"
+ else
+ cmake "${args[@]}" "$@"
+ fi
+}
+
+cmake_build_and_test() {
+ if [[ "${SKIP_BUILD}" -eq "1" ]]; then
+ return 0
+ fi
+ # gtest_discover_tests() runs the test binaries to discover the list of tests
+ # at build time, which fails under qemu.
+ ASAN_OPTIONS=detect_leaks=0 cmake --build "${BUILD_DIR}" -- $TARGETS
+ # Pack test binaries if requested.
+ if [[ "${PACK_TEST:-}" == "1" ]]; then
+ (cd "${BUILD_DIR}"
+ ${FIND_BIN} -name '*.cmake' -a '!' -path '*CMakeFiles*'
+ # gtest / gmock / gtest_main shared libs
+ ${FIND_BIN} lib/ -name 'libg*.so*'
+ ${FIND_BIN} -type d -name tests -a '!' -path '*CMakeFiles*'
+ ) | tar -C "${BUILD_DIR}" -cf "${BUILD_DIR}/tests.tar.xz" -T - \
+ --use-compress-program="xz --threads=$(nproc --all || echo 1) -6"
+ du -h "${BUILD_DIR}/tests.tar.xz"
+ # Pack coverage data if also available.
+ touch "${BUILD_DIR}/gcno.sentinel"
+ (cd "${BUILD_DIR}"; echo gcno.sentinel; ${FIND_BIN} -name '*gcno') | \
+ tar -C "${BUILD_DIR}" -cvf "${BUILD_DIR}/gcno.tar.xz" -T - \
+ --use-compress-program="xz --threads=$(nproc --all || echo 1) -6"
+ fi
+
+ if [[ "${SKIP_TEST}" -ne "1" ]]; then
+ (cd "${BUILD_DIR}"
+ export UBSAN_OPTIONS=print_stacktrace=1
+ [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+ ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure)
+ fi
+}
+
+# Configure the build to strip unused functions. This considerably reduces the
+# output size, specially for tests which only use a small part of the whole
+# library.
+strip_dead_code() {
+ # Emscripten does tree shaking without any extra flags.
+ if [[ "${BUILD_TARGET}" == wasm* ]]; then
+ return 0
+ fi
+ # -ffunction-sections, -fdata-sections and -Wl,--gc-sections effectively
+ # discard all unreachable code, reducing the code size. For this to work, we
+ # need to also pass --no-export-dynamic to prevent it from exporting all the
+ # internal symbols (like functions) making them all reachable and thus not a
+ # candidate for removal.
+ CMAKE_CXX_FLAGS+=" -ffunction-sections -fdata-sections"
+ CMAKE_C_FLAGS+=" -ffunction-sections -fdata-sections"
+ if [[ "${OS}" == "Darwin" ]]; then
+ CMAKE_EXE_LINKER_FLAGS+=" -dead_strip"
+ CMAKE_SHARED_LINKER_FLAGS+=" -dead_strip"
+ else
+ CMAKE_EXE_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic"
+ CMAKE_SHARED_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic"
+ fi
+}
+
+### Externally visible commands
+
+cmd_debug() {
+ CMAKE_BUILD_TYPE="Debug"
+ cmake_configure "$@"
+ cmake_build_and_test
+}
+
+cmd_release() {
+ CMAKE_BUILD_TYPE="Release"
+ strip_dead_code
+ cmake_configure "$@"
+ cmake_build_and_test
+}
+
+cmd_opt() {
+ CMAKE_BUILD_TYPE="RelWithDebInfo"
+ CMAKE_CXX_FLAGS+=" -DJXL_DEBUG_WARNING -DJXL_DEBUG_ON_ERROR"
+ cmake_configure "$@"
+ cmake_build_and_test
+}
+
+cmd_coverage() {
+ # -O0 prohibits stack space reuse -> causes stack-overflow on dozens of tests.
+ TEST_STACK_LIMIT="none"
+
+ cmd_release -DJPEGXL_ENABLE_COVERAGE=ON "$@"
+
+ if [[ "${SKIP_TEST}" -ne "1" ]]; then
+ # If we didn't run the test we also don't print a coverage report.
+ cmd_coverage_report
+ fi
+}
+
+cmd_coverage_report() {
+ LLVM_COV=$("${CC:-clang}" -print-prog-name=llvm-cov)
+ local real_build_dir=$(realpath "${BUILD_DIR}")
+ local gcovr_args=(
+ -r "${real_build_dir}"
+ --gcov-executable "${LLVM_COV} gcov"
+ # Only print coverage information for the libjxl directories. The rest
+ # is not part of the code under test.
+ --filter '.*jxl/.*'
+ --exclude '.*_gbench.cc'
+ --exclude '.*_test.cc'
+ --exclude '.*_testonly..*'
+ --exclude '.*_debug.*'
+ --exclude '.*test_utils..*'
+ --object-directory "${real_build_dir}"
+ )
+
+ (
+ cd "${real_build_dir}"
+ gcovr "${gcovr_args[@]}" --html --html-details \
+ --output="${real_build_dir}/coverage.html"
+ gcovr "${gcovr_args[@]}" --print-summary |
+ tee "${real_build_dir}/coverage.txt"
+ gcovr "${gcovr_args[@]}" --xml --output="${real_build_dir}/coverage.xml"
+ )
+}
+
+cmd_test() {
+ export_env
+ # Unpack tests if needed.
+ if [[ -e "${BUILD_DIR}/tests.tar.xz" && ! -d "${BUILD_DIR}/tests" ]]; then
+ tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/tests.tar.xz"
+ fi
+ if [[ -e "${BUILD_DIR}/gcno.tar.xz" && ! -d "${BUILD_DIR}/gcno.sentinel" ]]; then
+ tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/gcno.tar.xz"
+ fi
+ (cd "${BUILD_DIR}"
+ export UBSAN_OPTIONS=print_stacktrace=1
+ [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+ ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure "$@")
+}
+
+cmd_gbench() {
+ export_env
+ (cd "${BUILD_DIR}"
+ export UBSAN_OPTIONS=print_stacktrace=1
+ lib/jxl_gbench \
+ --benchmark_counters_tabular=true \
+ --benchmark_out_format=json \
+ --benchmark_out=gbench.json "$@"
+ )
+}
+
+cmd_asanfuzz() {
+ CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+ CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+ cmd_asan -DJPEGXL_ENABLE_FUZZERS=ON "$@"
+}
+
+cmd_msanfuzz() {
+ # Install msan if needed before changing the flags.
+ detect_clang_version
+ local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+ if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then
+ # Install msan libraries for this version if needed or if an older version
+ # with libc++abi was installed.
+ cmd_msan_install
+ fi
+
+ CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+ CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1"
+ cmd_msan -DJPEGXL_ENABLE_FUZZERS=ON "$@"
+}
+
+cmd_asan() {
+ SANITIZER="asan"
+ CMAKE_C_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \
+ -fsanitize=address ${UBSAN_FLAGS[@]}"
+ CMAKE_CXX_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \
+ -fsanitize=address ${UBSAN_FLAGS[@]}"
+ strip_dead_code
+ cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF
+ cmake_build_and_test
+}
+
+cmd_tsan() {
+ SANITIZER="tsan"
+ local tsan_args=(
+ -DJXL_ENABLE_ASSERT=1
+ -g
+ -DTHREAD_SANITIZER
+ ${UBSAN_FLAGS[@]}
+ -fsanitize=thread
+ )
+ CMAKE_C_FLAGS+=" ${tsan_args[@]}"
+ CMAKE_CXX_FLAGS+=" ${tsan_args[@]}"
+
+ CMAKE_BUILD_TYPE="RelWithDebInfo"
+ cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF
+ cmake_build_and_test
+}
+
+cmd_msan() {
+ SANITIZER="msan"
+ detect_clang_version
+ local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+ if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then
+ # Install msan libraries for this version if needed or if an older version
+ # with libc++abi was installed.
+ cmd_msan_install
+ fi
+
+ local msan_c_flags=(
+ -fsanitize=memory
+ -fno-omit-frame-pointer
+ -fsanitize-memory-track-origins
+
+ -DJXL_ENABLE_ASSERT=1
+ -g
+ -DMEMORY_SANITIZER
+
+ # Force gtest to not use the cxxbai.
+ -DGTEST_HAS_CXXABI_H_=0
+ )
+ local msan_cxx_flags=(
+ "${msan_c_flags[@]}"
+
+ # Some C++ sources don't use the std at all, so the -stdlib=libc++ is unused
+ # in those cases. Ignore the warning.
+ -Wno-unused-command-line-argument
+ -stdlib=libc++
+
+ # We include the libc++ from the msan directory instead, so we don't want
+ # the std includes.
+ -nostdinc++
+ -cxx-isystem"${msan_prefix}/include/c++/v1"
+ )
+
+ local msan_linker_flags=(
+ -L"${msan_prefix}"/lib
+ -Wl,-rpath -Wl,"${msan_prefix}"/lib/
+ )
+
+ CMAKE_C_FLAGS+=" ${msan_c_flags[@]} ${UBSAN_FLAGS[@]}"
+ CMAKE_CXX_FLAGS+=" ${msan_cxx_flags[@]} ${UBSAN_FLAGS[@]}"
+ CMAKE_EXE_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+ CMAKE_MODULE_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+ CMAKE_SHARED_LINKER_FLAGS+=" ${msan_linker_flags[@]}"
+ strip_dead_code
+ cmake_configure "$@" \
+ -DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0 \
+ -DJPEGXL_ENABLE_TCMALLOC=OFF -DJPEGXL_WARNINGS_AS_ERRORS=OFF \
+ -DCMAKE_REQUIRED_LINK_OPTIONS="${msan_linker_flags[@]}"
+ cmake_build_and_test
+}
+
+# Install libc++ libraries compiled with msan in the msan_prefix for the current
+# compiler version.
+cmd_msan_install() {
+ local tmpdir=$(mktemp -d)
+ CLEANUP_FILES+=("${tmpdir}")
+ # Detect the llvm to install:
+ export CC="${CC:-clang}"
+ export CXX="${CXX:-clang++}"
+ detect_clang_version
+ # Allow overriding the LLVM checkout.
+ local llvm_root="${LLVM_ROOT:-}"
+ if [ -z "${llvm_root}" ]; then
+ local llvm_tag="llvmorg-${CLANG_VERSION}.0.0"
+ case "${CLANG_VERSION}" in
+ "6.0")
+ llvm_tag="llvmorg-6.0.1"
+ ;;
+ "7")
+ llvm_tag="llvmorg-7.0.1"
+ ;;
+ esac
+ local llvm_targz="${tmpdir}/${llvm_tag}.tar.gz"
+ curl -L --show-error -o "${llvm_targz}" \
+ "https://github.com/llvm/llvm-project/archive/${llvm_tag}.tar.gz"
+ tar -C "${tmpdir}" -zxf "${llvm_targz}"
+ llvm_root="${tmpdir}/llvm-project-${llvm_tag}"
+ fi
+
+ local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+ rm -rf "${msan_prefix}"
+
+ declare -A CMAKE_EXTRAS
+ CMAKE_EXTRAS[libcxx]="\
+ -DLIBCXX_CXX_ABI=libstdc++ \
+ -DLIBCXX_INSTALL_EXPERIMENTAL_LIBRARY=ON"
+
+ for project in libcxx; do
+ local proj_build="${tmpdir}/build-${project}"
+ local proj_dir="${llvm_root}/${project}"
+ mkdir -p "${proj_build}"
+ cmake -B"${proj_build}" -H"${proj_dir}" \
+ -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_USE_SANITIZER=Memory \
+ -DLLVM_PATH="${llvm_root}/llvm" \
+ -DLLVM_CONFIG_PATH="$(which llvm-config llvm-config-7 llvm-config-6.0 | \
+ head -n1)" \
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" \
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" \
+ -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \
+ -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" \
+ -DCMAKE_INSTALL_PREFIX="${msan_prefix}" \
+ ${CMAKE_EXTRAS[${project}]}
+ cmake --build "${proj_build}"
+ ninja -C "${proj_build}" install
+ done
+}
+
+# Internal build step shared between all cmd_ossfuzz_* commands.
+_cmd_ossfuzz() {
+ local sanitizer="$1"
+ shift
+ mkdir -p "${BUILD_DIR}"
+ local real_build_dir=$(realpath "${BUILD_DIR}")
+
+ # oss-fuzz defines three directories:
+ # * /work, with the working directory to do re-builds
+ # * /src, with the source code to build
+ # * /out, with the output directory where to copy over the built files.
+ # We use $BUILD_DIR as the /work and the script directory as the /src. The
+ # /out directory is ignored as developers are used to look for the fuzzers in
+ # $BUILD_DIR/tools/ directly.
+
+ if [[ "${sanitizer}" = "memory" && ! -d "${BUILD_DIR}/msan" ]]; then
+ sudo docker run --rm -i \
+ --user $(id -u):$(id -g) \
+ -v "${real_build_dir}":/work \
+ gcr.io/oss-fuzz-base/msan-libs-builder \
+ bash -c "cp -r /msan /work"
+ fi
+
+ # Args passed to ninja. These will be evaluated as a string separated by
+ # spaces.
+ local jpegxl_extra_args="$@"
+
+ sudo docker run --rm -i \
+ -e JPEGXL_UID=$(id -u) \
+ -e JPEGXL_GID=$(id -g) \
+ -e FUZZING_ENGINE="${FUZZING_ENGINE:-libfuzzer}" \
+ -e SANITIZER="${sanitizer}" \
+ -e ARCHITECTURE=x86_64 \
+ -e FUZZING_LANGUAGE=c++ \
+ -e MSAN_LIBS_PATH="/work/msan" \
+ -e JPEGXL_EXTRA_ARGS="${jpegxl_extra_args}" \
+ -v "${MYDIR}":/src/libjxl \
+ -v "${MYDIR}/tools/scripts/ossfuzz-build.sh":/src/build.sh \
+ -v "${real_build_dir}":/work \
+ gcr.io/oss-fuzz/libjxl
+}
+
+cmd_ossfuzz_asan() {
+ _cmd_ossfuzz address "$@"
+}
+cmd_ossfuzz_msan() {
+ _cmd_ossfuzz memory "$@"
+}
+cmd_ossfuzz_ubsan() {
+ _cmd_ossfuzz undefined "$@"
+}
+
+cmd_ossfuzz_ninja() {
+ [[ -e "${BUILD_DIR}/build.ninja" ]]
+ local real_build_dir=$(realpath "${BUILD_DIR}")
+
+ if [[ -e "${BUILD_DIR}/msan" ]]; then
+ echo "ossfuzz_ninja doesn't work with msan builds. Use ossfuzz_msan." >&2
+ exit 1
+ fi
+
+ sudo docker run --rm -i \
+ --user $(id -u):$(id -g) \
+ -v "${MYDIR}":/src/libjxl \
+ -v "${real_build_dir}":/work \
+ gcr.io/oss-fuzz/libjxl \
+ ninja -C /work "$@"
+}
+
+cmd_fast_benchmark() {
+ local small_corpus_tar="${BENCHMARK_CORPORA}/jyrki-full.tar"
+ mkdir -p "${BENCHMARK_CORPORA}"
+ curl --show-error -o "${small_corpus_tar}" -z "${small_corpus_tar}" \
+ "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/jyrki-full.tar"
+
+ local tmpdir=$(mktemp -d)
+ CLEANUP_FILES+=("${tmpdir}")
+ tar -xf "${small_corpus_tar}" -C "${tmpdir}"
+
+ run_benchmark "${tmpdir}" 1048576
+}
+
+cmd_benchmark() {
+ local nikon_corpus_tar="${BENCHMARK_CORPORA}/nikon-subset.tar"
+ mkdir -p "${BENCHMARK_CORPORA}"
+ curl --show-error -o "${nikon_corpus_tar}" -z "${nikon_corpus_tar}" \
+ "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/nikon-subset.tar"
+
+ local tmpdir=$(mktemp -d)
+ CLEANUP_FILES+=("${tmpdir}")
+ tar -xvf "${nikon_corpus_tar}" -C "${tmpdir}"
+
+ local sem_id="jpegxl_benchmark-$$"
+ local nprocs=$(nproc --all || echo 1)
+ images=()
+ local filename
+ while IFS= read -r filename; do
+ # This removes the './'
+ filename="${filename:2}"
+ local mode
+ if [[ "${filename:0:4}" == "srgb" ]]; then
+ mode="RGB_D65_SRG_Rel_SRG"
+ elif [[ "${filename:0:5}" == "adobe" ]]; then
+ mode="RGB_D65_Ado_Rel_Ado"
+ else
+ echo "Unknown image colorspace: ${filename}" >&2
+ exit 1
+ fi
+ png_filename="${filename%.ppm}.png"
+ png_filename=$(echo "${png_filename}" | tr '/' '_')
+ sem --bg --id "${sem_id}" -j"${nprocs}" -- \
+ "${BUILD_DIR}/tools/decode_and_encode" \
+ "${tmpdir}/${filename}" "${mode}" "${tmpdir}/${png_filename}"
+ images+=( "${png_filename}" )
+ done < <(cd "${tmpdir}"; ${FIND_BIN} . -name '*.ppm' -type f)
+ sem --id "${sem_id}" --wait
+
+ # We need about 10 GiB per thread on these images.
+ run_benchmark "${tmpdir}" 10485760
+}
+
+get_mem_available() {
+ if [[ "${OS}" == "Darwin" ]]; then
+ echo $(vm_stat | grep -F 'Pages free:' | awk '{print $3 * 4}')
+ else
+ echo $(grep -F MemAvailable: /proc/meminfo | awk '{print $2}')
+ fi
+}
+
+run_benchmark() {
+ local src_img_dir="$1"
+ local mem_per_thread="${2:-10485760}"
+
+ local output_dir="${BUILD_DIR}/benchmark_results"
+ mkdir -p "${output_dir}"
+
+ # The memory available at the beginning of the benchmark run in kB. The number
+ # of threads depends on the available memory, and the passed memory per
+ # thread. We also add a 2 GiB of constant memory.
+ local mem_available="$(get_mem_available)"
+ # Check that we actually have a MemAvailable value.
+ [[ -n "${mem_available}" ]]
+ local num_threads=$(( (${mem_available} - 1048576) / ${mem_per_thread} ))
+ if [[ ${num_threads} -le 0 ]]; then
+ num_threads=1
+ fi
+
+ local benchmark_args=(
+ --input "${src_img_dir}/*.png"
+ --codec=jpeg:yuv420:q85,webp:q80,jxl:d1:6,jxl:d1:6:downsampling=8,jxl:d5:6,jxl:d5:6:downsampling=8,jxl:m:d0:2,jxl:m:d0:3,jxl:m:d2:2
+ --output_dir "${output_dir}"
+ --noprofiler --show_progress
+ --num_threads="${num_threads}"
+ )
+ if [[ "${STORE_IMAGES}" == "1" ]]; then
+ benchmark_args+=(--save_decompressed --save_compressed)
+ fi
+ (
+ [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}"
+ "${BUILD_DIR}/tools/benchmark_xl" "${benchmark_args[@]}" | \
+ tee "${output_dir}/results.txt"
+
+ # Check error code for benckmark_xl command. This will exit if not.
+ return ${PIPESTATUS[0]}
+ )
+
+ if [[ -n "${CI_BUILD_NAME:-}" ]]; then
+ { set +x; } 2>/dev/null
+ local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}):
+
+$(cat "${output_dir}/results.txt")
+"
+ cmd_post_mr_comment "${message}"
+ set -x
+ fi
+}
+
+# Helper function to wait for the CPU temperature to cool down on ARM.
+wait_for_temp() {
+ { set +x; } 2>/dev/null
+ local temp_limit=${1:-38000}
+ if [[ -z "${THERMAL_FILE:-}" ]]; then
+ echo "Must define the THERMAL_FILE with the thermal_zoneX/temp file" \
+ "to read the temperature from. This is normally set in the runner." >&2
+ exit 1
+ fi
+ local org_temp=$(cat "${THERMAL_FILE}")
+ if [[ "${org_temp}" -ge "${temp_limit}" ]]; then
+ echo -n "Waiting for temp to get down from ${org_temp}... "
+ fi
+ local temp="${org_temp}"
+ local secs=0
+ while [[ "${temp}" -ge "${temp_limit}" ]]; do
+ sleep 1
+ temp=$(cat "${THERMAL_FILE}")
+ echo -n "${temp} "
+ secs=$((secs + 1))
+ if [[ ${secs} -ge 5 ]]; then
+ break
+ fi
+ done
+ if [[ "${org_temp}" -ge "${temp_limit}" ]]; then
+ echo "Done, temp=${temp}"
+ fi
+ set -x
+}
+
+# Helper function to set the cpuset restriction of the current process.
+cmd_cpuset() {
+ [[ "${SKIP_CPUSET:-}" != "1" ]] || return 0
+ local newset="$1"
+ local mycpuset=$(cat /proc/self/cpuset)
+ mycpuset="/dev/cpuset${mycpuset}"
+ # Check that the directory exists:
+ [[ -d "${mycpuset}" ]]
+ if [[ -e "${mycpuset}/cpuset.cpus" ]]; then
+ echo "${newset}" >"${mycpuset}/cpuset.cpus"
+ else
+ echo "${newset}" >"${mycpuset}/cpus"
+ fi
+}
+
+# Return the encoding/decoding speed from the Stats output.
+_speed_from_output() {
+ local speed="$1"
+ local unit="${2:-MP/s}"
+ if [[ "${speed}" == *"${unit}"* ]]; then
+ speed="${speed%% ${unit}*}"
+ speed="${speed##* }"
+ echo "${speed}"
+ fi
+}
+
+
+# Run benchmarks on ARM for the big and little CPUs.
+cmd_arm_benchmark() {
+ # Flags used for cjxl encoder with .png inputs
+ local jxl_png_benchmarks=(
+ # Lossy options:
+ "--epf=0 --distance=1.0 --speed=cheetah"
+ "--epf=2 --distance=1.0 --speed=cheetah"
+ "--epf=0 --distance=8.0 --speed=cheetah"
+ "--epf=1 --distance=8.0 --speed=cheetah"
+ "--epf=2 --distance=8.0 --speed=cheetah"
+ "--epf=3 --distance=8.0 --speed=cheetah"
+ "--modular -Q 90"
+ "--modular -Q 50"
+ # Lossless options:
+ "--modular"
+ "--modular -E 0 -I 0"
+ "--modular -P 5"
+ "--modular --responsive=1"
+ # Near-lossless options:
+ "--epf=0 --distance=0.3 --speed=fast"
+ "--modular -Q 97"
+ )
+
+ # Flags used for cjxl encoder with .jpg inputs. These should do lossless
+ # JPEG recompression (of pixels or full jpeg).
+ local jxl_jpeg_benchmarks=(
+ "--num_reps=3"
+ )
+
+ local images=(
+ "testdata/jxl/flower/flower.png"
+ )
+
+ local jpg_images=(
+ "testdata/jxl/flower/flower.png.im_q85_420.jpg"
+ )
+
+ if [[ "${SKIP_CPUSET:-}" == "1" ]]; then
+ # Use a single cpu config in this case.
+ local cpu_confs=("?")
+ else
+ # Otherwise the CPU config comes from the environment:
+ local cpu_confs=(
+ "${RUNNER_CPU_LITTLE}"
+ "${RUNNER_CPU_BIG}"
+ # The CPU description is something like 3-7, so these configurations only
+ # take the first CPU of the group.
+ "${RUNNER_CPU_LITTLE%%-*}"
+ "${RUNNER_CPU_BIG%%-*}"
+ )
+ # Check that RUNNER_CPU_ALL is defined. In the SKIP_CPUSET=1 case this will
+ # be ignored but still evaluated when calling cmd_cpuset.
+ [[ -n "${RUNNER_CPU_ALL}" ]]
+ fi
+
+ local jpg_dirname="third_party/corpora/jpeg"
+ mkdir -p "${jpg_dirname}"
+ local jpg_qualities=( 50 80 95 )
+ for src_img in "${images[@]}"; do
+ for q in "${jpg_qualities[@]}"; do
+ local jpeg_name="${jpg_dirname}/"$(basename "${src_img}" .png)"-q${q}.jpg"
+ convert -sampling-factor 1x1 -quality "${q}" \
+ "${src_img}" "${jpeg_name}"
+ jpg_images+=("${jpeg_name}")
+ done
+ done
+
+ local output_dir="${BUILD_DIR}/benchmark_results"
+ mkdir -p "${output_dir}"
+ local runs_file="${output_dir}/runs.txt"
+
+ if [[ ! -e "${runs_file}" ]]; then
+ echo -e "binary\tflags\tsrc_img\tsrc size\tsrc pixels\tcpuset\tenc size (B)\tenc speed (MP/s)\tdec speed (MP/s)\tJPG dec speed (MP/s)\tJPG dec speed (MB/s)" |
+ tee -a "${runs_file}"
+ fi
+
+ mkdir -p "${BUILD_DIR}/arm_benchmark"
+ local flags
+ local src_img
+ for src_img in "${jpg_images[@]}" "${images[@]}"; do
+ local src_img_hash=$(sha1sum "${src_img}" | cut -f 1 -d ' ')
+ local enc_binaries=("${BUILD_DIR}/tools/cjxl")
+ local src_ext="${src_img##*.}"
+ for enc_binary in "${enc_binaries[@]}"; do
+ local enc_binary_base=$(basename "${enc_binary}")
+
+ # Select the list of flags to use for the current encoder/image pair.
+ local img_benchmarks
+ if [[ "${src_ext}" == "jpg" ]]; then
+ img_benchmarks=("${jxl_jpeg_benchmarks[@]}")
+ else
+ img_benchmarks=("${jxl_png_benchmarks[@]}")
+ fi
+
+ for flags in "${img_benchmarks[@]}"; do
+ # Encoding step.
+ local enc_file_hash="${enc_binary_base} || $flags || ${src_img} || ${src_img_hash}"
+ enc_file_hash=$(echo "${enc_file_hash}" | sha1sum | cut -f 1 -d ' ')
+ local enc_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jxl"
+
+ for cpu_conf in "${cpu_confs[@]}"; do
+ cmd_cpuset "${cpu_conf}"
+ # nproc returns the number of active CPUs, which is given by the cpuset
+ # mask.
+ local num_threads="$(nproc)"
+
+ echo "Encoding with: ${enc_binary_base} img=${src_img} cpus=${cpu_conf} enc_flags=${flags}"
+ local enc_output
+ if [[ "${flags}" == *"modular"* ]]; then
+ # We don't benchmark encoding speed in this case.
+ if [[ ! -f "${enc_file}" ]]; then
+ cmd_cpuset "${RUNNER_CPU_ALL:-}"
+ "${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp"
+ mv "${enc_file}.tmp" "${enc_file}"
+ cmd_cpuset "${cpu_conf}"
+ fi
+ enc_output=" ?? MP/s"
+ else
+ wait_for_temp
+ enc_output=$("${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp" \
+ 2>&1 | tee /dev/stderr | grep -F "MP/s [")
+ mv "${enc_file}.tmp" "${enc_file}"
+ fi
+ local enc_speed=$(_speed_from_output "${enc_output}")
+ local enc_size=$(stat -c "%s" "${enc_file}")
+
+ echo "Decoding with: img=${src_img} cpus=${cpu_conf} enc_flags=${flags}"
+
+ local dec_output
+ wait_for_temp
+ dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \
+ --num_reps=5 --num_threads="${num_threads}" 2>&1 | tee /dev/stderr |
+ grep -E "M[BP]/s \[")
+ local img_size=$(echo "${dec_output}" | cut -f 1 -d ',')
+ local img_size_x=$(echo "${img_size}" | cut -f 1 -d ' ')
+ local img_size_y=$(echo "${img_size}" | cut -f 3 -d ' ')
+ local img_size_px=$(( ${img_size_x} * ${img_size_y} ))
+ local dec_speed=$(_speed_from_output "${dec_output}")
+
+ # For JPEG lossless recompression modes (where the original is a JPEG)
+ # decode to JPG as well.
+ local jpeg_dec_mps_speed=""
+ local jpeg_dec_mbs_speed=""
+ if [[ "${src_ext}" == "jpg" ]]; then
+ wait_for_temp
+ local dec_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jpg"
+ dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \
+ "${dec_file}" --num_reps=5 --num_threads="${num_threads}" 2>&1 | \
+ tee /dev/stderr | grep -E "M[BP]/s \[")
+ local jpeg_dec_mps_speed=$(_speed_from_output "${dec_output}")
+ local jpeg_dec_mbs_speed=$(_speed_from_output "${dec_output}" MB/s)
+ if ! cmp --quiet "${src_img}" "${dec_file}"; then
+ # Add a start at the end to signal that the files are different.
+ jpeg_dec_mbs_speed+="*"
+ fi
+ fi
+
+ # Record entry in a tab-separated file.
+ local src_img_base=$(basename "${src_img}")
+ echo -e "${enc_binary_base}\t${flags}\t${src_img_base}\t${img_size}\t${img_size_px}\t${cpu_conf}\t${enc_size}\t${enc_speed}\t${dec_speed}\t${jpeg_dec_mps_speed}\t${jpeg_dec_mbs_speed}" |
+ tee -a "${runs_file}"
+ done
+ done
+ done
+ done
+ cmd_cpuset "${RUNNER_CPU_ALL:-}"
+ cat "${runs_file}"
+
+ if [[ -n "${CI_BUILD_NAME:-}" ]]; then
+ load_mr_vars_from_commit
+ { set +x; } 2>/dev/null
+ local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}):
+
+\`\`\`
+$(column -t -s " " "${runs_file}")
+\`\`\`
+"
+ cmd_post_mr_comment "${message}"
+ set -x
+ fi
+}
+
+# Generate a corpus and run the fuzzer on that corpus.
+cmd_fuzz() {
+ local corpus_dir=$(realpath "${BUILD_DIR}/fuzzer_corpus")
+ local fuzzer_crash_dir=$(realpath "${BUILD_DIR}/fuzzer_crash")
+ mkdir -p "${corpus_dir}" "${fuzzer_crash_dir}"
+ # Generate step.
+ "${BUILD_DIR}/tools/fuzzer_corpus" "${corpus_dir}"
+ # Run step:
+ local nprocs=$(nproc --all || echo 1)
+ (
+ cd "${BUILD_DIR}"
+ "tools/djxl_fuzzer" "${fuzzer_crash_dir}" "${corpus_dir}" \
+ -max_total_time="${FUZZER_MAX_TIME}" -jobs=${nprocs} \
+ -artifact_prefix="${fuzzer_crash_dir}/"
+ )
+}
+
+# Runs the linters (clang-format, build_cleaner, buildirier) on the pending CLs.
+cmd_lint() {
+ merge_request_commits
+ { set +x; } 2>/dev/null
+ local versions=(${1:-16 15 14 13 12 11 10 9 8 7 6.0})
+ local clang_format_bins=("${versions[@]/#/clang-format-}" clang-format)
+ local tmpdir=$(mktemp -d)
+ CLEANUP_FILES+=("${tmpdir}")
+
+ local ret=0
+ local build_patch="${tmpdir}/build_cleaner.patch"
+ if ! "${MYDIR}/tools/scripts/build_cleaner.py" >"${build_patch}"; then
+ ret=1
+ echo "build_cleaner.py findings:" >&2
+ "${COLORDIFF_BIN}" <"${build_patch}"
+ echo "Run \`tools/scripts/build_cleaner.py --update\` to apply them" >&2
+ fi
+
+ # It is ok, if buildifier is not installed.
+ if which buildifier >/dev/null; then
+ local buildifier_patch="${tmpdir}/buildifier.patch"
+ local bazel_files=`git -C ${MYDIR} ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"`
+ set -x
+ buildifier -d ${bazel_files} >"${buildifier_patch}"|| true
+ { set +x; } 2>/dev/null
+ if [ -s "${buildifier_patch}" ]; then
+ ret=1
+ echo 'buildifier have found some problems in Bazel build files:' >&2
+ "${COLORDIFF_BIN}" <"${buildifier_patch}"
+ echo 'To fix them run (from the base directory):' >&2
+ echo ' buildifier `git ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"`' >&2
+ fi
+ fi
+
+ local installed=()
+ local clang_patch
+ local clang_format
+ for clang_format in "${clang_format_bins[@]}"; do
+ if ! which "${clang_format}" >/dev/null; then
+ continue
+ fi
+ installed+=("${clang_format}")
+ local tmppatch="${tmpdir}/${clang_format}.patch"
+ # We include in this linter all the changes including the uncommitted changes
+ # to avoid printing changes already applied.
+ set -x
+ # Ignoring the error that git-clang-format outputs.
+ git -C "${MYDIR}" "${clang_format}" --binary "${clang_format}" \
+ --style=file --diff "${MR_ANCESTOR_SHA}" -- >"${tmppatch}" || true
+ { set +x; } 2>/dev/null
+ if grep -E '^--- ' "${tmppatch}">/dev/null; then
+ if [[ -n "${LINT_OUTPUT:-}" ]]; then
+ cp "${tmppatch}" "${LINT_OUTPUT}"
+ fi
+ clang_patch="${tmppatch}"
+ else
+ echo "clang-format check OK" >&2
+ return ${ret}
+ fi
+ done
+
+ if [[ ${#installed[@]} -eq 0 ]]; then
+ echo "You must install clang-format for \"git clang-format\"" >&2
+ exit 1
+ fi
+
+ # clang-format is installed but found problems.
+ echo "clang-format findings:" >&2
+ "${COLORDIFF_BIN}" < "${clang_patch}"
+
+ echo "clang-format found issues in your patches from ${MR_ANCESTOR_SHA}" \
+ "to the current patch. Run \`./ci.sh lint | patch -p1\` from the base" \
+ "directory to apply them." >&2
+ exit 1
+}
+
+# Runs clang-tidy on the pending CLs. If the "all" argument is passed it runs
+# clang-tidy over all the source files instead.
+cmd_tidy() {
+ local what="${1:-}"
+
+ if [[ -z "${CLANG_TIDY_BIN}" ]]; then
+ echo "ERROR: You must install clang-tidy-7 or newer to use ci.sh tidy" >&2
+ exit 1
+ fi
+
+ local git_args=()
+ if [[ "${what}" == "all" ]]; then
+ git_args=(ls-files)
+ shift
+ else
+ merge_request_commits
+ git_args=(
+ diff-tree --no-commit-id --name-only -r "${MR_ANCESTOR_SHA}"
+ "${MR_HEAD_SHA}"
+ )
+ fi
+
+ # Clang-tidy needs the compilation database generated by cmake.
+ if [[ ! -e "${BUILD_DIR}/compile_commands.json" ]]; then
+ # Generate the build options in debug mode, since we need the debug asserts
+ # enabled for the clang-tidy analyzer to use them.
+ CMAKE_BUILD_TYPE="Debug"
+ cmake_configure
+ # Build the autogen targets to generate the .h files from the .ui files.
+ local autogen_targets=(
+ $(ninja -C "${BUILD_DIR}" -t targets | grep -F _autogen: |
+ cut -f 1 -d :)
+ )
+ if [[ ${#autogen_targets[@]} != 0 ]]; then
+ ninja -C "${BUILD_DIR}" "${autogen_targets[@]}"
+ fi
+ fi
+
+ cd "${MYDIR}"
+ local nprocs=$(nproc --all || echo 1)
+ local ret=0
+ if ! parallel -j"${nprocs}" --keep-order -- \
+ "${CLANG_TIDY_BIN}" -p "${BUILD_DIR}" -format-style=file -quiet "$@" {} \
+ < <(git "${git_args[@]}" | grep -E '(\.cc|\.cpp)$') \
+ >"${BUILD_DIR}/clang-tidy.txt"; then
+ ret=1
+ fi
+ { set +x; } 2>/dev/null
+ echo "Findings statistics:" >&2
+ grep -E ' \[[A-Za-z\.,\-]+\]' -o "${BUILD_DIR}/clang-tidy.txt" | sort \
+ | uniq -c >&2
+
+ if [[ $ret -ne 0 ]]; then
+ cat >&2 <<EOF
+Errors found, see ${BUILD_DIR}/clang-tidy.txt for details.
+To automatically fix them, run:
+
+ SKIP_TEST=1 ./ci.sh debug
+ ${CLANG_TIDY_BIN} -p ${BUILD_DIR} -fix -format-style=file -quiet $@ \$(git ${git_args[@]} | grep -E '(\.cc|\.cpp)\$')
+EOF
+ fi
+
+ return ${ret}
+}
+
+# Print stats about all the packages built in ${BUILD_DIR}/debs/.
+cmd_debian_stats() {
+ { set +x; } 2>/dev/null
+ local debsdir="${BUILD_DIR}/debs"
+ local f
+ while IFS='' read -r -d '' f; do
+ echo "====================================================================="
+ echo "Package $f:"
+ dpkg --info $f
+ dpkg --contents $f
+ done < <(find "${BUILD_DIR}/debs" -maxdepth 1 -mindepth 1 -type f \
+ -name '*.deb' -print0)
+}
+
+build_debian_pkg() {
+ local srcdir="$1"
+ local srcpkg="$2"
+
+ local debsdir="${BUILD_DIR}/debs"
+ local builddir="${debsdir}/${srcpkg}"
+
+ # debuild doesn't have an easy way to build out of tree, so we make a copy
+ # of with all symlinks on the first level.
+ mkdir -p "${builddir}"
+ for f in $(find "${srcdir}" -mindepth 1 -maxdepth 1 -printf '%P\n'); do
+ if [[ ! -L "${builddir}/$f" ]]; then
+ rm -f "${builddir}/$f"
+ ln -s "${srcdir}/$f" "${builddir}/$f"
+ fi
+ done
+ (
+ cd "${builddir}"
+ debuild -b -uc -us
+ )
+}
+
+cmd_debian_build() {
+ local srcpkg="${1:-}"
+
+ case "${srcpkg}" in
+ jpeg-xl)
+ build_debian_pkg "${MYDIR}" "jpeg-xl"
+ ;;
+ highway)
+ build_debian_pkg "${MYDIR}/third_party/highway" "highway"
+ ;;
+ *)
+ echo "ERROR: Must pass a valid source package name to build." >&2
+ ;;
+ esac
+}
+
+get_version() {
+ local varname=$1
+ local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1)
+ [[ -n "${line}" ]]
+ line="${line#set(${varname} }"
+ line="${line%)}"
+ echo "${line}"
+}
+
+cmd_bump_version() {
+ local newver="${1:-}"
+
+ if ! which dch >/dev/null; then
+ echo "Missing dch\nTo install it run:\n sudo apt install devscripts"
+ exit 1
+ fi
+
+ if [[ -z "${newver}" ]]; then
+ local major=$(get_version JPEGXL_MAJOR_VERSION)
+ local minor=$(get_version JPEGXL_MINOR_VERSION)
+ local patch=0
+ minor=$(( ${minor} + 1))
+ else
+ local major="${newver%%.*}"
+ newver="${newver#*.}"
+ local minor="${newver%%.*}"
+ newver="${newver#${minor}}"
+ local patch="${newver#.}"
+ if [[ -z "${patch}" ]]; then
+ patch=0
+ fi
+ fi
+
+ newver="${major}.${minor}.${patch}"
+
+ echo "Bumping version to ${newver} (${major}.${minor}.${patch})"
+ sed -E \
+ -e "s/(set\\(JPEGXL_MAJOR_VERSION) [0-9]+\\)/\\1 ${major})/" \
+ -e "s/(set\\(JPEGXL_MINOR_VERSION) [0-9]+\\)/\\1 ${minor})/" \
+ -e "s/(set\\(JPEGXL_PATCH_VERSION) [0-9]+\\)/\\1 ${patch})/" \
+ -i lib/CMakeLists.txt
+ sed -E \
+ -e "s/(LIBJXL_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}.${patch}/" \
+ -e "s/(LIBJXL_ABI_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}/" \
+ -i .github/workflows/conformance.yml
+
+ # Update lib.gni
+ tools/scripts/build_cleaner.py --update
+
+ # Mark the previous version as "unstable".
+ DEBCHANGE_RELEASE_HEURISTIC=log dch -M --distribution unstable --release ''
+ DEBCHANGE_RELEASE_HEURISTIC=log dch -M \
+ --newversion "${newver}" \
+ "Bump JPEG XL version to ${newver}."
+}
+
+# Check that the AUTHORS file contains the email of the committer.
+cmd_authors() {
+ merge_request_commits
+ local emails
+ local names
+ readarray -t emails < <(git log --format='%ae' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}")
+ readarray -t names < <(git log --format='%an' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}")
+ for i in "${!names[@]}"; do
+ echo "Checking name '${names[$i]}' with email '${emails[$i]}' ..."
+ "${MYDIR}"/tools/scripts/check_author.py "${emails[$i]}" "${names[$i]}"
+ done
+}
+
+main() {
+ local cmd="${1:-}"
+ if [[ -z "${cmd}" ]]; then
+ cat >&2 <<EOF
+Use: $0 CMD
+
+Where cmd is one of:
+ opt Build and test a Release with symbols build.
+ debug Build and test a Debug build (NDEBUG is not defined).
+ release Build and test a striped Release binary without debug information.
+ asan Build and test an ASan (AddressSanitizer) build.
+ msan Build and test an MSan (MemorySanitizer) build. Needs to have msan
+ c++ libs installed with msan_install first.
+ tsan Build and test a TSan (ThreadSanitizer) build.
+ asanfuzz Build and test an ASan (AddressSanitizer) build for fuzzing.
+ msanfuzz Build and test an MSan (MemorySanitizer) build for fuzzing.
+ test Run the tests build by opt, debug, release, asan or msan. Useful when
+ building with SKIP_TEST=1.
+ gbench Run the Google benchmark tests.
+ fuzz Generate the fuzzer corpus and run the fuzzer on it. Useful after
+ building with asan or msan.
+ benchmark Run the benchmark over the default corpus.
+ fast_benchmark Run the benchmark over the small corpus.
+
+ coverage Build and run tests with coverage support. Runs coverage_report as
+ well.
+ coverage_report Generate HTML, XML and text coverage report after a coverage
+ run.
+
+ lint Run the linter checks on the current commit or merge request.
+ tidy Run clang-tidy on the current commit or merge request.
+ authors Check that the last commit's author is listed in the AUTHORS file.
+
+ msan_install Install the libc++ libraries required to build in msan mode. This
+ needs to be done once.
+
+ debian_build <srcpkg> Build the given source package.
+ debian_stats Print stats about the built packages.
+
+oss-fuzz commands:
+ ossfuzz_asan Build the local source inside oss-fuzz docker with asan.
+ ossfuzz_msan Build the local source inside oss-fuzz docker with msan.
+ ossfuzz_ubsan Build the local source inside oss-fuzz docker with ubsan.
+ ossfuzz_ninja Run ninja on the BUILD_DIR inside the oss-fuzz docker. Extra
+ parameters are passed to ninja, for example "djxl_fuzzer" will
+ only build that ninja target. Use for faster build iteration
+ after one of the ossfuzz_*san commands.
+
+You can pass some optional environment variables as well:
+ - BUILD_DIR: The output build directory (by default "$$repo/build")
+ - BUILD_TARGET: The target triplet used when cross-compiling.
+ - CMAKE_FLAGS: Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS.
+ - CMAKE_PREFIX_PATH: Installation prefixes to be searched by the find_package.
+ - ENABLE_WASM_SIMD=1: enable experimental SIMD in WASM build (only).
+ - FUZZER_MAX_TIME: "fuzz" command fuzzer running timeout in seconds.
+ - LINT_OUTPUT: Path to the output patch from the "lint" command.
+ - SKIP_CPUSET=1: Skip modifying the cpuset in the arm_benchmark.
+ - SKIP_BUILD=1: Skip the build stage, cmake configure only.
+ - SKIP_TEST=1: Skip the test stage.
+ - STORE_IMAGES=0: Makes the benchmark discard the computed images.
+ - TEST_STACK_LIMIT: Stack size limit (ulimit -s) during tests, in KiB.
+ - TEST_SELECTOR: pass additional arguments to ctest, e.g. "-R .Resample.".
+ - STACK_SIZE=1: Generate binaries with the .stack_sizes sections.
+
+These optional environment variables are forwarded to the cmake call as
+parameters:
+ - CMAKE_BUILD_TYPE
+ - CMAKE_C_FLAGS
+ - CMAKE_CXX_FLAGS
+ - CMAKE_C_COMPILER_LAUNCHER
+ - CMAKE_CXX_COMPILER_LAUNCHER
+ - CMAKE_CROSSCOMPILING_EMULATOR
+ - CMAKE_FIND_ROOT_PATH
+ - CMAKE_EXE_LINKER_FLAGS
+ - CMAKE_MAKE_PROGRAM
+ - CMAKE_MODULE_LINKER_FLAGS
+ - CMAKE_SHARED_LINKER_FLAGS
+ - CMAKE_TOOLCHAIN_FILE
+
+Example:
+ BUILD_DIR=/tmp/build $0 opt
+EOF
+ exit 1
+ fi
+
+ cmd="cmd_${cmd}"
+ shift
+ set -x
+ "${cmd}" "$@"
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/cmake/FindAtomics.cmake b/third_party/jpeg-xl/cmake/FindAtomics.cmake
new file mode 100644
index 0000000000..9a6cdc39ec
--- /dev/null
+++ b/third_party/jpeg-xl/cmake/FindAtomics.cmake
@@ -0,0 +1,53 @@
+# Original issue:
+# * https://gitlab.kitware.com/cmake/cmake/-/issues/23021#note_1098733
+#
+# For reference:
+# * https://gcc.gnu.org/wiki/Atomic/GCCMM
+#
+# riscv64 specific:
+# * https://lists.debian.org/debian-riscv/2022/01/msg00009.html
+#
+# ATOMICS_FOUND - system has c++ atomics
+# ATOMICS_LIBRARIES - libraries needed to use c++ atomics
+
+include(CheckCXXSourceCompiles)
+
+# RISC-V only has 32-bit and 64-bit atomic instructions. GCC is supposed
+# to convert smaller atomics to those larger ones via masking and
+# shifting like LLVM, but it’s a known bug that it does not. This means
+# anything that wants to use atomics on 1-byte or 2-byte types needs
+# -latomic, but not 4-byte or 8-byte (though it does no harm).
+set(atomic_code
+ "
+ #include <atomic>
+ #include <cstdint>
+ std::atomic<uint8_t> n8 (0); // riscv64
+ std::atomic<uint64_t> n64 (0); // armel, mipsel, powerpc
+ int main() {
+ ++n8;
+ ++n64;
+ return 0;
+ }")
+
+check_cxx_source_compiles("${atomic_code}" ATOMICS_LOCK_FREE_INSTRUCTIONS)
+
+if(ATOMICS_LOCK_FREE_INSTRUCTIONS)
+ set(ATOMICS_FOUND TRUE)
+ set(ATOMICS_LIBRARIES)
+else()
+ set(CMAKE_REQUIRED_LIBRARIES "-latomic")
+ check_cxx_source_compiles("${atomic_code}" ATOMICS_IN_LIBRARY)
+ set(CMAKE_REQUIRED_LIBRARIES)
+ if(ATOMICS_IN_LIBRARY)
+ set(ATOMICS_LIBRARY atomic)
+ include(FindPackageHandleStandardArgs)
+ find_package_handle_standard_args(Atomics DEFAULT_MSG ATOMICS_LIBRARY)
+ set(ATOMICS_LIBRARIES ${ATOMICS_LIBRARY})
+ unset(ATOMICS_LIBRARY)
+ else()
+ if(Atomics_FIND_REQUIRED)
+ message(FATAL_ERROR "Neither lock free instructions nor -latomic found.")
+ endif()
+ endif()
+endif()
+unset(atomic_code)
diff --git a/third_party/jpeg-xl/cmake/FindBrotli.cmake b/third_party/jpeg-xl/cmake/FindBrotli.cmake
new file mode 100644
index 0000000000..9fb78e47d8
--- /dev/null
+++ b/third_party/jpeg-xl/cmake/FindBrotli.cmake
@@ -0,0 +1,75 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set(brlibs brotlicommon brotlienc brotlidec)
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+ foreach(brlib IN ITEMS ${brlibs})
+ string(TOUPPER "${brlib}" BRPREFIX)
+ pkg_check_modules("PC_${BRPREFIX}" lib${brlib})
+ endforeach()
+endif()
+
+find_path(BROTLI_INCLUDE_DIR
+ NAMES brotli/decode.h
+ HINTS ${PC_BROTLICOMMON_INCLUDEDIR} ${PC_BROTLICOMMON_INCLUDE_DIRS}
+)
+
+foreach(brlib IN ITEMS ${brlibs})
+ string(TOUPPER "${brlib}" BRPREFIX)
+ find_library(${BRPREFIX}_LIBRARY
+ NAMES ${${BRPREFIX}_NAMES} ${brlib}
+ HINTS ${PC_${BRPREFIX}_LIBDIR} ${PC_${BRPREFIX}_LIBRARY_DIRS}
+ )
+
+ if (${BRPREFIX}_LIBRARY AND NOT TARGET ${brlib})
+ if(CMAKE_VERSION VERSION_LESS "3.13.5")
+ add_library(${brlib} INTERFACE IMPORTED GLOBAL)
+ set_property(TARGET ${brlib} PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${BROTLI_INCLUDE_DIR})
+ target_link_libraries(${brlib} INTERFACE ${${BRPREFIX}_LIBRARY})
+ set_property(TARGET ${brlib} PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_${BRPREFIX}_CFLAGS_OTHER})
+ else()
+ add_library(${brlib} INTERFACE IMPORTED GLOBAL)
+ target_include_directories(${brlib}
+ INTERFACE ${BROTLI_INCLUDE_DIR})
+ target_link_libraries(${brlib}
+ INTERFACE ${${BRPREFIX}_LIBRARY})
+ target_link_options(${brlib}
+ INTERFACE ${PC_${BRPREFIX}_LDFLAGS_OTHER})
+ target_compile_options(${brlib}
+ INTERFACE ${PC_${BRPREFIX}_CFLAGS_OTHER})
+ endif()
+ endif()
+endforeach()
+
+if (BROTLICOMMON_FOUND AND BROTLIENC_FOUND AND BROTLIDEC_FOUND)
+ set(Brotli_FOUND ON)
+else ()
+ set(Brotli_FOUND OFF)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Brotli
+ FOUND_VAR Brotli_FOUND
+ REQUIRED_VARS
+ BROTLI_INCLUDE_DIR
+ BROTLICOMMON_LIBRARY
+ BROTLIENC_LIBRARY
+ BROTLIDEC_LIBRARY
+ VERSION_VAR Brotli_VERSION
+)
+
+mark_as_advanced(
+ BROTLI_INCLUDE_DIR
+ BROTLICOMMON_LIBRARY
+ BROTLIENC_LIBRARY
+ BROTLIDEC_LIBRARY
+)
+
+if (Brotli_FOUND)
+ set(Brotli_LIBRARIES ${BROTLICOMMON_LIBRARY} ${BROTLIENC_LIBRARY} ${BROTLIDEC_LIBRARY})
+ set(Brotli_INCLUDE_DIRS ${BROTLI_INCLUDE_DIR})
+endif()
diff --git a/third_party/jpeg-xl/cmake/FindHWY.cmake b/third_party/jpeg-xl/cmake/FindHWY.cmake
new file mode 100644
index 0000000000..c1deb9b851
--- /dev/null
+++ b/third_party/jpeg-xl/cmake/FindHWY.cmake
@@ -0,0 +1,66 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+ pkg_check_modules(PC_HWY QUIET libhwy)
+ set(HWY_VERSION ${PC_HWY_VERSION})
+endif ()
+
+find_path(HWY_INCLUDE_DIR
+ NAMES hwy/highway.h
+ HINTS ${PC_HWY_INCLUDEDIR} ${PC_HWY_INCLUDE_DIRS}
+)
+
+find_library(HWY_LIBRARY
+ NAMES ${HWY_NAMES} hwy
+ HINTS ${PC_HWY_LIBDIR} ${PC_HWY_LIBRARY_DIRS}
+)
+
+if (HWY_INCLUDE_DIR AND NOT HWY_VERSION)
+ if (EXISTS "${HWY_INCLUDE_DIR}/hwy/highway.h")
+ file(READ "${HWY_INCLUDE_DIR}/hwy/highway.h" HWY_VERSION_CONTENT)
+
+ string(REGEX MATCH "#define HWY_MAJOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+ set(HWY_VERSION_MAJOR "${CMAKE_MATCH_1}")
+
+ string(REGEX MATCH "#define +HWY_MINOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+ set(HWY_VERSION_MINOR "${CMAKE_MATCH_1}")
+
+ string(REGEX MATCH "#define +HWY_PATCH +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}")
+ set(HWY_VERSION_PATCH "${CMAKE_MATCH_1}")
+
+ set(HWY_VERSION "${HWY_VERSION_MAJOR}.${HWY_VERSION_MINOR}.${HWY_VERSION_PATCH}")
+ endif ()
+endif ()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(HWY
+ FOUND_VAR HWY_FOUND
+ REQUIRED_VARS HWY_LIBRARY HWY_INCLUDE_DIR
+ VERSION_VAR HWY_VERSION
+)
+
+if (HWY_LIBRARY AND NOT TARGET hwy)
+ add_library(hwy INTERFACE IMPORTED GLOBAL)
+
+ if(CMAKE_VERSION VERSION_LESS "3.13.5")
+ set_property(TARGET hwy PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${HWY_INCLUDE_DIR})
+ target_link_libraries(hwy INTERFACE ${HWY_LIBRARY})
+ set_property(TARGET hwy PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_HWY_CFLAGS_OTHER})
+ else()
+ target_include_directories(hwy INTERFACE ${HWY_INCLUDE_DIR})
+ target_link_libraries(hwy INTERFACE ${HWY_LIBRARY})
+ target_link_options(hwy INTERFACE ${PC_HWY_LDFLAGS_OTHER})
+ target_compile_options(hwy INTERFACE ${PC_HWY_CFLAGS_OTHER})
+ endif()
+endif()
+
+mark_as_advanced(HWY_INCLUDE_DIR HWY_LIBRARY)
+
+if (HWY_FOUND)
+ set(HWY_LIBRARIES ${HWY_LIBRARY})
+ set(HWY_INCLUDE_DIRS ${HWY_INCLUDE_DIR})
+endif ()
diff --git a/third_party/jpeg-xl/cmake/FindLCMS2.cmake b/third_party/jpeg-xl/cmake/FindLCMS2.cmake
new file mode 100644
index 0000000000..0a7b54eb96
--- /dev/null
+++ b/third_party/jpeg-xl/cmake/FindLCMS2.cmake
@@ -0,0 +1,59 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig QUIET)
+if (PkgConfig_FOUND)
+ pkg_check_modules(PC_LCMS2 QUIET libLCMS2)
+ set(LCMS2_VERSION ${PC_LCMS2_VERSION})
+endif ()
+
+find_path(LCMS2_INCLUDE_DIR
+ NAMES lcms2.h
+ HINTS ${PC_LCMS2_INCLUDEDIR} ${PC_LCMS2_INCLUDE_DIRS}
+)
+
+find_library(LCMS2_LIBRARY
+ NAMES ${LCMS2_NAMES} lcms2 liblcms2 lcms-2 liblcms-2
+ HINTS ${PC_LCMS2_LIBDIR} ${PC_LCMS2_LIBRARY_DIRS}
+)
+
+if (LCMS2_INCLUDE_DIR AND NOT LCMS_VERSION)
+ file(READ ${LCMS2_INCLUDE_DIR}/lcms2.h LCMS2_VERSION_CONTENT)
+ string(REGEX MATCH "#define[ \t]+LCMS_VERSION[ \t]+([0-9]+)[ \t]*\n" LCMS2_VERSION_MATCH ${LCMS2_VERSION_CONTENT})
+ if (LCMS2_VERSION_MATCH)
+ string(SUBSTRING ${CMAKE_MATCH_1} 0 1 LCMS2_VERSION_MAJOR)
+ string(SUBSTRING ${CMAKE_MATCH_1} 1 2 LCMS2_VERSION_MINOR)
+ set(LCMS2_VERSION "${LCMS2_VERSION_MAJOR}.${LCMS2_VERSION_MINOR}")
+ endif ()
+endif ()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(LCMS2
+ FOUND_VAR LCMS2_FOUND
+ REQUIRED_VARS LCMS2_LIBRARY LCMS2_INCLUDE_DIR
+ VERSION_VAR LCMS2_VERSION
+)
+
+if (LCMS2_LIBRARY AND NOT TARGET lcms2)
+ add_library(lcms2 INTERFACE IMPORTED GLOBAL)
+
+ if(CMAKE_VERSION VERSION_LESS "3.13.5")
+ set_property(TARGET lcms2 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LCMS2_INCLUDE_DIR})
+ target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY})
+ set_property(TARGET lcms2 PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_LCMS2_CFLAGS_OTHER})
+ else()
+ target_include_directories(lcms2 INTERFACE ${LCMS2_INCLUDE_DIR})
+ target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY})
+ target_link_options(lcms2 INTERFACE ${PC_LCMS2_LDFLAGS_OTHER})
+ target_compile_options(lcms2 INTERFACE ${PC_LCMS2_CFLAGS_OTHER})
+ endif()
+endif()
+
+mark_as_advanced(LCMS2_INCLUDE_DIR LCMS2_LIBRARY)
+
+if (LCMS2_FOUND)
+ set(LCMS2_LIBRARIES ${LCMS2_LIBRARY})
+ set(LCMS2_INCLUDE_DIRS ${LCMS2_INCLUDE_DIR})
+endif ()
diff --git a/third_party/jpeg-xl/debian/changelog b/third_party/jpeg-xl/debian/changelog
new file mode 100644
index 0000000000..6fbaddf68a
--- /dev/null
+++ b/third_party/jpeg-xl/debian/changelog
@@ -0,0 +1,95 @@
+jpeg-xl (0.9.0) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.9.0.
+
+ -- JPEG XL Maintainers <jpegxl@google.com> Wed, 11 Jan 2023 16:12:35 +0000
+
+jpeg-xl (0.8) unstable; urgency=medium
+
+ * Bump JPEG XL version to 0.8.
+
+ -- JPEG XL Maintainers <jpegxl@google.com> Wed, 11 Jan 2023 16:12:34 +0000
+
+jpeg-xl (0.7) unstable; urgency=medium
+
+ * Bump JPEG XL version to 0.7.
+
+ -- JPEG XL Maintainers <jpegxl@google.com> Mon, 08 Aug 2022 14:43:58 +0000
+
+jpeg-xl (0.6) unstable; urgency=medium
+
+ * Bump JPEG XL version to 0.6.
+
+ -- JPEG XL Maintainers <jpegxl@google.com> Fri, 10 Sep 2021 16:08:17 +0200
+
+jpeg-xl (0.5.0) unstable; urgency=medium
+
+ * Bump JPEG XL version to 0.5.0.
+
+ -- JPEG XL Maintainers <jpegxl@google.com> Thu, 12 Aug 2021 23:49:40 +0200
+
+jpeg-xl (0.3.7) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.7.
+
+ -- Sami Boukortt <sboukortt@google.com> Mon, 29 Mar 2021 12:14:20 +0200
+
+jpeg-xl (0.3.6) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.6.
+
+ -- Sami Boukortt <sboukortt@google.com> Thu, 25 Mar 2021 17:40:58 +0100
+
+jpeg-xl (0.3.5) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.5.
+
+ -- Sami Boukortt <sboukortt@google.com> Tue, 23 Mar 2021 15:20:44 +0100
+
+jpeg-xl (0.3.4) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.4.
+
+ -- Sami Boukortt <sboukortt@google.com> Tue, 16 Mar 2021 12:13:59 +0100
+
+jpeg-xl (0.3.3) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.3.
+
+ -- Sami Boukortt <sboukortt@google.com> Fri, 5 Mar 2021 19:15:26 +0100
+
+jpeg-xl (0.3.2) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.2.
+
+ -- Alex Deymo <deymo@google.com> Fri, 12 Feb 2021 21:00:12 +0100
+
+jpeg-xl (0.3.1) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.1.
+
+ -- Alex Deymo <deymo@google.com> Tue, 09 Feb 2021 09:48:43 +0100
+
+jpeg-xl (0.3) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.3.
+
+ -- Alex Deymo <deymo@google.com> Wed, 27 Jan 2021 22:36:32 +0100
+
+jpeg-xl (0.2) UNRELEASED; urgency=medium
+
+ * Bump JPEG XL version to 0.2.
+
+ -- Alex Deymo <deymo@google.com> Wed, 23 Nov 2020 20:42:10 +0100
+
+jpeg-xl (0.1) UNRELEASED; urgency=medium
+
+ * JPEG XL format release candidate.
+
+ -- Alex Deymo <deymo@google.com> Fri, 13 Nov 2020 17:42:24 +0100
+
+jpeg-xl (0.0.2-1) UNRELEASED; urgency=medium
+
+ * Initial debian package.
+
+ -- Alex Deymo <deymo@google.com> Tue, 27 Oct 2020 15:27:59 +0100
diff --git a/third_party/jpeg-xl/debian/compat b/third_party/jpeg-xl/debian/compat
new file mode 100644
index 0000000000..f599e28b8a
--- /dev/null
+++ b/third_party/jpeg-xl/debian/compat
@@ -0,0 +1 @@
+10
diff --git a/third_party/jpeg-xl/debian/control b/third_party/jpeg-xl/debian/control
new file mode 100644
index 0000000000..f5dc5ce0cc
--- /dev/null
+++ b/third_party/jpeg-xl/debian/control
@@ -0,0 +1,88 @@
+Source: jpeg-xl
+Maintainer: JPEG XL Maintainers <jpegxl@google.com>
+Section: misc
+Priority: optional
+Standards-Version: 3.9.8
+Build-Depends:
+ asciidoc,
+ cmake,
+ debhelper (>= 9),
+ libbrotli-dev,
+ libgdk-pixbuf-2.0-dev | libgdk-pixbuf2.0-dev,
+ libgif-dev,
+ libgimp2.0-dev,
+ libgmock-dev,
+ libgoogle-perftools-dev,
+ libgtest-dev,
+ libhwy-dev (>= 1.0.0),
+ libjpeg-dev,
+ libopenexr-dev,
+ libpng-dev,
+ libwebp-dev,
+ pkg-config,
+ xdg-utils,
+ xmlto,
+Homepage: https://github.com/libjxl/libjxl
+Rules-Requires-Root: no
+
+Package: jxl
+Architecture: any
+Section: utils
+Depends: ${misc:Depends}, ${shlibs:Depends}
+Description: JPEG XL Image Coding System - "JXL" (command line utility)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs the command line utilities.
+
+Package: libjxl-dev
+Architecture: any
+Section: libdevel
+Depends: libjxl (= ${binary:Version}), ${misc:Depends}
+ libhwy-dev,
+Description: JPEG XL Image Coding System - "JXL" (development files)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs development files.
+
+Package: libjxl
+Architecture: any
+Multi-Arch: same
+Section: libs
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Description: JPEG XL Image Coding System - "JXL" (shared libraries)
+ The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and
+ lossless image compression format. It has a rich feature set and is
+ particularly optimized for responsive web environments, so that
+ content renders well on a wide range of devices. Moreover, it includes
+ several features that help transition from the legacy JPEG format.
+ .
+ This package installs shared libraries.
+
+Package: libjxl-gdk-pixbuf
+Architecture: any
+Multi-Arch: same
+Section: libs
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Description: JPEG XL Plugin for gdk-pixbuf
+ This package installs the required files for reading JPEG XL files in
+ GTK applications.
+
+Package: libjxl-gimp-plugin
+Architecture: any
+Multi-Arch: same
+Section: graphics
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Pre-Depends: ${misc:Pre-Depends}
+Enhances: gimp
+Description: JPEG XL Import and Export Plugin for GIMP
+ This is a plugin for GIMP version 2.10.x to import and export JPEG XL images.
diff --git a/third_party/jpeg-xl/debian/copyright b/third_party/jpeg-xl/debian/copyright
new file mode 100644
index 0000000000..20225a9209
--- /dev/null
+++ b/third_party/jpeg-xl/debian/copyright
@@ -0,0 +1,194 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: jpeg-xl
+
+Files: *
+Copyright: 2020 the JPEG XL Project
+License: BSD-3-clause
+
+Files: third_party/sjpeg/*
+Copyright: 2017 Google, Inc
+License: Apache-2.0
+
+Files: third_party/skcms/*
+Copyright: 2018 Google Inc.
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+ .
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Files: testdata/external/pngsuite/*
+Copyright: Willem van Schaik, 1996, 2011
+License: PngSuite License
+ See http://www.schaik.com/pngsuite/ for details.
+ .
+ Permission to use, copy, modify and distribute these images for any
+ purpose and without fee is hereby granted.
+
+Files: testdata/external/raw.pixls/*
+Copyright: their respective owners listed in https://raw.pixls.us/
+License: CC0-1.0
+
+Files: testdata/external/wesaturate/*
+Copyright: their respective owners listed in https://www.wesaturate.com/
+License: CC0-1.0
+
+Files: testdata/external/wide-gamut-tests/
+Copyright: github.com/codelogic/wide-gamut-tests authors.
+License: Apache-2.0
+
+License: Apache-2.0
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ .
+ On Debian systems, the complete text of the Apache License, Version 2
+ can be found in "/usr/share/common-licenses/Apache-2.0".
+
+License: CC0
+ Creative Commons Zero v1.0 Universal
+ .
+ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL
+ SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT
+ RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS"
+ BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS
+ DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS
+ LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE
+ INFORMATION OR WORKS PROVIDED HEREUNDER.
+ .
+ Statement of Purpose
+ .
+ The laws of most jurisdictions throughout the world automatically confer
+ exclusive Copyright and Related Rights (defined below) upon the creator and
+ subsequent owner(s) (each and all, an "owner") of an original work of
+ authorship and/or a database (each, a "Work").
+ .
+ Certain owners wish to permanently relinquish those rights to a Work for the
+ purpose of contributing to a commons of creative, cultural and scientific
+ works ("Commons") that the public can reliably and without fear of later
+ claims of infringement build upon, modify, incorporate in other works, reuse
+ and redistribute as freely as possible in any form whatsoever and for any
+ purposes, including without limitation commercial purposes. These owners may
+ contribute to the Commons to promote the ideal of a free culture and the
+ further production of creative, cultural and scientific works, or to gain
+ reputation or greater distribution for their Work in part through the use
+ and efforts of others.
+ .
+ For these and/or other purposes and motivations, and without any expectation
+ of additional consideration or compensation, the person associating CC0 with
+ a Work (the "Affirmer"), to the extent that he or she is an owner of
+ Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to
+ the Work and publicly distribute the Work under its terms, with knowledge of
+ his or her Copyright and Related Rights in the Work and the meaning and
+ intended legal effect of CC0 on those rights.
+ .
+ 1. Copyright and Related Rights. A Work made available under CC0 may be
+ protected by copyright and related or neighboring rights ("Copyright and
+ Related Rights"). Copyright and Related Rights include, but are not limited
+ to, the following:
+ i. the right to reproduce, adapt, distribute, perform, display,
+ communicate, and translate a Work;
+ ii. moral rights retained by the original author(s) and/or performer(s);
+ iii. publicity and privacy rights pertaining to a person's image or
+ likeness depicted in a Work;
+ iv. rights protecting against unfair competition in regards to a Work,
+ subject to the limitations in paragraph 4(a), below;
+ v. rights protecting the extraction, dissemination, use and reuse of data
+ in a Work;
+ vi. database rights (such as those arising under Directive 96/9/EC of the
+ European Parliament and of the Council of 11 March 1996 on the legal
+ protection of databases, and under any national implementation thereof,
+ including any amended or successor version of such directive); and
+ vii. other similar, equivalent or corresponding rights throughout the
+ world based on applicable law or treaty, and any national implementations
+ thereof.
+ .
+ 2. Waiver. To the greatest extent permitted by, but not in contravention of,
+ applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
+ unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
+ and Related Rights and associated claims and causes of action, whether now
+ known or unknown (including existing as well as future claims and causes of
+ action), in the Work (i) in all territories worldwide, (ii) for the maximum
+ duration provided by applicable law or treaty (including future time
+ extensions), (iii) in any current or future medium and for any number of
+ copies, and (iv) for any purpose whatsoever, including without limitation
+ commercial, advertising or promotional purposes (the "Waiver"). Affirmer
+ makes the Waiver for the benefit of each member of the public at large and
+ to the detriment of Affirmer's heirs and successors, fully intending that
+ such Waiver shall not be subject to revocation, rescission, cancellation,
+ termination, or any other legal or equitable action to disrupt the quiet
+ enjoyment of the Work by the public as contemplated by Affirmer's express
+ Statement of Purpose.
+ .
+ 3. Public License Fallback. Should any part of the Waiver for any reason be
+ judged legally invalid or ineffective under applicable law, then the Waiver
+ shall be preserved to the maximum extent permitted taking into account
+ Affirmer's express Statement of Purpose. In addition, to the extent the
+ Waiver is so judged Affirmer hereby grants to each affected person a
+ royalty-free, non transferable, non sublicensable, non exclusive,
+ irrevocable and unconditional license to exercise Affirmer's Copyright and
+ Related Rights in the Work (i) in all territories worldwide, (ii) for the
+ maximum duration provided by applicable law or treaty (including future time
+ extensions), (iii) in any current or future medium and for any number of
+ copies, and (iv) for any purpose whatsoever, including without limitation
+ commercial, advertising or promotional purposes (the "License"). The License
+ shall be deemed effective as of the date CC0 was applied by Affirmer to the
+ Work. Should any part of the License for any reason be judged legally
+ invalid or ineffective under applicable law, such partial invalidity or
+ ineffectiveness shall not invalidate the remainder of the License, and in
+ such case Affirmer hereby affirms that he or she will not (i) exercise any
+ of his or her remaining Copyright and Related Rights in the Work or (ii)
+ assert any associated claims and causes of action with respect to the Work,
+ in either case contrary to Affirmer's express Statement of Purpose.
+ .
+ 4. Limitations and Disclaimers.
+ a. No trademark or patent rights held by Affirmer are waived, abandoned,
+ surrendered, licensed or otherwise affected by this document.
+ b. Affirmer offers the Work as-is and makes no representations or
+ warranties of any kind concerning the Work, express, implied, statutory or
+ otherwise, including without limitation warranties of title,
+ merchantability, fitness for a particular purpose, non infringement, or the
+ absence of latent or other defects, accuracy, or the present or absence of
+ errors, whether or not discoverable, all to the greatest extent permissible
+ under applicable law.
+ c. Affirmer disclaims responsibility for clearing rights of other persons
+ that may apply to the Work or any use thereof, including without limitation
+ any person's Copyright and Related Rights in the Work. Further, Affirmer
+ disclaims responsibility for obtaining any necessary consents, permissions
+ or other rights required for any use of the Work.
+ d. Affirmer understands and acknowledges that Creative Commons is not a
+ party to this document and has no duty or obligation with respect to this
+ CC0 or use of the Work.
+ .
+ For more information, please see:
+ http://creativecommons.org/publicdomain/zero/1.0/>
+
diff --git a/third_party/jpeg-xl/debian/jxl.install b/third_party/jpeg-xl/debian/jxl.install
new file mode 100644
index 0000000000..c3bae3ed10
--- /dev/null
+++ b/third_party/jpeg-xl/debian/jxl.install
@@ -0,0 +1,3 @@
+usr/bin/*
+usr/share/man/man1/cjxl.1
+usr/share/man/man1/djxl.1
diff --git a/third_party/jpeg-xl/debian/libjxl-dev.install b/third_party/jpeg-xl/debian/libjxl-dev.install
new file mode 100644
index 0000000000..b735ec2c26
--- /dev/null
+++ b/third_party/jpeg-xl/debian/libjxl-dev.install
@@ -0,0 +1,4 @@
+usr/include/jxl/*.h
+usr/lib/*/*.a
+usr/lib/*/*.so
+usr/lib/*/pkgconfig/*.pc
diff --git a/third_party/jpeg-xl/debian/libjxl-gdk-pixbuf.install b/third_party/jpeg-xl/debian/libjxl-gdk-pixbuf.install
new file mode 100644
index 0000000000..12d2ab250f
--- /dev/null
+++ b/third_party/jpeg-xl/debian/libjxl-gdk-pixbuf.install
@@ -0,0 +1,3 @@
+usr/lib/*/gdk-pixbuf-*/*/loaders/*
+usr/share/mime/packages/image-jxl.xml
+usr/share/thumbnailers/jxl.thumbnailer
diff --git a/third_party/jpeg-xl/debian/libjxl-gimp-plugin.install b/third_party/jpeg-xl/debian/libjxl-gimp-plugin.install
new file mode 100644
index 0000000000..353431dba3
--- /dev/null
+++ b/third_party/jpeg-xl/debian/libjxl-gimp-plugin.install
@@ -0,0 +1 @@
+usr/lib/gimp
diff --git a/third_party/jpeg-xl/debian/libjxl.install b/third_party/jpeg-xl/debian/libjxl.install
new file mode 100644
index 0000000000..cd157a7a5c
--- /dev/null
+++ b/third_party/jpeg-xl/debian/libjxl.install
@@ -0,0 +1 @@
+usr/lib/*/libjxl*.so.*
diff --git a/third_party/jpeg-xl/debian/rules b/third_party/jpeg-xl/debian/rules
new file mode 100755
index 0000000000..6259dbfc61
--- /dev/null
+++ b/third_party/jpeg-xl/debian/rules
@@ -0,0 +1,21 @@
+#!/usr/bin/make -f
+
+include /usr/share/dpkg/pkg-info.mk
+
+%:
+ dh $@ --buildsystem=cmake
+
+override_dh_auto_configure:
+ # TODO(deymo): Remove the DCMAKE_BUILD_TYPE once builds without NDEBUG
+ # are as useful as Release builds.
+ # TODO(szabadka) Re-enable jpegli after tests are fixed on Ubuntu 20.04,
+ # and debian:buster
+ dh_auto_configure -- \
+ -DJPEGXL_VERSION=$(DEB_VERSION) \
+ -DCMAKE_BUILD_TYPE=RelWithDebInfo \
+ -DJPEGXL_FORCE_SYSTEM_GTEST=ON \
+ -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \
+ -DJPEGXL_FORCE_SYSTEM_HWY=ON \
+ -DJPEGXL_ENABLE_JPEGLI=OFF \
+ -DJPEGXL_ENABLE_JPEGLI_LIBJPEG=OFF \
+ -DJPEGXL_ENABLE_PLUGINS=ON
diff --git a/third_party/jpeg-xl/debian/source/format b/third_party/jpeg-xl/debian/source/format
new file mode 100644
index 0000000000..163aaf8d82
--- /dev/null
+++ b/third_party/jpeg-xl/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/third_party/jpeg-xl/deps.sh b/third_party/jpeg-xl/deps.sh
new file mode 100755
index 0000000000..b6555e30ca
--- /dev/null
+++ b/third_party/jpeg-xl/deps.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file downloads the dependencies needed to build JPEG XL into third_party.
+# These dependencies are normally pulled by gtest.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+# Git revisions we use for the given submodules. Update these whenever you
+# update a git submodule.
+THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727"
+THIRD_PARTY_HIGHWAY="46e365d6770f5d7a4240d8ac9d8e928a520478ea"
+THIRD_PARTY_SKCMS="b25b07b4b07990811de121c0356155b2ba0f4318"
+THIRD_PARTY_SJPEG="868ab558fad70fcbe8863ba4e85179eeb81cc840"
+THIRD_PARTY_ZLIB="cacf7f1d4e3d44d871b605da3b647f07d718623f"
+THIRD_PARTY_LIBPNG="a40189cf881e9f0db80511c382292a5604c3c3d1"
+
+# Download the target revision from GitHub.
+download_github() {
+ local path="$1"
+ local project="$2"
+
+ local varname=`echo "$path" | tr '[:lower:]' '[:upper:]'`
+ varname="${varname/\//_}"
+ local sha
+ eval "sha=\${${varname}}"
+
+ local down_dir="${MYDIR}/downloads"
+ local local_fn="${down_dir}/${sha}.tar.gz"
+ if [[ -e "${local_fn}" && -d "${MYDIR}/${path}" ]]; then
+ echo "${path} already up to date." >&2
+ return 0
+ fi
+
+ local url
+ local strip_components=0
+ if [[ "${project:0:4}" == "http" ]]; then
+ # "project" is a googlesource.com base url.
+ url="${project}${sha}.tar.gz"
+ else
+ # GitHub files have a top-level directory
+ strip_components=1
+ url="https://github.com/${project}/tarball/${sha}"
+ fi
+
+ echo "Downloading ${path} version ${sha}..." >&2
+ mkdir -p "${down_dir}"
+ curl -L --show-error -o "${local_fn}.tmp" "${url}"
+ mkdir -p "${MYDIR}/${path}"
+ tar -zxf "${local_fn}.tmp" -C "${MYDIR}/${path}" \
+ --strip-components="${strip_components}"
+ mv "${local_fn}.tmp" "${local_fn}"
+}
+
+
+main() {
+ if git -C "${MYDIR}" rev-parse; then
+ cat >&2 <<EOF
+Current directory is a git repository, downloading dependencies via git:
+
+ git submodule update --init --recursive
+
+EOF
+ git -C "${MYDIR}" submodule update --init --recursive --depth 1 --recommend-shallow
+ return 0
+ fi
+
+ # Sources downloaded from a tarball.
+ download_github third_party/brotli google/brotli
+ download_github third_party/highway google/highway
+ download_github third_party/sjpeg webmproject/sjpeg
+ download_github third_party/skcms \
+ "https://skia.googlesource.com/skcms/+archive/"
+ download_github third_party/zlib madler/zlib
+ download_github third_party/libpng glennrp/libpng
+ echo "Done."
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder b/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder
new file mode 100644
index 0000000000..16e0077eea
--- /dev/null
+++ b/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder
@@ -0,0 +1,21 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Build an Ubuntu-based docker image with the installed software needed to
+# develop and test JPEG XL.
+
+FROM ubuntu:bionic
+
+# Set a prompt for when using it locally.
+ENV PS1="\[\033[01;33m\]\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ "
+
+COPY scripts/99_norecommends /etc/apt/apt.conf.d/99_norecommends
+
+COPY scripts /jpegxl_scripts
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN /jpegxl_scripts/jpegxl_builder.sh && \
+ rm -rf /jpegxl_scripts
diff --git a/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder-run-aarch64 b/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder-run-aarch64
new file mode 100644
index 0000000000..b4f2918375
--- /dev/null
+++ b/third_party/jpeg-xl/docker/Dockerfile.jpegxl-builder-run-aarch64
@@ -0,0 +1,36 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Build an Ubuntu-based docker image for aarch64 with the installed software
+# needed to run JPEG XL. This is only useful when running on actual aarch64
+# hardware.
+
+FROM arm64v8/ubuntu:bionic
+
+COPY scripts/99_norecommends /etc/apt/apt.conf.d/99_norecommends
+
+# Set a prompt for when using it locally.
+ENV PS1="\[\033[01;33m\]\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ "
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+RUN set -ex; \
+ apt-get update -y; \
+ apt-get install -y \
+ bsdmainutils \
+ cmake \
+ curl \
+ ca-certificates \
+ extra-cmake-modules \
+ git \
+ imagemagick \
+ libjpeg8 \
+ libgif7 \
+ libgoogle-perftools4 \
+ libopenexr22 \
+ libpng16-16 \
+ libsdl2-2.0-0 \
+ parallel; \
+ rm -rf /var/lib/apt/lists/*;
diff --git a/third_party/jpeg-xl/docker/README.md b/third_party/jpeg-xl/docker/README.md
new file mode 100644
index 0000000000..874df1cb80
--- /dev/null
+++ b/third_party/jpeg-xl/docker/README.md
@@ -0,0 +1,7 @@
+### Docker container infrastructure for JPEG XL
+
+This directory contains the requirements to build a docker image for the
+JPEG XL project builder.
+
+Docker images need to be created and upload manually. See ./build.sh for
+details.
diff --git a/third_party/jpeg-xl/docker/build.sh b/third_party/jpeg-xl/docker/build.sh
new file mode 100755
index 0000000000..3d4727f6a4
--- /dev/null
+++ b/third_party/jpeg-xl/docker/build.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+declare -a TARGETS
+
+load_targets() {
+ # Built-in OSX "find" does not support "-m".
+ FIND=$(which "gfind" || which "find")
+ for f in $(${FIND} -maxdepth 1 -name 'Dockerfile.*' | sort); do
+ local target="${f#*Dockerfile.}"
+ TARGETS+=("${target}")
+ done
+}
+
+usage() {
+ cat >&2 <<EOF
+Use: $1 [targets]
+
+Available targets:
+ * all
+EOF
+ for target in "${TARGETS[@]}"; do
+ echo " * ${target}" >&2
+ done
+}
+
+build_target() {
+ local target="$1"
+
+ local dockerfile="${MYDIR}/Dockerfile.${target}"
+ # JPEG XL builder images are stored in the gcr.io/jpegxl project.
+ local tag="gcr.io/jpegxl/${target}"
+
+ echo "Building ${target}"
+ if ! sudo docker build --no-cache -t "${tag}" -f "${dockerfile}" "${MYDIR}" \
+ >"${target}.log" 2>&1; then
+ echo "${target} failed. See ${target}.log" >&2
+ else
+ echo "Done, to upload image run:" >&2
+ echo " sudo docker push ${tag}"
+ if [[ "${JPEGXL_PUSH:-}" == "1" ]]; then
+ echo "sudo docker push ${tag}" >&2
+ sudo docker push "${tag}"
+ # The RepoDigest is only created after it is pushed.
+ local fulltag=$(sudo docker inspect --format="{{.RepoDigests}}" "${tag}")
+ fulltag="${fulltag#[}"
+ fulltag="${fulltag%]}"
+ echo "Updating .gitlab-ci.yml to ${fulltag}" >&2
+ sed -E "s;${tag}@sha256:[0-9a-f]+;${fulltag};" \
+ -i "${MYDIR}/../.gitlab-ci.yml"
+ fi
+ fi
+}
+
+main() {
+ cd "${MYDIR}"
+ local target="${1:-}"
+
+ load_targets
+ if [[ -z "${target}" ]]; then
+ usage $0
+ exit 1
+ fi
+
+ if [[ "${target}" == "all" ]]; then
+ for target in "${TARGETS[@]}"; do
+ build_target "${target}"
+ done
+ else
+ for target in "$@"; do
+ build_target "${target}"
+ done
+ fi
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/docker/scripts/99_norecommends b/third_party/jpeg-xl/docker/scripts/99_norecommends
new file mode 100644
index 0000000000..96d672811d
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/99_norecommends
@@ -0,0 +1 @@
+APT::Install-Recommends "false";
diff --git a/third_party/jpeg-xl/docker/scripts/binutils_align_fix.patch b/third_party/jpeg-xl/docker/scripts/binutils_align_fix.patch
new file mode 100644
index 0000000000..6066252db8
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/binutils_align_fix.patch
@@ -0,0 +1,28 @@
+Description: fix lack of alignment in relocations (crashes on mingw)
+See https://sourceware.org/git/?p=binutils-gdb.git;a=patch;h=73af69e74974eaa155eec89867e3ccc77ab39f6d
+From: Marc <marc@groundctl.com>
+Date: Fri, 9 Nov 2018 11:13:50 +0000
+Subject: [PATCH] Allow for compilers that do not produce aligned .rdat
+ sections in PE format files.
+
+--- a/upstream/ld/scripttempl/pe.sc 2020-05-12 18:45:12.000000000 +0200
++++ b/upstream/ld/scripttempl/pe.sc 2020-05-12 18:47:12.000000000 +0200
+@@ -143,6 +143,7 @@
+ .rdata ${RELOCATING+BLOCK(__section_alignment__)} :
+ {
+ ${R_RDATA}
++ . = ALIGN(4);
+ ${RELOCATING+__rt_psrelocs_start = .;}
+ ${RELOCATING+KEEP(*(.rdata_runtime_pseudo_reloc))}
+ ${RELOCATING+__rt_psrelocs_end = .;}
+--- a/upstream/ld/scripttempl/pep.sc 2020-05-12 18:45:19.000000000 +0200
++++ b/upstream/ld/scripttempl/pep.sc 2020-05-12 18:47:18.000000000 +0200
+@@ -143,6 +143,7 @@
+ .rdata ${RELOCATING+BLOCK(__section_alignment__)} :
+ {
+ ${R_RDATA}
++ . = ALIGN(4);
+ ${RELOCATING+__rt_psrelocs_start = .;}
+ ${RELOCATING+KEEP(*(.rdata_runtime_pseudo_reloc))}
+ ${RELOCATING+__rt_psrelocs_end = .;}
+
diff --git a/third_party/jpeg-xl/docker/scripts/emsdk_install.sh b/third_party/jpeg-xl/docker/scripts/emsdk_install.sh
new file mode 100755
index 0000000000..6cf225a9d9
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/emsdk_install.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+EMSDK_URL="https://github.com/emscripten-core/emsdk/archive/main.tar.gz"
+EMSDK_DIR="/opt/emsdk"
+
+EMSDK_RELEASE="2.0.23"
+
+set -eu -x
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+ if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+ rm -fr "${CLEANUP_FILES[@]}"
+ fi
+}
+trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+
+main() {
+ local workdir=$(mktemp -d --suffix=emsdk)
+ CLEANUP_FILES+=("${workdir}")
+
+ local emsdktar="${workdir}/emsdk.tar.gz"
+ curl --output "${emsdktar}" "${EMSDK_URL}" --location
+ mkdir -p "${EMSDK_DIR}"
+ tar -zxf "${emsdktar}" -C "${EMSDK_DIR}" --strip-components=1
+
+ cd "${EMSDK_DIR}"
+ ./emsdk install --shallow "${EMSDK_RELEASE}"
+ ./emsdk activate --embedded "${EMSDK_RELEASE}"
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/docker/scripts/jpegxl_builder.sh b/third_party/jpeg-xl/docker/scripts/jpegxl_builder.sh
new file mode 100755
index 0000000000..949c811eae
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/jpegxl_builder.sh
@@ -0,0 +1,516 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Main entry point for all the Dockerfile for jpegxl-builder. This centralized
+# file helps sharing code and configuration between Dockerfiles.
+
+set -eux
+
+MYDIR=$(dirname $(realpath "$0"))
+
+# libjpeg-turbo.
+JPEG_TURBO_RELEASE="2.0.4"
+JPEG_TURBO_URL="https://github.com/libjpeg-turbo/libjpeg-turbo/archive/${JPEG_TURBO_RELEASE}.tar.gz"
+JPEG_TURBO_SHA256="7777c3c19762940cff42b3ba4d7cd5c52d1671b39a79532050c85efb99079064"
+
+# zlib (dependency of libpng)
+ZLIB_RELEASE="1.2.11"
+ZLIB_URL="https://www.zlib.net/zlib-${ZLIB_RELEASE}.tar.gz"
+ZLIB_SHA256="c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1"
+# The name in the .pc and the .dll generated don't match in zlib for Windows
+# because they use different .dll names in Windows. We avoid that by defining
+# UNIX=1. We also install all the .dll files to ${prefix}/lib instead of the
+# default ${prefix}/bin.
+ZLIB_FLAGS='-DUNIX=1 -DINSTALL_PKGCONFIG_DIR=/${CMAKE_INSTALL_PREFIX}/lib/pkgconfig -DINSTALL_BIN_DIR=/${CMAKE_INSTALL_PREFIX}/lib'
+
+# libpng
+LIBPNG_RELEASE="1.6.37"
+LIBPNG_URL="https://github.com/glennrp/libpng/archive/v${LIBPNG_RELEASE}.tar.gz"
+LIBPNG_SHA256="ca74a0dace179a8422187671aee97dd3892b53e168627145271cad5b5ac81307"
+
+# giflib
+GIFLIB_RELEASE="5.2.1"
+GIFLIB_URL="https://netcologne.dl.sourceforge.net/project/giflib/giflib-${GIFLIB_RELEASE}.tar.gz"
+GIFLIB_SHA256="31da5562f44c5f15d63340a09a4fd62b48c45620cd302f77a6d9acf0077879bd"
+
+# A patch needed to compile GIFLIB in mingw.
+GIFLIB_PATCH_URL="https://github.com/msys2/MINGW-packages/raw/3afde38fcee7b3ba2cafd97d76cca8f06934504f/mingw-w64-giflib/001-mingw-build.patch"
+GIFLIB_PATCH_SHA256="2b2262ddea87fc07be82e10aeb39eb699239f883c899aa18a16e4d4e40af8ec8"
+
+# webp
+WEBP_RELEASE="1.0.2"
+WEBP_URL="https://codeload.github.com/webmproject/libwebp/tar.gz/v${WEBP_RELEASE}"
+WEBP_SHA256="347cf85ddc3497832b5fa9eee62164a37b249c83adae0ba583093e039bf4881f"
+
+# Google benchmark
+BENCHMARK_RELEASE="1.5.2"
+BENCHMARK_URL="https://github.com/google/benchmark/archive/v${BENCHMARK_RELEASE}.tar.gz"
+BENCHMARK_SHA256="dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c"
+BENCHMARK_FLAGS="-DGOOGLETEST_PATH=${MYDIR}/../../third_party/googletest"
+# attribute(format(__MINGW_PRINTF_FORMAT, ...)) doesn't work in our
+# environment, so we disable the warning.
+BENCHMARK_FLAGS="-DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_TESTING=OFF \
+ -DCMAKE_CXX_FLAGS=-Wno-ignored-attributes \
+ -DCMAKE_POSITION_INDEPENDENT_CODE=ON"
+
+# V8
+V8_VERSION="9.3.22"
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+ if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+ rm -fr "${CLEANUP_FILES[@]}"
+ fi
+}
+trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+
+# List of Ubuntu arch names supported by the builder (such as "i386").
+LIST_ARCHS=(
+ amd64
+ i386
+ arm64
+ armhf
+)
+
+# List of target triplets supported by the builder.
+LIST_TARGETS=(
+ x86_64-linux-gnu
+ i686-linux-gnu
+ arm-linux-gnueabihf
+ aarch64-linux-gnu
+)
+LIST_MINGW_TARGETS=(
+ i686-w64-mingw32
+ x86_64-w64-mingw32
+)
+LIST_WASM_TARGETS=(
+ wasm32
+)
+
+# Setup the apt repositories and supported architectures.
+setup_apt() {
+ apt-get update -y
+ apt-get install -y curl gnupg ca-certificates
+
+ apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 1E9377A2BA9EF27F
+
+ # node sources.
+ cat >/etc/apt/sources.list.d/nodesource.list <<EOF
+ deb https://deb.nodesource.com/node_14.x bionic main
+ deb-src https://deb.nodesource.com/node_14.x bionic main
+EOF
+ curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
+
+ local port_list=()
+ local main_list=()
+ local ubarch
+ for ubarch in "${LIST_ARCHS[@]}"; do
+ if [[ "${ubarch}" != "amd64" && "${ubarch}" != "i386" ]]; then
+ # other archs are not part of the main mirrors, but available in
+ # ports.ubuntu.com.
+ port_list+=("${ubarch}")
+ else
+ main_list+=("${ubarch}")
+ fi
+ # Add the arch to the system.
+ if [[ "${ubarch}" != "amd64" ]]; then
+ dpkg --add-architecture "${ubarch}"
+ fi
+ done
+
+ # Update the sources.list with the split of supported architectures.
+ local bkplist="/etc/apt/sources.list.bkp"
+ [[ -e "${bkplist}" ]] || \
+ mv /etc/apt/sources.list "${bkplist}"
+
+ local newlist="/etc/apt/sources.list.tmp"
+ rm -f "${newlist}"
+ port_list=$(echo "${port_list[@]}" | tr ' ' ,)
+ if [[ -n "${port_list}" ]]; then
+ local port_url="http://ports.ubuntu.com/ubuntu-ports/"
+ grep -v -E '^#' "${bkplist}" |
+ sed -E "s;^deb (http[^ ]+) (.*)\$;deb [arch=${port_list}] ${port_url} \\2;" \
+ >>"${newlist}"
+ fi
+
+ main_list=$(echo "${main_list[@]}" | tr ' ' ,)
+ grep -v -E '^#' "${bkplist}" |
+ sed -E "s;^deb (http[^ ]+) (.*)\$;deb [arch=${main_list}] \\1 \\2\ndeb-src [arch=${main_list}] \\1 \\2;" \
+ >>"${newlist}"
+ mv "${newlist}" /etc/apt/sources.list
+}
+
+install_pkgs() {
+ packages=(
+ # Native compilers (minimum for SIMD is clang-7)
+ clang-7 clang-format-7 clang-tidy-7
+
+ # TODO: Consider adding clang-8 to every builder:
+ # clang-8 clang-format-8 clang-tidy-8
+
+ # For cross-compiling to Windows with mingw.
+ mingw-w64
+ wine64
+ wine-binfmt
+
+ # Native tools.
+ bsdmainutils
+ cmake
+ extra-cmake-modules
+ git
+ llvm
+ nasm
+ ninja-build
+ parallel
+ pkg-config
+
+ # For compiling / testing JNI wrapper. JDK8 is almost 2x smaller than JDK11
+ # openjdk-8-jdk-headless would be 50MB smaller, unfortunately, CMake
+ # does mistakenly thinks it does not contain JNI feature.
+ openjdk-8-jdk
+
+ # These are used by the ./ci.sh lint in the native builder.
+ clang-format-7
+ clang-format-8
+
+ # For coverage builds
+ gcovr
+
+ # For compiling giflib documentation.
+ xmlto
+
+ # Common libraries.
+ libstdc++-8-dev
+
+ # We don't use tcmalloc on archs other than amd64. This installs
+ # libgoogle-perftools4:amd64.
+ google-perftools
+
+ # NodeJS for running WASM tests
+ nodejs
+
+ # To generate API documentation.
+ doxygen
+
+ # Freezes version that builds (passes tests). Newer version
+ # (2.30-21ubuntu1~18.04.4) claims to fix "On Intel Skylake
+ # (-march=native) generated avx512 instruction can be wrong",
+ # but newly added tests does not pass. Perhaps the problem is
+ # that mingw package is not updated.
+ binutils-source=2.30-15ubuntu1
+ )
+
+ # Install packages that are arch-dependent.
+ local ubarch
+ for ubarch in "${LIST_ARCHS[@]}"; do
+ packages+=(
+ # Library dependencies. These normally depend on the target architecture
+ # we are compiling for and can't usually be installed for multiple
+ # architectures at the same time.
+ libgif7:"${ubarch}"
+ libjpeg-dev:"${ubarch}"
+ libpng-dev:"${ubarch}"
+
+ libstdc++-8-dev:"${ubarch}"
+
+ # For OpenEXR:
+ libilmbase12:"${ubarch}"
+ libopenexr22:"${ubarch}"
+
+ # TCMalloc dependency
+ libunwind-dev:"${ubarch}"
+
+ # Cross-compiling tools per arch.
+ libc6-dev-"${ubarch}"-cross
+ libstdc++-8-dev-"${ubarch}"-cross
+ )
+ done
+
+ local target
+ for target in "${LIST_TARGETS[@]}"; do
+ # Per target cross-compiling tools.
+ if [[ "${target}" != "x86_64-linux-gnu" ]]; then
+ packages+=(
+ binutils-"${target}"
+ gcc-"${target}"
+ )
+ fi
+ done
+
+ # Install all the manual packages via "apt install" for the main arch. These
+ # will be installed for other archs via manual download and unpack.
+ apt install -y "${packages[@]}" "${UNPACK_PKGS[@]}"
+}
+
+# binutils <2.32 need a patch.
+install_binutils() {
+ local workdir=$(mktemp -d --suffix=_install)
+ CLEANUP_FILES+=("${workdir}")
+ pushd "${workdir}"
+ apt source binutils-mingw-w64
+ apt -y build-dep binutils-mingw-w64
+ cd binutils-mingw-w64-8ubuntu1
+ cp "${MYDIR}/binutils_align_fix.patch" debian/patches
+ echo binutils_align_fix.patch >> debian/patches/series
+ dpkg-buildpackage -b
+ cd ..
+ dpkg -i *deb
+ popd
+}
+
+# Install a library from the source code for multiple targets.
+# Usage: install_from_source <tar_url> <sha256> <target> [<target...>]
+install_from_source() {
+ local package="$1"
+ shift
+
+ local url
+ eval "url=\${${package}_URL}"
+ local sha256
+ eval "sha256=\${${package}_SHA256}"
+ # Optional package flags
+ local pkgflags
+ eval "pkgflags=\${${package}_FLAGS:-}"
+
+ local workdir=$(mktemp -d --suffix=_install)
+ CLEANUP_FILES+=("${workdir}")
+
+ local tarfile="${workdir}"/$(basename "${url}")
+ curl -L --output "${tarfile}" "${url}"
+ if ! echo "${sha256} ${tarfile}" | sha256sum -c --status -; then
+ echo "SHA256 mismatch for ${url}: expected ${sha256} but found:"
+ sha256sum "${tarfile}"
+ exit 1
+ fi
+
+ local target
+ for target in "$@"; do
+ echo "Installing ${package} for target ${target} from ${url}"
+
+ local srcdir="${workdir}/source-${target}"
+ mkdir -p "${srcdir}"
+ tar -zxf "${tarfile}" -C "${srcdir}" --strip-components=1
+
+ local prefix="/usr"
+ if [[ "${target}" != "x86_64-linux-gnu" ]]; then
+ prefix="/usr/${target}"
+ fi
+
+ # Apply patches to buildfiles.
+ if [[ "${package}" == "GIFLIB" && "${target}" == *mingw32 ]]; then
+ # GIFLIB Makefile has several problems so we need to fix them here. We are
+ # using a patch from MSYS2 that already fixes the compilation for mingw.
+ local make_patch="${srcdir}/libgif.patch"
+ curl -L "${GIFLIB_PATCH_URL}" -o "${make_patch}"
+ echo "${GIFLIB_PATCH_SHA256} ${make_patch}" | sha256sum -c --status -
+ patch "${srcdir}/Makefile" < "${make_patch}"
+ elif [[ "${package}" == "LIBPNG" && "${target}" == wasm* ]]; then
+ # Cut the dependency to libm; there is pull request to fix it, so this
+ # might not be needed in the future.
+ sed -i 's/APPLE/EMSCRIPTEN/g' "${srcdir}/CMakeLists.txt"
+ fi
+
+ local cmake_args=()
+ local export_args=("CC=clang-7" "CXX=clang++-7")
+ local cmake="cmake"
+ local make="make"
+ local system_name="Linux"
+ if [[ "${target}" == *mingw32 ]]; then
+ system_name="Windows"
+ # When compiling with clang, CMake doesn't detect that we are using mingw.
+ cmake_args+=(
+ -DMINGW=1
+ # Googletest needs this when cross-compiling to windows
+ -DCMAKE_CROSSCOMPILING=1
+ -DHAVE_STD_REGEX=0
+ -DHAVE_POSIX_REGEX=0
+ -DHAVE_GNU_POSIX_REGEX=0
+ )
+ local windres=$(which ${target}-windres || true)
+ if [[ -n "${windres}" ]]; then
+ cmake_args+=(-DCMAKE_RC_COMPILER="${windres}")
+ fi
+ fi
+ if [[ "${target}" == wasm* ]]; then
+ system_name="WASM"
+ cmake="emcmake cmake"
+ make="emmake make"
+ export_args=()
+ cmake_args+=(
+ -DCMAKE_FIND_ROOT_PATH="${prefix}"
+ -DCMAKE_PREFIX_PATH="${prefix}"
+ )
+ # Static and shared library link to the same file -> race condition.
+ nproc=1
+ else
+ nproc=`nproc --all`
+ fi
+ cmake_args+=(-DCMAKE_SYSTEM_NAME="${system_name}")
+
+ if [[ "${target}" != "x86_64-linux-gnu" ]]; then
+ # Cross-compiling.
+ cmake_args+=(
+ -DCMAKE_C_COMPILER_TARGET="${target}"
+ -DCMAKE_CXX_COMPILER_TARGET="${target}"
+ -DCMAKE_SYSTEM_PROCESSOR="${target%%-*}"
+ )
+ fi
+
+ if [[ -e "${srcdir}/CMakeLists.txt" ]]; then
+ # Most packages use cmake for building which is easier to configure for
+ # cross-compiling.
+ if [[ "${package}" == "JPEG_TURBO" && "${target}" == wasm* ]]; then
+ # JT erroneously detects WASM CPU as i386 and tries to use asm.
+ # Wasm/Emscripten support for dynamic linking is incomplete; disable
+ # to avoid CMake warning.
+ cmake_args+=(-DWITH_SIMD=0 -DENABLE_SHARED=OFF)
+ fi
+ (
+ cd "${srcdir}"
+ export ${export_args[@]}
+ ${cmake} \
+ -DCMAKE_INSTALL_PREFIX="${prefix}" \
+ "${cmake_args[@]}" ${pkgflags}
+ ${make} -j${nproc}
+ ${make} install
+ )
+ elif [[ "${package}" == "GIFLIB" ]]; then
+ # GIFLIB doesn't yet have a cmake build system. There is a pull
+ # request in giflib for adding CMakeLists.txt so this might not be
+ # needed in the future.
+ (
+ cd "${srcdir}"
+ local giflib_make_flags=(
+ CFLAGS="-O2 --target=${target} -std=gnu99"
+ PREFIX="${prefix}"
+ )
+ if [[ "${target}" != wasm* ]]; then
+ giflib_make_flags+=(CC=clang-7)
+ fi
+ # giflib make dependencies are not properly set up so parallel building
+ # doesn't work for everything.
+ ${make} -j${nproc} libgif.a "${giflib_make_flags[@]}"
+ ${make} -j${nproc} all "${giflib_make_flags[@]}"
+ ${make} install "${giflib_make_flags[@]}"
+ )
+ else
+ echo "Don't know how to install ${package}"
+ exit 1
+ fi
+
+ # CMake mistakenly uses ".so" libraries and EMCC fails to link properly.
+ if [[ "${target}" == wasm* ]]; then
+ rm -f "${prefix}/lib"/*.so*
+ fi
+ done
+}
+
+# Packages that are manually unpacked for each architecture.
+UNPACK_PKGS=(
+ libgif-dev
+ libclang-common-7-dev
+
+ # For OpenEXR:
+ libilmbase-dev
+ libopenexr-dev
+
+ # TCMalloc
+ libgoogle-perftools-dev
+ libtcmalloc-minimal4
+ libgoogle-perftools4
+)
+
+# Main script entry point.
+main() {
+ cd "${MYDIR}"
+
+ # Configure the repositories with the sources for multi-arch cross
+ # compilation.
+ setup_apt
+ apt-get update -y
+ apt-get dist-upgrade -y
+
+ install_pkgs
+ install_binutils
+ apt clean
+
+ # Remove prebuilt Java classes cache.
+ rm /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server/classes.jsa
+
+ # Manually extract packages for the target arch that can't install it directly
+ # at the same time as the native ones.
+ local ubarch
+ for ubarch in "${LIST_ARCHS[@]}"; do
+ if [[ "${ubarch}" != "amd64" ]]; then
+ local pkg
+ for pkg in "${UNPACK_PKGS[@]}"; do
+ apt download "${pkg}":"${ubarch}"
+ dpkg -x "${pkg}"_*_"${ubarch}".deb /
+ done
+ fi
+ done
+ # TODO: Add clang from the llvm repos. This is problematic since we are
+ # installing libclang-common-7-dev:"${ubarch}" from the ubuntu ports repos
+ # which is not available in the llvm repos so it might have a different
+ # version than the ubuntu ones.
+
+ # Remove the win32 libgcc version. The gcc-mingw-w64-x86-64 (and i686)
+ # packages install two libgcc versions:
+ # /usr/lib/gcc/x86_64-w64-mingw32/7.3-posix
+ # /usr/lib/gcc/x86_64-w64-mingw32/7.3-win32
+ # (exact libgcc version number depends on the package version).
+ #
+ # Clang will pick the best libgcc, sorting by version, but it doesn't
+ # seem to be a way to specify one or the other one, except by passing
+ # -nostdlib and setting all the include paths from the command line.
+ # To check which one is being used you can run:
+ # clang++-7 --target=x86_64-w64-mingw32 -v -print-libgcc-file-name
+ # We need to use the "posix" versions for thread support, so here we
+ # just remove the other one.
+ local target
+ for target in "${LIST_MINGW_TARGETS[@]}"; do
+ update-alternatives --set "${target}-gcc" $(which "${target}-gcc-posix")
+ local gcc_win32_path=$("${target}-cpp-win32" -print-libgcc-file-name)
+ rm -rf $(dirname "${gcc_win32_path}")
+ done
+
+ # TODO: Add msan for the target when cross-compiling. This only installs it
+ # for amd64.
+ ./msan_install.sh
+
+ # Build and install qemu user-linux targets.
+ ./qemu_install.sh
+
+ # Install emscripten SDK.
+ ./emsdk_install.sh
+
+ # Setup environment for building WASM libraries from sources.
+ source /opt/emsdk/emsdk_env.sh
+
+ # Install some dependency libraries manually for the different targets.
+
+ install_from_source JPEG_TURBO "${LIST_MINGW_TARGETS[@]}" "${LIST_WASM_TARGETS[@]}"
+ install_from_source ZLIB "${LIST_MINGW_TARGETS[@]}" "${LIST_WASM_TARGETS[@]}"
+ install_from_source LIBPNG "${LIST_MINGW_TARGETS[@]}" "${LIST_WASM_TARGETS[@]}"
+ install_from_source GIFLIB "${LIST_MINGW_TARGETS[@]}" "${LIST_WASM_TARGETS[@]}"
+ # webp in Ubuntu is relatively old so we install it from source for everybody.
+ install_from_source WEBP "${LIST_TARGETS[@]}" "${LIST_MINGW_TARGETS[@]}"
+
+ install_from_source BENCHMARK "${LIST_TARGETS[@]}" "${LIST_MINGW_TARGETS[@]}"
+
+ # Install v8. v8 has better WASM SIMD support than NodeJS 14 (LTS).
+ # First we need the installer to install v8.
+ npm install jsvu -g
+ # install specific version;
+ HOME=/opt jsvu --os=linux64 "v8@${V8_VERSION}"
+ ln -s "/opt/.jsvu/v8-${V8_VERSION}" "/opt/.jsvu/v8"
+
+ # Cleanup.
+ find /var/lib/apt/lists/ -mindepth 1 -delete
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/docker/scripts/msan_install.sh b/third_party/jpeg-xl/docker/scripts/msan_install.sh
new file mode 100755
index 0000000000..0216f62b04
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/msan_install.sh
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set -eu
+
+MYDIR=$(dirname $(realpath "$0"))
+
+# Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS
+CMAKE_FLAGS=${CMAKE_FLAGS:-}
+CMAKE_C_FLAGS=${CMAKE_C_FLAGS:-${CMAKE_FLAGS}}
+CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS:-${CMAKE_FLAGS}}
+CMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS:-}
+
+CLANG_VERSION="${CLANG_VERSION:-}"
+# Detect the clang version suffix and store it in CLANG_VERSION. For example,
+# "6.0" for clang 6 or "7" for clang 7.
+detect_clang_version() {
+ if [[ -n "${CLANG_VERSION}" ]]; then
+ return 0
+ fi
+ local clang_version=$("${CC:-clang}" --version | head -n1)
+ local llvm_tag
+ case "${clang_version}" in
+ "clang version 6."*)
+ CLANG_VERSION="6.0"
+ ;;
+ "clang version 7."*)
+ CLANG_VERSION="7"
+ ;;
+ "clang version 8."*)
+ CLANG_VERSION="8"
+ ;;
+ "clang version 9."*)
+ CLANG_VERSION="9"
+ ;;
+ *)
+ echo "Unknown clang version: ${clang_version}" >&2
+ return 1
+ esac
+}
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+ if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+ rm -fr "${CLEANUP_FILES[@]}"
+ fi
+}
+trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+
+# Install libc++ libraries compiled with msan in the msan_prefix for the current
+# compiler version.
+cmd_msan_install() {
+ local tmpdir=$(mktemp -d)
+ CLEANUP_FILES+=("${tmpdir}")
+ # Detect the llvm to install:
+ export CC="${CC:-clang}"
+ export CXX="${CXX:-clang++}"
+ detect_clang_version
+ local llvm_tag
+ case "${CLANG_VERSION}" in
+ "6.0")
+ llvm_tag="llvmorg-6.0.1"
+ ;;
+ "7")
+ llvm_tag="llvmorg-7.0.1"
+ ;;
+ "8")
+ llvm_tag="llvmorg-8.0.0"
+ ;;
+ *)
+ echo "Unknown clang version: ${clang_version}" >&2
+ return 1
+ esac
+ local llvm_targz="${tmpdir}/${llvm_tag}.tar.gz"
+ curl -L --show-error -o "${llvm_targz}" \
+ "https://github.com/llvm/llvm-project/archive/${llvm_tag}.tar.gz"
+ tar -C "${tmpdir}" -zxf "${llvm_targz}"
+ local llvm_root="${tmpdir}/llvm-project-${llvm_tag}"
+
+ local msan_prefix="${HOME}/.msan/${CLANG_VERSION}"
+ rm -rf "${msan_prefix}"
+
+ declare -A CMAKE_EXTRAS
+ CMAKE_EXTRAS[libcxx]="\
+ -DLIBCXX_CXX_ABI=libstdc++ \
+ -DLIBCXX_INSTALL_EXPERIMENTAL_LIBRARY=ON"
+
+ for project in libcxx; do
+ local proj_build="${tmpdir}/build-${project}"
+ local proj_dir="${llvm_root}/${project}"
+ mkdir -p "${proj_build}"
+ cmake -B"${proj_build}" -H"${proj_dir}" \
+ -G Ninja \
+ -DCMAKE_BUILD_TYPE=Release \
+ -DLLVM_USE_SANITIZER=Memory \
+ -DLLVM_PATH="${llvm_root}/llvm" \
+ -DLLVM_CONFIG_PATH="$(which llvm-config llvm-config-7 llvm-config-6.0 | \
+ head -n1)" \
+ -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" \
+ -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" \
+ -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \
+ -DCMAKE_INSTALL_PREFIX="${msan_prefix}" \
+ ${CMAKE_EXTRAS[${project}]}
+ cmake --build "${proj_build}"
+ ninja -C "${proj_build}" install
+ done
+}
+
+main() {
+ set -x
+ for version in 6.0 7 8; do
+ if ! which "clang-${version}" >/dev/null; then
+ echo "Skipping msan install for clang version ${version}"
+ continue
+ fi
+ (
+ trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+ export CLANG_VERSION=${version}
+ export CC=clang-${version}
+ export CXX=clang++-${version}
+ cmd_msan_install
+ ) &
+ done
+ wait
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/docker/scripts/qemu_install.sh b/third_party/jpeg-xl/docker/scripts/qemu_install.sh
new file mode 100755
index 0000000000..8106c4471d
--- /dev/null
+++ b/third_party/jpeg-xl/docker/scripts/qemu_install.sh
@@ -0,0 +1,83 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+QEMU_RELEASE="4.1.0"
+QEMU_URL="https://download.qemu.org/qemu-${QEMU_RELEASE}.tar.xz"
+QEMU_ARCHS=(
+ aarch64
+ arm
+ i386
+ # TODO: Consider adding these:
+ # aarch64_be
+ # mips64el
+ # mips64
+ # mips
+ # ppc64
+ # ppc
+)
+
+# Ubuntu packages not installed that are needed to build qemu.
+QEMU_BUILD_DEPS=(
+ libglib2.0-dev
+ libpixman-1-dev
+ flex
+ bison
+)
+
+set -eu -x
+
+# Temporary files cleanup hooks.
+CLEANUP_FILES=()
+cleanup() {
+ if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then
+ rm -fr "${CLEANUP_FILES[@]}"
+ fi
+}
+trap "{ set +x; } 2>/dev/null; cleanup" INT TERM EXIT
+
+main() {
+ local workdir=$(mktemp -d --suffix=qemu)
+ CLEANUP_FILES+=("${workdir}")
+
+ apt install -y "${QEMU_BUILD_DEPS[@]}"
+
+ local qemutar="${workdir}/qemu.tar.gz"
+ curl --output "${qemutar}" "${QEMU_URL}"
+ tar -Jxf "${qemutar}" -C "${workdir}"
+ local srcdir="${workdir}/qemu-${QEMU_RELEASE}"
+
+ local builddir="${workdir}/build"
+ local prefixdir="${workdir}/prefix"
+ mkdir -p "${builddir}"
+
+ # List of targets to build.
+ local targets=""
+ local make_targets=()
+ local target
+ for target in "${QEMU_ARCHS[@]}"; do
+ targets="${targets} ${target}-linux-user"
+ # Build just the linux-user targets.
+ make_targets+=("${target}-linux-user/all")
+ done
+
+ cd "${builddir}"
+ "${srcdir}/configure" \
+ --prefix="${prefixdir}" \
+ --static --disable-system --enable-linux-user \
+ --target-list="${targets}"
+
+ make -j $(nproc --all || echo 1) "${make_targets[@]}"
+
+ # Manually install these into the non-standard location. This script runs as
+ # root anyway.
+ for target in "${QEMU_ARCHS[@]}"; do
+ cp "${target}-linux-user/qemu-${target}" "/usr/bin/qemu-${target}-static"
+ done
+
+ apt autoremove -y --purge "${QEMU_BUILD_DEPS[@]}"
+}
+
+main "$@"
diff --git a/third_party/jpeg-xl/examples/CMakeLists.txt b/third_party/jpeg-xl/examples/CMakeLists.txt
new file mode 100644
index 0000000000..88dc27c49f
--- /dev/null
+++ b/third_party/jpeg-xl/examples/CMakeLists.txt
@@ -0,0 +1,56 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Example project using libjxl.
+
+cmake_minimum_required(VERSION 3.10)
+
+project(SAMPLE_LIBJXL LANGUAGES C CXX)
+
+# Use pkg-config to find libjxl.
+find_package(PkgConfig)
+pkg_check_modules(Jxl REQUIRED IMPORTED_TARGET libjxl)
+pkg_check_modules(JxlThreads REQUIRED IMPORTED_TARGET libjxl_threads)
+
+# Build the example encoder/decoder binaries using the default shared libraries
+# installed.
+add_executable(decode_oneshot decode_oneshot.cc)
+target_link_libraries(decode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads)
+
+add_executable(decode_progressive decode_progressive.cc)
+target_link_libraries(decode_progressive PkgConfig::Jxl PkgConfig::JxlThreads)
+
+add_executable(encode_oneshot encode_oneshot.cc)
+target_link_libraries(encode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads)
+
+
+# Building a static binary with the static libjxl dependencies. How to load
+# static library configs from pkg-config and how to build static binaries
+# depends on the platform, and building static binaries in general has problems.
+# If you don't need static binaries you can remove this section.
+add_library(StaticJxl INTERFACE IMPORTED GLOBAL)
+set_target_properties(StaticJxl PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${Jxl_STATIC_INCLUDE_DIR}"
+ INTERFACE_COMPILE_OPTIONS "${Jxl_STATIC_CFLAGS_OTHER}"
+ INTERFACE_LINK_LIBRARIES "${Jxl_STATIC_LDFLAGS}"
+)
+add_library(StaticJxlThreads INTERFACE IMPORTED GLOBAL)
+set_target_properties(StaticJxlThreads PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${JxlThreads_STATIC_INCLUDE_DIR}"
+ INTERFACE_COMPILE_OPTIONS "${JxlThreads_STATIC_CFLAGS_OTHER}"
+ # libgcc uses weak symbols for pthread which means that -lpthread is not
+ # linked when compiling a static binary. This is a platform-specific fix for
+ # that.
+ INTERFACE_LINK_LIBRARIES
+ "${JxlThreads_STATIC_LDFLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive"
+)
+
+add_executable(decode_oneshot_static decode_oneshot.cc)
+target_link_libraries(decode_oneshot_static
+ -static StaticJxl StaticJxlThreads)
+
+add_executable(encode_oneshot_static encode_oneshot.cc)
+target_link_libraries(encode_oneshot_static
+ -static StaticJxl StaticJxlThreads)
diff --git a/third_party/jpeg-xl/examples/decode_exif_metadata.cc b/third_party/jpeg-xl/examples/decode_exif_metadata.cc
new file mode 100644
index 0000000000..97b0e52703
--- /dev/null
+++ b/third_party/jpeg-xl/examples/decode_exif_metadata.cc
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image in one shot (all input bytes
+// available at once). The example outputs the pixels and color information to a
+// floating point image and an ICC profile on disk.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+bool DecodeJpegXlExif(const uint8_t* jxl, size_t size,
+ std::vector<uint8_t>* exif) {
+ auto dec = JxlDecoderMake(nullptr);
+
+ // We're only interested in the Exif boxes in this example, so don't
+ // subscribe to events related to pixel data.
+ if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BOX)) {
+ fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+ return false;
+ }
+ bool support_decompression = true;
+ if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE)) {
+ fprintf(stderr,
+ "NOTE: decompressing brob boxes not supported with the currently "
+ "used jxl library.\n");
+ support_decompression = false;
+ }
+
+ JxlDecoderSetInput(dec.get(), jxl, size);
+ JxlDecoderCloseInput(dec.get());
+
+ const constexpr size_t kChunkSize = 65536;
+ size_t output_pos = 0;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+ if (status == JXL_DEC_ERROR) {
+ fprintf(stderr, "Decoder error\n");
+ return false;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+ fprintf(stderr, "Error, already provided all input\n");
+ return false;
+ } else if (status == JXL_DEC_BOX) {
+ if (!exif->empty()) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+ exif->resize(exif->size() - remaining);
+ // No need to wait for JXL_DEC_SUCCESS or decode other boxes.
+ return true;
+ }
+ JxlBoxType type;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetBoxType(dec.get(), type, support_decompression)) {
+ fprintf(stderr, "Error, failed to get box type\n");
+ return false;
+ }
+ if (!memcmp(type, "Exif", 4)) {
+ exif->resize(kChunkSize);
+ JxlDecoderSetBoxBuffer(dec.get(), exif->data(), exif->size());
+ }
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+ output_pos += kChunkSize - remaining;
+ exif->resize(exif->size() + kChunkSize);
+ JxlDecoderSetBoxBuffer(dec.get(), exif->data() + output_pos,
+ exif->size() - output_pos);
+ } else if (status == JXL_DEC_SUCCESS) {
+ if (!exif->empty()) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get());
+ exif->resize(exif->size() - remaining);
+ return true;
+ }
+ return true;
+ } else {
+ fprintf(stderr, "Unknown decoder status\n");
+ return false;
+ }
+ }
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+ FILE* file = fopen(filename, "rb");
+ if (!file) {
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ long size = ftell(file);
+ // Avoid invalid file or directory.
+ if (size >= LONG_MAX || size < 0) {
+ fclose(file);
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_SET) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ out->resize(size);
+ size_t readsize = fread(out->data(), 1, size, file);
+ if (fclose(file) != 0) {
+ return false;
+ }
+
+ return readsize == static_cast<size_t>(size);
+}
+
+bool WriteFile(const char* filename, const uint8_t* data, size_t size) {
+ FILE* file = fopen(filename, "wb");
+ if (!file) {
+ fprintf(stderr, "Could not open %s for writing", filename);
+ return false;
+ }
+ fwrite(data, 1, size, file);
+ if (fclose(file) != 0) {
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <jxl> <exif>\n"
+ "Where:\n"
+ " jxl = input JPEG XL image filename\n"
+ " exif = output exif filename\n"
+ "Output files will be overwritten.\n",
+ argv[0]);
+ return 1;
+ }
+
+ const char* jxl_filename = argv[1];
+ const char* exif_filename = argv[2];
+
+ std::vector<uint8_t> jxl;
+ if (!LoadFile(jxl_filename, &jxl)) {
+ fprintf(stderr, "couldn't load %s\n", jxl_filename);
+ return 1;
+ }
+
+ std::vector<uint8_t> exif;
+ if (!DecodeJpegXlExif(jxl.data(), jxl.size(), &exif)) {
+ fprintf(stderr, "Error while decoding the jxl file\n");
+ return 1;
+ }
+ if (exif.empty()) {
+ printf("No exif data present in this image\n");
+ } else {
+ // TODO(lode): the exif box data contains the 4-byte TIFF header at the
+ // beginning, check whether this is desired to be part of the output, or
+ // should be removed.
+ if (!WriteFile(exif_filename, exif.data(), exif.size())) {
+ fprintf(stderr, "Error while writing the exif file\n");
+ return 1;
+ }
+ printf("Successfully wrote %s\n", exif_filename);
+ }
+ return 0;
+}
diff --git a/third_party/jpeg-xl/examples/decode_oneshot.cc b/third_party/jpeg-xl/examples/decode_oneshot.cc
new file mode 100644
index 0000000000..8cf9d4f3a6
--- /dev/null
+++ b/third_party/jpeg-xl/examples/decode_oneshot.cc
@@ -0,0 +1,251 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image in one shot (all input bytes
+// available at once). The example outputs the pixels and color information to a
+// floating point image and an ICC profile on disk.
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+/** Decodes JPEG XL image to floating point pixels and ICC Profile. Pixel are
+ * stored as floating point, as interleaved RGBA (4 floating point values per
+ * pixel), line per line from top to bottom. Pixel values have nominal range
+ * 0..1 but may go beyond this range for HDR or wide gamut. The ICC profile
+ * describes the color format of the pixel data.
+ */
+bool DecodeJpegXlOneShot(const uint8_t* jxl, size_t size,
+ std::vector<float>* pixels, size_t* xsize,
+ size_t* ysize, std::vector<uint8_t>* icc_profile) {
+ // Multi-threaded parallel runner.
+ auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+ auto dec = JxlDecoderMake(nullptr);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO |
+ JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE)) {
+ fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+ return false;
+ }
+
+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+ JxlResizableParallelRunner,
+ runner.get())) {
+ fprintf(stderr, "JxlDecoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ JxlBasicInfo info;
+ JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+
+ JxlDecoderSetInput(dec.get(), jxl, size);
+ JxlDecoderCloseInput(dec.get());
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+
+ if (status == JXL_DEC_ERROR) {
+ fprintf(stderr, "Decoder error\n");
+ return false;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+ fprintf(stderr, "Error, already provided all input\n");
+ return false;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+ fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+ return false;
+ }
+ *xsize = info.xsize;
+ *ysize = info.ysize;
+ JxlResizableParallelRunnerSetThreads(
+ runner.get(),
+ JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ // Get the ICC color profile of the pixel data
+ size_t icc_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetICCProfileSize(
+ dec.get(), &format, JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) {
+ fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+ return false;
+ }
+ icc_profile->resize(icc_size);
+ if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+ dec.get(), &format,
+ JXL_COLOR_PROFILE_TARGET_DATA,
+ icc_profile->data(), icc_profile->size())) {
+ fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ size_t buffer_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+ fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+ return false;
+ }
+ if (buffer_size != *xsize * *ysize * 16) {
+ fprintf(stderr, "Invalid out buffer size %" PRIu64 " %" PRIu64 "\n",
+ static_cast<uint64_t>(buffer_size),
+ static_cast<uint64_t>(*xsize * *ysize * 16));
+ return false;
+ }
+ pixels->resize(*xsize * *ysize * 4);
+ void* pixels_buffer = (void*)pixels->data();
+ size_t pixels_buffer_size = pixels->size() * sizeof(float);
+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format,
+ pixels_buffer,
+ pixels_buffer_size)) {
+ fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ // Nothing to do. Do not yet return. If the image is an animation, more
+ // full frames may be decoded. This example only keeps the last one.
+ } else if (status == JXL_DEC_SUCCESS) {
+ // All decoding successfully finished.
+ // It's not required to call JxlDecoderReleaseInput(dec.get()) here since
+ // the decoder will be destroyed.
+ return true;
+ } else {
+ fprintf(stderr, "Unknown decoder status\n");
+ return false;
+ }
+ }
+}
+
+/** Writes to .pfm file (Portable FloatMap). Gimp, tev viewer and ImageMagick
+ * support viewing this format.
+ * The input pixels are given as 32-bit floating point with 4-channel RGBA.
+ * The alpha channel will not be written since .pfm does not support it.
+ */
+bool WritePFM(const char* filename, const float* pixels, size_t xsize,
+ size_t ysize) {
+ FILE* file = fopen(filename, "wb");
+ if (!file) {
+ fprintf(stderr, "Could not open %s for writing", filename);
+ return false;
+ }
+ uint32_t endian_test = 1;
+ uint8_t little_endian[4];
+ memcpy(little_endian, &endian_test, 4);
+
+ fprintf(file, "PF\n%d %d\n%s\n", (int)xsize, (int)ysize,
+ little_endian[0] ? "-1.0" : "1.0");
+ for (int y = ysize - 1; y >= 0; y--) {
+ for (size_t x = 0; x < xsize; x++) {
+ for (size_t c = 0; c < 3; c++) {
+ const float* f = &pixels[(y * xsize + x) * 4 + c];
+ fwrite(f, 4, 1, file);
+ }
+ }
+ }
+ if (fclose(file) != 0) {
+ return false;
+ }
+ return true;
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+ FILE* file = fopen(filename, "rb");
+ if (!file) {
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ long size = ftell(file);
+ // Avoid invalid file or directory.
+ if (size >= LONG_MAX || size < 0) {
+ fclose(file);
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_SET) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ out->resize(size);
+ size_t readsize = fread(out->data(), 1, size, file);
+ if (fclose(file) != 0) {
+ return false;
+ }
+
+ return readsize == static_cast<size_t>(size);
+}
+
+bool WriteFile(const char* filename, const uint8_t* data, size_t size) {
+ FILE* file = fopen(filename, "wb");
+ if (!file) {
+ fprintf(stderr, "Could not open %s for writing", filename);
+ return false;
+ }
+ fwrite(data, 1, size, file);
+ if (fclose(file) != 0) {
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 4) {
+ fprintf(stderr,
+ "Usage: %s <jxl> <pfm> <icc>\n"
+ "Where:\n"
+ " jxl = input JPEG XL image filename\n"
+ " pfm = output Portable FloatMap image filename\n"
+ " icc = output ICC color profile filename\n"
+ "Output files will be overwritten.\n",
+ argv[0]);
+ return 1;
+ }
+
+ const char* jxl_filename = argv[1];
+ const char* pfm_filename = argv[2];
+ const char* icc_filename = argv[3];
+
+ std::vector<uint8_t> jxl;
+ if (!LoadFile(jxl_filename, &jxl)) {
+ fprintf(stderr, "couldn't load %s\n", jxl_filename);
+ return 1;
+ }
+
+ std::vector<float> pixels;
+ std::vector<uint8_t> icc_profile;
+ size_t xsize = 0, ysize = 0;
+ if (!DecodeJpegXlOneShot(jxl.data(), jxl.size(), &pixels, &xsize, &ysize,
+ &icc_profile)) {
+ fprintf(stderr, "Error while decoding the jxl file\n");
+ return 1;
+ }
+ if (!WritePFM(pfm_filename, pixels.data(), xsize, ysize)) {
+ fprintf(stderr, "Error while writing the PFM image file\n");
+ return 1;
+ }
+ if (!WriteFile(icc_filename, icc_profile.data(), icc_profile.size())) {
+ fprintf(stderr, "Error while writing the ICC profile file\n");
+ return 1;
+ }
+ printf("Successfully wrote %s and %s\n", pfm_filename, icc_filename);
+ return 0;
+}
diff --git a/third_party/jpeg-xl/examples/decode_progressive.cc b/third_party/jpeg-xl/examples/decode_progressive.cc
new file mode 100644
index 0000000000..0d035121e4
--- /dev/null
+++ b/third_party/jpeg-xl/examples/decode_progressive.cc
@@ -0,0 +1,245 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This C++ example decodes a JPEG XL image progressively (input bytes are
+// passed in chunks). The example outputs the intermediate steps to PAM files.
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+bool WritePAM(const char* filename, const uint8_t* buffer, size_t w, size_t h) {
+ FILE* fp = fopen(filename, "wb");
+ if (!fp) {
+ fprintf(stderr, "Could not open %s for writing", filename);
+ return false;
+ }
+ fprintf(fp,
+ "P7\nWIDTH %" PRIu64 "\nHEIGHT %" PRIu64
+ "\nDEPTH 4\nMAXVAL 255\nTUPLTYPE "
+ "RGB_ALPHA\nENDHDR\n",
+ static_cast<uint64_t>(w), static_cast<uint64_t>(h));
+ size_t num_bytes = w * h * 4;
+ if (fwrite(buffer, 1, num_bytes, fp) != num_bytes) {
+ fclose(fp);
+ return false;
+ };
+ if (fclose(fp) != 0) {
+ return false;
+ }
+ return true;
+}
+
+/** Decodes JPEG XL image to 8-bit integer RGBA pixels and an ICC Profile, in a
+ * progressive way, saving the intermediate steps.
+ */
+bool DecodeJpegXlProgressive(const uint8_t* jxl, size_t size,
+ const char* filename, size_t chunksize) {
+ std::vector<uint8_t> pixels;
+ std::vector<uint8_t> icc_profile;
+ size_t xsize = 0, ysize = 0;
+
+ // Multi-threaded parallel runner.
+ auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+ auto dec = JxlDecoderMake(nullptr);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO |
+ JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE)) {
+ fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+ return false;
+ }
+
+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+ JxlResizableParallelRunner,
+ runner.get())) {
+ fprintf(stderr, "JxlDecoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ JxlBasicInfo info;
+ JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+
+ size_t seen = 0;
+ JxlDecoderSetInput(dec.get(), jxl, chunksize);
+ size_t remaining = chunksize;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+
+ if (status == JXL_DEC_ERROR) {
+ fprintf(stderr, "Decoder error\n");
+ return false;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT || status == JXL_DEC_SUCCESS ||
+ status == JXL_DEC_FULL_IMAGE) {
+ seen += remaining - JxlDecoderReleaseInput(dec.get());
+ printf("Flushing after %" PRIu64 " bytes\n", static_cast<uint64_t>(seen));
+ if (status == JXL_DEC_NEED_MORE_INPUT &&
+ JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec.get())) {
+ printf("flush error (no preview yet)\n");
+ } else {
+ char fname[1024];
+ if (snprintf(fname, 1024, "%s-%" PRIu64 ".pam", filename,
+ static_cast<uint64_t>(seen)) >= 1024) {
+ fprintf(stderr, "Filename too long\n");
+ return false;
+ };
+ if (!WritePAM(fname, pixels.data(), xsize, ysize)) {
+ fprintf(stderr, "Error writing progressive output\n");
+ }
+ }
+ remaining = size - seen;
+ if (remaining > chunksize) remaining = chunksize;
+ if (remaining == 0) {
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ fprintf(stderr, "Error, already provided all input\n");
+ return false;
+ } else {
+ return true;
+ }
+ }
+ JxlDecoderSetInput(dec.get(), jxl + seen, remaining);
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+ fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+ return false;
+ }
+ xsize = info.xsize;
+ ysize = info.ysize;
+ JxlResizableParallelRunnerSetThreads(
+ runner.get(),
+ JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ // Get the ICC color profile of the pixel data
+ size_t icc_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetICCProfileSize(dec.get(), &format,
+ JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ &icc_size)) {
+ fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+ return false;
+ }
+ icc_profile.resize(icc_size);
+ if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+ dec.get(), &format,
+ JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ icc_profile.data(), icc_profile.size())) {
+ fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ size_t buffer_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) {
+ fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+ return false;
+ }
+ if (buffer_size != xsize * ysize * 4) {
+ fprintf(stderr, "Invalid out buffer size %" PRIu64 " != %" PRIu64 "\n",
+ static_cast<uint64_t>(buffer_size),
+ static_cast<uint64_t>(xsize * ysize * 4));
+ return false;
+ }
+ pixels.resize(xsize * ysize * 4);
+ void* pixels_buffer = (void*)pixels.data();
+ size_t pixels_buffer_size = pixels.size() * sizeof(float);
+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format,
+ pixels_buffer,
+ pixels_buffer_size)) {
+ fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+ return false;
+ }
+ } else {
+ fprintf(stderr, "Unknown decoder status\n");
+ return false;
+ }
+ }
+}
+
+bool LoadFile(const char* filename, std::vector<uint8_t>* out) {
+ FILE* file = fopen(filename, "rb");
+ if (!file) {
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ long size = ftell(file);
+ // Avoid invalid file or directory.
+ if (size >= LONG_MAX || size < 0) {
+ fclose(file);
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_SET) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ out->resize(size);
+ size_t readsize = fread(out->data(), 1, size, file);
+ if (fclose(file) != 0) {
+ return false;
+ }
+
+ return readsize == static_cast<size_t>(size);
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ fprintf(
+ stderr,
+ "Usage: %s <jxl> <basename> [chunksize]\n"
+ "Where:\n"
+ " jxl = input JPEG XL image filename\n"
+ " basename = prefix of output filenames\n"
+ " chunksize = loads chunksize bytes at a time and writes\n"
+ " intermediate results to basename-[bytes loaded].pam\n"
+ "Output files will be overwritten.\n",
+ argv[0]);
+ return 1;
+ }
+
+ const char* jxl_filename = argv[1];
+ const char* png_filename = argv[2];
+
+ std::vector<uint8_t> jxl;
+ if (!LoadFile(jxl_filename, &jxl)) {
+ fprintf(stderr, "couldn't load %s\n", jxl_filename);
+ return 1;
+ }
+ size_t chunksize = jxl.size();
+ if (argc > 3) {
+ long cs = atol(argv[3]);
+ if (cs < 100) {
+ fprintf(stderr, "Chunk size is too low, try at least 100 bytes\n");
+ return 1;
+ }
+ chunksize = cs;
+ }
+
+ if (!DecodeJpegXlProgressive(jxl.data(), jxl.size(), png_filename,
+ chunksize)) {
+ fprintf(stderr, "Error while decoding the jxl file\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/third_party/jpeg-xl/examples/encode_oneshot.cc b/third_party/jpeg-xl/examples/encode_oneshot.cc
new file mode 100644
index 0000000000..49b360ce3b
--- /dev/null
+++ b/third_party/jpeg-xl/examples/encode_oneshot.cc
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This example encodes a file containing a floating point image to another
+// file containing JPEG XL image with a single frame.
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <limits.h>
+#include <string.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+/**
+ * Reads from .pfm file (Portable FloatMap)
+ *
+ * @param filename name of the file to read
+ * @param pixels vector to fill with loaded pixels as 32-bit floating point with
+ * 3-channel RGB
+ * @param xsize set to width of loaded image
+ * @param ysize set to height of loaded image
+ */
+bool ReadPFM(const char* filename, std::vector<float>* pixels, uint32_t* xsize,
+ uint32_t* ysize) {
+ FILE* file = fopen(filename, "rb");
+ if (!file) {
+ fprintf(stderr, "Could not open %s for reading.\n", filename);
+ return false;
+ }
+ uint32_t endian_test = 1;
+ uint8_t little_endian[4];
+ memcpy(little_endian, &endian_test, 4);
+
+ if (fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ long size = ftell(file);
+ // Avoid invalid file or directory.
+ if (size >= LONG_MAX || size < 0) {
+ fclose(file);
+ return false;
+ }
+
+ if (fseek(file, 0, SEEK_SET) != 0) {
+ fclose(file);
+ return false;
+ }
+
+ std::vector<char> data;
+ data.resize(size);
+
+ size_t readsize = fread(data.data(), 1, size, file);
+ if ((long)readsize != size) {
+ return false;
+ }
+ if (fclose(file) != 0) {
+ return false;
+ }
+
+ std::stringstream datastream;
+ std::string datastream_content(data.data(), data.size());
+ datastream.str(datastream_content);
+
+ std::string pf_token;
+ getline(datastream, pf_token, '\n');
+ if (pf_token != "PF") {
+ fprintf(stderr,
+ "%s doesn't seem to be a 3 channel Portable FloatMap file (missing "
+ "'PF\\n' "
+ "bytes).\n",
+ filename);
+ return false;
+ }
+
+ std::string xsize_token;
+ getline(datastream, xsize_token, ' ');
+ *xsize = std::stoi(xsize_token);
+
+ std::string ysize_token;
+ getline(datastream, ysize_token, '\n');
+ *ysize = std::stoi(ysize_token);
+
+ std::string endianness_token;
+ getline(datastream, endianness_token, '\n');
+ bool input_little_endian;
+ if (endianness_token == "1.0") {
+ input_little_endian = false;
+ } else if (endianness_token == "-1.0") {
+ input_little_endian = true;
+ } else {
+ fprintf(stderr,
+ "%s doesn't seem to be a Portable FloatMap file (endianness token "
+ "isn't '1.0' or '-1.0').\n",
+ filename);
+ return false;
+ }
+
+ size_t offset = pf_token.size() + 1 + xsize_token.size() + 1 +
+ ysize_token.size() + 1 + endianness_token.size() + 1;
+
+ if (data.size() != *ysize * *xsize * 3 * 4 + offset) {
+ fprintf(stderr,
+ "%s doesn't seem to be a Portable FloatMap file (pixel data bytes "
+ "are %d, but expected %d * %d * 3 * 4 + %d (%d).\n",
+ filename, (int)data.size(), (int)*ysize, (int)*xsize, (int)offset,
+ (int)(*ysize * *xsize * 3 * 4 + offset));
+ return false;
+ }
+
+ if (!!little_endian[0] != input_little_endian) {
+ fprintf(stderr,
+ "%s has a different endianness than we do, conversion is not "
+ "supported.\n",
+ filename);
+ return false;
+ }
+
+ pixels->resize(*ysize * *xsize * 3);
+
+ for (int y = *ysize - 1; y >= 0; y--) {
+ for (int x = 0; x < (int)*xsize; x++) {
+ for (int c = 0; c < 3; c++) {
+ memcpy(pixels->data() + (y * *xsize + x) * 3 + c, data.data() + offset,
+ sizeof(float));
+ offset += sizeof(float);
+ }
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Compresses the provided pixels.
+ *
+ * @param pixels input pixels
+ * @param xsize width of the input image
+ * @param ysize height of the input image
+ * @param compressed will be populated with the compressed bytes
+ */
+bool EncodeJxlOneshot(const std::vector<float>& pixels, const uint32_t xsize,
+ const uint32_t ysize, std::vector<uint8_t>* compressed) {
+ auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
+ auto runner = JxlThreadParallelRunnerMake(
+ /*memory_manager=*/nullptr,
+ JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(),
+ JxlThreadParallelRunner,
+ runner.get())) {
+ fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ JxlPixelFormat pixel_format = {3, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+
+ JxlBasicInfo basic_info;
+ JxlEncoderInitBasicInfo(&basic_info);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.bits_per_sample = 32;
+ basic_info.exponent_bits_per_sample = 8;
+ basic_info.uses_original_profile = JXL_FALSE;
+ if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc.get(), &basic_info)) {
+ fprintf(stderr, "JxlEncoderSetBasicInfo failed\n");
+ return false;
+ }
+
+ JxlColorEncoding color_encoding = {};
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) {
+ fprintf(stderr, "JxlEncoderSetColorEncoding failed\n");
+ return false;
+ }
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ (void*)pixels.data(),
+ sizeof(float) * pixels.size())) {
+ fprintf(stderr, "JxlEncoderAddImageFrame failed\n");
+ return false;
+ }
+ JxlEncoderCloseInput(enc.get());
+
+ compressed->resize(64);
+ uint8_t* next_out = compressed->data();
+ size_t avail_out = compressed->size() - (next_out - compressed->data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed->data();
+ compressed->resize(compressed->size() * 2);
+ next_out = compressed->data() + offset;
+ avail_out = compressed->size() - offset;
+ }
+ }
+ compressed->resize(next_out - compressed->data());
+ if (JXL_ENC_SUCCESS != process_result) {
+ fprintf(stderr, "JxlEncoderProcessOutput failed\n");
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Writes bytes to file.
+ */
+bool WriteFile(const std::vector<uint8_t>& bytes, const char* filename) {
+ FILE* file = fopen(filename, "wb");
+ if (!file) {
+ fprintf(stderr, "Could not open %s for writing\n", filename);
+ return false;
+ }
+ if (fwrite(bytes.data(), sizeof(uint8_t), bytes.size(), file) !=
+ bytes.size()) {
+ fprintf(stderr, "Could not write bytes to %s\n", filename);
+ fclose(file);
+ return false;
+ }
+ if (fclose(file) != 0) {
+ fprintf(stderr, "Could not close %s\n", filename);
+ return false;
+ }
+ return true;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <pfm> <jxl>\n"
+ "Where:\n"
+ " pfm = input Portable FloatMap image filename\n"
+ " jxl = output JPEG XL image filename\n"
+ "Output files will be overwritten.\n",
+ argv[0]);
+ return 1;
+ }
+
+ const char* pfm_filename = argv[1];
+ const char* jxl_filename = argv[2];
+
+ std::vector<float> pixels;
+ uint32_t xsize;
+ uint32_t ysize;
+ if (!ReadPFM(pfm_filename, &pixels, &xsize, &ysize)) {
+ fprintf(stderr, "Couldn't load %s\n", pfm_filename);
+ return 2;
+ }
+
+ std::vector<uint8_t> compressed;
+ if (!EncodeJxlOneshot(pixels, xsize, ysize, &compressed)) {
+ fprintf(stderr, "Couldn't encode jxl\n");
+ return 3;
+ }
+
+ if (!WriteFile(compressed, jxl_filename)) {
+ fprintf(stderr, "Couldn't write jxl file\n");
+ return 4;
+ }
+
+ return 0;
+}
diff --git a/third_party/jpeg-xl/examples/examples.cmake b/third_party/jpeg-xl/examples/examples.cmake
new file mode 100644
index 0000000000..fd159578bc
--- /dev/null
+++ b/third_party/jpeg-xl/examples/examples.cmake
@@ -0,0 +1,11 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+add_executable(decode_oneshot ${CMAKE_CURRENT_LIST_DIR}/decode_oneshot.cc)
+target_link_libraries(decode_oneshot jxl_dec jxl_threads)
+add_executable(decode_progressive ${CMAKE_CURRENT_LIST_DIR}/decode_progressive.cc)
+target_link_libraries(decode_progressive jxl_dec jxl_threads)
+add_executable(encode_oneshot ${CMAKE_CURRENT_LIST_DIR}/encode_oneshot.cc)
+target_link_libraries(encode_oneshot jxl jxl_threads)
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/.gitignore b/third_party/jpeg-xl/experimental/fast_lossless/.gitignore
new file mode 100644
index 0000000000..567609b123
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/README.md b/third_party/jpeg-xl/experimental/fast_lossless/README.md
new file mode 100644
index 0000000000..5f99c133d8
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/README.md
@@ -0,0 +1,10 @@
+# Fast-lossless
+This is a script to compile a standalone version of a JXL encoder that supports
+lossless compression, up to 16 bits, of 1- to 4-channel images and animations; it is
+very fast and compression is slightly worse than PNG for 8-bit nonphoto content
+and better or much better than PNG for all other situations.
+
+The main encoder is made out of two files, `lib/jxl/enc_fast_lossless.{cc,h}`;
+it automatically selects and runs a SIMD implementation supported by your CPU.
+
+This folder contains an example build script and `main` file.
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/build-android.sh b/third_party/jpeg-xl/experimental/fast_lossless/build-android.sh
new file mode 100755
index 0000000000..c155b2169a
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/build-android.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+
+mkdir -p /tmp/build-android
+cd /tmp/build-android
+
+CXX="$ANDROID_NDK"/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android30-clang++
+if ! command -v "$CXX" >/dev/null ; then
+ printf >&2 '%s: Android C++ compiler not found, is ANDROID_NDK set properly?\n' "${0##*/}"
+ exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 \
+ -I. lodepng.o \
+ -I"${DIR}"/../../ \
+ "${DIR}"/../../lib/jxl/enc_fast_lossless.cc "${DIR}"/fast_lossless_main.cc \
+ -o fast_lossless
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/build.sh b/third_party/jpeg-xl/experimental/fast_lossless/build.sh
new file mode 100755
index 0000000000..e2c0aa3fd0
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+mkdir -p "$DIR"/build
+cd "$DIR"/build
+
+# set CXX to clang++ if not set in the environment
+CXX="${CXX-clang++}"
+if ! command -v "$CXX" >/dev/null ; then
+ printf >&2 '%s: C++ compiler not found\n' "${0##*/}"
+ exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 \
+ -I. -g lodepng.o \
+ -I"$DIR"/../../ \
+ "$DIR"/../../lib/jxl/enc_fast_lossless.cc "$DIR"/fast_lossless_main.cc \
+ -o fast_lossless
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/cross_compile_aarch64.sh b/third_party/jpeg-xl/experimental/fast_lossless/cross_compile_aarch64.sh
new file mode 100755
index 0000000000..a5e6aa2a52
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/cross_compile_aarch64.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+set -e
+
+DIR=$(realpath "$(dirname "$0")")
+mkdir -p "$DIR"/build-aarch64
+cd "$DIR"/build-aarch64
+
+CXX="${CXX-aarch64-linux-gnu-c++}"
+if ! command -v "$CXX" >/dev/null ; then
+ printf >&2 '%s: C++ compiler not found\n' "${0##*/}"
+ exit 1
+fi
+
+[ -f lodepng.cpp ] || curl -o lodepng.cpp --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.cpp'
+[ -f lodepng.h ] || curl -o lodepng.h --url 'https://raw.githubusercontent.com/lvandeve/lodepng/8c6a9e30576f07bf470ad6f09458a2dcd7a6a84a/lodepng.h'
+[ -f lodepng.o ] || "$CXX" lodepng.cpp -O3 -o lodepng.o -c
+
+"$CXX" -O3 -static \
+ -I. lodepng.o \
+ -I"$DIR"/../../ \
+ "$DIR"/../../lib/jxl/enc_fast_lossless.cc "$DIR"/fast_lossless_main.cc \
+ -o fast_lossless
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/fast_lossless_main.cc b/third_party/jpeg-xl/experimental/fast_lossless/fast_lossless_main.cc
new file mode 100644
index 0000000000..b59051d4e2
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/fast_lossless_main.cc
@@ -0,0 +1,113 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atomic>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lodepng.h"
+#include "pam-input.h"
+
+int main(int argc, char** argv) {
+ if (argc < 3) {
+ fprintf(stderr,
+ "Usage: %s in.png out.jxl [effort] [num_reps] [num_threads]\n",
+ argv[0]);
+ return 1;
+ }
+
+ const char* in = argv[1];
+ const char* out = argv[2];
+ int effort = argc >= 4 ? atoi(argv[3]) : 2;
+ size_t num_reps = argc >= 5 ? atoi(argv[4]) : 1;
+ size_t num_threads = argc >= 6 ? atoi(argv[5]) : 0;
+
+ if (effort < 0 || effort > 127) {
+ fprintf(
+ stderr,
+ "Effort should be between 0 and 127 (default is 2, more is slower)\n");
+ return 1;
+ }
+
+ unsigned char* png;
+ unsigned w, h;
+ size_t nb_chans = 4, bitdepth = 8;
+
+ unsigned error = lodepng_decode32_file(&png, &w, &h, in);
+
+ size_t width = w, height = h;
+ if (error && !DecodePAM(in, &png, &width, &height, &nb_chans, &bitdepth)) {
+ fprintf(stderr, "lodepng error %u: %s\n", error, lodepng_error_text(error));
+ return 1;
+ }
+
+ auto parallel_runner = [](void* num_threads_ptr, void* opaque,
+ void fun(void*, size_t), size_t count) {
+ size_t num_threads = *(size_t*)num_threads_ptr;
+ if (num_threads == 0) {
+ num_threads = std::thread::hardware_concurrency();
+ }
+ if (num_threads > count) {
+ num_threads = count;
+ }
+ if (num_threads == 1) {
+ for (size_t i = 0; i < count; i++) {
+ fun(opaque, i);
+ }
+ } else {
+ std::atomic<int> task{0};
+ std::vector<std::thread> threads;
+ for (size_t i = 0; i < num_threads; i++) {
+ threads.push_back(std::thread([count, opaque, fun, &task]() {
+ while (true) {
+ int t = task++;
+ if (t >= count) break;
+ fun(opaque, t);
+ }
+ }));
+ }
+ for (auto& t : threads) t.join();
+ }
+ };
+
+ size_t encoded_size = 0;
+ unsigned char* encoded = nullptr;
+ size_t stride = width * nb_chans * (bitdepth > 8 ? 2 : 1);
+
+ auto start = std::chrono::high_resolution_clock::now();
+ for (size_t _ = 0; _ < num_reps; _++) {
+ free(encoded);
+ encoded_size = JxlFastLosslessEncode(
+ png, width, stride, height, nb_chans, bitdepth,
+ /*big_endian=*/true, effort, &encoded, &num_threads, +parallel_runner);
+ }
+ auto stop = std::chrono::high_resolution_clock::now();
+ if (num_reps > 1) {
+ float us =
+ std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
+ .count();
+ size_t pixels = size_t{width} * size_t{height} * num_reps;
+ float mps = pixels / us;
+ fprintf(stderr, "%10.3f MP/s\n", mps);
+ fprintf(stderr, "%10.3f bits/pixel\n",
+ encoded_size * 8.0 / float(width) / float(height));
+ }
+
+ FILE* o = fopen(out, "wb");
+ if (!o) {
+ fprintf(stderr, "error opening %s: %s\n", out, strerror(errno));
+ return 1;
+ }
+ if (fwrite(encoded, 1, encoded_size, o) != encoded_size) {
+ fprintf(stderr, "error writing to %s: %s\n", out, strerror(errno));
+ }
+ fclose(o);
+}
diff --git a/third_party/jpeg-xl/experimental/fast_lossless/pam-input.h b/third_party/jpeg-xl/experimental/fast_lossless/pam-input.h
new file mode 100644
index 0000000000..4ecbe6b72d
--- /dev/null
+++ b/third_party/jpeg-xl/experimental/fast_lossless/pam-input.h
@@ -0,0 +1,289 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+bool error_msg(const char* message) {
+ fprintf(stderr, "%s\n", message);
+ return false;
+}
+#define return_on_error(X) \
+ if (!X) return false;
+
+size_t Log2(uint32_t value) { return 31 - __builtin_clz(value); }
+
+struct HeaderPNM {
+ size_t xsize;
+ size_t ysize;
+ bool is_gray; // PGM
+ bool has_alpha; // PAM
+ size_t bits_per_sample;
+};
+
+class Parser {
+ public:
+ explicit Parser(uint8_t* data, size_t length)
+ : pos_(data), end_(data + length) {}
+
+ // Sets "pos" to the first non-header byte/pixel on success.
+ bool ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+ // codec.cc ensures we have at least two bytes => no range check here.
+ if (pos_[0] != 'P') return false;
+ const uint8_t type = pos_[1];
+ pos_ += 2;
+
+ switch (type) {
+ case '5':
+ header->is_gray = true;
+ return ParseHeaderPNM(header, pos);
+
+ case '6':
+ header->is_gray = false;
+ return ParseHeaderPNM(header, pos);
+
+ case '7':
+ return ParseHeaderPAM(header, pos);
+ }
+ return false;
+ }
+
+ // Exposed for testing
+ bool ParseUnsigned(size_t* number) {
+ if (pos_ == end_) return error_msg("PNM: reached end before number");
+ if (!IsDigit(*pos_)) return error_msg("PNM: expected unsigned number");
+
+ *number = 0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ return true;
+ }
+
+ bool ParseSigned(double* number) {
+ if (pos_ == end_) return error_msg("PNM: reached end before signed");
+
+ if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+ return error_msg("PNM: expected signed number");
+ }
+
+ // Skip sign
+ const bool is_neg = *pos_ == '-';
+ if (is_neg || *pos_ == '+') {
+ ++pos_;
+ if (pos_ == end_) return error_msg("PNM: reached end before digits");
+ }
+
+ // Leading digits
+ *number = 0.0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ // Decimal places?
+ if (pos_ < end_ && *pos_ == '.') {
+ ++pos_;
+ double place = 0.1;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number += (*pos_ - '0') * place;
+ place *= 0.1;
+ ++pos_;
+ }
+ }
+
+ if (is_neg) *number = -*number;
+ return true;
+ }
+
+ private:
+ static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+ static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+ static bool IsWhitespace(const uint8_t c) {
+ return IsLineBreak(c) || c == '\t' || c == ' ';
+ }
+
+ bool SkipBlank() {
+ if (pos_ == end_) return error_msg("PNM: reached end before blank");
+ const uint8_t c = *pos_;
+ if (c != ' ' && c != '\n') return error_msg("PNM: expected blank");
+ ++pos_;
+ return true;
+ }
+
+ bool SkipSingleWhitespace() {
+ if (pos_ == end_) return error_msg("PNM: reached end before whitespace");
+ if (!IsWhitespace(*pos_)) return error_msg("PNM: expected whitespace");
+ ++pos_;
+ return true;
+ }
+
+ bool SkipWhitespace() {
+ if (pos_ == end_) return error_msg("PNM: reached end before whitespace");
+ if (!IsWhitespace(*pos_) && *pos_ != '#') {
+ return error_msg("PNM: expected whitespace/comment");
+ }
+
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+
+ // Comment(s)
+ while (pos_ != end_ && *pos_ == '#') {
+ while (pos_ != end_ && !IsLineBreak(*pos_)) {
+ ++pos_;
+ }
+ // Newline(s)
+ while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+ }
+
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+ return true;
+ }
+
+ bool MatchString(const char* keyword) {
+ const uint8_t* ppos = pos_;
+ while (*keyword) {
+ if (ppos >= end_) return error_msg("PAM: unexpected end of input");
+ if (*keyword != *ppos) return false;
+ ppos++;
+ keyword++;
+ }
+ pos_ = ppos;
+ return_on_error(SkipWhitespace());
+ return true;
+ }
+
+ bool ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) {
+ size_t num_channels = 3;
+ size_t max_val = 255;
+ while (!MatchString("ENDHDR")) {
+ return_on_error(SkipWhitespace());
+ if (MatchString("WIDTH")) {
+ return_on_error(ParseUnsigned(&header->xsize));
+ } else if (MatchString("HEIGHT")) {
+ return_on_error(ParseUnsigned(&header->ysize));
+ } else if (MatchString("DEPTH")) {
+ return_on_error(ParseUnsigned(&num_channels));
+ } else if (MatchString("MAXVAL")) {
+ return_on_error(ParseUnsigned(&max_val));
+ } else if (MatchString("TUPLTYPE")) {
+ if (MatchString("RGB_ALPHA")) {
+ header->has_alpha = true;
+ } else if (MatchString("RGB")) {
+ } else if (MatchString("GRAYSCALE_ALPHA")) {
+ header->has_alpha = true;
+ header->is_gray = true;
+ } else if (MatchString("GRAYSCALE")) {
+ header->is_gray = true;
+ } else if (MatchString("BLACKANDWHITE_ALPHA")) {
+ header->has_alpha = true;
+ header->is_gray = true;
+ max_val = 1;
+ } else if (MatchString("BLACKANDWHITE")) {
+ header->is_gray = true;
+ max_val = 1;
+ } else {
+ return error_msg("PAM: unknown TUPLTYPE");
+ }
+ } else {
+ return error_msg("PAM: unknown header keyword");
+ }
+ }
+ if (num_channels !=
+ (header->has_alpha ? 1 : 0) + (header->is_gray ? 1 : 3)) {
+ return error_msg("PAM: bad DEPTH");
+ }
+ if (max_val == 0 || max_val >= 65536) {
+ return error_msg("PAM: bad MAXVAL");
+ }
+ header->bits_per_sample = Log2(max_val + 1);
+
+ *pos = pos_;
+ return true;
+ }
+
+ bool ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+ return_on_error(SkipWhitespace());
+ return_on_error(ParseUnsigned(&header->xsize));
+
+ return_on_error(SkipWhitespace());
+ return_on_error(ParseUnsigned(&header->ysize));
+
+ return_on_error(SkipWhitespace());
+ size_t max_val;
+ return_on_error(ParseUnsigned(&max_val));
+ if (max_val == 0 || max_val >= 65536) {
+ return error_msg("PNM: bad MaxVal");
+ }
+ header->bits_per_sample = Log2(max_val + 1);
+
+ return_on_error(SkipSingleWhitespace());
+
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* pos_;
+ const uint8_t* const end_;
+};
+
+bool load_file(unsigned char** out, size_t* outsize, const char* filename) {
+ FILE* file;
+ file = fopen(filename, "rb");
+ if (!file) return false;
+ if (fseek(file, 0, SEEK_END) != 0) {
+ fclose(file);
+ return false;
+ }
+ *outsize = ftell(file);
+ if (*outsize == LONG_MAX || *outsize < 9 || fseek(file, 0, SEEK_SET)) {
+ fclose(file);
+ return false;
+ }
+ *out = (unsigned char*)malloc(*outsize);
+ if (!(*out)) return false;
+ size_t readsize;
+ readsize = fread(*out, 1, *outsize, file);
+ fclose(file);
+ if (readsize != *outsize) return false;
+ return true;
+}
+
+bool DecodePAM(const char* filename, uint8_t** buffer, size_t* w, size_t* h,
+ size_t* nb_chans, size_t* bitdepth) {
+ unsigned char* in_file;
+ size_t in_size;
+ if (!load_file(&in_file, &in_size, filename))
+ return error_msg("Could not read input file");
+ Parser parser(in_file, in_size);
+ HeaderPNM header = {};
+ const uint8_t* pos = nullptr;
+ if (!parser.ParseHeader(&header, &pos)) return false;
+
+ if (header.bits_per_sample == 0 || header.bits_per_sample > 16) {
+ return error_msg("PNM: bits_per_sample invalid (can do at most 16-bit)");
+ }
+ *w = header.xsize;
+ *h = header.ysize;
+ *bitdepth = header.bits_per_sample;
+ *nb_chans = (header.is_gray ? 1 : 3) + (header.has_alpha ? 1 : 0);
+
+ size_t pnm_remaining_size = in_file + in_size - pos;
+ size_t buffer_size = *w * *h * *nb_chans * (*bitdepth > 8 ? 2 : 1);
+ if (pnm_remaining_size < buffer_size) {
+ return error_msg("PNM file too small");
+ }
+ *buffer = (uint8_t*)malloc(buffer_size);
+ memcpy(*buffer, pos, buffer_size);
+ return true;
+}
diff --git a/third_party/jpeg-xl/lib/BUILD b/third_party/jpeg-xl/lib/BUILD
new file mode 100644
index 0000000000..1707a36f77
--- /dev/null
+++ b/third_party/jpeg-xl/lib/BUILD
@@ -0,0 +1,256 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Load sources/headers/tests lists.
+load(
+ "jxl_lists.bzl",
+ "libjxl_base_sources",
+ "libjxl_codec_apng_sources",
+ "libjxl_codec_exr_sources",
+ "libjxl_codec_gif_sources",
+ "libjxl_codec_jpegli_sources",
+ "libjxl_codec_jpg_sources",
+ "libjxl_codec_jxl_sources",
+ "libjxl_codec_npy_sources",
+ "libjxl_codec_pgx_sources",
+ "libjxl_codec_pnm_sources",
+ "libjxl_dec_box_sources",
+ "libjxl_dec_jpeg_sources",
+ "libjxl_dec_sources",
+ "libjxl_enc_sources",
+ "libjxl_extras_for_tools_sources",
+ "libjxl_extras_sources",
+ #'libjxl_gbench_sources',
+ "libjxl_jpegli_sources",
+ "libjxl_jpegli_testlib_files",
+ "libjxl_jpegli_tests",
+ "libjxl_major_version",
+ "libjxl_minor_version",
+ "libjxl_patch_version",
+ "libjxl_public_headers",
+ "libjxl_testlib_files",
+ "libjxl_tests",
+ "libjxl_threads_public_headers",
+ "libjxl_threads_sources",
+)
+load(
+ "jxl_vars.bzl",
+ "libjxl_deps_brotli",
+ "libjxl_deps_exr",
+ "libjxl_deps_gif",
+ "libjxl_deps_gtest",
+ "libjxl_deps_hwy",
+ "libjxl_deps_hwy_nanobenchmark",
+ "libjxl_deps_hwy_test_util",
+ "libjxl_deps_jpeg",
+ "libjxl_deps_jxl_box",
+ "libjxl_deps_png",
+ "libjxl_deps_runfiles",
+ "libjxl_deps_skcms",
+ "libjxl_deps_testdata",
+ "libjxl_root_package",
+ "libjxl_test_shards",
+ "libjxl_test_timeouts",
+)
+load("@bazel_skylib//rules:expand_template.bzl", "expand_template")
+
+DEFAULT_VISIBILITY = ["//:__subpackages__"]
+
+DEFAULT_COMPATIBILITY = []
+
+INCLUDES_DIR = "include"
+
+package(
+ default_visibility = ["//:__subpackages__"],
+)
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+EXPORT_TEMPLATE = """
+#ifndef @_EXPORT_H
+#define @_EXPORT_H
+
+#define @_EXPORT
+#define @_NO_EXPORT
+
+#ifndef @_DEPRECATED
+# define @_DEPRECATED __attribute__ ((__deprecated__))
+#endif
+
+#endif
+"""
+
+JXL_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_export.h"
+
+genrule(
+ name = "create_jxl_export",
+ outs = [JXL_EXPORT_H],
+ cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL") + "' > $@",
+ compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+JXL_THREADS_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_threads_export.h"
+
+genrule(
+ name = "create_jxl_threads_export",
+ outs = [JXL_THREADS_EXPORT_H],
+ cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL_THREADS") + "' > $@",
+ compatible_with = DEFAULT_COMPATIBILITY,
+)
+
+JXL_VERSION_H = INCLUDES_DIR + "/jxl/version.h"
+
+expand_template(
+ name = "expand_jxl_version",
+ out = JXL_VERSION_H,
+ compatible_with = DEFAULT_COMPATIBILITY,
+ substitutions = {
+ "@JPEGXL_MAJOR_VERSION@": str(libjxl_major_version),
+ "@JPEGXL_MINOR_VERSION@": str(libjxl_minor_version),
+ "@JPEGXL_PATCH_VERSION@": str(libjxl_patch_version),
+ },
+ template = "jxl/version.h.in",
+)
+
+cc_library(
+ name = "jxl_version",
+ hdrs = [JXL_VERSION_H],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ strip_include_prefix = INCLUDES_DIR,
+)
+
+cc_library(
+ name = "includes",
+ hdrs = libjxl_public_headers + [JXL_EXPORT_H],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ strip_include_prefix = INCLUDES_DIR,
+ deps = [":jxl_version"],
+)
+
+cc_library(
+ name = "base",
+ srcs = [path for path in libjxl_base_sources if path.endswith(".cc")],
+ hdrs = [path for path in libjxl_base_sources if path.endswith(".h")],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ deps = [
+ ":includes",
+ ] + libjxl_deps_hwy,
+)
+
+cc_library(
+ name = "jpegxl",
+ srcs = libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources,
+ compatible_with = DEFAULT_COMPATIBILITY,
+ defines = ["JPEGXL_ENABLE_SKCMS=1"],
+ deps = [
+ ":base",
+ ":includes",
+ ] + libjxl_deps_brotli + libjxl_deps_hwy + libjxl_deps_skcms,
+)
+
+cc_library(
+ name = "jpegxl_private",
+ hdrs = [
+ path
+ for path in libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources
+ if path.endswith(".h") and not path.endswith("-inl.h")
+ ],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ deps = [":jpegxl"],
+)
+
+cc_library(
+ name = "jpegxl_threads",
+ srcs = libjxl_threads_sources,
+ hdrs = libjxl_threads_public_headers + [JXL_THREADS_EXPORT_H],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ strip_include_prefix = INCLUDES_DIR,
+ deps = [
+ ":base",
+ ":includes",
+ ],
+)
+
+CODEC_FILES = libjxl_codec_apng_sources + libjxl_codec_exr_sources + libjxl_codec_gif_sources + libjxl_codec_jpegli_sources + libjxl_codec_jpg_sources + libjxl_codec_jxl_sources + libjxl_codec_npy_sources + libjxl_codec_pgx_sources + libjxl_codec_pnm_sources
+
+CODEC_SRCS = [path for path in CODEC_FILES if path.endswith(".cc")]
+
+CODEC_HDRS = [path for path in CODEC_FILES if path.endswith(".h")]
+
+cc_library(
+ name = "jpegli",
+ srcs = libjxl_jpegli_sources,
+ hdrs = [
+ "jpegli/common_internal.h", # TODO(eustas): should not be here
+ ],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ deps = [
+ ":jpegxl_private",
+ ] + libjxl_deps_hwy + libjxl_deps_jpeg,
+)
+
+# TODO(eustas): build codecs separately?
+cc_library(
+ name = "jpegxl_extras",
+ srcs = libjxl_extras_sources + libjxl_extras_for_tools_sources + CODEC_SRCS,
+ hdrs = CODEC_HDRS,
+ compatible_with = DEFAULT_COMPATIBILITY,
+ defines = [
+ "JPEGXL_ENABLE_APNG=1",
+ "JPEGXL_ENABLE_EXR=1",
+ "JPEGXL_ENABLE_GIF=1",
+ "JPEGXL_ENABLE_JPEG=1",
+ "JPEGXL_ENABLE_JPEGLI=1",
+ ],
+ deps = [
+ ":jpegli",
+ ":jpegxl_private",
+ ":jpegxl_threads",
+ ":jxl_version",
+ ] + libjxl_deps_exr + libjxl_deps_gif + libjxl_deps_jpeg + libjxl_deps_png,
+)
+
+TESTLIB_FILES = libjxl_testlib_files + libjxl_jpegli_testlib_files
+
+cc_library(
+ name = "test_utils",
+ testonly = 1,
+ srcs = [path for path in TESTLIB_FILES if not path.endswith(".h")],
+ hdrs = [path for path in TESTLIB_FILES if path.endswith(".h")],
+ compatible_with = DEFAULT_COMPATIBILITY,
+ defines = [
+ 'JPEGXL_ROOT_PACKAGE=\'"' + libjxl_root_package + '"\'',
+ ],
+ deps = [
+ ":jpegli",
+ ":jpegxl_extras",
+ ":jpegxl_private",
+ ] + libjxl_deps_runfiles,
+)
+
+TESTS = [path.partition(".")[0] for path in libjxl_tests + libjxl_jpegli_tests]
+
+[
+ cc_test(
+ name = test,
+ timeout = libjxl_test_timeouts.get(test, "moderate"),
+ srcs = [
+ test + ".cc",
+ "jpegli/testing.h",
+ "jxl/testing.h",
+ ],
+ data = ["//:testdata"],
+ shard_count = libjxl_test_shards.get(test, 1),
+ deps = [
+ ":jpegxl_extras",
+ ":jpegxl_private",
+ ":jpegxl_threads",
+ ":test_utils",
+ ] + libjxl_deps_gtest + libjxl_deps_hwy_test_util + libjxl_deps_hwy_nanobenchmark + libjxl_deps_jxl_box,
+ )
+ for test in TESTS
+]
diff --git a/third_party/jpeg-xl/lib/CMakeLists.txt b/third_party/jpeg-xl/lib/CMakeLists.txt
new file mode 100644
index 0000000000..a340288726
--- /dev/null
+++ b/third_party/jpeg-xl/lib/CMakeLists.txt
@@ -0,0 +1,168 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+set(JPEGXL_MAJOR_VERSION 0)
+set(JPEGXL_MINOR_VERSION 9)
+set(JPEGXL_PATCH_VERSION 0)
+set(JPEGXL_LIBRARY_VERSION
+ "${JPEGXL_MAJOR_VERSION}.${JPEGXL_MINOR_VERSION}.${JPEGXL_PATCH_VERSION}")
+
+# This is the library API/ABI compatibility version. Changing this value makes
+# the shared library incompatible with previous version. A program linked
+# against this shared library SOVERSION will not run with an older SOVERSION.
+# It is important to update this value when making incompatible API/ABI changes
+# so that programs that depend on libjxl can update their dependencies. Semantic
+# versioning allows 0.y.z to have incompatible changes in minor versions.
+set(JPEGXL_SO_MINOR_VERSION 9)
+if (JPEGXL_MAJOR_VERSION EQUAL 0)
+ set(JPEGXL_LIBRARY_SOVERSION
+ "${JPEGXL_MAJOR_VERSION}.${JPEGXL_SO_MINOR_VERSION}")
+else()
+ set(JPEGXL_LIBRARY_SOVERSION "${JPEGXL_MAJOR_VERSION}")
+endif()
+
+
+# List of warning and feature flags for our library and tests.
+if (MSVC)
+ set(JPEGXL_INTERNAL_FLAGS
+ # TODO(janwas): add flags
+ )
+else ()
+ set(JPEGXL_INTERNAL_FLAGS
+ # F_FLAGS
+ -fmerge-all-constants
+ -fno-builtin-fwrite
+ -fno-builtin-fread
+
+ # WARN_FLAGS
+ -Wall
+ -Wextra
+ -Wc++11-compat
+ -Warray-bounds
+ -Wformat-security
+ -Wimplicit-fallthrough
+ -Wno-register # Needed by public headers in lcms
+ -Wno-unused-function
+ -Wno-unused-parameter
+ -Wnon-virtual-dtor
+ -Woverloaded-virtual
+ -Wvla
+ )
+
+ # Warning flags supported by clang.
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ list(APPEND JPEGXL_INTERNAL_FLAGS
+ -Wdeprecated-increment-bool
+ # TODO(deymo): Add -Wextra-semi once we update third_party/highway.
+ # -Wextra-semi
+ -Wfloat-overflow-conversion
+ -Wfloat-zero-conversion
+ -Wfor-loop-analysis
+ -Wgnu-redeclared-enum
+ -Winfinite-recursion
+ -Wliteral-conversion
+ -Wno-c++98-compat
+ -Wno-unused-command-line-argument
+ -Wprivate-header
+ -Wself-assign
+ -Wstring-conversion
+ -Wtautological-overlap-compare
+ -Wthread-safety-analysis
+ -Wundefined-func-template
+ -Wunreachable-code
+ -Wunused-comparison
+ )
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0)
+ list(APPEND HWY_FLAGS -Wc++2a-extensions)
+ endif()
+ endif() # Clang
+
+ if (WIN32)
+ list(APPEND JPEGXL_INTERNAL_FLAGS
+ -Wno-cast-align
+ -Wno-double-promotion
+ -Wno-float-equal
+ -Wno-format-nonliteral
+ -Wno-shadow
+ -Wno-sign-conversion
+ -Wno-zero-as-null-pointer-constant
+ )
+
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ list(APPEND JPEGXL_INTERNAL_FLAGS
+ -Wno-used-but-marked-unused
+ -Wno-unused-template
+ -Wno-unused-member-function
+ -Wno-shadow-field-in-constructor
+ -Wno-language-extension-token
+ -Wno-global-constructors
+ -Wno-c++98-compat-pedantic
+ )
+ endif() # Clang
+ else() # WIN32
+ list(APPEND JPEGXL_INTERNAL_FLAGS
+ -fsized-deallocation
+ -fno-exceptions
+
+ # Language flags
+ -fmath-errno
+ )
+
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ list(APPEND JPEGXL_INTERNAL_FLAGS
+ -fnew-alignment=8
+ -fno-cxx-exceptions
+ -fno-slp-vectorize
+ -fno-vectorize
+
+ -disable-free
+ -disable-llvm-verifier
+ )
+ endif() # Clang
+ endif() # WIN32
+endif() #!MSVC
+
+# strips the -static suffix from all the elements in LIST
+function(strip_static OUTPUT_VAR LIB_LIST)
+ foreach(lib IN LISTS ${LIB_LIST})
+ string(REGEX REPLACE "-static$" "" lib "${lib}")
+ list(APPEND out_list "${lib}")
+ endforeach()
+ set(${OUTPUT_VAR} ${out_list} PARENT_SCOPE)
+endfunction()
+
+# The jxl library definition.
+include(jxl.cmake)
+
+# Other libraries outside the core jxl library.
+if(JPEGXL_ENABLE_TOOLS)
+ include(jxl_extras.cmake)
+endif()
+include(jxl_threads.cmake)
+find_package(JPEG)
+if (JPEG_FOUND AND JPEGXL_ENABLE_JPEGLI)
+ include(jpegli.cmake)
+endif()
+
+# Install all the library headers from the source and the generated ones. There
+# is no distinction on which libraries use which header since it is expected
+# that all developer libraries are available together at build time.
+install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/jxl
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/jxl
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
+
+if(BUILD_TESTING)
+ cmake_policy(SET CMP0057 NEW) # https://gitlab.kitware.com/cmake/cmake/issues/18198
+ include(GoogleTest)
+endif()
+
+# Tests for the jxl library.
+include(jxl_tests.cmake)
+
+if(BUILD_TESTING)
+ # Google benchmark for the jxl library
+ include(jxl_benchmark.cmake)
+endif()
diff --git a/third_party/jpeg-xl/lib/compatibility.cmake b/third_party/jpeg-xl/lib/compatibility.cmake
new file mode 100644
index 0000000000..9d99d29482
--- /dev/null
+++ b/third_party/jpeg-xl/lib/compatibility.cmake
@@ -0,0 +1,30 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+function(jxl_discover_tests TESTNAME)
+ if (CMAKE_VERSION VERSION_LESS "3.10.3")
+ gtest_discover_tests(${TESTNAME} TIMEOUT 240)
+ else ()
+ gtest_discover_tests(${TESTNAME} DISCOVERY_TIMEOUT 240)
+ endif ()
+endfunction()
+
+function(jxl_link_libraries DST SRC)
+ if (CMAKE_VERSION VERSION_LESS "3.13.5")
+ target_include_directories(${DST} SYSTEM PUBLIC
+ $<BUILD_INTERFACE:$<TARGET_PROPERTY:${SRC},INTERFACE_SYSTEM_INCLUDE_DIRECTORIES>>
+ )
+ add_dependencies(${DST} ${SRC})
+ else()
+ target_link_libraries(${DST} PUBLIC ${SRC})
+ endif()
+endfunction()
+
+
+if (CMAKE_VERSION VERSION_LESS "3.12.4")
+ set(JXL_HWY_INCLUDE_DIRS "$<BUILD_INTERFACE:$<TARGET_PROPERTY:hwy,INTERFACE_INCLUDE_DIRECTORIES>>")
+else()
+ set(JXL_HWY_INCLUDE_DIRS "$<BUILD_INTERFACE:$<TARGET_PROPERTY:$<IF:$<TARGET_EXISTS:hwy::hwy>,hwy::hwy,hwy>,INTERFACE_INCLUDE_DIRECTORIES>>")
+endif()
diff --git a/third_party/jpeg-xl/lib/extras/LICENSE.apngdis b/third_party/jpeg-xl/lib/extras/LICENSE.apngdis
new file mode 100644
index 0000000000..eb0ba7c07b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/LICENSE.apngdis
@@ -0,0 +1,27 @@
+APNG Disassembler 2.8
+
+Deconstructs APNG files into individual frames.
+
+http://apngdis.sourceforge.net
+
+Copyright (c) 2010-2015 Max Stepin
+maxst at users.sourceforge.net
+
+zlib license
+------------
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
diff --git a/third_party/jpeg-xl/lib/extras/README.md b/third_party/jpeg-xl/lib/extras/README.md
new file mode 100644
index 0000000000..06a9b5ea07
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/README.md
@@ -0,0 +1,5 @@
+## JPEG XL "extras"
+
+The files in this directory do not form part of the library or codec and are
+only used by tests or specific internal tools that have access to the internals
+of the library.
diff --git a/third_party/jpeg-xl/lib/extras/codec.cc b/third_party/jpeg-xl/lib/extras/codec.cc
new file mode 100644
index 0000000000..5d3f00706e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/codec.cc
@@ -0,0 +1,191 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+#if JPEGXL_ENABLE_APNG
+#include "lib/extras/enc/apng.h"
+#endif
+#if JPEGXL_ENABLE_JPEG
+#include "lib/extras/enc/jpg.h"
+#endif
+#if JPEGXL_ENABLE_EXR
+#include "lib/extras/enc/exr.h"
+#endif
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/enc/pgx.h"
+#include "lib/extras/enc/pnm.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+} // namespace
+
+Status SetFromBytes(const Span<const uint8_t> bytes,
+ const extras::ColorHints& color_hints, CodecInOut* io,
+ ThreadPool* pool, const SizeConstraints* constraints,
+ extras::Codec* orig_codec) {
+ if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes");
+
+ extras::PackedPixelFile ppf;
+ if (extras::DecodeBytes(bytes, color_hints, &ppf, constraints, orig_codec)) {
+ return ConvertPackedPixelFileToCodecInOut(ppf, pool, io);
+ }
+ return JXL_FAILURE("Codecs failed to decode");
+}
+
+Status SetFromFile(const std::string& pathname,
+ const extras::ColorHints& color_hints, CodecInOut* io,
+ ThreadPool* pool, const SizeConstraints* constraints,
+ extras::Codec* orig_codec) {
+ std::vector<uint8_t> encoded;
+ JXL_RETURN_IF_ERROR(ReadFile(pathname, &encoded));
+ JXL_RETURN_IF_ERROR(SetFromBytes(Span<const uint8_t>(encoded), color_hints,
+ io, pool, constraints, orig_codec));
+ return true;
+}
+
+Status Encode(const CodecInOut& io, const extras::Codec codec,
+ const ColorEncoding& c_desired, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes, ThreadPool* pool) {
+ JXL_CHECK(!io.Main().c_current().ICC().empty());
+ JXL_CHECK(!c_desired.ICC().empty());
+ io.CheckMetadata();
+ if (io.Main().IsJPEG()) {
+ JXL_WARNING("Writing JPEG data as pixels");
+ }
+ JxlPixelFormat format = {
+ 0, // num_channels is ignored by the converter
+ bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+ 0};
+ const bool floating_point = bits_per_sample > 16;
+ std::unique_ptr<extras::Encoder> encoder;
+ std::ostringstream os;
+ switch (codec) {
+ case extras::Codec::kPNG:
+#if JPEGXL_ENABLE_APNG
+ encoder = extras::GetAPNGEncoder();
+ break;
+#else
+ return JXL_FAILURE("JPEG XL was built without (A)PNG support");
+#endif
+ case extras::Codec::kJPG:
+#if JPEGXL_ENABLE_JPEG
+ format.data_type = JXL_TYPE_UINT8;
+ encoder = extras::GetJPEGEncoder();
+ os << io.jpeg_quality;
+ encoder->SetOption("q", os.str());
+ break;
+#else
+ return JXL_FAILURE("JPEG XL was built without JPEG support");
+#endif
+ case extras::Codec::kPNM:
+ if (io.Main().HasAlpha()) {
+ encoder = extras::GetPAMEncoder();
+ } else if (io.Main().IsGray()) {
+ encoder = extras::GetPGMEncoder();
+ } else if (!floating_point) {
+ encoder = extras::GetPPMEncoder();
+ } else {
+ format.data_type = JXL_TYPE_FLOAT;
+ format.endianness = JXL_LITTLE_ENDIAN;
+ encoder = extras::GetPFMEncoder();
+ }
+ break;
+ case extras::Codec::kPGX:
+ encoder = extras::GetPGXEncoder();
+ break;
+ case extras::Codec::kGIF:
+ return JXL_FAILURE("Encoding to GIF is not implemented");
+ case extras::Codec::kEXR:
+#if JPEGXL_ENABLE_EXR
+ format.data_type = JXL_TYPE_FLOAT;
+ encoder = extras::GetEXREncoder();
+ break;
+#else
+ return JXL_FAILURE("JPEG XL was built without OpenEXR support");
+#endif
+ case extras::Codec::kUnknown:
+ return JXL_FAILURE("Cannot encode using Codec::kUnknown");
+ }
+
+ if (!encoder) {
+ return JXL_FAILURE("Invalid codec.");
+ }
+
+ extras::PackedPixelFile ppf;
+ JXL_RETURN_IF_ERROR(
+ ConvertCodecInOutToPackedPixelFile(io, format, c_desired, pool, &ppf));
+ ppf.info.bits_per_sample = bits_per_sample;
+ if (format.data_type == JXL_TYPE_FLOAT) {
+ ppf.info.bits_per_sample = 32;
+ ppf.info.exponent_bits_per_sample = 8;
+ }
+ extras::EncodedImage encoded_image;
+ JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded_image, pool));
+ JXL_ASSERT(encoded_image.bitstreams.size() == 1);
+ *bytes = encoded_image.bitstreams[0];
+
+ return true;
+}
+
+Status EncodeToFile(const CodecInOut& io, const ColorEncoding& c_desired,
+ size_t bits_per_sample, const std::string& pathname,
+ ThreadPool* pool) {
+ const std::string extension = Extension(pathname);
+ const extras::Codec codec =
+ extras::CodecFromExtension(extension, &bits_per_sample);
+
+ // Warn about incorrect usage of PGM/PGX/PPM - only the latter supports
+ // color, but CodecFromExtension lumps them all together.
+ if (codec == extras::Codec::kPNM && extension != ".pfm") {
+ if (io.Main().HasAlpha() && extension != ".pam") {
+ JXL_WARNING(
+ "For images with alpha, the filename should end with .pam.\n");
+ } else if (!io.Main().IsGray() && extension == ".pgm") {
+ JXL_WARNING("For color images, the filename should end with .ppm.\n");
+ } else if (io.Main().IsGray() && extension == ".ppm") {
+ JXL_WARNING(
+ "For grayscale images, the filename should not end with .ppm.\n");
+ }
+ if (bits_per_sample > 16) {
+ JXL_WARNING("PPM only supports up to 16 bits per sample");
+ bits_per_sample = 16;
+ }
+ } else if (codec == extras::Codec::kPGX && !io.Main().IsGray()) {
+ JXL_WARNING("Storing color image to PGX - use .ppm extension instead.\n");
+ }
+ if (bits_per_sample > 16 && codec == extras::Codec::kPNG) {
+ JXL_WARNING("PNG only supports up to 16 bits per sample");
+ bits_per_sample = 16;
+ }
+
+ std::vector<uint8_t> encoded;
+ return Encode(io, codec, c_desired, bits_per_sample, &encoded, pool) &&
+ WriteFile(encoded, pathname);
+}
+
+Status EncodeToFile(const CodecInOut& io, const std::string& pathname,
+ ThreadPool* pool) {
+ // TODO(lode): need to take the floating_point_sample field into account
+ return EncodeToFile(io, io.metadata.m.color_encoding,
+ io.metadata.m.bit_depth.bits_per_sample, pathname, pool);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/codec.h b/third_party/jpeg-xl/lib/extras/codec.h
new file mode 100644
index 0000000000..80a42f926c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/codec.h
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_CODEC_H_
+#define LIB_EXTRAS_CODEC_H_
+
+// Facade for image encoders/decoders (PNG, PNM, ...).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/field_encodings.h" // MakeBit
+
+namespace jxl {
+
+struct SizeConstraints;
+
+// Decodes "bytes" and sets io->metadata.m.
+// color_space_hint may specify the color space, otherwise, defaults to sRGB.
+Status SetFromBytes(Span<const uint8_t> bytes,
+ const extras::ColorHints& color_hints, CodecInOut* io,
+ ThreadPool* pool = nullptr,
+ const SizeConstraints* constraints = nullptr,
+ extras::Codec* orig_codec = nullptr);
+// Helper function to use no color_space_hint.
+JXL_INLINE Status SetFromBytes(const Span<const uint8_t> bytes, CodecInOut* io,
+ ThreadPool* pool = nullptr,
+ const SizeConstraints* constraints = nullptr,
+ extras::Codec* orig_codec = nullptr) {
+ return SetFromBytes(bytes, extras::ColorHints(), io, pool, constraints,
+ orig_codec);
+}
+
+// Reads from file and calls SetFromBytes.
+Status SetFromFile(const std::string& pathname,
+ const extras::ColorHints& color_hints, CodecInOut* io,
+ ThreadPool* pool = nullptr,
+ const SizeConstraints* constraints = nullptr,
+ extras::Codec* orig_codec = nullptr);
+
+// Replaces "bytes" with an encoding of pixels transformed from c_current
+// color space to c_desired.
+Status Encode(const CodecInOut& io, extras::Codec codec,
+ const ColorEncoding& c_desired, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes, ThreadPool* pool = nullptr);
+
+// Deduces codec, calls Encode and writes to file.
+Status EncodeToFile(const CodecInOut& io, const ColorEncoding& c_desired,
+ size_t bits_per_sample, const std::string& pathname,
+ ThreadPool* pool = nullptr);
+// Same, but defaults to metadata.original color_encoding and bits_per_sample.
+Status EncodeToFile(const CodecInOut& io, const std::string& pathname,
+ ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_CODEC_H_
diff --git a/third_party/jpeg-xl/lib/extras/codec_test.cc b/third_party/jpeg-xl/lib/extras/codec_test.cc
new file mode 100644
index 0000000000..66a8563639
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/codec_test.cc
@@ -0,0 +1,645 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/pgx.h"
+#include "lib/extras/dec/pnm.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+using test::ThreadPoolForTests;
+
+namespace extras {
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Contains;
+using ::testing::Field;
+using ::testing::IsEmpty;
+using ::testing::NotNull;
+using ::testing::SizeIs;
+
+std::string ExtensionFromCodec(Codec codec, const bool is_gray,
+ const bool has_alpha,
+ const size_t bits_per_sample) {
+ switch (codec) {
+ case Codec::kJPG:
+ return ".jpg";
+ case Codec::kPGX:
+ return ".pgx";
+ case Codec::kPNG:
+ return ".png";
+ case Codec::kPNM:
+ if (bits_per_sample == 32) return ".pfm";
+ if (has_alpha) return ".pam";
+ return is_gray ? ".pgm" : ".ppm";
+ case Codec::kGIF:
+ return ".gif";
+ case Codec::kEXR:
+ return ".exr";
+ case Codec::kUnknown:
+ return std::string();
+ }
+ JXL_UNREACHABLE;
+ return std::string();
+}
+
+void VerifySameImage(const PackedImage& im0, size_t bits_per_sample0,
+ const PackedImage& im1, size_t bits_per_sample1,
+ bool lossless = true) {
+ ASSERT_EQ(im0.xsize, im1.xsize);
+ ASSERT_EQ(im0.ysize, im1.ysize);
+ ASSERT_EQ(im0.format.num_channels, im1.format.num_channels);
+ auto get_factor = [](JxlPixelFormat f, size_t bits) -> double {
+ return 1.0 / ((1u << std::min(test::GetPrecision(f.data_type), bits)) - 1);
+ };
+ double factor0 = get_factor(im0.format, bits_per_sample0);
+ double factor1 = get_factor(im1.format, bits_per_sample1);
+ auto pixels0 = static_cast<const uint8_t*>(im0.pixels());
+ auto pixels1 = static_cast<const uint8_t*>(im1.pixels());
+ auto rgba0 =
+ test::ConvertToRGBA32(pixels0, im0.xsize, im0.ysize, im0.format, factor0);
+ auto rgba1 =
+ test::ConvertToRGBA32(pixels1, im1.xsize, im1.ysize, im1.format, factor1);
+ double tolerance =
+ lossless ? 0.5 * std::min(factor0, factor1) : 3.0f / 255.0f;
+ if (bits_per_sample0 == 32 || bits_per_sample1 == 32) {
+ tolerance = 0.5 * std::max(factor0, factor1);
+ }
+ for (size_t y = 0; y < im0.ysize; ++y) {
+ for (size_t x = 0; x < im0.xsize; ++x) {
+ for (size_t c = 0; c < im0.format.num_channels; ++c) {
+ size_t ix = (y * im0.xsize + x) * 4 + c;
+ double val0 = rgba0[ix];
+ double val1 = rgba1[ix];
+ ASSERT_NEAR(val1, val0, tolerance)
+ << "y = " << y << " x = " << x << " c = " << c;
+ }
+ }
+ }
+}
+
+JxlColorEncoding CreateTestColorEncoding(bool is_gray) {
+ JxlColorEncoding c;
+ c.color_space = is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+ c.white_point = JXL_WHITE_POINT_D65;
+ c.primaries = JXL_PRIMARIES_P3;
+ c.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+ c.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR;
+ // Roundtrip through internal color encoding to fill in primaries and white
+ // point CIE xy coordinates.
+ ColorEncoding c_internal;
+ JXL_CHECK(ConvertExternalToInternalColorEncoding(c, &c_internal));
+ ConvertInternalToExternalColorEncoding(c_internal, &c);
+ return c;
+}
+
+std::vector<uint8_t> GenerateICC(JxlColorEncoding color_encoding) {
+ ColorEncoding c;
+ JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c));
+ JXL_CHECK(c.CreateICC());
+ PaddedBytes icc = c.ICC();
+ return std::vector<uint8_t>(icc.begin(), icc.end());
+}
+
+void StoreRandomValue(uint8_t* out, Rng* rng, JxlPixelFormat format,
+ size_t bits_per_sample) {
+ uint64_t max_val = (1ull << bits_per_sample) - 1;
+ if (format.data_type == JXL_TYPE_UINT8) {
+ *out = rng->UniformU(0, max_val);
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ uint32_t val = rng->UniformU(0, max_val);
+ if (format.endianness == JXL_BIG_ENDIAN) {
+ StoreBE16(val, out);
+ } else {
+ StoreLE16(val, out);
+ }
+ } else {
+ ASSERT_EQ(format.data_type, JXL_TYPE_FLOAT);
+ float val = rng->UniformF(0.0, 1.0);
+ uint32_t uval;
+ memcpy(&uval, &val, 4);
+ if (format.endianness == JXL_BIG_ENDIAN) {
+ StoreBE32(uval, out);
+ } else {
+ StoreLE32(uval, out);
+ }
+ }
+}
+
+void FillPackedImage(size_t bits_per_sample, PackedImage* image) {
+ JxlPixelFormat format = image->format;
+ size_t bytes_per_channel = PackedImage::BitsPerChannel(format.data_type) / 8;
+ uint8_t* out = static_cast<uint8_t*>(image->pixels());
+ size_t stride = image->xsize * format.num_channels * bytes_per_channel;
+ ASSERT_EQ(image->pixels_size, image->ysize * stride);
+ Rng rng(129);
+ for (size_t y = 0; y < image->ysize; ++y) {
+ for (size_t x = 0; x < image->xsize; ++x) {
+ for (size_t c = 0; c < format.num_channels; ++c) {
+ StoreRandomValue(out, &rng, format, bits_per_sample);
+ out += bytes_per_channel;
+ }
+ }
+ }
+}
+
+struct TestImageParams {
+ Codec codec;
+ size_t xsize;
+ size_t ysize;
+ size_t bits_per_sample;
+ bool is_gray;
+ bool add_alpha;
+ bool big_endian;
+ bool add_extra_channels;
+
+ bool ShouldTestRoundtrip() const {
+ if (codec == Codec::kPNG) {
+ return bits_per_sample <= 16;
+ } else if (codec == Codec::kPNM) {
+ // TODO(szabadka) Make PNM encoder endianness-aware.
+ return ((bits_per_sample <= 16 && big_endian) ||
+ (bits_per_sample == 32 && !add_alpha && !big_endian));
+ } else if (codec == Codec::kPGX) {
+ return ((bits_per_sample == 8 || bits_per_sample == 16) && is_gray &&
+ !add_alpha);
+ } else if (codec == Codec::kEXR) {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+ // OpenEXR 2.3 has a memory leak in IlmThread_2_3::ThreadPool
+ return false;
+#else
+ return bits_per_sample == 32 && !is_gray;
+#endif
+ } else if (codec == Codec::kJPG) {
+ return bits_per_sample == 8 && !add_alpha;
+ } else {
+ return false;
+ }
+ }
+
+ JxlPixelFormat PixelFormat() const {
+ JxlPixelFormat format;
+ format.num_channels = (is_gray ? 1 : 3) + (add_alpha ? 1 : 0);
+ format.data_type = (bits_per_sample == 32 ? JXL_TYPE_FLOAT
+ : bits_per_sample > 8 ? JXL_TYPE_UINT16
+ : JXL_TYPE_UINT8);
+ format.endianness = big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN;
+ format.align = 0;
+ return format;
+ }
+
+ std::string DebugString() const {
+ std::ostringstream os;
+ os << "bps:" << bits_per_sample << " gr:" << is_gray << " al:" << add_alpha
+ << " be: " << big_endian << " ec: " << add_extra_channels;
+ return os.str();
+ }
+};
+
+void CreateTestImage(const TestImageParams& params, PackedPixelFile* ppf) {
+ ppf->info.xsize = params.xsize;
+ ppf->info.ysize = params.ysize;
+ ppf->info.bits_per_sample = params.bits_per_sample;
+ ppf->info.exponent_bits_per_sample = params.bits_per_sample == 32 ? 8 : 0;
+ ppf->info.num_color_channels = params.is_gray ? 1 : 3;
+ ppf->info.alpha_bits = params.add_alpha ? params.bits_per_sample : 0;
+ ppf->info.alpha_premultiplied = (params.codec == Codec::kEXR);
+
+ JxlColorEncoding color_encoding = CreateTestColorEncoding(params.is_gray);
+ ppf->icc = GenerateICC(color_encoding);
+ ppf->color_encoding = color_encoding;
+
+ PackedFrame frame(params.xsize, params.ysize, params.PixelFormat());
+ FillPackedImage(params.bits_per_sample, &frame.color);
+ if (params.add_extra_channels) {
+ for (size_t i = 0; i < 7; ++i) {
+ JxlPixelFormat ec_format = params.PixelFormat();
+ ec_format.num_channels = 1;
+ PackedImage ec(params.xsize, params.ysize, ec_format);
+ FillPackedImage(params.bits_per_sample, &ec);
+ frame.extra_channels.emplace_back(std::move(ec));
+ PackedExtraChannel pec;
+ pec.ec_info.bits_per_sample = params.bits_per_sample;
+ pec.ec_info.type = static_cast<JxlExtraChannelType>(i);
+ ppf->extra_channels_info.emplace_back(std::move(pec));
+ }
+ }
+ ppf->frames.emplace_back(std::move(frame));
+}
+
+// Ensures reading a newly written file leads to the same image pixels.
+void TestRoundTrip(const TestImageParams& params, ThreadPool* pool) {
+ if (!params.ShouldTestRoundtrip()) return;
+
+ std::string extension = ExtensionFromCodec(
+ params.codec, params.is_gray, params.add_alpha, params.bits_per_sample);
+ printf("Codec %s %s\n", extension.c_str(), params.DebugString().c_str());
+
+ PackedPixelFile ppf_in;
+ CreateTestImage(params, &ppf_in);
+
+ EncodedImage encoded;
+ auto encoder = Encoder::FromExtension(extension);
+ ASSERT_TRUE(encoder.get());
+ ASSERT_TRUE(encoder->Encode(ppf_in, &encoded, pool));
+ ASSERT_EQ(encoded.bitstreams.size(), 1);
+
+ PackedPixelFile ppf_out;
+ ColorHints color_hints;
+ if (params.codec == Codec::kPNM || params.codec == Codec::kPGX) {
+ color_hints.Add("color_space",
+ params.is_gray ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG");
+ }
+ ASSERT_TRUE(DecodeBytes(Span<const uint8_t>(encoded.bitstreams[0]),
+ color_hints, &ppf_out));
+ if (params.codec == Codec::kPNG && ppf_out.icc.empty()) {
+ // Decoding a PNG may drop the ICC profile if there's a valid cICP chunk.
+ // Rendering intent is not preserved in this case.
+ EXPECT_EQ(ppf_in.color_encoding.color_space,
+ ppf_out.color_encoding.color_space);
+ EXPECT_EQ(ppf_in.color_encoding.white_point,
+ ppf_out.color_encoding.white_point);
+ if (ppf_in.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) {
+ EXPECT_EQ(ppf_in.color_encoding.primaries,
+ ppf_out.color_encoding.primaries);
+ }
+ EXPECT_EQ(ppf_in.color_encoding.transfer_function,
+ ppf_out.color_encoding.transfer_function);
+ EXPECT_EQ(ppf_out.color_encoding.rendering_intent,
+ JXL_RENDERING_INTENT_RELATIVE);
+ } else if (params.codec != Codec::kPNM && params.codec != Codec::kPGX &&
+ params.codec != Codec::kEXR) {
+ EXPECT_EQ(ppf_in.icc, ppf_out.icc);
+ }
+
+ ASSERT_EQ(ppf_out.frames.size(), 1);
+ const auto& frame_in = ppf_in.frames[0];
+ const auto& frame_out = ppf_out.frames[0];
+ VerifySameImage(frame_in.color, ppf_in.info.bits_per_sample, frame_out.color,
+ ppf_out.info.bits_per_sample,
+ /*lossless=*/params.codec != Codec::kJPG);
+ ASSERT_EQ(frame_in.extra_channels.size(), frame_out.extra_channels.size());
+ ASSERT_EQ(ppf_out.extra_channels_info.size(),
+ frame_out.extra_channels.size());
+ for (size_t i = 0; i < frame_in.extra_channels.size(); ++i) {
+ VerifySameImage(frame_in.extra_channels[i], ppf_in.info.bits_per_sample,
+ frame_out.extra_channels[i], ppf_out.info.bits_per_sample,
+ /*lossless=*/true);
+ EXPECT_EQ(ppf_out.extra_channels_info[i].ec_info.type,
+ ppf_in.extra_channels_info[i].ec_info.type);
+ }
+}
+
+TEST(CodecTest, TestRoundTrip) {
+ ThreadPoolForTests pool(12);
+
+ TestImageParams params;
+ params.xsize = 7;
+ params.ysize = 4;
+
+ for (Codec codec : AvailableCodecs()) {
+ for (int bits_per_sample : {4, 8, 10, 12, 16, 32}) {
+ for (bool is_gray : {false, true}) {
+ for (bool add_alpha : {false, true}) {
+ for (bool big_endian : {false, true}) {
+ params.codec = codec;
+ params.bits_per_sample = static_cast<size_t>(bits_per_sample);
+ params.is_gray = is_gray;
+ params.add_alpha = add_alpha;
+ params.big_endian = big_endian;
+ params.add_extra_channels = false;
+ TestRoundTrip(params, &pool);
+ if (codec == Codec::kPNM && add_alpha) {
+ params.add_extra_channels = true;
+ TestRoundTrip(params, &pool);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(CodecTest, LosslessPNMRoundtrip) {
+ ThreadPoolForTests pool(12);
+
+ static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"};
+ static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"};
+ for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) {
+ for (size_t channels = 1; channels <= 4; ++channels) {
+ if (bit_depth == 1 && (channels == 2 || channels == 4)) continue;
+ std::string extension(kExtension[channels]);
+ std::string filename = "jxl/flower/flower_small." +
+ std::string(kChannels[channels]) + ".depth" +
+ std::to_string(bit_depth) + extension;
+ const PaddedBytes orig = jxl::test::ReadTestData(filename);
+
+ PackedPixelFile ppf;
+ ColorHints color_hints;
+ color_hints.Add("color_space",
+ channels < 3 ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG");
+ ASSERT_TRUE(DecodeBytes(Span<const uint8_t>(orig.data(), orig.size()),
+ color_hints, &ppf));
+
+ EncodedImage encoded;
+ auto encoder = Encoder::FromExtension(extension);
+ ASSERT_TRUE(encoder.get());
+ ASSERT_TRUE(encoder->Encode(ppf, &encoded, &pool));
+ ASSERT_EQ(encoded.bitstreams.size(), 1);
+ ASSERT_EQ(orig.size(), encoded.bitstreams[0].size());
+ EXPECT_EQ(0,
+ memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size()));
+ }
+ }
+}
+
+void DecodeRoundtrip(const std::string& pathname, ThreadPool* pool,
+ CodecInOut& io,
+ const ColorHints& color_hints = ColorHints()) {
+ const PaddedBytes orig = jxl::test::ReadTestData(pathname);
+ JXL_CHECK(SetFromBytes(Span<const uint8_t>(orig), color_hints, &io, pool));
+ const ImageBundle& ib1 = io.Main();
+
+ // Encode/Decode again to make sure Encode carries through all metadata.
+ std::vector<uint8_t> encoded;
+ JXL_CHECK(Encode(io, Codec::kPNG, io.metadata.m.color_encoding,
+ io.metadata.m.bit_depth.bits_per_sample, &encoded, pool));
+
+ CodecInOut io2;
+ JXL_CHECK(
+ SetFromBytes(Span<const uint8_t>(encoded), color_hints, &io2, pool));
+ const ImageBundle& ib2 = io2.Main();
+ EXPECT_EQ(Description(ib1.metadata()->color_encoding),
+ Description(ib2.metadata()->color_encoding));
+ EXPECT_EQ(Description(ib1.c_current()), Description(ib2.c_current()));
+
+ size_t bits_per_sample = io2.metadata.m.bit_depth.bits_per_sample;
+
+ // "Same" pixels?
+ double max_l1 = bits_per_sample <= 12 ? 1.3 : 2E-3;
+ double max_rel = bits_per_sample <= 12 ? 6E-3 : 1E-4;
+ if (ib1.metadata()->color_encoding.IsGray()) {
+ max_rel *= 2.0;
+ } else if (ib1.metadata()->color_encoding.primaries != Primaries::kSRGB) {
+ // Need more tolerance for large gamuts (anything but sRGB)
+ max_l1 *= 1.5;
+ max_rel *= 3.0;
+ }
+ JXL_ASSERT_OK(
+ VerifyRelativeError(ib1.color(), ib2.color(), max_l1, max_rel, _));
+
+ // Simulate the encoder removing profile and decoder restoring it.
+ if (!ib2.metadata()->color_encoding.WantICC()) {
+ io2.metadata.m.color_encoding.InternalRemoveICC();
+ EXPECT_TRUE(io2.metadata.m.color_encoding.CreateICC());
+ }
+}
+
+#if 0
+TEST(CodecTest, TestMetadataSRGB) {
+ ThreadPoolForTests pool(12);
+
+ const char* paths[] = {"external/raw.pixls/DJI-FC6310-16bit_srgb8_v4_krita.png",
+ "external/raw.pixls/Google-Pixel2XL-16bit_srgb8_v4_krita.png",
+ "external/raw.pixls/HUAWEI-EVA-L09-16bit_srgb8_dt.png",
+ "external/raw.pixls/Nikon-D300-12bit_srgb8_dt.png",
+ "external/raw.pixls/Sony-DSC-RX1RM2-14bit_srgb8_v4_krita.png"};
+ for (const char* relative_pathname : paths) {
+ CodecInOut io;
+ DecodeRoundtrip(relative_pathname, Codec::kPNG, &pool, io);
+ EXPECT_EQ(8, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+ EXPECT_EQ(64, io.xsize());
+ EXPECT_EQ(64, io.ysize());
+ EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+ const ColorEncoding& c_original = io.metadata.m.color_encoding;
+ EXPECT_FALSE(c_original.ICC().empty());
+ EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+ EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+ EXPECT_EQ(Primaries::kSRGB, c_original.primaries);
+ EXPECT_TRUE(c_original.tf.IsSRGB());
+ }
+}
+
+TEST(CodecTest, TestMetadataLinear) {
+ ThreadPoolForTests pool(12);
+
+ const char* paths[3] = {
+ "external/raw.pixls/Google-Pixel2XL-16bit_acescg_g1_v4_krita.png",
+ "external/raw.pixls/HUAWEI-EVA-L09-16bit_709_g1_dt.png",
+ "external/raw.pixls/Nikon-D300-12bit_2020_g1_dt.png",
+ };
+ const WhitePoint white_points[3] = {WhitePoint::kCustom, WhitePoint::kD65,
+ WhitePoint::kD65};
+ const Primaries primaries[3] = {Primaries::kCustom, Primaries::kSRGB,
+ Primaries::k2100};
+
+ for (size_t i = 0; i < 3; ++i) {
+ CodecInOut io;
+ DecodeRoundtrip(paths[i], Codec::kPNG, &pool, io);
+ EXPECT_EQ(16, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+ EXPECT_EQ(64, io.xsize());
+ EXPECT_EQ(64, io.ysize());
+ EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+ const ColorEncoding& c_original = io.metadata.m.color_encoding;
+ EXPECT_FALSE(c_original.ICC().empty());
+ EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+ EXPECT_EQ(white_points[i], c_original.white_point);
+ EXPECT_EQ(primaries[i], c_original.primaries);
+ EXPECT_TRUE(c_original.tf.IsLinear());
+ }
+}
+
+TEST(CodecTest, TestMetadataICC) {
+ ThreadPoolForTests pool(12);
+
+ const char* paths[] = {
+ "external/raw.pixls/DJI-FC6310-16bit_709_v4_krita.png",
+ "external/raw.pixls/Sony-DSC-RX1RM2-14bit_709_v4_krita.png",
+ };
+ for (const char* relative_pathname : paths) {
+ CodecInOut io;
+ DecodeRoundtrip(relative_pathname, Codec::kPNG, &pool, io);
+ EXPECT_GE(16, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_LE(14, io.metadata.m.bit_depth.bits_per_sample);
+
+ EXPECT_EQ(64, io.xsize());
+ EXPECT_EQ(64, io.ysize());
+ EXPECT_FALSE(io.metadata.m.HasAlpha());
+
+ const ColorEncoding& c_original = io.metadata.m.color_encoding;
+ EXPECT_FALSE(c_original.ICC().empty());
+ EXPECT_EQ(RenderingIntent::kPerceptual, c_original.rendering_intent);
+ EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+ EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+ EXPECT_EQ(Primaries::kSRGB, c_original.primaries);
+ EXPECT_EQ(TransferFunction::k709, c_original.tf.GetTransferFunction());
+ }
+}
+
+TEST(CodecTest, Testexternal/pngsuite) {
+ ThreadPoolForTests pool(12);
+
+ // Ensure we can load PNG with text, japanese UTF-8, compressed text.
+ CodecInOut tmp1;
+ DecodeRoundtrip("external/pngsuite/ct1n0g04.png", Codec::kPNG, &pool, tmp1);
+ CodecInOut tmp2;
+ DecodeRoundtrip("external/pngsuite/ctjn0g04.png", Codec::kPNG, &pool, tmp2);
+ CodecInOut tmp3;
+ DecodeRoundtrip("external/pngsuite/ctzn0g04.png", Codec::kPNG, &pool, tmp3);
+
+ // Extract gAMA
+ CodecInOut b1;
+ DecodeRoundtrip("external/pngsuite/g10n3p04.png", Codec::kPNG, &pool, b1);
+ EXPECT_TRUE(b1.metadata.color_encoding.tf.IsLinear());
+
+ // Extract cHRM
+ CodecInOut b_p;
+ DecodeRoundtrip("external/pngsuite/ccwn2c08.png", Codec::kPNG, &pool, b_p);
+ EXPECT_EQ(Primaries::kSRGB, b_p.metadata.color_encoding.primaries);
+ EXPECT_EQ(WhitePoint::kD65, b_p.metadata.color_encoding.white_point);
+
+ // Extract EXIF from (new-style) dedicated chunk
+ CodecInOut b_exif;
+ DecodeRoundtrip("external/pngsuite/exif2c08.png", Codec::kPNG, &pool, b_exif);
+ EXPECT_EQ(978, b_exif.blobs.exif.size());
+}
+#endif
+
+void VerifyWideGamutMetadata(const std::string& relative_pathname,
+ const Primaries primaries, ThreadPool* pool) {
+ CodecInOut io;
+ DecodeRoundtrip(relative_pathname, pool, io);
+
+ EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample);
+
+ const ColorEncoding& c_original = io.metadata.m.color_encoding;
+ EXPECT_FALSE(c_original.ICC().empty());
+ EXPECT_EQ(RenderingIntent::kAbsolute, c_original.rendering_intent);
+ EXPECT_EQ(ColorSpace::kRGB, c_original.GetColorSpace());
+ EXPECT_EQ(WhitePoint::kD65, c_original.white_point);
+ EXPECT_EQ(primaries, c_original.primaries);
+}
+
+TEST(CodecTest, TestWideGamut) {
+ ThreadPoolForTests pool(12);
+ // VerifyWideGamutMetadata("external/wide-gamut-tests/P3-sRGB-color-bars.png",
+ // Primaries::kP3, &pool);
+ VerifyWideGamutMetadata("external/wide-gamut-tests/P3-sRGB-color-ring.png",
+ Primaries::kP3, &pool);
+ // VerifyWideGamutMetadata("external/wide-gamut-tests/R2020-sRGB-color-bars.png",
+ // Primaries::k2100, &pool);
+ // VerifyWideGamutMetadata("external/wide-gamut-tests/R2020-sRGB-color-ring.png",
+ // Primaries::k2100, &pool);
+}
+
+TEST(CodecTest, TestPNM) { TestCodecPNM(); }
+
+TEST(CodecTest, FormatNegotiation) {
+ const std::vector<JxlPixelFormat> accepted_formats = {
+ {/*num_channels=*/4,
+ /*data_type=*/JXL_TYPE_UINT16,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0},
+ {/*num_channels=*/3,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0},
+ {/*num_channels=*/3,
+ /*data_type=*/JXL_TYPE_UINT16,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0},
+ {/*num_channels=*/1,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0},
+ };
+
+ JxlBasicInfo info;
+ JxlEncoderInitBasicInfo(&info);
+ info.bits_per_sample = 12;
+ info.num_color_channels = 2;
+
+ JxlPixelFormat format;
+ EXPECT_FALSE(SelectFormat(accepted_formats, info, &format));
+
+ info.num_color_channels = 3;
+ ASSERT_TRUE(SelectFormat(accepted_formats, info, &format));
+ EXPECT_EQ(format.num_channels, info.num_color_channels);
+ // 16 is the smallest accepted format that can accommodate the 12-bit data.
+ EXPECT_EQ(format.data_type, JXL_TYPE_UINT16);
+}
+
+TEST(CodecTest, EncodeToPNG) {
+ ThreadPool* const pool = nullptr;
+
+ std::unique_ptr<Encoder> png_encoder = Encoder::FromExtension(".png");
+ ASSERT_THAT(png_encoder, NotNull());
+
+ const PaddedBytes original_png = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ PackedPixelFile ppf;
+ ASSERT_TRUE(extras::DecodeBytes(Span<const uint8_t>(original_png),
+ ColorHints(), &ppf));
+
+ const JxlPixelFormat& format = ppf.frames.front().color.format;
+ ASSERT_THAT(
+ png_encoder->AcceptedFormats(),
+ Contains(AllOf(Field(&JxlPixelFormat::num_channels, format.num_channels),
+ Field(&JxlPixelFormat::data_type, format.data_type),
+ Field(&JxlPixelFormat::endianness, format.endianness))));
+ EncodedImage encoded_png;
+ ASSERT_TRUE(png_encoder->Encode(ppf, &encoded_png, pool));
+ EXPECT_THAT(encoded_png.icc, IsEmpty());
+ ASSERT_THAT(encoded_png.bitstreams, SizeIs(1));
+
+ PackedPixelFile decoded_ppf;
+ ASSERT_TRUE(
+ extras::DecodeBytes(Span<const uint8_t>(encoded_png.bitstreams.front()),
+ ColorHints(), &decoded_ppf));
+
+ ASSERT_EQ(decoded_ppf.info.bits_per_sample, ppf.info.bits_per_sample);
+ ASSERT_EQ(decoded_ppf.frames.size(), 1);
+ VerifySameImage(ppf.frames[0].color, ppf.info.bits_per_sample,
+ decoded_ppf.frames[0].color,
+ decoded_ppf.info.bits_per_sample);
+}
+
+} // namespace
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/apng.cc b/third_party/jpeg-xl/lib/extras/dec/apng.cc
new file mode 100644
index 0000000000..c16fb5c81f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/apng.cc
@@ -0,0 +1,962 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/apng.h"
+
+// Parts of this code are taken from apngdis, which has the following license:
+/* APNG Disassembler 2.8
+ *
+ * Deconstructs APNG files into individual frames.
+ *
+ * http://apngdis.sourceforge.net
+ *
+ * Copyright (c) 2010-2015 Max Stepin
+ * maxst at users.sourceforge.net
+ *
+ * zlib license
+ * ------------
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ */
+
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/scope_guard.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/sanitizers.h"
+#include "png.h" /* original (unpatched) libpng is ok */
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+ 0x66, 0x00, 0x00};
+
+/* hIST chunk tail is not proccesed properly; skip this chunk completely;
+ see https://github.com/glennrp/libpng/pull/413 */
+const png_byte kIgnoredPngChunks[] = {
+ 104, 73, 83, 84, '\0' /* hIST */
+};
+
+// Returns floating-point value from the PNG encoding (times 10^5).
+static double F64FromU32(const uint32_t x) {
+ return static_cast<int32_t>(x) * 1E-5;
+}
+
+Status DecodeSRGB(const unsigned char* payload, const size_t payload_size,
+ JxlColorEncoding* color_encoding) {
+ if (payload_size != 1) return JXL_FAILURE("Wrong sRGB size");
+ // (PNG uses the same values as ICC.)
+ if (payload[0] >= 4) return JXL_FAILURE("Invalid Rendering Intent");
+ color_encoding->white_point = JXL_WHITE_POINT_D65;
+ color_encoding->primaries = JXL_PRIMARIES_SRGB;
+ color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+ color_encoding->rendering_intent =
+ static_cast<JxlRenderingIntent>(payload[0]);
+ return true;
+}
+
+// If the cICP profile is not fully supported, return false and leave
+// color_encoding unmodified.
+Status DecodeCICP(const unsigned char* payload, const size_t payload_size,
+ JxlColorEncoding* color_encoding) {
+ if (payload_size != 4) return JXL_FAILURE("Wrong cICP size");
+ JxlColorEncoding color_enc = *color_encoding;
+
+ // From https://www.itu.int/rec/T-REC-H.273-202107-I/en
+ if (payload[0] == 1) {
+ // IEC 61966-2-1 sRGB
+ color_enc.primaries = JXL_PRIMARIES_SRGB;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else if (payload[0] == 4) {
+ // Rec. ITU-R BT.470-6 System M
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 0.67;
+ color_enc.primaries_red_xy[1] = 0.33;
+ color_enc.primaries_green_xy[0] = 0.21;
+ color_enc.primaries_green_xy[1] = 0.71;
+ color_enc.primaries_blue_xy[0] = 0.14;
+ color_enc.primaries_blue_xy[1] = 0.08;
+ color_enc.white_point = JXL_WHITE_POINT_CUSTOM;
+ color_enc.white_point_xy[0] = 0.310;
+ color_enc.white_point_xy[1] = 0.316;
+ } else if (payload[0] == 5) {
+ // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 0.64;
+ color_enc.primaries_red_xy[1] = 0.33;
+ color_enc.primaries_green_xy[0] = 0.29;
+ color_enc.primaries_green_xy[1] = 0.60;
+ color_enc.primaries_blue_xy[0] = 0.15;
+ color_enc.primaries_blue_xy[1] = 0.06;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else if (payload[0] == 6 || payload[0] == 7) {
+ // SMPTE ST 170 (2004) / SMPTE ST 240 (1999)
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 0.630;
+ color_enc.primaries_red_xy[1] = 0.340;
+ color_enc.primaries_green_xy[0] = 0.310;
+ color_enc.primaries_green_xy[1] = 0.595;
+ color_enc.primaries_blue_xy[0] = 0.155;
+ color_enc.primaries_blue_xy[1] = 0.070;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else if (payload[0] == 8) {
+ // Generic film (colour filters using Illuminant C)
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 0.681;
+ color_enc.primaries_red_xy[1] = 0.319;
+ color_enc.primaries_green_xy[0] = 0.243;
+ color_enc.primaries_green_xy[1] = 0.692;
+ color_enc.primaries_blue_xy[0] = 0.145;
+ color_enc.primaries_blue_xy[1] = 0.049;
+ color_enc.white_point = JXL_WHITE_POINT_CUSTOM;
+ color_enc.white_point_xy[0] = 0.310;
+ color_enc.white_point_xy[1] = 0.316;
+ } else if (payload[0] == 9) {
+ // Rec. ITU-R BT.2100-2
+ color_enc.primaries = JXL_PRIMARIES_2100;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else if (payload[0] == 10) {
+ // CIE 1931 XYZ
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 1;
+ color_enc.primaries_red_xy[1] = 0;
+ color_enc.primaries_green_xy[0] = 0;
+ color_enc.primaries_green_xy[1] = 1;
+ color_enc.primaries_blue_xy[0] = 0;
+ color_enc.primaries_blue_xy[1] = 0;
+ color_enc.white_point = JXL_WHITE_POINT_E;
+ } else if (payload[0] == 11) {
+ // SMPTE RP 431-2 (2011)
+ color_enc.primaries = JXL_PRIMARIES_P3;
+ color_enc.white_point = JXL_WHITE_POINT_DCI;
+ } else if (payload[0] == 12) {
+ // SMPTE EG 432-1 (2010)
+ color_enc.primaries = JXL_PRIMARIES_P3;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else if (payload[0] == 22) {
+ color_enc.primaries = JXL_PRIMARIES_CUSTOM;
+ color_enc.primaries_red_xy[0] = 0.630;
+ color_enc.primaries_red_xy[1] = 0.340;
+ color_enc.primaries_green_xy[0] = 0.295;
+ color_enc.primaries_green_xy[1] = 0.605;
+ color_enc.primaries_blue_xy[0] = 0.155;
+ color_enc.primaries_blue_xy[1] = 0.077;
+ color_enc.white_point = JXL_WHITE_POINT_D65;
+ } else {
+ JXL_WARNING("Unsupported primaries specified in cICP chunk: %d",
+ static_cast<int>(payload[0]));
+ return false;
+ }
+
+ if (payload[1] == 1 || payload[1] == 6 || payload[1] == 14 ||
+ payload[1] == 15) {
+ // Rec. ITU-R BT.709-6
+ color_enc.transfer_function = JXL_TRANSFER_FUNCTION_709;
+ } else if (payload[1] == 4) {
+ // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+ color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ color_enc.gamma = 1 / 2.2;
+ } else if (payload[1] == 5) {
+ // Rec. ITU-R BT.470-6 System B, G
+ color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ color_enc.gamma = 1 / 2.8;
+ } else if (payload[1] == 8 || payload[1] == 13 || payload[1] == 16 ||
+ payload[1] == 17 || payload[1] == 18) {
+ // These codes all match the corresponding JXL enum values
+ color_enc.transfer_function = static_cast<JxlTransferFunction>(payload[1]);
+ } else {
+ JXL_WARNING("Unsupported transfer function specified in cICP chunk: %d",
+ static_cast<int>(payload[1]));
+ return false;
+ }
+
+ if (payload[2] != 0) {
+ JXL_WARNING("Unsupported color space specified in cICP chunk: %d",
+ static_cast<int>(payload[2]));
+ return false;
+ }
+ if (payload[3] != 1) {
+ JXL_WARNING("Unsupported full-range flag specified in cICP chunk: %d",
+ static_cast<int>(payload[3]));
+ return false;
+ }
+ // cICP has no rendering intent, so use the default
+ color_enc.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+ *color_encoding = color_enc;
+ return true;
+}
+
+Status DecodeGAMA(const unsigned char* payload, const size_t payload_size,
+ JxlColorEncoding* color_encoding) {
+ if (payload_size != 4) return JXL_FAILURE("Wrong gAMA size");
+ color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ color_encoding->gamma = F64FromU32(LoadBE32(payload));
+ return true;
+}
+
+Status DecodeCHRM(const unsigned char* payload, const size_t payload_size,
+ JxlColorEncoding* color_encoding) {
+ if (payload_size != 32) return JXL_FAILURE("Wrong cHRM size");
+
+ color_encoding->white_point = JXL_WHITE_POINT_CUSTOM;
+ color_encoding->white_point_xy[0] = F64FromU32(LoadBE32(payload + 0));
+ color_encoding->white_point_xy[1] = F64FromU32(LoadBE32(payload + 4));
+
+ color_encoding->primaries = JXL_PRIMARIES_CUSTOM;
+ color_encoding->primaries_red_xy[0] = F64FromU32(LoadBE32(payload + 8));
+ color_encoding->primaries_red_xy[1] = F64FromU32(LoadBE32(payload + 12));
+ color_encoding->primaries_green_xy[0] = F64FromU32(LoadBE32(payload + 16));
+ color_encoding->primaries_green_xy[1] = F64FromU32(LoadBE32(payload + 20));
+ color_encoding->primaries_blue_xy[0] = F64FromU32(LoadBE32(payload + 24));
+ color_encoding->primaries_blue_xy[1] = F64FromU32(LoadBE32(payload + 28));
+ return true;
+}
+
+// Retrieves XMP and EXIF/IPTC from itext and text.
+class BlobsReaderPNG {
+ public:
+ static Status Decode(const png_text_struct& info, PackedMetadata* metadata) {
+ // We trust these are properly null-terminated by libpng.
+ const char* key = info.key;
+ const char* value = info.text;
+ if (strstr(key, "XML:com.adobe.xmp")) {
+ metadata->xmp.resize(strlen(value)); // safe, see above
+ memcpy(metadata->xmp.data(), value, metadata->xmp.size());
+ }
+
+ std::string type;
+ std::vector<uint8_t> bytes;
+
+ // Handle text chunks annotated with key "Raw profile type ####", with
+ // #### a type, which may contain metadata.
+ const char* kKey = "Raw profile type ";
+ if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+
+ if (!MaybeDecodeBase16(key, value, &type, &bytes)) {
+ JXL_WARNING("Couldn't parse 'Raw format type' text chunk");
+ return false;
+ }
+ if (type == "exif") {
+ // Remove "Exif\0\0" prefix if present
+ if (bytes.size() >= sizeof kExifSignature &&
+ memcmp(bytes.data(), kExifSignature, sizeof kExifSignature) == 0) {
+ bytes.erase(bytes.begin(), bytes.begin() + sizeof kExifSignature);
+ }
+ if (!metadata->exif.empty()) {
+ JXL_WARNING("overwriting EXIF (%" PRIuS " bytes) with base16 (%" PRIuS
+ " bytes)",
+ metadata->exif.size(), bytes.size());
+ }
+ metadata->exif = std::move(bytes);
+ } else if (type == "iptc") {
+ // TODO (jon): Deal with IPTC in some way
+ } else if (type == "8bim") {
+ // TODO (jon): Deal with 8bim in some way
+ } else if (type == "xmp") {
+ if (!metadata->xmp.empty()) {
+ JXL_WARNING("overwriting XMP (%" PRIuS " bytes) with base16 (%" PRIuS
+ " bytes)",
+ metadata->xmp.size(), bytes.size());
+ }
+ metadata->xmp = std::move(bytes);
+ } else {
+ JXL_WARNING("Unknown type in 'Raw format type' text chunk: %s: %" PRIuS
+ " bytes",
+ type.c_str(), bytes.size());
+ }
+ return true;
+ }
+
+ private:
+ // Returns false if invalid.
+ static JXL_INLINE Status DecodeNibble(const char c,
+ uint32_t* JXL_RESTRICT nibble) {
+ if ('a' <= c && c <= 'f') {
+ *nibble = 10 + c - 'a';
+ } else if ('0' <= c && c <= '9') {
+ *nibble = c - '0';
+ } else {
+ *nibble = 0;
+ return JXL_FAILURE("Invalid metadata nibble");
+ }
+ JXL_ASSERT(*nibble < 16);
+ return true;
+ }
+
+ // Returns false if invalid.
+ static JXL_INLINE Status DecodeDecimal(const char** pos, const char* end,
+ uint32_t* JXL_RESTRICT value) {
+ size_t len = 0;
+ *value = 0;
+ while (*pos < end) {
+ char next = **pos;
+ if (next >= '0' && next <= '9') {
+ *value = (*value * 10) + static_cast<uint32_t>(next - '0');
+ len++;
+ if (len > 8) {
+ break;
+ }
+ } else {
+ // Do not consume terminator (non-decimal digit).
+ break;
+ }
+ (*pos)++;
+ }
+ if (len == 0 || len > 8) {
+ return JXL_FAILURE("Failed to parse decimal");
+ }
+ return true;
+ }
+
+ // Parses a PNG text chunk with key of the form "Raw profile type ####", with
+ // #### a type.
+ // Returns whether it could successfully parse the content.
+ // We trust key and encoded are null-terminated because they come from
+ // libpng.
+ static Status MaybeDecodeBase16(const char* key, const char* encoded,
+ std::string* type,
+ std::vector<uint8_t>* bytes) {
+ const char* encoded_end = encoded + strlen(encoded);
+
+ const char* kKey = "Raw profile type ";
+ if (strncmp(key, kKey, strlen(kKey)) != 0) return false;
+ *type = key + strlen(kKey);
+ const size_t kMaxTypeLen = 20;
+ if (type->length() > kMaxTypeLen) return false; // Type too long
+
+ // Header: freeform string and number of bytes
+ // Expected format is:
+ // \n
+ // profile name/description\n
+ // 40\n (the number of bytes after hex-decoding)
+ // 01234566789abcdef....\n (72 bytes per line max).
+ // 012345667\n (last line)
+ const char* pos = encoded;
+
+ if (*(pos++) != '\n') return false;
+ while (pos < encoded_end && *pos != '\n') {
+ pos++;
+ }
+ if (pos == encoded_end) return false;
+ // We parsed so far a \n, some number of non \n characters and are now
+ // pointing at a \n.
+ if (*(pos++) != '\n') return false;
+ // Skip leading spaces
+ while (pos < encoded_end && *pos == ' ') {
+ pos++;
+ }
+ uint32_t bytes_to_decode = 0;
+ JXL_RETURN_IF_ERROR(DecodeDecimal(&pos, encoded_end, &bytes_to_decode));
+
+ // We need 2*bytes for the hex values plus 1 byte every 36 values,
+ // plus terminal \n for length.
+ const unsigned long needed_bytes =
+ bytes_to_decode * 2 + 1 + DivCeil(bytes_to_decode, 36);
+ if (needed_bytes != static_cast<size_t>(encoded_end - pos)) {
+ return JXL_FAILURE("Not enough bytes to parse %d bytes in hex",
+ bytes_to_decode);
+ }
+ JXL_ASSERT(bytes->empty());
+ bytes->reserve(bytes_to_decode);
+
+ // Encoding: base16 with newline after 72 chars.
+ // pos points to the \n before the first line of hex values.
+ for (size_t i = 0; i < bytes_to_decode; ++i) {
+ if (i % 36 == 0) {
+ if (pos + 1 >= encoded_end) return false; // Truncated base16 1
+ if (*pos != '\n') return false; // Expected newline
+ ++pos;
+ }
+
+ if (pos + 2 >= encoded_end) return false; // Truncated base16 2;
+ uint32_t nibble0, nibble1;
+ JXL_RETURN_IF_ERROR(DecodeNibble(pos[0], &nibble0));
+ JXL_RETURN_IF_ERROR(DecodeNibble(pos[1], &nibble1));
+ bytes->push_back(static_cast<uint8_t>((nibble0 << 4) + nibble1));
+ pos += 2;
+ }
+ if (pos + 1 != encoded_end) return false; // Too many encoded bytes
+ if (pos[0] != '\n') return false; // Incorrect metadata terminator
+ return true;
+ }
+};
+
+constexpr bool isAbc(char c) {
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
+constexpr uint32_t kId_IHDR = 0x52444849;
+constexpr uint32_t kId_acTL = 0x4C546361;
+constexpr uint32_t kId_fcTL = 0x4C546366;
+constexpr uint32_t kId_IDAT = 0x54414449;
+constexpr uint32_t kId_fdAT = 0x54416466;
+constexpr uint32_t kId_IEND = 0x444E4549;
+constexpr uint32_t kId_cICP = 0x50434963;
+constexpr uint32_t kId_iCCP = 0x50434369;
+constexpr uint32_t kId_sRGB = 0x42475273;
+constexpr uint32_t kId_gAMA = 0x414D4167;
+constexpr uint32_t kId_cHRM = 0x4D524863;
+constexpr uint32_t kId_eXIf = 0x66495865;
+
+struct APNGFrame {
+ std::vector<uint8_t> pixels;
+ std::vector<uint8_t*> rows;
+ unsigned int w, h, delay_num, delay_den;
+};
+
+struct Reader {
+ const uint8_t* next;
+ const uint8_t* last;
+ bool Read(void* data, size_t len) {
+ size_t cap = last - next;
+ size_t to_copy = std::min(cap, len);
+ memcpy(data, next, to_copy);
+ next += to_copy;
+ return (len == to_copy);
+ }
+ bool Eof() { return next == last; }
+};
+
+const unsigned long cMaxPNGSize = 1000000UL;
+const size_t kMaxPNGChunkSize = 1lu << 30; // 1 GB
+
+void info_fn(png_structp png_ptr, png_infop info_ptr) {
+ png_set_expand(png_ptr);
+ png_set_palette_to_rgb(png_ptr);
+ png_set_tRNS_to_alpha(png_ptr);
+ (void)png_set_interlace_handling(png_ptr);
+ png_read_update_info(png_ptr, info_ptr);
+}
+
+void row_fn(png_structp png_ptr, png_bytep new_row, png_uint_32 row_num,
+ int pass) {
+ APNGFrame* frame = (APNGFrame*)png_get_progressive_ptr(png_ptr);
+ JXL_CHECK(frame);
+ JXL_CHECK(row_num < frame->rows.size());
+ JXL_CHECK(frame->rows[row_num] < frame->pixels.data() + frame->pixels.size());
+ png_progressive_combine_row(png_ptr, frame->rows[row_num], new_row);
+}
+
+inline unsigned int read_chunk(Reader* r, std::vector<uint8_t>* pChunk) {
+ unsigned char len[4];
+ if (r->Read(&len, 4)) {
+ const auto size = png_get_uint_32(len);
+ // Check first, to avoid overflow.
+ if (size > kMaxPNGChunkSize) {
+ JXL_WARNING("APNG chunk size is too big");
+ return 0;
+ }
+ pChunk->resize(size + 12);
+ memcpy(pChunk->data(), len, 4);
+ if (r->Read(pChunk->data() + 4, pChunk->size() - 4)) {
+ return LoadLE32(pChunk->data() + 4);
+ }
+ }
+ return 0;
+}
+
+int processing_start(png_structp& png_ptr, png_infop& info_ptr, void* frame_ptr,
+ bool hasInfo, std::vector<uint8_t>& chunkIHDR,
+ std::vector<std::vector<uint8_t>>& chunksInfo) {
+ unsigned char header[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+
+ // Cleanup prior decoder, if any.
+ png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+ // Just in case. Not all versions on libpng wipe-out the pointers.
+ png_ptr = nullptr;
+ info_ptr = nullptr;
+
+ png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+ info_ptr = png_create_info_struct(png_ptr);
+ if (!png_ptr || !info_ptr) return 1;
+
+ if (setjmp(png_jmpbuf(png_ptr))) {
+ return 1;
+ }
+
+ png_set_keep_unknown_chunks(png_ptr, 1, kIgnoredPngChunks,
+ (int)sizeof(kIgnoredPngChunks) / 5);
+
+ png_set_crc_action(png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE);
+ png_set_progressive_read_fn(png_ptr, frame_ptr, info_fn, row_fn, NULL);
+
+ png_process_data(png_ptr, info_ptr, header, 8);
+ png_process_data(png_ptr, info_ptr, chunkIHDR.data(), chunkIHDR.size());
+
+ if (hasInfo) {
+ for (unsigned int i = 0; i < chunksInfo.size(); i++) {
+ png_process_data(png_ptr, info_ptr, chunksInfo[i].data(),
+ chunksInfo[i].size());
+ }
+ }
+ return 0;
+}
+
+int processing_data(png_structp png_ptr, png_infop info_ptr, unsigned char* p,
+ unsigned int size) {
+ if (!png_ptr || !info_ptr) return 1;
+
+ if (setjmp(png_jmpbuf(png_ptr))) {
+ return 1;
+ }
+
+ png_process_data(png_ptr, info_ptr, p, size);
+ return 0;
+}
+
+int processing_finish(png_structp png_ptr, png_infop info_ptr,
+ PackedMetadata* metadata) {
+ unsigned char footer[12] = {0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130};
+
+ if (!png_ptr || !info_ptr) return 1;
+
+ if (setjmp(png_jmpbuf(png_ptr))) {
+ return 1;
+ }
+
+ png_process_data(png_ptr, info_ptr, footer, 12);
+ // before destroying: check if we encountered any metadata chunks
+ png_textp text_ptr;
+ int num_text;
+ png_get_text(png_ptr, info_ptr, &text_ptr, &num_text);
+ for (int i = 0; i < num_text; i++) {
+ (void)BlobsReaderPNG::Decode(text_ptr[i], metadata);
+ }
+
+ return 0;
+}
+
+} // namespace
+
+Status DecodeImageAPNG(const Span<const uint8_t> bytes,
+ const ColorHints& color_hints, PackedPixelFile* ppf,
+ const SizeConstraints* constraints) {
+ Reader r;
+ unsigned int id, j, w, h, w0, h0, x0, y0;
+ unsigned int delay_num, delay_den, dop, bop, rowbytes, imagesize;
+ unsigned char sig[8];
+ png_structp png_ptr = nullptr;
+ png_infop info_ptr = nullptr;
+ std::vector<uint8_t> chunk;
+ std::vector<uint8_t> chunkIHDR;
+ std::vector<std::vector<uint8_t>> chunksInfo;
+ bool isAnimated = false;
+ bool hasInfo = false;
+ APNGFrame frameRaw = {};
+ uint32_t num_channels;
+ JxlPixelFormat format;
+ unsigned int bytes_per_pixel = 0;
+
+ struct FrameInfo {
+ PackedImage data;
+ uint32_t duration;
+ size_t x0, xsize;
+ size_t y0, ysize;
+ uint32_t dispose_op;
+ uint32_t blend_op;
+ };
+
+ std::vector<FrameInfo> frames;
+
+ // Make sure png memory is released in any case.
+ auto scope_guard = MakeScopeGuard([&]() {
+ png_destroy_read_struct(&png_ptr, &info_ptr, 0);
+ // Just in case. Not all versions on libpng wipe-out the pointers.
+ png_ptr = nullptr;
+ info_ptr = nullptr;
+ });
+
+ r = {bytes.data(), bytes.data() + bytes.size()};
+ // Not a PNG => not an error
+ unsigned char png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10};
+ if (!r.Read(sig, 8) || memcmp(sig, png_signature, 8) != 0) {
+ return false;
+ }
+ id = read_chunk(&r, &chunkIHDR);
+
+ ppf->info.exponent_bits_per_sample = 0;
+ ppf->info.alpha_exponent_bits = 0;
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ ppf->frames.clear();
+
+ bool have_color = false;
+ bool have_cicp = false, have_iccp = false, have_srgb = false;
+ bool errorstate = true;
+ if (id == kId_IHDR && chunkIHDR.size() == 25) {
+ x0 = 0;
+ y0 = 0;
+ delay_num = 1;
+ delay_den = 10;
+ dop = 0;
+ bop = 0;
+
+ w0 = w = png_get_uint_32(chunkIHDR.data() + 8);
+ h0 = h = png_get_uint_32(chunkIHDR.data() + 12);
+ if (w > cMaxPNGSize || h > cMaxPNGSize) {
+ return false;
+ }
+
+ // default settings in case e.g. only gAMA is given
+ ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+ ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+ ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+ ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_RELATIVE;
+
+ if (!processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+ chunkIHDR, chunksInfo)) {
+ while (!r.Eof()) {
+ id = read_chunk(&r, &chunk);
+ if (!id) break;
+
+ if (id == kId_acTL && !hasInfo && !isAnimated) {
+ isAnimated = true;
+ ppf->info.have_animation = true;
+ ppf->info.animation.tps_numerator = 1000;
+ ppf->info.animation.tps_denominator = 1;
+ } else if (id == kId_IEND ||
+ (id == kId_fcTL && (!hasInfo || isAnimated))) {
+ if (hasInfo) {
+ if (!processing_finish(png_ptr, info_ptr, &ppf->metadata)) {
+ // Allocates the frame buffer.
+ uint32_t duration = delay_num * 1000 / delay_den;
+ frames.push_back(FrameInfo{PackedImage(w0, h0, format), duration,
+ x0, w0, y0, h0, dop, bop});
+ auto& frame = frames.back().data;
+ for (size_t y = 0; y < h0; ++y) {
+ memcpy(static_cast<uint8_t*>(frame.pixels()) + frame.stride * y,
+ frameRaw.rows[y], bytes_per_pixel * w0);
+ }
+ } else {
+ break;
+ }
+ }
+
+ if (id == kId_IEND) {
+ errorstate = false;
+ break;
+ }
+ if (chunk.size() < 34) {
+ return JXL_FAILURE("Received a chunk that is too small (%" PRIuS
+ "B)",
+ chunk.size());
+ }
+ // At this point the old frame is done. Let's start a new one.
+ w0 = png_get_uint_32(chunk.data() + 12);
+ h0 = png_get_uint_32(chunk.data() + 16);
+ x0 = png_get_uint_32(chunk.data() + 20);
+ y0 = png_get_uint_32(chunk.data() + 24);
+ delay_num = png_get_uint_16(chunk.data() + 28);
+ delay_den = png_get_uint_16(chunk.data() + 30);
+ dop = chunk[32];
+ bop = chunk[33];
+
+ if (!delay_den) delay_den = 100;
+
+ if (w0 > cMaxPNGSize || h0 > cMaxPNGSize || x0 > cMaxPNGSize ||
+ y0 > cMaxPNGSize || x0 + w0 > w || y0 + h0 > h || dop > 2 ||
+ bop > 1) {
+ break;
+ }
+
+ if (hasInfo) {
+ memcpy(chunkIHDR.data() + 8, chunk.data() + 12, 8);
+ if (processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo,
+ chunkIHDR, chunksInfo)) {
+ break;
+ }
+ }
+ } else if (id == kId_IDAT) {
+ // First IDAT chunk means we now have all header info
+ hasInfo = true;
+ JXL_CHECK(w == png_get_image_width(png_ptr, info_ptr));
+ JXL_CHECK(h == png_get_image_height(png_ptr, info_ptr));
+ int colortype = png_get_color_type(png_ptr, info_ptr);
+ int png_bit_depth = png_get_bit_depth(png_ptr, info_ptr);
+ ppf->info.bits_per_sample = png_bit_depth;
+ png_color_8p sigbits = NULL;
+ png_get_sBIT(png_ptr, info_ptr, &sigbits);
+ if (colortype & 1) {
+ // palette will actually be 8-bit regardless of the index bitdepth
+ ppf->info.bits_per_sample = 8;
+ }
+ if (colortype & 2) {
+ ppf->info.num_color_channels = 3;
+ ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+ if (sigbits && sigbits->red == sigbits->green &&
+ sigbits->green == sigbits->blue)
+ ppf->info.bits_per_sample = sigbits->red;
+ } else {
+ ppf->info.num_color_channels = 1;
+ ppf->color_encoding.color_space = JXL_COLOR_SPACE_GRAY;
+ if (sigbits) ppf->info.bits_per_sample = sigbits->gray;
+ }
+ if (colortype & 4 ||
+ png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) {
+ ppf->info.alpha_bits = ppf->info.bits_per_sample;
+ if (sigbits) {
+ if (sigbits->alpha &&
+ sigbits->alpha != ppf->info.bits_per_sample) {
+ return JXL_FAILURE("Unsupported alpha bit-depth");
+ }
+ ppf->info.alpha_bits = sigbits->alpha;
+ }
+ } else {
+ ppf->info.alpha_bits = 0;
+ }
+ ppf->color_encoding.color_space =
+ (ppf->info.num_color_channels == 1 ? JXL_COLOR_SPACE_GRAY
+ : JXL_COLOR_SPACE_RGB);
+ ppf->info.xsize = w;
+ ppf->info.ysize = h;
+ JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, w, h));
+ num_channels =
+ ppf->info.num_color_channels + (ppf->info.alpha_bits ? 1 : 0);
+ format = {
+ /*num_channels=*/num_channels,
+ /*data_type=*/ppf->info.bits_per_sample > 8 ? JXL_TYPE_UINT16
+ : JXL_TYPE_UINT8,
+ /*endianness=*/JXL_BIG_ENDIAN,
+ /*align=*/0,
+ };
+ if (png_bit_depth > 8 && format.data_type == JXL_TYPE_UINT8) {
+ png_set_strip_16(png_ptr);
+ }
+ bytes_per_pixel =
+ num_channels * (format.data_type == JXL_TYPE_UINT16 ? 2 : 1);
+ rowbytes = w * bytes_per_pixel;
+ imagesize = h * rowbytes;
+ frameRaw.pixels.resize(imagesize);
+ frameRaw.rows.resize(h);
+ for (j = 0; j < h; j++)
+ frameRaw.rows[j] = frameRaw.pixels.data() + j * rowbytes;
+
+ if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+ break;
+ }
+ } else if (id == kId_fdAT && isAnimated) {
+ png_save_uint_32(chunk.data() + 4, chunk.size() - 16);
+ memcpy(chunk.data() + 8, "IDAT", 4);
+ if (processing_data(png_ptr, info_ptr, chunk.data() + 4,
+ chunk.size() - 4)) {
+ break;
+ }
+ } else if (id == kId_cICP) {
+ // Color profile chunks: cICP has the highest priority, followed by
+ // iCCP and sRGB (which shouldn't co-exist, but if they do, we use
+ // iCCP), followed finally by gAMA and cHRM.
+ if (DecodeCICP(chunk.data() + 8, chunk.size() - 12,
+ &ppf->color_encoding)) {
+ have_cicp = true;
+ have_color = true;
+ ppf->icc.clear();
+ }
+ } else if (!have_cicp && id == kId_iCCP) {
+ if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+ JXL_WARNING("Corrupt iCCP chunk");
+ break;
+ }
+
+ // TODO(jon): catch special case of PQ and synthesize color encoding
+ // in that case
+ int compression_type;
+ png_bytep profile;
+ png_charp name;
+ png_uint_32 proflen = 0;
+ auto ok = png_get_iCCP(png_ptr, info_ptr, &name, &compression_type,
+ &profile, &proflen);
+ if (ok && proflen) {
+ ppf->icc.assign(profile, profile + proflen);
+ have_color = true;
+ have_iccp = true;
+ } else {
+ // TODO(eustas): JXL_WARNING?
+ }
+ } else if (!have_cicp && !have_iccp && id == kId_sRGB) {
+ JXL_RETURN_IF_ERROR(DecodeSRGB(chunk.data() + 8, chunk.size() - 12,
+ &ppf->color_encoding));
+ have_srgb = true;
+ have_color = true;
+ } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_gAMA) {
+ JXL_RETURN_IF_ERROR(DecodeGAMA(chunk.data() + 8, chunk.size() - 12,
+ &ppf->color_encoding));
+ have_color = true;
+ } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_cHRM) {
+ JXL_RETURN_IF_ERROR(DecodeCHRM(chunk.data() + 8, chunk.size() - 12,
+ &ppf->color_encoding));
+ have_color = true;
+ } else if (id == kId_eXIf) {
+ ppf->metadata.exif.resize(chunk.size() - 12);
+ memcpy(ppf->metadata.exif.data(), chunk.data() + 8,
+ chunk.size() - 12);
+ } else if (!isAbc(chunk[4]) || !isAbc(chunk[5]) || !isAbc(chunk[6]) ||
+ !isAbc(chunk[7])) {
+ break;
+ } else {
+ if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) {
+ break;
+ }
+ if (!hasInfo) {
+ chunksInfo.push_back(chunk);
+ continue;
+ }
+ }
+ }
+ }
+
+ JXL_RETURN_IF_ERROR(ApplyColorHints(
+ color_hints, have_color, ppf->info.num_color_channels == 1, ppf));
+ }
+
+ if (errorstate) return false;
+
+ bool has_nontrivial_background = false;
+ bool previous_frame_should_be_cleared = false;
+ enum {
+ DISPOSE_OP_NONE = 0,
+ DISPOSE_OP_BACKGROUND = 1,
+ DISPOSE_OP_PREVIOUS = 2,
+ };
+ enum {
+ BLEND_OP_SOURCE = 0,
+ BLEND_OP_OVER = 1,
+ };
+ for (size_t i = 0; i < frames.size(); i++) {
+ auto& frame = frames[i];
+ JXL_ASSERT(frame.data.xsize == frame.xsize);
+ JXL_ASSERT(frame.data.ysize == frame.ysize);
+
+ // Before encountering a DISPOSE_OP_NONE frame, the canvas is filled with 0,
+ // so DISPOSE_OP_BACKGROUND and DISPOSE_OP_PREVIOUS are equivalent.
+ if (frame.dispose_op == DISPOSE_OP_NONE) {
+ has_nontrivial_background = true;
+ }
+ bool should_blend = frame.blend_op == BLEND_OP_OVER;
+ bool use_for_next_frame =
+ has_nontrivial_background && frame.dispose_op != DISPOSE_OP_PREVIOUS;
+ size_t x0 = frame.x0;
+ size_t y0 = frame.y0;
+ size_t xsize = frame.data.xsize;
+ size_t ysize = frame.data.ysize;
+ if (previous_frame_should_be_cleared) {
+ size_t px0 = frames[i - 1].x0;
+ size_t py0 = frames[i - 1].y0;
+ size_t pxs = frames[i - 1].xsize;
+ size_t pys = frames[i - 1].ysize;
+ if (px0 >= x0 && py0 >= y0 && px0 + pxs <= x0 + xsize &&
+ py0 + pys <= y0 + ysize && frame.blend_op == BLEND_OP_SOURCE &&
+ use_for_next_frame) {
+ // If the previous frame is entirely contained in the current frame and
+ // we are using BLEND_OP_SOURCE, nothing special needs to be done.
+ ppf->frames.emplace_back(std::move(frame.data));
+ } else if (px0 == x0 && py0 == y0 && px0 + pxs == x0 + xsize &&
+ py0 + pys == y0 + ysize && use_for_next_frame) {
+ // If the new frame has the same size as the old one, but we are
+ // blending, we can instead just not blend.
+ should_blend = false;
+ ppf->frames.emplace_back(std::move(frame.data));
+ } else if (px0 <= x0 && py0 <= y0 && px0 + pxs >= x0 + xsize &&
+ py0 + pys >= y0 + ysize && use_for_next_frame) {
+ // If the new frame is contained within the old frame, we can pad the
+ // new frame with zeros and not blend.
+ PackedImage new_data(pxs, pys, frame.data.format);
+ memset(new_data.pixels(), 0, new_data.pixels_size);
+ for (size_t y = 0; y < ysize; y++) {
+ size_t bytes_per_pixel =
+ PackedImage::BitsPerChannel(new_data.format.data_type) *
+ new_data.format.num_channels / 8;
+ memcpy(static_cast<uint8_t*>(new_data.pixels()) +
+ new_data.stride * (y + y0 - py0) +
+ bytes_per_pixel * (x0 - px0),
+ static_cast<const uint8_t*>(frame.data.pixels()) +
+ frame.data.stride * y,
+ xsize * bytes_per_pixel);
+ }
+
+ x0 = px0;
+ y0 = py0;
+ xsize = pxs;
+ ysize = pys;
+ should_blend = false;
+ ppf->frames.emplace_back(std::move(new_data));
+ } else {
+ // If all else fails, insert a dummy blank frame with kReplace.
+ PackedImage blank(pxs, pys, frame.data.format);
+ memset(blank.pixels(), 0, blank.pixels_size);
+ ppf->frames.emplace_back(std::move(blank));
+ auto& pframe = ppf->frames.back();
+ pframe.frame_info.layer_info.crop_x0 = px0;
+ pframe.frame_info.layer_info.crop_y0 = py0;
+ pframe.frame_info.layer_info.xsize = pxs;
+ pframe.frame_info.layer_info.ysize = pys;
+ pframe.frame_info.duration = 0;
+ bool is_full_size = px0 == 0 && py0 == 0 && pxs == ppf->info.xsize &&
+ pys == ppf->info.ysize;
+ pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1;
+ pframe.frame_info.layer_info.blend_info.blendmode = JXL_BLEND_REPLACE;
+ pframe.frame_info.layer_info.blend_info.source = 1;
+ pframe.frame_info.layer_info.save_as_reference = 1;
+ ppf->frames.emplace_back(std::move(frame.data));
+ }
+ } else {
+ ppf->frames.emplace_back(std::move(frame.data));
+ }
+
+ auto& pframe = ppf->frames.back();
+ pframe.frame_info.layer_info.crop_x0 = x0;
+ pframe.frame_info.layer_info.crop_y0 = y0;
+ pframe.frame_info.layer_info.xsize = xsize;
+ pframe.frame_info.layer_info.ysize = ysize;
+ pframe.frame_info.duration = frame.duration;
+ pframe.frame_info.layer_info.blend_info.blendmode =
+ should_blend ? JXL_BLEND_BLEND : JXL_BLEND_REPLACE;
+ bool is_full_size = x0 == 0 && y0 == 0 && xsize == ppf->info.xsize &&
+ ysize == ppf->info.ysize;
+ pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1;
+ pframe.frame_info.layer_info.blend_info.source = 1;
+ pframe.frame_info.layer_info.blend_info.alpha = 0;
+ pframe.frame_info.layer_info.save_as_reference = use_for_next_frame ? 1 : 0;
+
+ previous_frame_should_be_cleared =
+ has_nontrivial_background && frame.dispose_op == DISPOSE_OP_BACKGROUND;
+ }
+ if (ppf->frames.empty()) return JXL_FAILURE("No frames decoded");
+ ppf->frames.back().frame_info.is_last = true;
+
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/apng.h b/third_party/jpeg-xl/lib/extras/dec/apng.h
new file mode 100644
index 0000000000..6502ac80c0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/apng.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_APNG_H_
+#define LIB_EXTRAS_DEC_APNG_H_
+
+// Decodes APNG images in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`.
+Status DecodeImageAPNG(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_APNG_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/color_description.cc b/third_party/jpeg-xl/lib/extras/dec/color_description.cc
new file mode 100644
index 0000000000..54f6aa4206
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/color_description.cc
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_description.h"
+
+#include <errno.h>
+
+#include <cmath>
+
+namespace jxl {
+
+namespace {
+
+template <typename T>
+struct EnumName {
+ const char* name;
+ T value;
+};
+
+const EnumName<JxlColorSpace> kJxlColorSpaceNames[] = {
+ {"RGB", JXL_COLOR_SPACE_RGB},
+ {"Gra", JXL_COLOR_SPACE_GRAY},
+ {"XYB", JXL_COLOR_SPACE_XYB},
+ {"CS?", JXL_COLOR_SPACE_UNKNOWN},
+};
+
+const EnumName<JxlWhitePoint> kJxlWhitePointNames[] = {
+ {"D65", JXL_WHITE_POINT_D65},
+ {"Cst", JXL_WHITE_POINT_CUSTOM},
+ {"EER", JXL_WHITE_POINT_E},
+ {"DCI", JXL_WHITE_POINT_DCI},
+};
+
+const EnumName<JxlPrimaries> kJxlPrimariesNames[] = {
+ {"SRG", JXL_PRIMARIES_SRGB},
+ {"Cst", JXL_PRIMARIES_CUSTOM},
+ {"202", JXL_PRIMARIES_2100},
+ {"DCI", JXL_PRIMARIES_P3},
+};
+
+const EnumName<JxlTransferFunction> kJxlTransferFunctionNames[] = {
+ {"709", JXL_TRANSFER_FUNCTION_709},
+ {"TF?", JXL_TRANSFER_FUNCTION_UNKNOWN},
+ {"Lin", JXL_TRANSFER_FUNCTION_LINEAR},
+ {"SRG", JXL_TRANSFER_FUNCTION_SRGB},
+ {"PeQ", JXL_TRANSFER_FUNCTION_PQ},
+ {"DCI", JXL_TRANSFER_FUNCTION_DCI},
+ {"HLG", JXL_TRANSFER_FUNCTION_HLG},
+ {"", JXL_TRANSFER_FUNCTION_GAMMA},
+};
+
+const EnumName<JxlRenderingIntent> kJxlRenderingIntentNames[] = {
+ {"Per", JXL_RENDERING_INTENT_PERCEPTUAL},
+ {"Rel", JXL_RENDERING_INTENT_RELATIVE},
+ {"Sat", JXL_RENDERING_INTENT_SATURATION},
+ {"Abs", JXL_RENDERING_INTENT_ABSOLUTE},
+};
+
+template <typename T>
+Status ParseEnum(const std::string& token, const EnumName<T>* enum_values,
+ size_t enum_len, T* value) {
+ for (size_t i = 0; i < enum_len; i++) {
+ if (enum_values[i].name == token) {
+ *value = enum_values[i].value;
+ return true;
+ }
+ }
+ return false;
+}
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+#define PARSE_ENUM(type, token, value) \
+ ParseEnum<type>(token, k##type##Names, ARRAY_SIZE(k##type##Names), value)
+
+class Tokenizer {
+ public:
+ Tokenizer(const std::string* input, char separator)
+ : input_(input), separator_(separator) {}
+
+ Status Next(std::string* next) {
+ const size_t end = input_->find(separator_, start_);
+ if (end == std::string::npos) {
+ *next = input_->substr(start_); // rest of string
+ } else {
+ *next = input_->substr(start_, end - start_);
+ }
+ if (next->empty()) return JXL_FAILURE("Missing token");
+ start_ = end + 1;
+ return true;
+ }
+
+ private:
+ const std::string* const input_; // not owned
+ const char separator_;
+ size_t start_ = 0; // of next token
+};
+
+Status ParseDouble(const std::string& num, double* d) {
+ char* end;
+ errno = 0;
+ *d = strtod(num.c_str(), &end);
+ if (*d == 0.0 && end == num.c_str()) {
+ return JXL_FAILURE("Invalid double: %s", num.c_str());
+ }
+ if (std::isnan(*d)) {
+ return JXL_FAILURE("Invalid double: %s", num.c_str());
+ }
+ if (errno == ERANGE) {
+ return JXL_FAILURE("Double out of range: %s", num.c_str());
+ }
+ return true;
+}
+
+Status ParseDouble(Tokenizer* tokenizer, double* d) {
+ std::string num;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&num));
+ return ParseDouble(num, d);
+}
+
+Status ParseColorSpace(Tokenizer* tokenizer, JxlColorEncoding* c) {
+ std::string str;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+ JxlColorSpace cs;
+ if (PARSE_ENUM(JxlColorSpace, str, &cs)) {
+ c->color_space = cs;
+ return true;
+ }
+
+ return JXL_FAILURE("Unknown ColorSpace %s", str.c_str());
+}
+
+Status ParseWhitePoint(Tokenizer* tokenizer, JxlColorEncoding* c) {
+ if (c->color_space == JXL_COLOR_SPACE_XYB) {
+ // Implicit white point.
+ c->white_point = JXL_WHITE_POINT_D65;
+ return true;
+ }
+
+ std::string str;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+ if (PARSE_ENUM(JxlWhitePoint, str, &c->white_point)) return true;
+
+ Tokenizer xy_tokenizer(&str, ';');
+ c->white_point = JXL_WHITE_POINT_CUSTOM;
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 0));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 1));
+ return true;
+}
+
+Status ParsePrimaries(Tokenizer* tokenizer, JxlColorEncoding* c) {
+ if (c->color_space == JXL_COLOR_SPACE_GRAY ||
+ c->color_space == JXL_COLOR_SPACE_XYB) {
+ // No primaries case.
+ return true;
+ }
+
+ std::string str;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+ if (PARSE_ENUM(JxlPrimaries, str, &c->primaries)) return true;
+
+ Tokenizer xy_tokenizer(&str, ';');
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 0));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 1));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 0));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 1));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 0));
+ JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 1));
+ c->primaries = JXL_PRIMARIES_CUSTOM;
+
+ return JXL_FAILURE("Invalid primaries %s", str.c_str());
+}
+
+Status ParseRenderingIntent(Tokenizer* tokenizer, JxlColorEncoding* c) {
+ std::string str;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+ if (PARSE_ENUM(JxlRenderingIntent, str, &c->rendering_intent)) return true;
+
+ return JXL_FAILURE("Invalid RenderingIntent %s\n", str.c_str());
+}
+
+Status ParseTransferFunction(Tokenizer* tokenizer, JxlColorEncoding* c) {
+ if (c->color_space == JXL_COLOR_SPACE_XYB) {
+ // Implicit TF.
+ c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ c->gamma = 1 / 3.;
+ return true;
+ }
+
+ std::string str;
+ JXL_RETURN_IF_ERROR(tokenizer->Next(&str));
+ if (PARSE_ENUM(JxlTransferFunction, str, &c->transfer_function)) {
+ return true;
+ }
+
+ if (str[0] == 'g') {
+ JXL_RETURN_IF_ERROR(ParseDouble(str.substr(1), &c->gamma));
+ c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ return true;
+ }
+
+ return JXL_FAILURE("Invalid gamma %s", str.c_str());
+}
+
+} // namespace
+
+Status ParseDescription(const std::string& description, JxlColorEncoding* c) {
+ *c = {};
+ Tokenizer tokenizer(&description, '_');
+ JXL_RETURN_IF_ERROR(ParseColorSpace(&tokenizer, c));
+ JXL_RETURN_IF_ERROR(ParseWhitePoint(&tokenizer, c));
+ JXL_RETURN_IF_ERROR(ParsePrimaries(&tokenizer, c));
+ JXL_RETURN_IF_ERROR(ParseRenderingIntent(&tokenizer, c));
+ JXL_RETURN_IF_ERROR(ParseTransferFunction(&tokenizer, c));
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/color_description.h b/third_party/jpeg-xl/lib/extras/dec/color_description.h
new file mode 100644
index 0000000000..23680ff7c6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/color_description.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_COLOR_DESCRIPTION_H_
+#define LIB_EXTRAS_COLOR_DESCRIPTION_H_
+
+#include <jxl/color_encoding.h>
+
+#include <string>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Parse the color description into a JxlColorEncoding "RGB_D65_SRG_Rel_Lin".
+Status ParseDescription(const std::string& description,
+ JxlColorEncoding* JXL_RESTRICT c);
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_COLOR_DESCRIPTION_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/color_description_test.cc b/third_party/jpeg-xl/lib/extras/dec/color_description_test.cc
new file mode 100644
index 0000000000..a1c04a94e4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/color_description_test.cc
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_description.h"
+
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+// Verify ParseDescription(Description) yields the same ColorEncoding
+TEST(ColorDescriptionTest, RoundTripAll) {
+ for (const auto& cdesc : test::AllEncodings()) {
+ const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc);
+ const std::string description = Description(c_original);
+ printf("%s\n", description.c_str());
+
+ JxlColorEncoding c_external = {};
+ EXPECT_TRUE(ParseDescription(description, &c_external));
+ ColorEncoding c_internal;
+ EXPECT_TRUE(
+ ConvertExternalToInternalColorEncoding(c_external, &c_internal));
+ EXPECT_TRUE(c_original.SameColorEncoding(c_internal))
+ << "Where c_original=" << c_original
+ << " and c_internal=" << c_internal;
+ }
+}
+
+TEST(ColorDescriptionTest, NanGamma) {
+ const std::string description = "Gra_2_Per_gnan";
+ JxlColorEncoding c;
+ EXPECT_FALSE(ParseDescription(description, &c));
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/color_hints.cc b/third_party/jpeg-xl/lib/extras/dec/color_hints.cc
new file mode 100644
index 0000000000..53f7cd0543
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/color_hints.cc
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/color_hints.h"
+
+#include <jxl/encode.h>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/jxl/base/file_io.h"
+
+namespace jxl {
+namespace extras {
+
+Status ApplyColorHints(const ColorHints& color_hints,
+ const bool color_already_set, const bool is_gray,
+ PackedPixelFile* ppf) {
+ if (color_already_set) {
+ return color_hints.Foreach(
+ [](const std::string& key, const std::string& /*value*/) {
+ JXL_WARNING("Decoder ignoring %s hint", key.c_str());
+ return true;
+ });
+ }
+
+ bool got_color_space = false;
+
+ JXL_RETURN_IF_ERROR(color_hints.Foreach(
+ [is_gray, ppf, &got_color_space](const std::string& key,
+ const std::string& value) -> Status {
+ if (key == "color_space") {
+ JxlColorEncoding c_original_external;
+ if (!ParseDescription(value, &c_original_external)) {
+ return JXL_FAILURE("Failed to apply color_space");
+ }
+ ppf->color_encoding = c_original_external;
+
+ if (is_gray !=
+ (ppf->color_encoding.color_space == JXL_COLOR_SPACE_GRAY)) {
+ return JXL_FAILURE("mismatch between file and color_space hint");
+ }
+
+ got_color_space = true;
+ } else if (key == "icc_pathname") {
+ JXL_RETURN_IF_ERROR(ReadFile(value, &ppf->icc));
+ got_color_space = true;
+ } else {
+ JXL_WARNING("Ignoring %s hint", key.c_str());
+ }
+ return true;
+ }));
+
+ if (!got_color_space) {
+ ppf->color_encoding.color_space =
+ is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+ ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+ ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+ }
+
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/color_hints.h b/third_party/jpeg-xl/lib/extras/dec/color_hints.h
new file mode 100644
index 0000000000..9c7de884f9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/color_hints.h
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_COLOR_HINTS_H_
+#define LIB_EXTRAS_COLOR_HINTS_H_
+
+// Not all the formats implemented in the extras lib support bundling color
+// information into the file, and those that support it may not have it.
+// To allow attaching color information to those file formats the caller can
+// define these color hints.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+class ColorHints {
+ public:
+ // key=color_space, value=Description(c/pp): specify the ColorEncoding of
+ // the pixels for decoding. Otherwise, if the codec did not obtain an ICC
+ // profile from the image, assume sRGB.
+ //
+ // Strings are taken from the command line, so avoid spaces for convenience.
+ void Add(const std::string& key, const std::string& value) {
+ kv_.emplace_back(key, value);
+ }
+
+ // Calls `func(key, value)` for each key/value in the order they were added,
+ // returning false immediately if `func` returns false.
+ template <class Func>
+ Status Foreach(const Func& func) const {
+ for (const KeyValue& kv : kv_) {
+ Status ok = func(kv.key, kv.value);
+ if (!ok) {
+ return JXL_FAILURE("ColorHints::Foreach returned false");
+ }
+ }
+ return true;
+ }
+
+ private:
+ // Splitting into key/value avoids parsing in each codec.
+ struct KeyValue {
+ KeyValue(std::string key, std::string value)
+ : key(std::move(key)), value(std::move(value)) {}
+
+ std::string key;
+ std::string value;
+ };
+
+ std::vector<KeyValue> kv_;
+};
+
+// Apply the color hints to the decoded image in PackedPixelFile if any.
+// color_already_set tells whether the color encoding was already set, in which
+// case the hints are ignored if any hint is passed.
+Status ApplyColorHints(const ColorHints& color_hints, bool color_already_set,
+ bool is_gray, PackedPixelFile* ppf);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_COLOR_HINTS_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/decode.cc b/third_party/jpeg-xl/lib/extras/dec/decode.cc
new file mode 100644
index 0000000000..e1b0365274
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/decode.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/decode.h"
+
+#include <locale>
+
+#if JPEGXL_ENABLE_APNG
+#include "lib/extras/dec/apng.h"
+#endif
+#if JPEGXL_ENABLE_EXR
+#include "lib/extras/dec/exr.h"
+#endif
+#if JPEGXL_ENABLE_GIF
+#include "lib/extras/dec/gif.h"
+#endif
+#if JPEGXL_ENABLE_JPEG
+#include "lib/extras/dec/jpg.h"
+#endif
+#include "lib/extras/dec/pgx.h"
+#include "lib/extras/dec/pnm.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+// Any valid encoding is larger (ensures codecs can read the first few bytes)
+constexpr size_t kMinBytes = 9;
+
+} // namespace
+
+std::vector<Codec> AvailableCodecs() {
+ std::vector<Codec> out;
+#if JPEGXL_ENABLE_APNG
+ out.push_back(Codec::kPNG);
+#endif
+#if JPEGXL_ENABLE_EXR
+ out.push_back(Codec::kEXR);
+#endif
+#if JPEGXL_ENABLE_GIF
+ out.push_back(Codec::kGIF);
+#endif
+#if JPEGXL_ENABLE_JPEG
+ out.push_back(Codec::kJPG);
+#endif
+ out.push_back(Codec::kPGX);
+ out.push_back(Codec::kPNM);
+ return out;
+}
+
+Codec CodecFromExtension(std::string extension,
+ size_t* JXL_RESTRICT bits_per_sample) {
+ std::transform(
+ extension.begin(), extension.end(), extension.begin(),
+ [](char c) { return std::tolower(c, std::locale::classic()); });
+ if (extension == ".png") return Codec::kPNG;
+
+ if (extension == ".jpg") return Codec::kJPG;
+ if (extension == ".jpeg") return Codec::kJPG;
+
+ if (extension == ".pgx") return Codec::kPGX;
+
+ if (extension == ".pam") return Codec::kPNM;
+ if (extension == ".pnm") return Codec::kPNM;
+ if (extension == ".pgm") return Codec::kPNM;
+ if (extension == ".ppm") return Codec::kPNM;
+ if (extension == ".pfm") {
+ if (bits_per_sample != nullptr) *bits_per_sample = 32;
+ return Codec::kPNM;
+ }
+
+ if (extension == ".gif") return Codec::kGIF;
+
+ if (extension == ".exr") return Codec::kEXR;
+
+ return Codec::kUnknown;
+}
+
+Status DecodeBytes(const Span<const uint8_t> bytes,
+ const ColorHints& color_hints, extras::PackedPixelFile* ppf,
+ const SizeConstraints* constraints, Codec* orig_codec) {
+ if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes");
+
+ *ppf = extras::PackedPixelFile();
+
+ // Default values when not set by decoders.
+ ppf->info.uses_original_profile = true;
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ const auto choose_codec = [&]() -> Codec {
+#if JPEGXL_ENABLE_APNG
+ if (DecodeImageAPNG(bytes, color_hints, ppf, constraints)) {
+ return Codec::kPNG;
+ }
+#endif
+ if (DecodeImagePGX(bytes, color_hints, ppf, constraints)) {
+ return Codec::kPGX;
+ }
+ if (DecodeImagePNM(bytes, color_hints, ppf, constraints)) {
+ return Codec::kPNM;
+ }
+#if JPEGXL_ENABLE_GIF
+ if (DecodeImageGIF(bytes, color_hints, ppf, constraints)) {
+ return Codec::kGIF;
+ }
+#endif
+#if JPEGXL_ENABLE_JPEG
+ if (DecodeImageJPG(bytes, color_hints, ppf, constraints)) {
+ return Codec::kJPG;
+ }
+#endif
+#if JPEGXL_ENABLE_EXR
+ if (DecodeImageEXR(bytes, color_hints, ppf, constraints)) {
+ return Codec::kEXR;
+ }
+#endif
+ return Codec::kUnknown;
+ };
+
+ Codec codec = choose_codec();
+ if (codec == Codec::kUnknown) {
+ return JXL_FAILURE("Codecs failed to decode");
+ }
+ if (orig_codec) *orig_codec = codec;
+
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/decode.h b/third_party/jpeg-xl/lib/extras/dec/decode.h
new file mode 100644
index 0000000000..f802041026
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/decode.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_DECODE_H_
+#define LIB_EXTRAS_DEC_DECODE_H_
+
+// Facade for image decoders (PNG, PNM, ...).
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Codecs supported by CodecInOut::Encode.
+enum class Codec : uint32_t {
+ kUnknown, // for CodecFromExtension
+ kPNG,
+ kPNM,
+ kPGX,
+ kJPG,
+ kGIF,
+ kEXR
+};
+
+std::vector<Codec> AvailableCodecs();
+
+// If and only if extension is ".pfm", *bits_per_sample is updated to 32 so
+// that Encode() would encode to PFM instead of PPM.
+Codec CodecFromExtension(std::string extension,
+ size_t* JXL_RESTRICT bits_per_sample = nullptr);
+
+// Decodes "bytes" info *ppf.
+// color_space_hint may specify the color space, otherwise, defaults to sRGB.
+Status DecodeBytes(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ extras::PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr,
+ Codec* orig_codec = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_DECODE_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/exr.cc b/third_party/jpeg-xl/lib/extras/dec/exr.cc
new file mode 100644
index 0000000000..f174ccd0c9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/exr.cc
@@ -0,0 +1,184 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/exr.h"
+
+#include <ImfChromaticitiesAttribute.h>
+#include <ImfIO.h>
+#include <ImfRgbaFile.h>
+#include <ImfStandardAttributes.h>
+
+#include <vector>
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+namespace OpenEXR = OPENEXR_IMF_NAMESPACE;
+namespace Imath = IMATH_NAMESPACE;
+
+// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using
+// uint64_t as recommended causes build failures with previous OpenEXR versions
+// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent
+// to uint64_t. This alternative should work in all cases.
+using ExrInt64 = decltype(std::declval<OpenEXR::IStream>().tellg());
+
+constexpr int kExrBitsPerSample = 16;
+constexpr int kExrAlphaBits = 16;
+
+class InMemoryIStream : public OpenEXR::IStream {
+ public:
+ // The data pointed to by `bytes` must outlive the InMemoryIStream.
+ explicit InMemoryIStream(const Span<const uint8_t> bytes)
+ : IStream(/*fileName=*/""), bytes_(bytes) {}
+
+ bool isMemoryMapped() const override { return true; }
+ char* readMemoryMapped(const int n) override {
+ JXL_ASSERT(pos_ + n <= bytes_.size());
+ char* const result =
+ const_cast<char*>(reinterpret_cast<const char*>(bytes_.data() + pos_));
+ pos_ += n;
+ return result;
+ }
+ bool read(char c[], const int n) override {
+ std::copy_n(readMemoryMapped(n), n, c);
+ return pos_ < bytes_.size();
+ }
+
+ ExrInt64 tellg() override { return pos_; }
+ void seekg(const ExrInt64 pos) override {
+ JXL_ASSERT(pos + 1 <= bytes_.size());
+ pos_ = pos;
+ }
+
+ private:
+ const Span<const uint8_t> bytes_;
+ size_t pos_ = 0;
+};
+
+} // namespace
+
+Status DecodeImageEXR(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints) {
+ InMemoryIStream is(bytes);
+
+#ifdef __EXCEPTIONS
+ std::unique_ptr<OpenEXR::RgbaInputFile> input_ptr;
+ try {
+ input_ptr.reset(new OpenEXR::RgbaInputFile(is));
+ } catch (...) {
+ return JXL_FAILURE("OpenEXR failed to parse input");
+ }
+ OpenEXR::RgbaInputFile& input = *input_ptr;
+#else
+ OpenEXR::RgbaInputFile input(is);
+#endif
+
+ if ((input.channels() & OpenEXR::RgbaChannels::WRITE_RGB) !=
+ OpenEXR::RgbaChannels::WRITE_RGB) {
+ return JXL_FAILURE("only RGB OpenEXR files are supported");
+ }
+ const bool has_alpha = (input.channels() & OpenEXR::RgbaChannels::WRITE_A) ==
+ OpenEXR::RgbaChannels::WRITE_A;
+
+ const float intensity_target = OpenEXR::hasWhiteLuminance(input.header())
+ ? OpenEXR::whiteLuminance(input.header())
+ : 0;
+
+ auto image_size = input.displayWindow().size();
+ // Size is computed as max - min, but both bounds are inclusive.
+ ++image_size.x;
+ ++image_size.y;
+
+ ppf->info.xsize = image_size.x;
+ ppf->info.ysize = image_size.y;
+ ppf->info.num_color_channels = 3;
+
+ const JxlDataType data_type =
+ kExrBitsPerSample == 16 ? JXL_TYPE_FLOAT16 : JXL_TYPE_FLOAT;
+ const JxlPixelFormat format{
+ /*num_channels=*/3u + (has_alpha ? 1u : 0u),
+ /*data_type=*/data_type,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0,
+ };
+ ppf->frames.clear();
+ // Allocates the frame buffer.
+ ppf->frames.emplace_back(image_size.x, image_size.y, format);
+ const auto& frame = ppf->frames.back();
+
+ const int row_size = input.dataWindow().size().x + 1;
+ // Number of rows to read at a time.
+ // https://www.openexr.com/documentation/ReadingAndWritingImageFiles.pdf
+ // recommends reading the whole file at once.
+ const int y_chunk_size = input.displayWindow().size().y + 1;
+ std::vector<OpenEXR::Rgba> input_rows(row_size * y_chunk_size);
+ for (int start_y =
+ std::max(input.dataWindow().min.y, input.displayWindow().min.y);
+ start_y <=
+ std::min(input.dataWindow().max.y, input.displayWindow().max.y);
+ start_y += y_chunk_size) {
+ // Inclusive.
+ const int end_y = std::min(
+ start_y + y_chunk_size - 1,
+ std::min(input.dataWindow().max.y, input.displayWindow().max.y));
+ input.setFrameBuffer(
+ input_rows.data() - input.dataWindow().min.x - start_y * row_size,
+ /*xStride=*/1, /*yStride=*/row_size);
+ input.readPixels(start_y, end_y);
+ for (int exr_y = start_y; exr_y <= end_y; ++exr_y) {
+ const int image_y = exr_y - input.displayWindow().min.y;
+ const OpenEXR::Rgba* const JXL_RESTRICT input_row =
+ &input_rows[(exr_y - start_y) * row_size];
+ uint8_t* row = static_cast<uint8_t*>(frame.color.pixels()) +
+ frame.color.stride * image_y;
+ const uint32_t pixel_size =
+ (3 + (has_alpha ? 1 : 0)) * kExrBitsPerSample / 8;
+ for (int exr_x =
+ std::max(input.dataWindow().min.x, input.displayWindow().min.x);
+ exr_x <=
+ std::min(input.dataWindow().max.x, input.displayWindow().max.x);
+ ++exr_x) {
+ const int image_x = exr_x - input.displayWindow().min.x;
+ memcpy(row + image_x * pixel_size,
+ input_row + (exr_x - input.dataWindow().min.x), pixel_size);
+ }
+ }
+ }
+
+ ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR;
+ ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB;
+ ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+ if (OpenEXR::hasChromaticities(input.header())) {
+ ppf->color_encoding.primaries = JXL_PRIMARIES_CUSTOM;
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_CUSTOM;
+ const auto& chromaticities = OpenEXR::chromaticities(input.header());
+ ppf->color_encoding.primaries_red_xy[0] = chromaticities.red.x;
+ ppf->color_encoding.primaries_red_xy[1] = chromaticities.red.y;
+ ppf->color_encoding.primaries_green_xy[0] = chromaticities.green.x;
+ ppf->color_encoding.primaries_green_xy[1] = chromaticities.green.y;
+ ppf->color_encoding.primaries_blue_xy[0] = chromaticities.blue.x;
+ ppf->color_encoding.primaries_blue_xy[1] = chromaticities.blue.y;
+ ppf->color_encoding.white_point_xy[0] = chromaticities.white.x;
+ ppf->color_encoding.white_point_xy[1] = chromaticities.white.y;
+ }
+
+ // EXR uses binary16 or binary32 floating point format.
+ ppf->info.bits_per_sample = kExrBitsPerSample;
+ ppf->info.exponent_bits_per_sample = kExrBitsPerSample == 16 ? 5 : 8;
+ if (has_alpha) {
+ ppf->info.alpha_bits = kExrAlphaBits;
+ ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample;
+ ppf->info.alpha_premultiplied = true;
+ }
+ ppf->info.intensity_target = intensity_target;
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/exr.h b/third_party/jpeg-xl/lib/extras/dec/exr.h
new file mode 100644
index 0000000000..6b7c5b714d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/exr.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_EXR_H_
+#define LIB_EXTRAS_DEC_EXR_H_
+
+// Decodes OpenEXR images in memory.
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+Status DecodeImageEXR(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_EXR_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/gif.cc b/third_party/jpeg-xl/lib/extras/dec/gif.cc
new file mode 100644
index 0000000000..4593382b92
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/gif.cc
@@ -0,0 +1,400 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/gif.h"
+
+#include <gif_lib.h>
+#include <jxl/codestream_header.h>
+#include <string.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+struct ReadState {
+ Span<const uint8_t> bytes;
+};
+
+struct DGifCloser {
+ void operator()(GifFileType* const ptr) const { DGifCloseFile(ptr, nullptr); }
+};
+using GifUniquePtr = std::unique_ptr<GifFileType, DGifCloser>;
+
+struct PackedRgba {
+ uint8_t r, g, b, a;
+};
+
+struct PackedRgb {
+ uint8_t r, g, b;
+};
+
+void ensure_have_alpha(PackedFrame* frame) {
+ if (!frame->extra_channels.empty()) return;
+ const JxlPixelFormat alpha_format{
+ /*num_channels=*/1u,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0,
+ };
+ frame->extra_channels.emplace_back(frame->color.xsize, frame->color.ysize,
+ alpha_format);
+ // We need to set opaque-by-default.
+ std::fill_n(static_cast<uint8_t*>(frame->extra_channels[0].pixels()),
+ frame->color.xsize * frame->color.ysize, 255u);
+}
+
+} // namespace
+
+Status DecodeImageGIF(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints) {
+ int error = GIF_OK;
+ ReadState state = {bytes};
+ const auto ReadFromSpan = [](GifFileType* const gif, GifByteType* const bytes,
+ int n) {
+ ReadState* const state = reinterpret_cast<ReadState*>(gif->UserData);
+ // giflib API requires the input size `n` to be signed int.
+ if (static_cast<size_t>(n) > state->bytes.size()) {
+ n = state->bytes.size();
+ }
+ memcpy(bytes, state->bytes.data(), n);
+ state->bytes.remove_prefix(n);
+ return n;
+ };
+ GifUniquePtr gif(DGifOpen(&state, ReadFromSpan, &error));
+ if (gif == nullptr) {
+ if (error == D_GIF_ERR_NOT_GIF_FILE) {
+ // Not an error.
+ return false;
+ } else {
+ return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(error));
+ }
+ }
+ error = DGifSlurp(gif.get());
+ if (error != GIF_OK) {
+ return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(gif->Error));
+ }
+
+ msan::UnpoisonMemory(gif.get(), sizeof(*gif));
+ if (gif->SColorMap) {
+ msan::UnpoisonMemory(gif->SColorMap, sizeof(*gif->SColorMap));
+ msan::UnpoisonMemory(
+ gif->SColorMap->Colors,
+ sizeof(*gif->SColorMap->Colors) * gif->SColorMap->ColorCount);
+ }
+ msan::UnpoisonMemory(gif->SavedImages,
+ sizeof(*gif->SavedImages) * gif->ImageCount);
+
+ JXL_RETURN_IF_ERROR(
+ VerifyDimensions<uint32_t>(constraints, gif->SWidth, gif->SHeight));
+ uint64_t total_pixel_count =
+ static_cast<uint64_t>(gif->SWidth) * gif->SHeight;
+ for (int i = 0; i < gif->ImageCount; ++i) {
+ const SavedImage& image = gif->SavedImages[i];
+ uint32_t w = image.ImageDesc.Width;
+ uint32_t h = image.ImageDesc.Height;
+ JXL_RETURN_IF_ERROR(VerifyDimensions<uint32_t>(constraints, w, h));
+ uint64_t pixel_count = static_cast<uint64_t>(w) * h;
+ if (total_pixel_count + pixel_count < total_pixel_count) {
+ return JXL_FAILURE("Image too big");
+ }
+ total_pixel_count += pixel_count;
+ if (constraints && (total_pixel_count > constraints->dec_max_pixels)) {
+ return JXL_FAILURE("Image too big");
+ }
+ }
+
+ if (!gif->SColorMap) {
+ for (int i = 0; i < gif->ImageCount; ++i) {
+ if (!gif->SavedImages[i].ImageDesc.ColorMap) {
+ return JXL_FAILURE("Missing GIF color map");
+ }
+ }
+ }
+
+ if (gif->ImageCount > 1) {
+ ppf->info.have_animation = true;
+ // Delays in GIF are specified in 100ths of a second.
+ ppf->info.animation.tps_numerator = 100;
+ ppf->info.animation.tps_denominator = 1;
+ }
+
+ ppf->frames.clear();
+ ppf->frames.reserve(gif->ImageCount);
+
+ ppf->info.xsize = gif->SWidth;
+ ppf->info.ysize = gif->SHeight;
+ ppf->info.bits_per_sample = 8;
+ ppf->info.exponent_bits_per_sample = 0;
+ // alpha_bits is later set to 8 if we find a frame with transparent pixels.
+ ppf->info.alpha_bits = 0;
+ ppf->info.alpha_exponent_bits = 0;
+ JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+ /*is_gray=*/false, ppf));
+
+ ppf->info.num_color_channels = 3;
+
+ // Pixel format for the 'canvas' onto which we paint
+ // the (potentially individually cropped) GIF frames
+ // of an animation.
+ const JxlPixelFormat canvas_format{
+ /*num_channels=*/4u,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0,
+ };
+
+ // Pixel format for the JXL PackedFrame that goes into the
+ // PackedPixelFile. Here, we use 3 color channels, and provide
+ // the alpha channel as an extra_channel wherever it is used.
+ const JxlPixelFormat packed_frame_format{
+ /*num_channels=*/3u,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0,
+ };
+
+ GifColorType background_color;
+ if (gif->SColorMap == nullptr ||
+ gif->SBackGroundColor >= gif->SColorMap->ColorCount) {
+ background_color = {0, 0, 0};
+ } else {
+ background_color = gif->SColorMap->Colors[gif->SBackGroundColor];
+ }
+ const PackedRgba background_rgba{background_color.Red, background_color.Green,
+ background_color.Blue, 0};
+ PackedFrame canvas(gif->SWidth, gif->SHeight, canvas_format);
+ std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+ canvas.color.xsize * canvas.color.ysize, background_rgba);
+ Rect canvas_rect{0, 0, canvas.color.xsize, canvas.color.ysize};
+
+ Rect previous_rect_if_restore_to_background;
+
+ bool replace = true;
+ bool last_base_was_none = true;
+ for (int i = 0; i < gif->ImageCount; ++i) {
+ const SavedImage& image = gif->SavedImages[i];
+ msan::UnpoisonMemory(image.RasterBits, sizeof(*image.RasterBits) *
+ image.ImageDesc.Width *
+ image.ImageDesc.Height);
+ const Rect image_rect(image.ImageDesc.Left, image.ImageDesc.Top,
+ image.ImageDesc.Width, image.ImageDesc.Height);
+
+ Rect total_rect;
+ if (previous_rect_if_restore_to_background.xsize() != 0 ||
+ previous_rect_if_restore_to_background.ysize() != 0) {
+ const size_t xbegin = std::min(
+ image_rect.x0(), previous_rect_if_restore_to_background.x0());
+ const size_t ybegin = std::min(
+ image_rect.y0(), previous_rect_if_restore_to_background.y0());
+ const size_t xend =
+ std::max(image_rect.x0() + image_rect.xsize(),
+ previous_rect_if_restore_to_background.x0() +
+ previous_rect_if_restore_to_background.xsize());
+ const size_t yend =
+ std::max(image_rect.y0() + image_rect.ysize(),
+ previous_rect_if_restore_to_background.y0() +
+ previous_rect_if_restore_to_background.ysize());
+ total_rect = Rect(xbegin, ybegin, xend - xbegin, yend - ybegin);
+ previous_rect_if_restore_to_background = Rect();
+ replace = true;
+ } else {
+ total_rect = image_rect;
+ replace = false;
+ }
+ if (!image_rect.IsInside(canvas_rect)) {
+ return JXL_FAILURE("GIF frame extends outside of the canvas");
+ }
+
+ // Allocates the frame buffer.
+ ppf->frames.emplace_back(total_rect.xsize(), total_rect.ysize(),
+ packed_frame_format);
+ PackedFrame* frame = &ppf->frames.back();
+
+ // We cannot tell right from the start whether there will be a
+ // need for an alpha channel. This is discovered only as soon as
+ // we see a transparent pixel. We hence initialize alpha lazily.
+ auto set_pixel_alpha = [&frame](size_t x, size_t y, uint8_t a) {
+ // If we do not have an alpha-channel and a==255 (fully opaque),
+ // we can skip setting this pixel-value and rely on
+ // "no alpha channel = no transparency".
+ if (a == 255 && !frame->extra_channels.empty()) return;
+ ensure_have_alpha(frame);
+ static_cast<uint8_t*>(
+ frame->extra_channels[0].pixels())[y * frame->color.xsize + x] = a;
+ };
+
+ const ColorMapObject* const color_map =
+ image.ImageDesc.ColorMap ? image.ImageDesc.ColorMap : gif->SColorMap;
+ JXL_CHECK(color_map);
+ msan::UnpoisonMemory(color_map, sizeof(*color_map));
+ msan::UnpoisonMemory(color_map->Colors,
+ sizeof(*color_map->Colors) * color_map->ColorCount);
+ GraphicsControlBlock gcb;
+ DGifSavedExtensionToGCB(gif.get(), i, &gcb);
+ msan::UnpoisonMemory(&gcb, sizeof(gcb));
+ bool is_full_size = total_rect.x0() == 0 && total_rect.y0() == 0 &&
+ total_rect.xsize() == canvas.color.xsize &&
+ total_rect.ysize() == canvas.color.ysize;
+ if (ppf->info.have_animation) {
+ frame->frame_info.duration = gcb.DelayTime;
+ frame->frame_info.layer_info.have_crop = static_cast<int>(!is_full_size);
+ frame->frame_info.layer_info.crop_x0 = total_rect.x0();
+ frame->frame_info.layer_info.crop_y0 = total_rect.y0();
+ frame->frame_info.layer_info.xsize = frame->color.xsize;
+ frame->frame_info.layer_info.ysize = frame->color.ysize;
+ if (last_base_was_none) {
+ replace = true;
+ }
+ frame->frame_info.layer_info.blend_info.blendmode =
+ replace ? JXL_BLEND_REPLACE : JXL_BLEND_BLEND;
+ // We always only reference at most the last frame
+ frame->frame_info.layer_info.blend_info.source =
+ last_base_was_none ? 0u : 1u;
+ frame->frame_info.layer_info.blend_info.clamp = 1;
+ frame->frame_info.layer_info.blend_info.alpha = 0;
+ // TODO(veluca): this could in principle be implemented.
+ if (last_base_was_none &&
+ (total_rect.x0() != 0 || total_rect.y0() != 0 ||
+ total_rect.xsize() != canvas.color.xsize ||
+ total_rect.ysize() != canvas.color.ysize || !replace)) {
+ return JXL_FAILURE(
+ "GIF with dispose-to-0 is not supported for non-full or "
+ "blended frames");
+ }
+ switch (gcb.DisposalMode) {
+ case DISPOSE_DO_NOT:
+ case DISPOSE_BACKGROUND:
+ frame->frame_info.layer_info.save_as_reference = 1u;
+ last_base_was_none = false;
+ break;
+ case DISPOSE_PREVIOUS:
+ frame->frame_info.layer_info.save_as_reference = 0u;
+ break;
+ default:
+ frame->frame_info.layer_info.save_as_reference = 0u;
+ last_base_was_none = true;
+ }
+ }
+
+ // Update the canvas by creating a copy first.
+ PackedImage new_canvas_image(canvas.color.xsize, canvas.color.ysize,
+ canvas.color.format);
+ memcpy(new_canvas_image.pixels(), canvas.color.pixels(),
+ new_canvas_image.pixels_size);
+ for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+ // Assumes format.align == 0. row points to the beginning of the y row in
+ // the image_rect.
+ PackedRgba* row = static_cast<PackedRgba*>(new_canvas_image.pixels()) +
+ (y + image_rect.y0()) * new_canvas_image.xsize +
+ image_rect.x0();
+ for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+ const GifByteType byte = image.RasterBits[byte_index];
+ if (byte >= color_map->ColorCount) {
+ return JXL_FAILURE("GIF color is out of bounds");
+ }
+
+ if (byte == gcb.TransparentColor) continue;
+ GifColorType color = color_map->Colors[byte];
+ row[x].r = color.Red;
+ row[x].g = color.Green;
+ row[x].b = color.Blue;
+ row[x].a = 255;
+ }
+ }
+ const PackedImage& sub_frame_image = frame->color;
+ if (replace) {
+ // Copy from the new canvas image to the subframe
+ for (size_t y = 0; y < total_rect.ysize(); ++y) {
+ const PackedRgba* row_in =
+ static_cast<const PackedRgba*>(new_canvas_image.pixels()) +
+ (y + total_rect.y0()) * new_canvas_image.xsize + total_rect.x0();
+ PackedRgb* row_out = static_cast<PackedRgb*>(sub_frame_image.pixels()) +
+ y * sub_frame_image.xsize;
+ for (size_t x = 0; x < sub_frame_image.xsize; ++x) {
+ row_out[x].r = row_in[x].r;
+ row_out[x].g = row_in[x].g;
+ row_out[x].b = row_in[x].b;
+ set_pixel_alpha(x, y, row_in[x].a);
+ }
+ }
+ } else {
+ for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) {
+ // Assumes format.align == 0
+ PackedRgb* row = static_cast<PackedRgb*>(sub_frame_image.pixels()) +
+ y * sub_frame_image.xsize;
+ for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) {
+ const GifByteType byte = image.RasterBits[byte_index];
+ if (byte > color_map->ColorCount) {
+ return JXL_FAILURE("GIF color is out of bounds");
+ }
+ if (byte == gcb.TransparentColor) {
+ row[x].r = 0;
+ row[x].g = 0;
+ row[x].b = 0;
+ set_pixel_alpha(x, y, 0);
+ continue;
+ }
+ GifColorType color = color_map->Colors[byte];
+ row[x].r = color.Red;
+ row[x].g = color.Green;
+ row[x].b = color.Blue;
+ set_pixel_alpha(x, y, 255);
+ }
+ }
+ }
+
+ if (!frame->extra_channels.empty()) {
+ ppf->info.alpha_bits = 8;
+ }
+
+ switch (gcb.DisposalMode) {
+ case DISPOSE_DO_NOT:
+ canvas.color = std::move(new_canvas_image);
+ break;
+
+ case DISPOSE_BACKGROUND:
+ std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+ canvas.color.xsize * canvas.color.ysize, background_rgba);
+ previous_rect_if_restore_to_background = image_rect;
+ break;
+
+ case DISPOSE_PREVIOUS:
+ break;
+
+ case DISPOSAL_UNSPECIFIED:
+ default:
+ std::fill_n(static_cast<PackedRgba*>(canvas.color.pixels()),
+ canvas.color.xsize * canvas.color.ysize, background_rgba);
+ }
+ }
+ // Finally, if any frame has an alpha-channel, every frame will need
+ // to have an alpha-channel.
+ bool seen_alpha = false;
+ for (const PackedFrame& frame : ppf->frames) {
+ if (!frame.extra_channels.empty()) {
+ seen_alpha = true;
+ break;
+ }
+ }
+ if (seen_alpha) {
+ for (PackedFrame& frame : ppf->frames) {
+ ensure_have_alpha(&frame);
+ }
+ }
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/gif.h b/third_party/jpeg-xl/lib/extras/dec/gif.h
new file mode 100644
index 0000000000..e217d617a7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/gif.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_GIF_H_
+#define LIB_EXTRAS_DEC_GIF_H_
+
+// Decodes GIF images in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+Status DecodeImageGIF(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_GIF_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/jpegli.cc b/third_party/jpeg-xl/lib/extras/dec/jpegli.cc
new file mode 100644
index 0000000000..ffa1b79c25
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jpegli.cc
@@ -0,0 +1,271 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jpegli.h"
+
+#include <setjmp.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+ 0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+static inline bool IsJPG(const std::vector<uint8_t>& bytes) {
+ if (bytes.size() < 2) return false;
+ if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false;
+ return true;
+}
+
+bool MarkerIsExif(const jpeg_saved_marker_ptr marker) {
+ return marker->marker == kExifMarker &&
+ marker->data_length >= sizeof kExifSignature + 2 &&
+ std::equal(std::begin(kExifSignature), std::end(kExifSignature),
+ marker->data);
+}
+
+Status ReadICCProfile(jpeg_decompress_struct* const cinfo,
+ std::vector<uint8_t>* const icc) {
+ uint8_t* icc_data_ptr;
+ unsigned int icc_data_len;
+ if (jpegli_read_icc_profile(cinfo, &icc_data_ptr, &icc_data_len)) {
+ icc->assign(icc_data_ptr, icc_data_ptr + icc_data_len);
+ free(icc_data_ptr);
+ return true;
+ }
+ return false;
+}
+
+void ReadExif(jpeg_decompress_struct* const cinfo,
+ std::vector<uint8_t>* const exif) {
+ constexpr size_t kExifSignatureSize = sizeof kExifSignature;
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ // marker is initialized by libjpeg, which we are not instrumenting with
+ // msan.
+ msan::UnpoisonMemory(marker, sizeof(*marker));
+ msan::UnpoisonMemory(marker->data, marker->data_length);
+ if (!MarkerIsExif(marker)) continue;
+ size_t marker_length = marker->data_length - kExifSignatureSize;
+ exif->resize(marker_length);
+ std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data());
+ return;
+ }
+}
+
+JpegliDataType ConvertDataType(JxlDataType type) {
+ switch (type) {
+ case JXL_TYPE_UINT8:
+ return JPEGLI_TYPE_UINT8;
+ case JXL_TYPE_UINT16:
+ return JPEGLI_TYPE_UINT16;
+ case JXL_TYPE_FLOAT:
+ return JPEGLI_TYPE_FLOAT;
+ default:
+ return JPEGLI_TYPE_UINT8;
+ }
+}
+
+JpegliEndianness ConvertEndianness(JxlEndianness type) {
+ switch (type) {
+ case JXL_NATIVE_ENDIAN:
+ return JPEGLI_NATIVE_ENDIAN;
+ case JXL_BIG_ENDIAN:
+ return JPEGLI_BIG_ENDIAN;
+ case JXL_LITTLE_ENDIAN:
+ return JPEGLI_LITTLE_ENDIAN;
+ default:
+ return JPEGLI_NATIVE_ENDIAN;
+ }
+}
+
+JxlColorSpace ConvertColorSpace(J_COLOR_SPACE colorspace) {
+ switch (colorspace) {
+ case JCS_GRAYSCALE:
+ return JXL_COLOR_SPACE_GRAY;
+ case JCS_RGB:
+ return JXL_COLOR_SPACE_RGB;
+ default:
+ return JXL_COLOR_SPACE_UNKNOWN;
+ }
+}
+
+void MyErrorExit(j_common_ptr cinfo) {
+ jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+ (*cinfo->err->output_message)(cinfo);
+ jpegli_destroy_decompress(reinterpret_cast<j_decompress_ptr>(cinfo));
+ longjmp(*env, 1);
+}
+
+void MyOutputMessage(j_common_ptr cinfo) {
+#if JXL_DEBUG_WARNING == 1
+ char buf[JMSG_LENGTH_MAX + 1];
+ (*cinfo->err->format_message)(cinfo, buf);
+ buf[JMSG_LENGTH_MAX] = 0;
+ JXL_WARNING("%s", buf);
+#endif
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors) {
+ JXL_CHECK(colormap != nullptr);
+ std::vector<uint8_t> tmp(xsize * components);
+ for (size_t x = 0; x < xsize; ++x) {
+ JXL_CHECK(row[x] < num_colors);
+ for (int c = 0; c < components; ++c) {
+ tmp[x * components + c] = colormap[c][row[x]];
+ }
+ }
+ memcpy(row, tmp.data(), tmp.size());
+}
+
+} // namespace
+
+Status DecodeJpeg(const std::vector<uint8_t>& compressed,
+ const JpegDecompressParams& dparams, ThreadPool* pool,
+ PackedPixelFile* ppf) {
+ // Don't do anything for non-JPEG files (no need to report an error)
+ if (!IsJPG(compressed)) return false;
+
+ // TODO(veluca): use JPEGData also for pixels?
+
+ // We need to declare all the non-trivial destructor local variables before
+ // the call to setjmp().
+ std::unique_ptr<JSAMPLE[]> row;
+
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ // Setup error handling in jpeg library so we can deal with broken jpegs in
+ // the fuzzer.
+ jpeg_error_mgr jerr;
+ jmp_buf env;
+ cinfo.err = jpegli_std_error(&jerr);
+ jerr.error_exit = &MyErrorExit;
+ jerr.output_message = &MyOutputMessage;
+ if (setjmp(env)) {
+ return false;
+ }
+ cinfo.client_data = static_cast<void*>(&env);
+
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo,
+ reinterpret_cast<const unsigned char*>(compressed.data()),
+ compressed.size());
+ jpegli_save_markers(&cinfo, kICCMarker, 0xFFFF);
+ jpegli_save_markers(&cinfo, kExifMarker, 0xFFFF);
+ const auto failure = [&cinfo](const char* str) -> Status {
+ jpegli_abort_decompress(&cinfo);
+ jpegli_destroy_decompress(&cinfo);
+ return JXL_FAILURE("%s", str);
+ };
+ jpegli_read_header(&cinfo, TRUE);
+ // Might cause CPU-zip bomb.
+ if (cinfo.arith_code) {
+ return failure("arithmetic code JPEGs are not supported");
+ }
+ int nbcomp = cinfo.num_components;
+ if (nbcomp != 1 && nbcomp != 3) {
+ return failure("unsupported number of components in JPEG");
+ }
+ if (dparams.force_rgb) {
+ cinfo.out_color_space = JCS_RGB;
+ } else if (dparams.force_grayscale) {
+ cinfo.out_color_space = JCS_GRAYSCALE;
+ }
+ if (!ReadICCProfile(&cinfo, &ppf->icc)) {
+ ppf->icc.clear();
+ // Default to SRGB
+ ppf->color_encoding.color_space =
+ ConvertColorSpace(cinfo.out_color_space);
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+ ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+ ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+ ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL;
+ }
+ ReadExif(&cinfo, &ppf->metadata.exif);
+
+ ppf->info.xsize = cinfo.image_width;
+ ppf->info.ysize = cinfo.image_height;
+ if (dparams.output_data_type == JXL_TYPE_UINT8) {
+ ppf->info.bits_per_sample = 8;
+ ppf->info.exponent_bits_per_sample = 0;
+ } else if (dparams.output_data_type == JXL_TYPE_UINT16) {
+ ppf->info.bits_per_sample = 16;
+ ppf->info.exponent_bits_per_sample = 0;
+ } else if (dparams.output_data_type == JXL_TYPE_FLOAT) {
+ ppf->info.bits_per_sample = 32;
+ ppf->info.exponent_bits_per_sample = 8;
+ } else {
+ return failure("unsupported data type");
+ }
+ ppf->info.uses_original_profile = true;
+
+ // No alpha in JPG
+ ppf->info.alpha_bits = 0;
+ ppf->info.alpha_exponent_bits = 0;
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ jpegli_set_output_format(&cinfo, ConvertDataType(dparams.output_data_type),
+ ConvertEndianness(dparams.output_endianness));
+
+ if (dparams.num_colors > 0) {
+ cinfo.quantize_colors = TRUE;
+ cinfo.desired_number_of_colors = dparams.num_colors;
+ cinfo.two_pass_quantize = dparams.two_pass_quant;
+ cinfo.dither_mode = (J_DITHER_MODE)dparams.dither_mode;
+ }
+
+ jpegli_start_decompress(&cinfo);
+
+ ppf->info.num_color_channels = cinfo.out_color_components;
+ const JxlPixelFormat format{
+ /*num_channels=*/static_cast<uint32_t>(cinfo.out_color_components),
+ dparams.output_data_type,
+ dparams.output_endianness,
+ /*align=*/0,
+ };
+ ppf->frames.clear();
+ // Allocates the frame buffer.
+ ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format);
+ const auto& frame = ppf->frames.back();
+ JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components *
+ cinfo.image_width <=
+ frame.color.stride);
+
+ for (size_t y = 0; y < cinfo.image_height; ++y) {
+ JSAMPROW rows[] = {reinterpret_cast<JSAMPLE*>(
+ static_cast<uint8_t*>(frame.color.pixels()) +
+ frame.color.stride * y)};
+ jpegli_read_scanlines(&cinfo, rows, 1);
+ if (dparams.num_colors > 0) {
+ UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components,
+ cinfo.colormap, cinfo.actual_number_of_colors);
+ }
+ }
+
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ bool success = try_catch_block();
+ jpegli_destroy_decompress(&cinfo);
+ return success;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/jpegli.h b/third_party/jpeg-xl/lib/extras/dec/jpegli.h
new file mode 100644
index 0000000000..574df54c8e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jpegli.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JPEGLI_H_
+#define LIB_EXTRAS_DEC_JPEGLI_H_
+
+// Decodes JPG pixels and metadata in memory using the libjpegli library.
+
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct JpegDecompressParams {
+ JxlDataType output_data_type = JXL_TYPE_UINT8;
+ JxlEndianness output_endianness = JXL_NATIVE_ENDIAN;
+ bool force_rgb = false;
+ bool force_grayscale = false;
+ int num_colors = 0;
+ bool two_pass_quant = true;
+ // 0 = none, 1 = ordered, 2 = Floyd-Steinberg
+ int dither_mode = 2;
+};
+
+Status DecodeJpeg(const std::vector<uint8_t>& compressed,
+ const JpegDecompressParams& dparams, ThreadPool* pool,
+ PackedPixelFile* ppf);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_JPEGLI_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/jpg.cc b/third_party/jpeg-xl/lib/extras/dec/jpg.cc
new file mode 100644
index 0000000000..b3c568b87b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jpg.cc
@@ -0,0 +1,322 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jpg.h"
+
+#include <jpeglib.h>
+#include <setjmp.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kICCSignature[12] = {
+ 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+ 0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+
+static inline bool IsJPG(const Span<const uint8_t> bytes) {
+ if (bytes.size() < 2) return false;
+ if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false;
+ return true;
+}
+
+bool MarkerIsICC(const jpeg_saved_marker_ptr marker) {
+ return marker->marker == kICCMarker &&
+ marker->data_length >= sizeof kICCSignature + 2 &&
+ std::equal(std::begin(kICCSignature), std::end(kICCSignature),
+ marker->data);
+}
+bool MarkerIsExif(const jpeg_saved_marker_ptr marker) {
+ return marker->marker == kExifMarker &&
+ marker->data_length >= sizeof kExifSignature + 2 &&
+ std::equal(std::begin(kExifSignature), std::end(kExifSignature),
+ marker->data);
+}
+
+Status ReadICCProfile(jpeg_decompress_struct* const cinfo,
+ std::vector<uint8_t>* const icc) {
+ constexpr size_t kICCSignatureSize = sizeof kICCSignature;
+ // ICC signature + uint8_t index + uint8_t max_index.
+ constexpr size_t kICCHeadSize = kICCSignatureSize + 2;
+ // Markers are 1-indexed, and we keep them that way in this vector to get a
+ // convenient 0 at the front for when we compute the offsets later.
+ std::vector<size_t> marker_lengths;
+ int num_markers = 0;
+ int seen_markers_count = 0;
+ bool has_num_markers = false;
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ // marker is initialized by libjpeg, which we are not instrumenting with
+ // msan.
+ msan::UnpoisonMemory(marker, sizeof(*marker));
+ msan::UnpoisonMemory(marker->data, marker->data_length);
+ if (!MarkerIsICC(marker)) continue;
+
+ const int current_marker = marker->data[kICCSignatureSize];
+ if (current_marker == 0) {
+ return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+ }
+ const int current_num_markers = marker->data[kICCSignatureSize + 1];
+ if (current_marker > current_num_markers) {
+ return JXL_FAILURE("inconsistent JPEG ICC marker numbering");
+ }
+ if (has_num_markers) {
+ if (current_num_markers != num_markers) {
+ return JXL_FAILURE("inconsistent numbers of JPEG ICC markers");
+ }
+ } else {
+ num_markers = current_num_markers;
+ has_num_markers = true;
+ marker_lengths.resize(num_markers + 1);
+ }
+
+ size_t marker_length = marker->data_length - kICCHeadSize;
+
+ if (marker_length == 0) {
+ // NB: if we allow empty chunks, then the next check is incorrect.
+ return JXL_FAILURE("Empty ICC chunk");
+ }
+
+ if (marker_lengths[current_marker] != 0) {
+ return JXL_FAILURE("duplicate JPEG ICC marker number");
+ }
+ marker_lengths[current_marker] = marker_length;
+ seen_markers_count++;
+ }
+
+ if (marker_lengths.empty()) {
+ // Not an error.
+ return false;
+ }
+
+ if (seen_markers_count != num_markers) {
+ JXL_DASSERT(has_num_markers);
+ return JXL_FAILURE("Incomplete set of ICC chunks");
+ }
+
+ std::vector<size_t> offsets = std::move(marker_lengths);
+ std::partial_sum(offsets.begin(), offsets.end(), offsets.begin());
+ icc->resize(offsets.back());
+
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ if (!MarkerIsICC(marker)) continue;
+ const uint8_t* first = marker->data + kICCHeadSize;
+ uint8_t current_marker = marker->data[kICCSignatureSize];
+ size_t offset = offsets[current_marker - 1];
+ size_t marker_length = offsets[current_marker] - offset;
+ std::copy_n(first, marker_length, icc->data() + offset);
+ }
+
+ return true;
+}
+
+void ReadExif(jpeg_decompress_struct* const cinfo,
+ std::vector<uint8_t>* const exif) {
+ constexpr size_t kExifSignatureSize = sizeof kExifSignature;
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ // marker is initialized by libjpeg, which we are not instrumenting with
+ // msan.
+ msan::UnpoisonMemory(marker, sizeof(*marker));
+ msan::UnpoisonMemory(marker->data, marker->data_length);
+ if (!MarkerIsExif(marker)) continue;
+ size_t marker_length = marker->data_length - kExifSignatureSize;
+ exif->resize(marker_length);
+ std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data());
+ return;
+ }
+}
+
+void MyErrorExit(j_common_ptr cinfo) {
+ jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+ (*cinfo->err->output_message)(cinfo);
+ jpeg_destroy_decompress(reinterpret_cast<j_decompress_ptr>(cinfo));
+ longjmp(*env, 1);
+}
+
+void MyOutputMessage(j_common_ptr cinfo) {
+#if JXL_DEBUG_WARNING == 1
+ char buf[JMSG_LENGTH_MAX + 1];
+ (*cinfo->err->format_message)(cinfo, buf);
+ buf[JMSG_LENGTH_MAX] = 0;
+ JXL_WARNING("%s", buf);
+#endif
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors) {
+ JXL_CHECK(colormap != nullptr);
+ std::vector<uint8_t> tmp(xsize * components);
+ for (size_t x = 0; x < xsize; ++x) {
+ JXL_CHECK(row[x] < num_colors);
+ for (int c = 0; c < components; ++c) {
+ tmp[x * components + c] = colormap[c][row[x]];
+ }
+ }
+ memcpy(row, tmp.data(), tmp.size());
+}
+
+} // namespace
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes,
+ const ColorHints& color_hints, PackedPixelFile* ppf,
+ const SizeConstraints* constraints,
+ const JPGDecompressParams* dparams) {
+ // Don't do anything for non-JPEG files (no need to report an error)
+ if (!IsJPG(bytes)) return false;
+
+ // TODO(veluca): use JPEGData also for pixels?
+
+ // We need to declare all the non-trivial destructor local variables before
+ // the call to setjmp().
+ std::unique_ptr<JSAMPLE[]> row;
+
+ const auto try_catch_block = [&]() -> bool {
+ jpeg_decompress_struct cinfo = {};
+ // Setup error handling in jpeg library so we can deal with broken jpegs in
+ // the fuzzer.
+ jpeg_error_mgr jerr;
+ jmp_buf env;
+ cinfo.err = jpeg_std_error(&jerr);
+ jerr.error_exit = &MyErrorExit;
+ jerr.output_message = &MyOutputMessage;
+ if (setjmp(env)) {
+ return false;
+ }
+ cinfo.client_data = static_cast<void*>(&env);
+
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, reinterpret_cast<const unsigned char*>(bytes.data()),
+ bytes.size());
+ jpeg_save_markers(&cinfo, kICCMarker, 0xFFFF);
+ jpeg_save_markers(&cinfo, kExifMarker, 0xFFFF);
+ const auto failure = [&cinfo](const char* str) -> Status {
+ jpeg_abort_decompress(&cinfo);
+ jpeg_destroy_decompress(&cinfo);
+ return JXL_FAILURE("%s", str);
+ };
+ int read_header_result = jpeg_read_header(&cinfo, TRUE);
+ // TODO(eustas): what about JPEG_HEADER_TABLES_ONLY?
+ if (read_header_result == JPEG_SUSPENDED) {
+ return failure("truncated JPEG input");
+ }
+ if (!VerifyDimensions(constraints, cinfo.image_width, cinfo.image_height)) {
+ return failure("image too big");
+ }
+ // Might cause CPU-zip bomb.
+ if (cinfo.arith_code) {
+ return failure("arithmetic code JPEGs are not supported");
+ }
+ int nbcomp = cinfo.num_components;
+ if (nbcomp != 1 && nbcomp != 3) {
+ return failure("unsupported number of components in JPEG");
+ }
+ if (!ReadICCProfile(&cinfo, &ppf->icc)) {
+ ppf->icc.clear();
+ // Default to SRGB
+ // Actually, (cinfo.output_components == nbcomp) will be checked after
+ // `jpeg_start_decompress`.
+ ppf->color_encoding.color_space =
+ (nbcomp == 1) ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB;
+ ppf->color_encoding.white_point = JXL_WHITE_POINT_D65;
+ ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB;
+ ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB;
+ ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL;
+ }
+ ReadExif(&cinfo, &ppf->metadata.exif);
+ if (!ApplyColorHints(color_hints, /*color_already_set=*/true,
+ /*is_gray=*/false, ppf)) {
+ return failure("ApplyColorHints failed");
+ }
+
+ ppf->info.xsize = cinfo.image_width;
+ ppf->info.ysize = cinfo.image_height;
+ // Original data is uint, so exponent_bits_per_sample = 0.
+ ppf->info.bits_per_sample = BITS_IN_JSAMPLE;
+ JXL_ASSERT(BITS_IN_JSAMPLE == 8 || BITS_IN_JSAMPLE == 16);
+ ppf->info.exponent_bits_per_sample = 0;
+ ppf->info.uses_original_profile = true;
+
+ // No alpha in JPG
+ ppf->info.alpha_bits = 0;
+ ppf->info.alpha_exponent_bits = 0;
+
+ ppf->info.num_color_channels = nbcomp;
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ if (dparams && dparams->num_colors > 0) {
+ cinfo.quantize_colors = TRUE;
+ cinfo.desired_number_of_colors = dparams->num_colors;
+ cinfo.two_pass_quantize = dparams->two_pass_quant;
+ cinfo.dither_mode = (J_DITHER_MODE)dparams->dither_mode;
+ }
+
+ jpeg_start_decompress(&cinfo);
+ JXL_ASSERT(cinfo.out_color_components == nbcomp);
+ JxlDataType data_type =
+ ppf->info.bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16;
+
+ const JxlPixelFormat format{
+ /*num_channels=*/static_cast<uint32_t>(nbcomp),
+ data_type,
+ /*endianness=*/JXL_NATIVE_ENDIAN,
+ /*align=*/0,
+ };
+ ppf->frames.clear();
+ // Allocates the frame buffer.
+ ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format);
+ const auto& frame = ppf->frames.back();
+ JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components *
+ cinfo.image_width <=
+ frame.color.stride);
+
+ if (cinfo.quantize_colors) {
+ jxl::msan::UnpoisonMemory(cinfo.colormap, cinfo.out_color_components *
+ sizeof(cinfo.colormap[0]));
+ for (int c = 0; c < cinfo.out_color_components; ++c) {
+ jxl::msan::UnpoisonMemory(
+ cinfo.colormap[c],
+ cinfo.actual_number_of_colors * sizeof(cinfo.colormap[c][0]));
+ }
+ }
+ for (size_t y = 0; y < cinfo.image_height; ++y) {
+ JSAMPROW rows[] = {reinterpret_cast<JSAMPLE*>(
+ static_cast<uint8_t*>(frame.color.pixels()) +
+ frame.color.stride * y)};
+ jpeg_read_scanlines(&cinfo, rows, 1);
+ msan::UnpoisonMemory(rows[0], sizeof(JSAMPLE) * cinfo.output_components *
+ cinfo.image_width);
+ if (dparams && dparams->num_colors > 0) {
+ UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components,
+ cinfo.colormap, cinfo.actual_number_of_colors);
+ }
+ }
+
+ jpeg_finish_decompress(&cinfo);
+ jpeg_destroy_decompress(&cinfo);
+ return true;
+ };
+
+ return try_catch_block();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/jpg.h b/third_party/jpeg-xl/lib/extras/dec/jpg.h
new file mode 100644
index 0000000000..e3de2536ac
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jpg.h
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JPG_H_
+#define LIB_EXTRAS_DEC_JPG_H_
+
+// Decodes JPG pixels and metadata in memory.
+
+#include <stdint.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+struct JPGDecompressParams {
+ int num_colors = 0;
+ bool two_pass_quant = false;
+ // 0 = none, 1 = ordered, 2 = Floyd-Steinberg
+ int dither_mode = 0;
+};
+
+// Decodes `bytes` into `ppf`. color_hints are ignored.
+// `elapsed_deinterleave`, if non-null, will be set to the time (in seconds)
+// that it took to deinterleave the raw JSAMPLEs to planar floats.
+Status DecodeImageJPG(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr,
+ const JPGDecompressParams* dparams = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_JPG_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/jxl.cc b/third_party/jpeg-xl/lib/extras/dec/jxl.cc
new file mode 100644
index 0000000000..224f7c7bf9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jxl.cc
@@ -0,0 +1,561 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jxl.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/types.h>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/exif.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct BoxProcessor {
+ BoxProcessor(JxlDecoder* dec) : dec_(dec) { Reset(); }
+
+ void InitializeOutput(std::vector<uint8_t>* out) {
+ box_data_ = out;
+ AddMoreOutput();
+ }
+
+ bool AddMoreOutput() {
+ Flush();
+ static const size_t kBoxOutputChunkSize = 1 << 16;
+ box_data_->resize(box_data_->size() + kBoxOutputChunkSize);
+ next_out_ = box_data_->data() + total_size_;
+ avail_out_ = box_data_->size() - total_size_;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetBoxBuffer(dec_, next_out_, avail_out_)) {
+ fprintf(stderr, "JxlDecoderSetBoxBuffer failed\n");
+ return false;
+ }
+ return true;
+ }
+
+ void FinalizeOutput() {
+ if (box_data_ == nullptr) return;
+ Flush();
+ box_data_->resize(total_size_);
+ Reset();
+ }
+
+ private:
+ JxlDecoder* dec_;
+ std::vector<uint8_t>* box_data_;
+ uint8_t* next_out_;
+ size_t avail_out_;
+ size_t total_size_;
+
+ void Reset() {
+ box_data_ = nullptr;
+ next_out_ = nullptr;
+ avail_out_ = 0;
+ total_size_ = 0;
+ }
+ void Flush() {
+ if (box_data_ == nullptr) return;
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec_);
+ size_t bytes_written = avail_out_ - remaining;
+ next_out_ += bytes_written;
+ avail_out_ -= bytes_written;
+ total_size_ += bytes_written;
+ }
+};
+
+void SetBitDepthFromDataType(JxlDataType data_type, uint32_t* bits_per_sample,
+ uint32_t* exponent_bits_per_sample) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ *bits_per_sample = 8;
+ *exponent_bits_per_sample = 0;
+ break;
+ case JXL_TYPE_UINT16:
+ *bits_per_sample = 16;
+ *exponent_bits_per_sample = 0;
+ break;
+ case JXL_TYPE_FLOAT16:
+ *bits_per_sample = 16;
+ *exponent_bits_per_sample = 5;
+ break;
+ case JXL_TYPE_FLOAT:
+ *bits_per_sample = 32;
+ *exponent_bits_per_sample = 8;
+ break;
+ }
+}
+
+template <typename T>
+void UpdateBitDepth(JxlBitDepth bit_depth, JxlDataType data_type, T* info) {
+ if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+ SetBitDepthFromDataType(data_type, &info->bits_per_sample,
+ &info->exponent_bits_per_sample);
+ } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+ info->bits_per_sample = bit_depth.bits_per_sample;
+ info->exponent_bits_per_sample = bit_depth.exponent_bits_per_sample;
+ }
+}
+
+} // namespace
+
+bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
+ const JXLDecompressParams& dparams, size_t* decoded_bytes,
+ PackedPixelFile* ppf, std::vector<uint8_t>* jpeg_bytes) {
+ auto decoder = JxlDecoderMake(/*memory_manager=*/nullptr);
+ JxlDecoder* dec = decoder.get();
+ ppf->frames.clear();
+
+ if (dparams.runner_opaque != nullptr &&
+ JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec, dparams.runner,
+ dparams.runner_opaque)) {
+ fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ JxlPixelFormat format;
+ std::vector<JxlPixelFormat> accepted_formats = dparams.accepted_formats;
+ if (accepted_formats.empty()) {
+ for (const uint32_t num_channels : {1, 2, 3, 4}) {
+ accepted_formats.push_back(
+ {num_channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, /*align=*/0});
+ }
+ }
+ JxlColorEncoding color_encoding;
+ size_t num_color_channels = 0;
+ if (!dparams.color_space.empty()) {
+ if (!jxl::ParseDescription(dparams.color_space, &color_encoding)) {
+ fprintf(stderr, "Failed to parse color space %s.\n",
+ dparams.color_space.c_str());
+ return false;
+ }
+ num_color_channels =
+ color_encoding.color_space == JXL_COLOR_SPACE_GRAY ? 1 : 3;
+ }
+
+ bool can_reconstruct_jpeg = false;
+ std::vector<uint8_t> jpeg_data_chunk;
+ if (jpeg_bytes != nullptr) {
+ jpeg_data_chunk.resize(16384);
+ jpeg_bytes->resize(0);
+ }
+
+ int events = (JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE);
+
+ bool max_passes_defined =
+ (dparams.max_passes < std::numeric_limits<uint32_t>::max());
+ if (max_passes_defined || dparams.max_downsampling > 1) {
+ events |= JXL_DEC_FRAME_PROGRESSION;
+ if (max_passes_defined) {
+ JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kPasses);
+ } else {
+ JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kLastPasses);
+ }
+ }
+ if (jpeg_bytes != nullptr) {
+ events |= JXL_DEC_JPEG_RECONSTRUCTION;
+ } else {
+ events |= (JXL_DEC_COLOR_ENCODING | JXL_DEC_FRAME | JXL_DEC_PREVIEW_IMAGE |
+ JXL_DEC_BOX);
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec, events)) {
+ fprintf(stderr, "JxlDecoderSubscribeEvents failed\n");
+ return false;
+ }
+ if (jpeg_bytes == nullptr) {
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetRenderSpotcolors(dec, dparams.render_spotcolors)) {
+ fprintf(stderr, "JxlDecoderSetRenderSpotColors failed\n");
+ return false;
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetKeepOrientation(dec, dparams.keep_orientation)) {
+ fprintf(stderr, "JxlDecoderSetKeepOrientation failed\n");
+ return false;
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetUnpremultiplyAlpha(dec, dparams.unpremultiply_alpha)) {
+ fprintf(stderr, "JxlDecoderSetUnpremultiplyAlpha failed\n");
+ return false;
+ }
+ if (dparams.display_nits > 0 &&
+ JXL_DEC_SUCCESS !=
+ JxlDecoderSetDesiredIntensityTarget(dec, dparams.display_nits)) {
+ fprintf(stderr, "Decoder failed to set desired intensity target\n");
+ return false;
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec, JXL_TRUE)) {
+ fprintf(stderr, "JxlDecoderSetDecompressBoxes failed\n");
+ return false;
+ }
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderSetInput(dec, bytes, bytes_size)) {
+ fprintf(stderr, "Decoder failed to set input\n");
+ return false;
+ }
+ uint32_t progression_index = 0;
+ bool codestream_done = false;
+ BoxProcessor boxes(dec);
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ if (status == JXL_DEC_ERROR) {
+ fprintf(stderr, "Failed to decode image\n");
+ return false;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (codestream_done) {
+ break;
+ }
+ if (dparams.allow_partial_input) {
+ if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) {
+ fprintf(stderr,
+ "Input file is truncated and there is no preview "
+ "available yet.\n");
+ return false;
+ }
+ break;
+ }
+ size_t released_size = JxlDecoderReleaseInput(dec);
+ fprintf(stderr,
+ "Input file is truncated (total bytes: %" PRIuS
+ ", processed bytes: %" PRIuS
+ ") and --allow_partial_files is not present.\n",
+ bytes_size, bytes_size - released_size);
+ return false;
+ } else if (status == JXL_DEC_BOX) {
+ boxes.FinalizeOutput();
+ JxlBoxType box_type;
+ if (JXL_DEC_SUCCESS != JxlDecoderGetBoxType(dec, box_type, JXL_TRUE)) {
+ fprintf(stderr, "JxlDecoderGetBoxType failed\n");
+ return false;
+ }
+ std::vector<uint8_t>* box_data = nullptr;
+ if (memcmp(box_type, "Exif", 4) == 0) {
+ box_data = &ppf->metadata.exif;
+ } else if (memcmp(box_type, "iptc", 4) == 0) {
+ box_data = &ppf->metadata.iptc;
+ } else if (memcmp(box_type, "jumb", 4) == 0) {
+ box_data = &ppf->metadata.jumbf;
+ } else if (memcmp(box_type, "xml ", 4) == 0) {
+ box_data = &ppf->metadata.xmp;
+ }
+ if (box_data) {
+ boxes.InitializeOutput(box_data);
+ }
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ boxes.AddMoreOutput();
+ } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+ can_reconstruct_jpeg = true;
+ // Decoding to JPEG.
+ if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec,
+ jpeg_data_chunk.data(),
+ jpeg_data_chunk.size())) {
+ fprintf(stderr, "Decoder failed to set JPEG Buffer\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+ // Decoded a chunk to JPEG.
+ size_t used_jpeg_output =
+ jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec);
+ jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(),
+ jpeg_data_chunk.data() + used_jpeg_output);
+ if (used_jpeg_output == 0) {
+ // Chunk is too small.
+ jpeg_data_chunk.resize(jpeg_data_chunk.size() * 2);
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec,
+ jpeg_data_chunk.data(),
+ jpeg_data_chunk.size())) {
+ fprintf(stderr, "Decoder failed to set JPEG Buffer\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec, &ppf->info)) {
+ fprintf(stderr, "JxlDecoderGetBasicInfo failed\n");
+ return false;
+ }
+ if (num_color_channels != 0) {
+ // Mark the change in number of color channels due to the requested
+ // color space.
+ ppf->info.num_color_channels = num_color_channels;
+ }
+ if (dparams.output_bitdepth.type == JXL_BIT_DEPTH_CUSTOM) {
+ // Select format based on custom bits per sample.
+ ppf->info.bits_per_sample = dparams.output_bitdepth.bits_per_sample;
+ }
+ // Select format according to accepted formats.
+ if (!jxl::extras::SelectFormat(accepted_formats, ppf->info, &format)) {
+ fprintf(stderr, "SelectFormat failed\n");
+ return false;
+ }
+ bool have_alpha = (format.num_channels == 2 || format.num_channels == 4);
+ if (!have_alpha) {
+ // Mark in the basic info that alpha channel was dropped.
+ ppf->info.alpha_bits = 0;
+ } else {
+ if (dparams.unpremultiply_alpha) {
+ // Mark in the basic info that alpha was unpremultiplied.
+ ppf->info.alpha_premultiplied = false;
+ }
+ }
+ bool alpha_found = false;
+ for (uint32_t i = 0; i < ppf->info.num_extra_channels; ++i) {
+ JxlExtraChannelInfo eci;
+ if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelInfo(dec, i, &eci)) {
+ fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n");
+ return false;
+ }
+ if (eci.type == JXL_CHANNEL_ALPHA && have_alpha && !alpha_found) {
+ // Skip the first alpha channels because it is already present in the
+ // interleaved image.
+ alpha_found = true;
+ continue;
+ }
+ std::string name(eci.name_length + 1, 0);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetExtraChannelName(dec, i, &name[0], name.size())) {
+ fprintf(stderr, "JxlDecoderGetExtraChannelName failed\n");
+ return false;
+ }
+ name.resize(eci.name_length);
+ ppf->extra_channels_info.push_back({eci, i, name});
+ }
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ if (!dparams.color_space.empty()) {
+ if (ppf->info.uses_original_profile) {
+ fprintf(stderr,
+ "Warning: --color_space ignored because the image is "
+ "not XYB encoded.\n");
+ } else {
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetPreferredColorProfile(dec, &color_encoding)) {
+ fprintf(stderr, "Failed to set color space.\n");
+ return false;
+ }
+ }
+ }
+ size_t icc_size = 0;
+ JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetICCProfileSize(dec, nullptr, target, &icc_size)) {
+ fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+ }
+ if (icc_size != 0) {
+ ppf->icc.resize(icc_size);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetColorAsICCProfile(dec, nullptr, target,
+ ppf->icc.data(), icc_size)) {
+ fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+ return false;
+ }
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsEncodedProfile(
+ dec, nullptr, target, &ppf->color_encoding)) {
+ ppf->color_encoding.color_space = JXL_COLOR_SPACE_UNKNOWN;
+ }
+ icc_size = 0;
+ target = JXL_COLOR_PROFILE_TARGET_ORIGINAL;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetICCProfileSize(dec, nullptr, target, &icc_size)) {
+ fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n");
+ }
+ if (icc_size != 0) {
+ ppf->orig_icc.resize(icc_size);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetColorAsICCProfile(dec, nullptr, target,
+ ppf->orig_icc.data(), icc_size)) {
+ fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n");
+ return false;
+ }
+ }
+ } else if (status == JXL_DEC_FRAME) {
+ jxl::extras::PackedFrame frame(ppf->info.xsize, ppf->info.ysize, format);
+ if (JXL_DEC_SUCCESS != JxlDecoderGetFrameHeader(dec, &frame.frame_info)) {
+ fprintf(stderr, "JxlDecoderGetFrameHeader failed\n");
+ return false;
+ }
+ frame.name.resize(frame.frame_info.name_length + 1, 0);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetFrameName(dec, &frame.name[0], frame.name.size())) {
+ fprintf(stderr, "JxlDecoderGetFrameName failed\n");
+ return false;
+ }
+ frame.name.resize(frame.frame_info.name_length);
+ ppf->frames.emplace_back(std::move(frame));
+ progression_index = 0;
+ } else if (status == JXL_DEC_FRAME_PROGRESSION) {
+ size_t downsampling = JxlDecoderGetIntendedDownsamplingRatio(dec);
+ if ((max_passes_defined && progression_index >= dparams.max_passes) ||
+ (!max_passes_defined && downsampling <= dparams.max_downsampling)) {
+ if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) {
+ fprintf(stderr, "JxlDecoderFlushImage failed\n");
+ return false;
+ }
+ if (ppf->frames.back().frame_info.is_last) {
+ break;
+ }
+ if (JXL_DEC_SUCCESS != JxlDecoderSkipCurrentFrame(dec)) {
+ fprintf(stderr, "JxlDecoderSkipCurrentFrame failed\n");
+ return false;
+ }
+ }
+ ++progression_index;
+ } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+ size_t buffer_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)) {
+ fprintf(stderr, "JxlDecoderPreviewOutBufferSize failed\n");
+ return false;
+ }
+ ppf->preview_frame = std::unique_ptr<jxl::extras::PackedFrame>(
+ new jxl::extras::PackedFrame(ppf->info.preview.xsize,
+ ppf->info.preview.ysize, format));
+ if (buffer_size != ppf->preview_frame->color.pixels_size) {
+ fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n",
+ buffer_size, ppf->preview_frame->color.pixels_size);
+ return false;
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetPreviewOutBuffer(
+ dec, &format, ppf->preview_frame->color.pixels(), buffer_size)) {
+ fprintf(stderr, "JxlDecoderSetPreviewOutBuffer failed\n");
+ return false;
+ }
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ if (jpeg_bytes != nullptr) {
+ break;
+ }
+ size_t buffer_size;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)) {
+ fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n");
+ return false;
+ }
+ jxl::extras::PackedFrame& frame = ppf->frames.back();
+ if (buffer_size != frame.color.pixels_size) {
+ fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n",
+ buffer_size, frame.color.pixels_size);
+ return false;
+ }
+
+ if (dparams.use_image_callback) {
+ auto callback = [](void* opaque, size_t x, size_t y, size_t num_pixels,
+ const void* pixels) {
+ auto* ppf = reinterpret_cast<jxl::extras::PackedPixelFile*>(opaque);
+ jxl::extras::PackedImage& color = ppf->frames.back().color;
+ uint8_t* pixels_buffer = reinterpret_cast<uint8_t*>(color.pixels());
+ size_t sample_size = color.pixel_stride();
+ memcpy(pixels_buffer + (color.stride * y + sample_size * x), pixels,
+ num_pixels * sample_size);
+ };
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetImageOutCallback(dec, &format, callback, ppf)) {
+ fprintf(stderr, "JxlDecoderSetImageOutCallback failed\n");
+ return false;
+ }
+ } else {
+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec, &format,
+ frame.color.pixels(),
+ buffer_size)) {
+ fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n");
+ return false;
+ }
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetImageOutBitDepth(dec, &dparams.output_bitdepth)) {
+ fprintf(stderr, "JxlDecoderSetImageOutBitDepth failed\n");
+ return false;
+ }
+ UpdateBitDepth(dparams.output_bitdepth, format.data_type, &ppf->info);
+ bool have_alpha = (format.num_channels == 2 || format.num_channels == 4);
+ if (have_alpha) {
+ // Interleaved alpha channels has the same bit depth as color channels.
+ ppf->info.alpha_bits = ppf->info.bits_per_sample;
+ ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample;
+ }
+ JxlPixelFormat ec_format = format;
+ ec_format.num_channels = 1;
+ for (auto& eci : ppf->extra_channels_info) {
+ frame.extra_channels.emplace_back(jxl::extras::PackedImage(
+ ppf->info.xsize, ppf->info.ysize, ec_format));
+ auto& ec = frame.extra_channels.back();
+ size_t buffer_size;
+ if (JXL_DEC_SUCCESS != JxlDecoderExtraChannelBufferSize(
+ dec, &ec_format, &buffer_size, eci.index)) {
+ fprintf(stderr, "JxlDecoderExtraChannelBufferSize failed\n");
+ return false;
+ }
+ if (buffer_size != ec.pixels_size) {
+ fprintf(stderr,
+ "Invalid extra channel buffer size"
+ " %" PRIuS " %" PRIuS "\n",
+ buffer_size, ec.pixels_size);
+ return false;
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetExtraChannelBuffer(dec, &ec_format, ec.pixels(),
+ buffer_size, eci.index)) {
+ fprintf(stderr, "JxlDecoderSetExtraChannelBuffer failed\n");
+ return false;
+ }
+ UpdateBitDepth(dparams.output_bitdepth, ec_format.data_type,
+ &eci.ec_info);
+ }
+ } else if (status == JXL_DEC_SUCCESS) {
+ // Decoding finished successfully.
+ break;
+ } else if (status == JXL_DEC_PREVIEW_IMAGE) {
+ // Nothing to do.
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ if (jpeg_bytes != nullptr || ppf->frames.back().frame_info.is_last) {
+ codestream_done = true;
+ }
+ } else {
+ fprintf(stderr, "Error: unexpected status: %d\n",
+ static_cast<int>(status));
+ return false;
+ }
+ }
+ boxes.FinalizeOutput();
+ if (!ppf->metadata.exif.empty()) {
+ // Verify that Exif box has a valid TIFF header at the specified offset.
+ // Discard bytes preceding the header.
+ if (ppf->metadata.exif.size() >= 4) {
+ uint32_t offset = LoadBE32(ppf->metadata.exif.data());
+ if (offset <= ppf->metadata.exif.size() - 8) {
+ std::vector<uint8_t> exif(ppf->metadata.exif.begin() + 4 + offset,
+ ppf->metadata.exif.end());
+ bool bigendian;
+ if (IsExif(exif, &bigendian)) {
+ ppf->metadata.exif = std::move(exif);
+ } else {
+ fprintf(stderr, "Warning: invalid TIFF header in Exif\n");
+ }
+ } else {
+ fprintf(stderr, "Warning: invalid Exif offset: %" PRIu32 "\n", offset);
+ }
+ } else {
+ fprintf(stderr, "Warning: invalid Exif length: %" PRIuS "\n",
+ ppf->metadata.exif.size());
+ }
+ }
+ if (jpeg_bytes != nullptr) {
+ if (!can_reconstruct_jpeg) return false;
+ size_t used_jpeg_output =
+ jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec);
+ jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(),
+ jpeg_data_chunk.data() + used_jpeg_output);
+ }
+ if (decoded_bytes) {
+ *decoded_bytes = bytes_size - JxlDecoderReleaseInput(dec);
+ }
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/jxl.h b/third_party/jpeg-xl/lib/extras/dec/jxl.h
new file mode 100644
index 0000000000..5f4ed7f683
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/jxl.h
@@ -0,0 +1,69 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_JXL_H_
+#define LIB_EXTRAS_DEC_JXL_H_
+
+// Decodes JPEG XL images in memory.
+
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+
+struct JXLDecompressParams {
+ // If empty, little endian float formats will be accepted.
+ std::vector<JxlPixelFormat> accepted_formats;
+
+ // Requested output color space description.
+ std::string color_space;
+ // If set, performs tone mapping to this intensity target luminance.
+ float display_nits = 0.0;
+ // Whether spot colors are rendered on the image.
+ bool render_spotcolors = true;
+ // Whether to keep or undo the orientation given in the header.
+ bool keep_orientation = false;
+
+ // If runner_opaque is set, the decoder uses this parallel runner.
+ JxlParallelRunner runner;
+ void* runner_opaque = nullptr;
+
+ // Whether truncated input should be treated as an error.
+ bool allow_partial_input = false;
+
+ // How many passes to decode at most. By default, decode everything.
+ uint32_t max_passes = std::numeric_limits<uint32_t>::max();
+
+ // Alternatively, one can specify the maximum tolerable downscaling factor
+ // with respect to the full size of the image. By default, nothing less than
+ // the full size is requested.
+ size_t max_downsampling = 1;
+
+ // Whether to use the image callback or the image buffer to get the output.
+ bool use_image_callback = true;
+ // Whether to unpremultiply colors for associated alpha channels.
+ bool unpremultiply_alpha = false;
+
+ // Controls the effective bit depth of the output pixels.
+ JxlBitDepth output_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0};
+};
+
+bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size,
+ const JXLDecompressParams& dparams, size_t* decoded_bytes,
+ PackedPixelFile* ppf,
+ std::vector<uint8_t>* jpeg_bytes = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_JXL_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/pgx.cc b/third_party/jpeg-xl/lib/extras/dec/pgx.cc
new file mode 100644
index 0000000000..a99eb0f4ee
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/pgx.cc
@@ -0,0 +1,202 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pgx.h"
+
+#include <string.h>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct HeaderPGX {
+ // NOTE: PGX is always grayscale
+ size_t xsize;
+ size_t ysize;
+ size_t bits_per_sample;
+ bool big_endian;
+ bool is_signed;
+};
+
+class Parser {
+ public:
+ explicit Parser(const Span<const uint8_t> bytes)
+ : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+ // Sets "pos" to the first non-header byte/pixel on success.
+ Status ParseHeader(HeaderPGX* header, const uint8_t** pos) {
+ // codec.cc ensures we have at least two bytes => no range check here.
+ if (pos_[0] != 'P' || pos_[1] != 'G') return false;
+ pos_ += 2;
+ return ParseHeaderPGX(header, pos);
+ }
+
+ // Exposed for testing
+ Status ParseUnsigned(size_t* number) {
+ if (pos_ == end_) return JXL_FAILURE("PGX: reached end before number");
+ if (!IsDigit(*pos_)) return JXL_FAILURE("PGX: expected unsigned number");
+
+ *number = 0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ return true;
+ }
+
+ private:
+ static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+ static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+ static bool IsWhitespace(const uint8_t c) {
+ return IsLineBreak(c) || c == '\t' || c == ' ';
+ }
+
+ Status SkipSpace() {
+ if (pos_ == end_) return JXL_FAILURE("PGX: reached end before space");
+ const uint8_t c = *pos_;
+ if (c != ' ') return JXL_FAILURE("PGX: expected space");
+ ++pos_;
+ return true;
+ }
+
+ Status SkipLineBreak() {
+ if (pos_ == end_) return JXL_FAILURE("PGX: reached end before line break");
+ // Line break can be either "\n" (0a) or "\r\n" (0d 0a).
+ if (*pos_ == '\n') {
+ pos_++;
+ return true;
+ } else if (*pos_ == '\r' && pos_ + 1 != end_ && *(pos_ + 1) == '\n') {
+ pos_ += 2;
+ return true;
+ }
+ return JXL_FAILURE("PGX: expected line break");
+ }
+
+ Status SkipSingleWhitespace() {
+ if (pos_ == end_) return JXL_FAILURE("PGX: reached end before whitespace");
+ if (!IsWhitespace(*pos_)) return JXL_FAILURE("PGX: expected whitespace");
+ ++pos_;
+ return true;
+ }
+
+ Status ParseHeaderPGX(HeaderPGX* header, const uint8_t** pos) {
+ JXL_RETURN_IF_ERROR(SkipSpace());
+ if (pos_ + 2 > end_) return JXL_FAILURE("PGX: header too small");
+ if (*pos_ == 'M' && *(pos_ + 1) == 'L') {
+ header->big_endian = true;
+ } else if (*pos_ == 'L' && *(pos_ + 1) == 'M') {
+ header->big_endian = false;
+ } else {
+ return JXL_FAILURE("PGX: invalid endianness");
+ }
+ pos_ += 2;
+ JXL_RETURN_IF_ERROR(SkipSpace());
+ if (pos_ == end_) return JXL_FAILURE("PGX: header too small");
+ if (*pos_ == '+') {
+ header->is_signed = false;
+ } else if (*pos_ == '-') {
+ header->is_signed = true;
+ } else {
+ return JXL_FAILURE("PGX: invalid signedness");
+ }
+ pos_++;
+ // Skip optional space
+ if (pos_ < end_ && *pos_ == ' ') pos_++;
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->bits_per_sample));
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+ // 0xa, or 0xd 0xa.
+ JXL_RETURN_IF_ERROR(SkipLineBreak());
+
+ // TODO(jon): could do up to 24-bit by converting the values to
+ // JXL_TYPE_FLOAT.
+ if (header->bits_per_sample > 16) {
+ return JXL_FAILURE("PGX: >16 bits not yet supported");
+ }
+ // TODO(lode): support signed integers. This may require changing the way
+ // external_image works.
+ if (header->is_signed) {
+ return JXL_FAILURE("PGX: signed not yet supported");
+ }
+
+ size_t numpixels = header->xsize * header->ysize;
+ size_t bytes_per_pixel = header->bits_per_sample <= 8 ? 1 : 2;
+ if (pos_ + numpixels * bytes_per_pixel > end_) {
+ return JXL_FAILURE("PGX: data too small");
+ }
+
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* pos_;
+ const uint8_t* const end_;
+};
+
+} // namespace
+
+Status DecodeImagePGX(const Span<const uint8_t> bytes,
+ const ColorHints& color_hints, PackedPixelFile* ppf,
+ const SizeConstraints* constraints) {
+ Parser parser(bytes);
+ HeaderPGX header = {};
+ const uint8_t* pos;
+ if (!parser.ParseHeader(&header, &pos)) return false;
+ JXL_RETURN_IF_ERROR(
+ VerifyDimensions(constraints, header.xsize, header.ysize));
+ if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+ return JXL_FAILURE("PGX: bits_per_sample invalid");
+ }
+
+ JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+ /*is_gray=*/true, ppf));
+ ppf->info.xsize = header.xsize;
+ ppf->info.ysize = header.ysize;
+ // Original data is uint, so exponent_bits_per_sample = 0.
+ ppf->info.bits_per_sample = header.bits_per_sample;
+ ppf->info.exponent_bits_per_sample = 0;
+ ppf->info.uses_original_profile = true;
+
+ // No alpha in PGX
+ ppf->info.alpha_bits = 0;
+ ppf->info.alpha_exponent_bits = 0;
+ ppf->info.num_color_channels = 1; // Always grayscale
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ JxlDataType data_type;
+ if (header.bits_per_sample > 8) {
+ data_type = JXL_TYPE_UINT16;
+ } else {
+ data_type = JXL_TYPE_UINT8;
+ }
+
+ const JxlPixelFormat format{
+ /*num_channels=*/1,
+ /*data_type=*/data_type,
+ /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN,
+ /*align=*/0,
+ };
+ ppf->frames.clear();
+ // Allocates the frame buffer.
+ ppf->frames.emplace_back(header.xsize, header.ysize, format);
+ const auto& frame = ppf->frames.back();
+ size_t pgx_remaining_size = bytes.data() + bytes.size() - pos;
+ if (pgx_remaining_size < frame.color.pixels_size) {
+ return JXL_FAILURE("PGX file too small");
+ }
+ memcpy(frame.color.pixels(), pos, frame.color.pixels_size);
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/pgx.h b/third_party/jpeg-xl/lib/extras/dec/pgx.h
new file mode 100644
index 0000000000..2cbd3b4dcf
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/pgx.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_PGX_H_
+#define LIB_EXTRAS_DEC_PGX_H_
+
+// Decodes PGX pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`.
+Status DecodeImagePGX(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_PGX_H_
diff --git a/third_party/jpeg-xl/lib/extras/dec/pgx_test.cc b/third_party/jpeg-xl/lib/extras/dec/pgx_test.cc
new file mode 100644
index 0000000000..78ed689d07
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/pgx_test.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pgx.h"
+
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+Span<const uint8_t> MakeSpan(const char* str) {
+ return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+ strlen(str));
+}
+
+TEST(CodecPGXTest, Test8bits) {
+ std::string pgx = "PG ML + 8 2 3\npixels";
+
+ PackedPixelFile ppf;
+ ThreadPool* pool = nullptr;
+
+ EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf));
+ CodecInOut io;
+ EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io));
+
+ ScaleImage(255.f, io.Main().color());
+
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_TRUE(io.metadata.m.color_encoding.IsGray());
+ EXPECT_EQ(2u, io.xsize());
+ EXPECT_EQ(3u, io.ysize());
+
+ float eps = 1e-5;
+ EXPECT_NEAR('p', io.Main().color()->Plane(0).Row(0)[0], eps);
+ EXPECT_NEAR('i', io.Main().color()->Plane(0).Row(0)[1], eps);
+ EXPECT_NEAR('x', io.Main().color()->Plane(0).Row(1)[0], eps);
+ EXPECT_NEAR('e', io.Main().color()->Plane(0).Row(1)[1], eps);
+ EXPECT_NEAR('l', io.Main().color()->Plane(0).Row(2)[0], eps);
+ EXPECT_NEAR('s', io.Main().color()->Plane(0).Row(2)[1], eps);
+}
+
+TEST(CodecPGXTest, Test16bits) {
+ std::string pgx = "PG ML + 16 2 3\np_i_x_e_l_s_";
+
+ PackedPixelFile ppf;
+ ThreadPool* pool = nullptr;
+
+ EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf));
+ CodecInOut io;
+ EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io));
+
+ ScaleImage(255.f, io.Main().color());
+
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(16u, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_TRUE(io.metadata.m.color_encoding.IsGray());
+ EXPECT_EQ(2u, io.xsize());
+ EXPECT_EQ(3u, io.ysize());
+
+ // Comparing ~16-bit numbers in floats, only ~7 bits left.
+ float eps = 1e-3;
+ const auto& plane = io.Main().color()->Plane(0);
+ EXPECT_NEAR(256.0f * 'p' + '_', plane.Row(0)[0] * 257, eps);
+ EXPECT_NEAR(256.0f * 'i' + '_', plane.Row(0)[1] * 257, eps);
+ EXPECT_NEAR(256.0f * 'x' + '_', plane.Row(1)[0] * 257, eps);
+ EXPECT_NEAR(256.0f * 'e' + '_', plane.Row(1)[1] * 257, eps);
+ EXPECT_NEAR(256.0f * 'l' + '_', plane.Row(2)[0] * 257, eps);
+ EXPECT_NEAR(256.0f * 's' + '_', plane.Row(2)[1] * 257, eps);
+}
+
+} // namespace
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/pnm.cc b/third_party/jpeg-xl/lib/extras/dec/pnm.cc
new file mode 100644
index 0000000000..c3c2247769
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/pnm.cc
@@ -0,0 +1,474 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/pnm.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <cmath>
+
+#include "lib/extras/size_constraints.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+struct HeaderPNM {
+ size_t xsize;
+ size_t ysize;
+ bool is_gray; // PGM
+ bool has_alpha; // PAM
+ size_t bits_per_sample;
+ bool floating_point;
+ bool big_endian;
+ std::vector<JxlExtraChannelType> ec_types; // PAM
+};
+
+class Parser {
+ public:
+ explicit Parser(const Span<const uint8_t> bytes)
+ : pos_(bytes.data()), end_(pos_ + bytes.size()) {}
+
+ // Sets "pos" to the first non-header byte/pixel on success.
+ Status ParseHeader(HeaderPNM* header, const uint8_t** pos) {
+ // codec.cc ensures we have at least two bytes => no range check here.
+ if (pos_[0] != 'P') return false;
+ const uint8_t type = pos_[1];
+ pos_ += 2;
+
+ switch (type) {
+ case '4':
+ return JXL_FAILURE("pbm not supported");
+
+ case '5':
+ header->is_gray = true;
+ return ParseHeaderPNM(header, pos);
+
+ case '6':
+ header->is_gray = false;
+ return ParseHeaderPNM(header, pos);
+
+ case '7':
+ return ParseHeaderPAM(header, pos);
+
+ case 'F':
+ header->is_gray = false;
+ return ParseHeaderPFM(header, pos);
+
+ case 'f':
+ header->is_gray = true;
+ return ParseHeaderPFM(header, pos);
+ }
+ return false;
+ }
+
+ // Exposed for testing
+ Status ParseUnsigned(size_t* number) {
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before number");
+ if (!IsDigit(*pos_)) return JXL_FAILURE("PNM: expected unsigned number");
+
+ *number = 0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ return true;
+ }
+
+ Status ParseSigned(double* number) {
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before signed");
+
+ if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) {
+ return JXL_FAILURE("PNM: expected signed number");
+ }
+
+ // Skip sign
+ const bool is_neg = *pos_ == '-';
+ if (is_neg || *pos_ == '+') {
+ ++pos_;
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before digits");
+ }
+
+ // Leading digits
+ *number = 0.0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ // Decimal places?
+ if (pos_ < end_ && *pos_ == '.') {
+ ++pos_;
+ double place = 0.1;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number += (*pos_ - '0') * place;
+ place *= 0.1;
+ ++pos_;
+ }
+ }
+
+ if (is_neg) *number = -*number;
+ return true;
+ }
+
+ private:
+ static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; }
+ static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+ static bool IsWhitespace(const uint8_t c) {
+ return IsLineBreak(c) || c == '\t' || c == ' ';
+ }
+
+ Status SkipBlank() {
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before blank");
+ const uint8_t c = *pos_;
+ if (c != ' ' && c != '\n') return JXL_FAILURE("PNM: expected blank");
+ ++pos_;
+ return true;
+ }
+
+ Status SkipSingleWhitespace() {
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+ if (!IsWhitespace(*pos_)) return JXL_FAILURE("PNM: expected whitespace");
+ ++pos_;
+ return true;
+ }
+
+ Status SkipWhitespace() {
+ if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace");
+ if (!IsWhitespace(*pos_) && *pos_ != '#') {
+ return JXL_FAILURE("PNM: expected whitespace/comment");
+ }
+
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+
+ // Comment(s)
+ while (pos_ != end_ && *pos_ == '#') {
+ while (pos_ != end_ && !IsLineBreak(*pos_)) {
+ ++pos_;
+ }
+ // Newline(s)
+ while (pos_ != end_ && IsLineBreak(*pos_)) pos_++;
+ }
+
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+ return true;
+ }
+
+ Status MatchString(const char* keyword, bool skipws = true) {
+ const uint8_t* ppos = pos_;
+ while (*keyword) {
+ if (ppos >= end_) return JXL_FAILURE("PAM: unexpected end of input");
+ if (*keyword != *ppos) return false;
+ ppos++;
+ keyword++;
+ }
+ pos_ = ppos;
+ if (skipws) {
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ } else {
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+ }
+ return true;
+ }
+
+ Status ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) {
+ size_t depth = 3;
+ size_t max_val = 255;
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ while (!MatchString("ENDHDR", /*skipws=*/false)) {
+ if (MatchString("WIDTH")) {
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ } else if (MatchString("HEIGHT")) {
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ } else if (MatchString("DEPTH")) {
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&depth));
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ } else if (MatchString("MAXVAL")) {
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ } else if (MatchString("TUPLTYPE")) {
+ if (MatchString("RGB_ALPHA")) {
+ header->has_alpha = true;
+ } else if (MatchString("RGB")) {
+ } else if (MatchString("GRAYSCALE_ALPHA")) {
+ header->has_alpha = true;
+ header->is_gray = true;
+ } else if (MatchString("GRAYSCALE")) {
+ header->is_gray = true;
+ } else if (MatchString("BLACKANDWHITE_ALPHA")) {
+ header->has_alpha = true;
+ header->is_gray = true;
+ max_val = 1;
+ } else if (MatchString("BLACKANDWHITE")) {
+ header->is_gray = true;
+ max_val = 1;
+ } else if (MatchString("Alpha")) {
+ header->ec_types.push_back(JXL_CHANNEL_ALPHA);
+ } else if (MatchString("Depth")) {
+ header->ec_types.push_back(JXL_CHANNEL_DEPTH);
+ } else if (MatchString("SpotColor")) {
+ header->ec_types.push_back(JXL_CHANNEL_SPOT_COLOR);
+ } else if (MatchString("SelectionMask")) {
+ header->ec_types.push_back(JXL_CHANNEL_SELECTION_MASK);
+ } else if (MatchString("Black")) {
+ header->ec_types.push_back(JXL_CHANNEL_BLACK);
+ } else if (MatchString("CFA")) {
+ header->ec_types.push_back(JXL_CHANNEL_CFA);
+ } else if (MatchString("Thermal")) {
+ header->ec_types.push_back(JXL_CHANNEL_THERMAL);
+ } else {
+ return JXL_FAILURE("PAM: unknown TUPLTYPE");
+ }
+ } else {
+ constexpr size_t kMaxHeaderLength = 20;
+ char unknown_header[kMaxHeaderLength + 1];
+ size_t len = std::min<size_t>(kMaxHeaderLength, end_ - pos_);
+ strncpy(unknown_header, reinterpret_cast<const char*>(pos_), len);
+ unknown_header[len] = 0;
+ return JXL_FAILURE("PAM: unknown header keyword: %s", unknown_header);
+ }
+ }
+ size_t num_channels = header->is_gray ? 1 : 3;
+ if (header->has_alpha) num_channels++;
+ if (num_channels + header->ec_types.size() != depth) {
+ return JXL_FAILURE("PAM: bad DEPTH");
+ }
+ if (max_val == 0 || max_val >= 65536) {
+ return JXL_FAILURE("PAM: bad MAXVAL");
+ }
+ // e.g. When `max_val` is 1 , we want 1 bit:
+ header->bits_per_sample = FloorLog2Nonzero(max_val) + 1;
+ if ((1u << header->bits_per_sample) - 1 != max_val)
+ return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)");
+ // PAM does not pack bits as in PBM.
+
+ header->floating_point = false;
+ header->big_endian = true;
+ *pos = pos_;
+ return true;
+ }
+
+ Status ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) {
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+ JXL_RETURN_IF_ERROR(SkipWhitespace());
+ size_t max_val;
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val));
+ if (max_val == 0 || max_val >= 65536) {
+ return JXL_FAILURE("PNM: bad MaxVal");
+ }
+ header->bits_per_sample = FloorLog2Nonzero(max_val) + 1;
+ if ((1u << header->bits_per_sample) - 1 != max_val)
+ return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)");
+ header->floating_point = false;
+ header->big_endian = true;
+
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+ *pos = pos_;
+ return true;
+ }
+
+ Status ParseHeaderPFM(HeaderPNM* header, const uint8_t** pos) {
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize));
+
+ JXL_RETURN_IF_ERROR(SkipBlank());
+ JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize));
+
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+ // The scale has no meaning as multiplier, only its sign is used to
+ // indicate endianness. All software expects nominal range 0..1.
+ double scale;
+ JXL_RETURN_IF_ERROR(ParseSigned(&scale));
+ if (scale == 0.0) {
+ return JXL_FAILURE("PFM: bad scale factor value.");
+ } else if (std::abs(scale) != 1.0) {
+ JXL_WARNING("PFM: Discarding non-unit scale factor");
+ }
+ header->big_endian = scale > 0.0;
+ header->bits_per_sample = 32;
+ header->floating_point = true;
+
+ JXL_RETURN_IF_ERROR(SkipSingleWhitespace());
+
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* pos_;
+ const uint8_t* const end_;
+};
+
+Span<const uint8_t> MakeSpan(const char* str) {
+ return Span<const uint8_t>(reinterpret_cast<const uint8_t*>(str),
+ strlen(str));
+}
+
+} // namespace
+
+Status DecodeImagePNM(const Span<const uint8_t> bytes,
+ const ColorHints& color_hints, PackedPixelFile* ppf,
+ const SizeConstraints* constraints) {
+ Parser parser(bytes);
+ HeaderPNM header = {};
+ const uint8_t* pos = nullptr;
+ if (!parser.ParseHeader(&header, &pos)) return false;
+ JXL_RETURN_IF_ERROR(
+ VerifyDimensions(constraints, header.xsize, header.ysize));
+
+ if (header.bits_per_sample == 0 || header.bits_per_sample > 32) {
+ return JXL_FAILURE("PNM: bits_per_sample invalid");
+ }
+
+ // PPM specify that in the raster, the sample values are "nonlinear" (BP.709,
+ // with gamma number of 2.2). Deviate from the specification and assume
+ // `sRGB` in our implementation.
+ JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false,
+ header.is_gray, ppf));
+
+ ppf->info.xsize = header.xsize;
+ ppf->info.ysize = header.ysize;
+ if (header.floating_point) {
+ ppf->info.bits_per_sample = 32;
+ ppf->info.exponent_bits_per_sample = 8;
+ } else {
+ ppf->info.bits_per_sample = header.bits_per_sample;
+ ppf->info.exponent_bits_per_sample = 0;
+ }
+
+ ppf->info.orientation = JXL_ORIENT_IDENTITY;
+
+ // No alpha in PNM and PFM
+ ppf->info.alpha_bits = (header.has_alpha ? ppf->info.bits_per_sample : 0);
+ ppf->info.alpha_exponent_bits = 0;
+ ppf->info.num_color_channels = (header.is_gray ? 1 : 3);
+ uint32_t num_alpha_channels = (header.has_alpha ? 1 : 0);
+ uint32_t num_interleaved_channels =
+ ppf->info.num_color_channels + num_alpha_channels;
+ ppf->info.num_extra_channels = num_alpha_channels + header.ec_types.size();
+
+ for (auto type : header.ec_types) {
+ PackedExtraChannel pec;
+ pec.ec_info.bits_per_sample = ppf->info.bits_per_sample;
+ pec.ec_info.type = type;
+ ppf->extra_channels_info.emplace_back(std::move(pec));
+ }
+
+ JxlDataType data_type;
+ if (header.floating_point) {
+ // There's no float16 pnm version.
+ data_type = JXL_TYPE_FLOAT;
+ } else {
+ if (header.bits_per_sample > 8) {
+ data_type = JXL_TYPE_UINT16;
+ } else {
+ data_type = JXL_TYPE_UINT8;
+ }
+ }
+
+ const JxlPixelFormat format{
+ /*num_channels=*/num_interleaved_channels,
+ /*data_type=*/data_type,
+ /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN,
+ /*align=*/0,
+ };
+ const JxlPixelFormat ec_format{1, format.data_type, format.endianness, 0};
+ ppf->frames.clear();
+ ppf->frames.emplace_back(header.xsize, header.ysize, format);
+ auto* frame = &ppf->frames.back();
+ for (size_t i = 0; i < header.ec_types.size(); ++i) {
+ frame->extra_channels.emplace_back(header.xsize, header.ysize, ec_format);
+ }
+ size_t pnm_remaining_size = bytes.data() + bytes.size() - pos;
+ if (pnm_remaining_size < frame->color.pixels_size) {
+ return JXL_FAILURE("PNM file too small");
+ }
+
+ uint8_t* out = reinterpret_cast<uint8_t*>(frame->color.pixels());
+ std::vector<uint8_t*> ec_out(header.ec_types.size());
+ for (size_t i = 0; i < ec_out.size(); ++i) {
+ ec_out[i] = reinterpret_cast<uint8_t*>(frame->extra_channels[i].pixels());
+ }
+ if (ec_out.empty()) {
+ const bool flipped_y = header.bits_per_sample == 32; // PFMs are flipped
+ for (size_t y = 0; y < header.ysize; ++y) {
+ size_t y_in = flipped_y ? header.ysize - 1 - y : y;
+ const uint8_t* row_in = &pos[y_in * frame->color.stride];
+ uint8_t* row_out = &out[y * frame->color.stride];
+ memcpy(row_out, row_in, frame->color.stride);
+ }
+ } else {
+ size_t pwidth = PackedImage::BitsPerChannel(data_type) / 8;
+ for (size_t y = 0; y < header.ysize; ++y) {
+ for (size_t x = 0; x < header.xsize; ++x) {
+ memcpy(out, pos, frame->color.pixel_stride());
+ out += frame->color.pixel_stride();
+ pos += frame->color.pixel_stride();
+ for (auto& p : ec_out) {
+ memcpy(p, pos, pwidth);
+ pos += pwidth;
+ p += pwidth;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+void TestCodecPNM() {
+ size_t u = 77777; // Initialized to wrong value.
+ double d = 77.77;
+// Failing to parse invalid strings results in a crash if `JXL_CRASH_ON_ERROR`
+// is defined and hence the tests fail. Therefore we only run these tests if
+// `JXL_CRASH_ON_ERROR` is not defined.
+#ifndef JXL_CRASH_ON_ERROR
+ JXL_CHECK(false == Parser(MakeSpan("")).ParseUnsigned(&u));
+ JXL_CHECK(false == Parser(MakeSpan("+")).ParseUnsigned(&u));
+ JXL_CHECK(false == Parser(MakeSpan("-")).ParseUnsigned(&u));
+ JXL_CHECK(false == Parser(MakeSpan("A")).ParseUnsigned(&u));
+
+ JXL_CHECK(false == Parser(MakeSpan("")).ParseSigned(&d));
+ JXL_CHECK(false == Parser(MakeSpan("+")).ParseSigned(&d));
+ JXL_CHECK(false == Parser(MakeSpan("-")).ParseSigned(&d));
+ JXL_CHECK(false == Parser(MakeSpan("A")).ParseSigned(&d));
+#endif
+ JXL_CHECK(true == Parser(MakeSpan("1")).ParseUnsigned(&u));
+ JXL_CHECK(u == 1);
+
+ JXL_CHECK(true == Parser(MakeSpan("32")).ParseUnsigned(&u));
+ JXL_CHECK(u == 32);
+
+ JXL_CHECK(true == Parser(MakeSpan("1")).ParseSigned(&d));
+ JXL_CHECK(d == 1.0);
+ JXL_CHECK(true == Parser(MakeSpan("+2")).ParseSigned(&d));
+ JXL_CHECK(d == 2.0);
+ JXL_CHECK(true == Parser(MakeSpan("-3")).ParseSigned(&d));
+ JXL_CHECK(std::abs(d - -3.0) < 1E-15);
+ JXL_CHECK(true == Parser(MakeSpan("3.141592")).ParseSigned(&d));
+ JXL_CHECK(std::abs(d - 3.141592) < 1E-15);
+ JXL_CHECK(true == Parser(MakeSpan("-3.141592")).ParseSigned(&d));
+ JXL_CHECK(std::abs(d - -3.141592) < 1E-15);
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/dec/pnm.h b/third_party/jpeg-xl/lib/extras/dec/pnm.h
new file mode 100644
index 0000000000..0745b2f20d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/dec/pnm.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_DEC_PNM_H_
+#define LIB_EXTRAS_DEC_PNM_H_
+
+// Decodes PBM/PGM/PPM/PFM pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints;
+
+namespace extras {
+
+// Decodes `bytes` into `ppf`. color_hints may specify "color_space", which
+// defaults to sRGB.
+Status DecodeImagePNM(Span<const uint8_t> bytes, const ColorHints& color_hints,
+ PackedPixelFile* ppf,
+ const SizeConstraints* constraints = nullptr);
+
+void TestCodecPNM();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_DEC_PNM_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/apng.cc b/third_party/jpeg-xl/lib/extras/enc/apng.cc
new file mode 100644
index 0000000000..79d083349d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/apng.cc
@@ -0,0 +1,371 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/apng.h"
+
+// Parts of this code are taken from apngdis, which has the following license:
+/* APNG Disassembler 2.8
+ *
+ * Deconstructs APNG files into individual frames.
+ *
+ * http://apngdis.sourceforge.net
+ *
+ * Copyright (c) 2010-2015 Max Stepin
+ * maxst at users.sourceforge.net
+ *
+ * zlib license
+ * ------------
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/exif.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "png.h" /* original (unpatched) libpng is ok */
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+ 0x66, 0x00, 0x00};
+
+class APNGEncoder : public Encoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 2, 3, 4}) {
+ for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+ for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+ formats.push_back(
+ JxlPixelFormat{num_channels, data_type, endianness, /*align=*/0});
+ }
+ }
+ }
+ return formats;
+ }
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ encoded_image->icc.clear();
+ encoded_image->bitstreams.resize(1);
+ return EncodePackedPixelFileToAPNG(ppf, pool,
+ &encoded_image->bitstreams.front());
+ }
+
+ private:
+ Status EncodePackedPixelFileToAPNG(const PackedPixelFile& ppf,
+ ThreadPool* pool,
+ std::vector<uint8_t>* bytes) const;
+};
+
+static void PngWrite(png_structp png_ptr, png_bytep data, png_size_t length) {
+ std::vector<uint8_t>* bytes =
+ static_cast<std::vector<uint8_t>*>(png_get_io_ptr(png_ptr));
+ bytes->insert(bytes->end(), data, data + length);
+}
+
+// Stores XMP and EXIF/IPTC into key/value strings for PNG
+class BlobsWriterPNG {
+ public:
+ static Status Encode(const PackedMetadata& blobs,
+ std::vector<std::string>* strings) {
+ if (!blobs.exif.empty()) {
+ // PNG viewers typically ignore Exif orientation but not all of them do
+ // (and e.g. cjxl doesn't), so we overwrite the Exif orientation to the
+ // identity to avoid repeated orientation.
+ std::vector<uint8_t> exif = blobs.exif;
+ ResetExifOrientation(exif);
+ // By convention, the data is prefixed with "Exif\0\0" when stored in
+ // the legacy (and non-standard) "Raw profile type exif" text chunk
+ // currently used here.
+ // TODO: Store Exif data in an eXIf chunk instead, which always begins
+ // with the TIFF header.
+ if (exif.size() >= sizeof kExifSignature &&
+ memcmp(exif.data(), kExifSignature, sizeof kExifSignature) != 0) {
+ exif.insert(exif.begin(), kExifSignature,
+ kExifSignature + sizeof kExifSignature);
+ }
+ JXL_RETURN_IF_ERROR(EncodeBase16("exif", exif, strings));
+ }
+ if (!blobs.iptc.empty()) {
+ JXL_RETURN_IF_ERROR(EncodeBase16("iptc", blobs.iptc, strings));
+ }
+ if (!blobs.xmp.empty()) {
+ // TODO: Store XMP data in an "XML:com.adobe.xmp" text chunk instead.
+ JXL_RETURN_IF_ERROR(EncodeBase16("xmp", blobs.xmp, strings));
+ }
+ return true;
+ }
+
+ private:
+ static JXL_INLINE char EncodeNibble(const uint8_t nibble) {
+ JXL_ASSERT(nibble < 16);
+ return (nibble < 10) ? '0' + nibble : 'a' + nibble - 10;
+ }
+
+ static Status EncodeBase16(const std::string& type,
+ const std::vector<uint8_t>& bytes,
+ std::vector<std::string>* strings) {
+ // Encoding: base16 with newline after 72 chars.
+ const size_t base16_size =
+ 2 * bytes.size() + DivCeil(bytes.size(), size_t(36)) + 1;
+ std::string base16;
+ base16.reserve(base16_size);
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ if (i % 36 == 0) base16.push_back('\n');
+ base16.push_back(EncodeNibble(bytes[i] >> 4));
+ base16.push_back(EncodeNibble(bytes[i] & 0x0F));
+ }
+ base16.push_back('\n');
+ JXL_ASSERT(base16.length() == base16_size);
+
+ char key[30];
+ snprintf(key, sizeof(key), "Raw profile type %s", type.c_str());
+
+ char header[30];
+ snprintf(header, sizeof(header), "\n%s\n%8" PRIuS, type.c_str(),
+ bytes.size());
+
+ strings->push_back(std::string(key));
+ strings->push_back(std::string(header) + base16);
+ return true;
+ }
+};
+
+void MaybeAddCICP(JxlColorEncoding c_enc, png_structp png_ptr,
+ png_infop info_ptr) {
+ png_byte cicp_data[4] = {};
+ png_unknown_chunk cicp_chunk;
+ if (c_enc.color_space != JXL_COLOR_SPACE_RGB) {
+ return;
+ }
+ if (c_enc.primaries == JXL_PRIMARIES_P3) {
+ if (c_enc.white_point == JXL_WHITE_POINT_D65) {
+ cicp_data[0] = 12;
+ } else if (c_enc.white_point == JXL_WHITE_POINT_DCI) {
+ cicp_data[0] = 11;
+ } else {
+ return;
+ }
+ } else if (c_enc.primaries != JXL_PRIMARIES_CUSTOM &&
+ c_enc.white_point == JXL_WHITE_POINT_D65) {
+ cicp_data[0] = static_cast<png_byte>(c_enc.primaries);
+ } else {
+ return;
+ }
+ if (c_enc.transfer_function == JXL_TRANSFER_FUNCTION_UNKNOWN ||
+ c_enc.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+ return;
+ }
+ cicp_data[1] = static_cast<png_byte>(c_enc.transfer_function);
+ cicp_data[2] = 0;
+ cicp_data[3] = 1;
+ cicp_chunk.data = cicp_data;
+ cicp_chunk.size = sizeof(cicp_data);
+ cicp_chunk.location = PNG_HAVE_PLTE;
+ memcpy(cicp_chunk.name, "cICP", 5);
+ png_set_keep_unknown_chunks(png_ptr, 3,
+ reinterpret_cast<const png_byte*>("cICP"), 1);
+ png_set_unknown_chunks(png_ptr, info_ptr, &cicp_chunk, 1);
+}
+
+Status APNGEncoder::EncodePackedPixelFileToAPNG(
+ const PackedPixelFile& ppf, ThreadPool* pool,
+ std::vector<uint8_t>* bytes) const {
+ size_t xsize = ppf.info.xsize;
+ size_t ysize = ppf.info.ysize;
+ bool has_alpha = ppf.info.alpha_bits != 0;
+ bool is_gray = ppf.info.num_color_channels == 1;
+ size_t color_channels = ppf.info.num_color_channels;
+ size_t num_channels = color_channels + (has_alpha ? 1 : 0);
+ size_t num_samples = num_channels * xsize * ysize;
+
+ if (!ppf.info.have_animation && ppf.frames.size() != 1) {
+ return JXL_FAILURE("Invalid number of frames");
+ }
+
+ size_t count = 0;
+ size_t anim_chunks = 0;
+
+ for (const auto& frame : ppf.frames) {
+ JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+
+ const PackedImage& color = frame.color;
+ const JxlPixelFormat format = color.format;
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+ size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type);
+ size_t bytes_per_sample = data_bits_per_sample / 8;
+ size_t out_bytes_per_sample = bytes_per_sample > 1 ? 2 : 1;
+ size_t out_stride = xsize * num_channels * out_bytes_per_sample;
+ size_t out_size = ysize * out_stride;
+ std::vector<uint8_t> out(out_size);
+
+ if (format.data_type == JXL_TYPE_UINT8) {
+ if (ppf.info.bits_per_sample < 8) {
+ float mul = 255.0 / ((1u << ppf.info.bits_per_sample) - 1);
+ for (size_t i = 0; i < num_samples; ++i) {
+ out[i] = static_cast<uint8_t>(in[i] * mul + 0.5);
+ }
+ } else {
+ memcpy(&out[0], in, out_size);
+ }
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ if (ppf.info.bits_per_sample < 16 ||
+ format.endianness != JXL_BIG_ENDIAN) {
+ float mul = 65535.0 / ((1u << ppf.info.bits_per_sample) - 1);
+ const uint8_t* p_in = in;
+ uint8_t* p_out = out.data();
+ for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) {
+ uint32_t val = (format.endianness == JXL_BIG_ENDIAN ? LoadBE16(p_in)
+ : LoadLE16(p_in));
+ StoreBE16(static_cast<uint32_t>(val * mul + 0.5), p_out);
+ }
+ } else {
+ memcpy(&out[0], in, out_size);
+ }
+ }
+ png_structp png_ptr;
+ png_infop info_ptr;
+
+ png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+ if (!png_ptr) return JXL_FAILURE("Could not init png encoder");
+
+ info_ptr = png_create_info_struct(png_ptr);
+ if (!info_ptr) return JXL_FAILURE("Could not init png info struct");
+
+ png_set_write_fn(png_ptr, bytes, PngWrite, NULL);
+ png_set_flush(png_ptr, 0);
+
+ int width = xsize;
+ int height = ysize;
+
+ png_byte color_type = (is_gray ? PNG_COLOR_TYPE_GRAY : PNG_COLOR_TYPE_RGB);
+ if (has_alpha) color_type |= PNG_COLOR_MASK_ALPHA;
+ png_byte bit_depth = out_bytes_per_sample * 8;
+
+ png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type,
+ PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE,
+ PNG_FILTER_TYPE_BASE);
+ if (count == 0) {
+ MaybeAddCICP(ppf.color_encoding, png_ptr, info_ptr);
+ if (!ppf.icc.empty()) {
+ png_set_benign_errors(png_ptr, 1);
+ png_set_iCCP(png_ptr, info_ptr, "1", 0, ppf.icc.data(), ppf.icc.size());
+ }
+ std::vector<std::string> textstrings;
+ JXL_RETURN_IF_ERROR(BlobsWriterPNG::Encode(ppf.metadata, &textstrings));
+ for (size_t kk = 0; kk + 1 < textstrings.size(); kk += 2) {
+ png_text text;
+ text.key = const_cast<png_charp>(textstrings[kk].c_str());
+ text.text = const_cast<png_charp>(textstrings[kk + 1].c_str());
+ text.compression = PNG_TEXT_COMPRESSION_zTXt;
+ png_set_text(png_ptr, info_ptr, &text, 1);
+ }
+
+ png_write_info(png_ptr, info_ptr);
+ } else {
+ // fake writing a header, otherwise libpng gets confused
+ size_t pos = bytes->size();
+ png_write_info(png_ptr, info_ptr);
+ bytes->resize(pos);
+ }
+
+ if (ppf.info.have_animation) {
+ if (count == 0) {
+ png_byte adata[8];
+ png_save_uint_32(adata, ppf.frames.size());
+ png_save_uint_32(adata + 4, ppf.info.animation.num_loops);
+ png_byte actl[5] = "acTL";
+ png_write_chunk(png_ptr, actl, adata, 8);
+ }
+ png_byte fdata[26];
+ // TODO(jon): also make this work for the non-coalesced case
+ png_save_uint_32(fdata, anim_chunks++);
+ png_save_uint_32(fdata + 4, width);
+ png_save_uint_32(fdata + 8, height);
+ png_save_uint_32(fdata + 12, 0);
+ png_save_uint_32(fdata + 16, 0);
+ png_save_uint_16(fdata + 20, frame.frame_info.duration *
+ ppf.info.animation.tps_denominator);
+ png_save_uint_16(fdata + 22, ppf.info.animation.tps_numerator);
+ fdata[24] = 1;
+ fdata[25] = 0;
+ png_byte fctl[5] = "fcTL";
+ png_write_chunk(png_ptr, fctl, fdata, 26);
+ }
+
+ std::vector<uint8_t*> rows(height);
+ for (int y = 0; y < height; ++y) {
+ rows[y] = out.data() + y * out_stride;
+ }
+
+ png_write_flush(png_ptr);
+ const size_t pos = bytes->size();
+ png_write_image(png_ptr, &rows[0]);
+ png_write_flush(png_ptr);
+ if (count > 0) {
+ std::vector<uint8_t> fdata(4);
+ png_save_uint_32(fdata.data(), anim_chunks++);
+ size_t p = pos;
+ while (p + 8 < bytes->size()) {
+ size_t len = png_get_uint_32(bytes->data() + p);
+ JXL_ASSERT(bytes->operator[](p + 4) == 'I');
+ JXL_ASSERT(bytes->operator[](p + 5) == 'D');
+ JXL_ASSERT(bytes->operator[](p + 6) == 'A');
+ JXL_ASSERT(bytes->operator[](p + 7) == 'T');
+ fdata.insert(fdata.end(), bytes->data() + p + 8,
+ bytes->data() + p + 8 + len);
+ p += len + 12;
+ }
+ bytes->resize(pos);
+
+ png_byte fdat[5] = "fdAT";
+ png_write_chunk(png_ptr, fdat, fdata.data(), fdata.size());
+ }
+
+ count++;
+ if (count == ppf.frames.size() || !ppf.info.have_animation) {
+ png_write_end(png_ptr, NULL);
+ }
+
+ png_destroy_write_struct(&png_ptr, &info_ptr);
+ }
+
+ return true;
+}
+
+} // namespace
+
+std::unique_ptr<Encoder> GetAPNGEncoder() {
+ return jxl::make_unique<APNGEncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/apng.h b/third_party/jpeg-xl/lib/extras/enc/apng.h
new file mode 100644
index 0000000000..2a2139c8fa
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/apng.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_APNG_H_
+#define LIB_EXTRAS_ENC_APNG_H_
+
+// Encodes APNG images in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetAPNGEncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_APNG_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/encode.cc b/third_party/jpeg-xl/lib/extras/enc/encode.cc
new file mode 100644
index 0000000000..9ffba9d0dd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/encode.cc
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/encode.h"
+
+#include <locale>
+
+#if JPEGXL_ENABLE_APNG
+#include "lib/extras/enc/apng.h"
+#endif
+#if JPEGXL_ENABLE_EXR
+#include "lib/extras/enc/exr.h"
+#endif
+#if JPEGXL_ENABLE_JPEG
+#include "lib/extras/enc/jpg.h"
+#endif
+#include "lib/extras/enc/npy.h"
+#include "lib/extras/enc/pgx.h"
+#include "lib/extras/enc/pnm.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jxl {
+namespace extras {
+
+Status Encoder::VerifyBasicInfo(const JxlBasicInfo& info) {
+ if (info.xsize == 0 || info.ysize == 0) {
+ return JXL_FAILURE("Empty image");
+ }
+ if (info.num_color_channels != 1 && info.num_color_channels != 3) {
+ return JXL_FAILURE("Invalid number of color channels");
+ }
+ if (info.alpha_bits > 0 && info.alpha_bits != info.bits_per_sample) {
+ return JXL_FAILURE("Alpha bit depth does not match image bit depth");
+ }
+ if (info.orientation != JXL_ORIENT_IDENTITY) {
+ return JXL_FAILURE("Orientation must be identity");
+ }
+ return true;
+}
+
+Status Encoder::VerifyFormat(const JxlPixelFormat& format) const {
+ for (auto f : AcceptedFormats()) {
+ if (f.num_channels != format.num_channels) continue;
+ if (f.data_type != format.data_type) continue;
+ if (f.data_type == JXL_TYPE_UINT8 || f.endianness == format.endianness) {
+ return true;
+ }
+ }
+ return JXL_FAILURE("Format is not in the list of accepted formats.");
+}
+
+Status Encoder::VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
+ uint32_t exponent_bits) {
+ if ((data_type == JXL_TYPE_UINT8 &&
+ (bits_per_sample == 0 || bits_per_sample > 8 || exponent_bits != 0)) ||
+ (data_type == JXL_TYPE_UINT16 &&
+ (bits_per_sample <= 8 || bits_per_sample > 16 || exponent_bits != 0)) ||
+ (data_type == JXL_TYPE_FLOAT16 &&
+ (bits_per_sample != 16 || exponent_bits != 5)) ||
+ (data_type == JXL_TYPE_FLOAT &&
+ (bits_per_sample != 32 || exponent_bits != 8))) {
+ return JXL_FAILURE(
+ "Incompatible data_type %d and bit depth %u with exponent bits %u",
+ (int)data_type, bits_per_sample, exponent_bits);
+ }
+ return true;
+}
+
+Status Encoder::VerifyImageSize(const PackedImage& image,
+ const JxlBasicInfo& info) {
+ if (image.pixels() == nullptr) {
+ return JXL_FAILURE("Invalid image.");
+ }
+ if (image.stride != image.xsize * image.pixel_stride()) {
+ return JXL_FAILURE("Invalid image stride.");
+ }
+ if (image.pixels_size != image.ysize * image.stride) {
+ return JXL_FAILURE("Invalid image size.");
+ }
+ size_t info_num_channels =
+ (info.num_color_channels + (info.alpha_bits > 0 ? 1 : 0));
+ if (image.xsize != info.xsize || image.ysize != info.ysize ||
+ image.format.num_channels != info_num_channels) {
+ return JXL_FAILURE("Frame size does not match image size");
+ }
+ return true;
+}
+
+Status Encoder::VerifyPackedImage(const PackedImage& image,
+ const JxlBasicInfo& info) const {
+ JXL_RETURN_IF_ERROR(VerifyImageSize(image, info));
+ JXL_RETURN_IF_ERROR(VerifyFormat(image.format));
+ JXL_RETURN_IF_ERROR(VerifyBitDepth(image.format.data_type,
+ info.bits_per_sample,
+ info.exponent_bits_per_sample));
+ return true;
+}
+
+Status SelectFormat(const std::vector<JxlPixelFormat>& accepted_formats,
+ const JxlBasicInfo& basic_info, JxlPixelFormat* format) {
+ const size_t original_bit_depth = basic_info.bits_per_sample;
+ size_t current_bit_depth = 0;
+ size_t num_alpha_channels = (basic_info.alpha_bits != 0 ? 1 : 0);
+ size_t num_channels = basic_info.num_color_channels + num_alpha_channels;
+ for (;;) {
+ for (const JxlPixelFormat& candidate : accepted_formats) {
+ if (candidate.num_channels != num_channels) continue;
+ const size_t candidate_bit_depth =
+ PackedImage::BitsPerChannel(candidate.data_type);
+ if (
+ // Candidate bit depth is less than what we have and still enough
+ (original_bit_depth <= candidate_bit_depth &&
+ candidate_bit_depth < current_bit_depth) ||
+ // Or larger than the too-small bit depth we currently have
+ (current_bit_depth < candidate_bit_depth &&
+ current_bit_depth < original_bit_depth)) {
+ *format = candidate;
+ current_bit_depth = candidate_bit_depth;
+ }
+ }
+ if (current_bit_depth == 0) {
+ if (num_channels > basic_info.num_color_channels) {
+ // Try dropping the alpha channel.
+ --num_channels;
+ continue;
+ }
+ return JXL_FAILURE("no appropriate format found");
+ }
+ break;
+ }
+ if (current_bit_depth < original_bit_depth) {
+ JXL_WARNING("encoding %" PRIuS "-bit original to %" PRIuS " bits",
+ original_bit_depth, current_bit_depth);
+ }
+ return true;
+}
+
+std::unique_ptr<Encoder> Encoder::FromExtension(std::string extension) {
+ std::transform(
+ extension.begin(), extension.end(), extension.begin(),
+ [](char c) { return std::tolower(c, std::locale::classic()); });
+#if JPEGXL_ENABLE_APNG
+ if (extension == ".png" || extension == ".apng") return GetAPNGEncoder();
+#endif
+
+#if JPEGXL_ENABLE_JPEG
+ if (extension == ".jpg") return GetJPEGEncoder();
+ if (extension == ".jpeg") return GetJPEGEncoder();
+#endif
+
+ if (extension == ".npy") return GetNumPyEncoder();
+
+ if (extension == ".pgx") return GetPGXEncoder();
+
+ if (extension == ".pam") return GetPAMEncoder();
+ if (extension == ".pgm") return GetPGMEncoder();
+ if (extension == ".ppm") return GetPPMEncoder();
+ if (extension == ".pfm") return GetPFMEncoder();
+
+#if JPEGXL_ENABLE_EXR
+ if (extension == ".exr") return GetEXREncoder();
+#endif
+
+ return nullptr;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/encode.h b/third_party/jpeg-xl/lib/extras/enc/encode.h
new file mode 100644
index 0000000000..63cabaf30f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/encode.h
@@ -0,0 +1,83 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_ENCODE_H_
+#define LIB_EXTRAS_ENC_ENCODE_H_
+
+// Facade for image encoders.
+
+#include <string>
+#include <unordered_map>
+
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct EncodedImage {
+ // One (if the format supports animations or the image has only one frame) or
+ // more sequential bitstreams.
+ std::vector<std::vector<uint8_t>> bitstreams;
+
+ // For each extra channel one or more sequential bitstreams.
+ std::vector<std::vector<std::vector<uint8_t>>> extra_channel_bitstreams;
+
+ std::vector<uint8_t> preview_bitstream;
+
+ // If the format does not support embedding color profiles into the bitstreams
+ // above, it will be present here, to be written as a separate file. If it
+ // does support them, this field will be empty.
+ std::vector<uint8_t> icc;
+
+ // Additional output for conformance testing, only filled in by NumPyEncoder.
+ std::vector<uint8_t> metadata;
+};
+
+class Encoder {
+ public:
+ static std::unique_ptr<Encoder> FromExtension(std::string extension);
+
+ virtual ~Encoder() = default;
+
+ virtual std::vector<JxlPixelFormat> AcceptedFormats() const = 0;
+
+ // Any existing data in encoded_image is discarded.
+ virtual Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool = nullptr) const = 0;
+
+ void SetOption(std::string name, std::string value) {
+ options_[std::move(name)] = std::move(value);
+ }
+
+ static Status VerifyBasicInfo(const JxlBasicInfo& info);
+ static Status VerifyImageSize(const PackedImage& image,
+ const JxlBasicInfo& info);
+ static Status VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample,
+ uint32_t exponent_bits);
+
+ protected:
+ const std::unordered_map<std::string, std::string>& options() const {
+ return options_;
+ }
+
+ Status VerifyFormat(const JxlPixelFormat& format) const;
+
+ Status VerifyPackedImage(const PackedImage& image,
+ const JxlBasicInfo& info) const;
+
+ private:
+ std::unordered_map<std::string, std::string> options_;
+};
+
+// TODO(sboukortt): consider exposing this as part of the C API.
+Status SelectFormat(const std::vector<JxlPixelFormat>& accepted_formats,
+ const JxlBasicInfo& basic_info, JxlPixelFormat* format);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_ENCODE_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/exr.cc b/third_party/jpeg-xl/lib/extras/enc/exr.cc
new file mode 100644
index 0000000000..1d70913936
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/exr.cc
@@ -0,0 +1,200 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/exr.h"
+
+#include <ImfChromaticitiesAttribute.h>
+#include <ImfIO.h>
+#include <ImfRgbaFile.h>
+#include <ImfStandardAttributes.h>
+#include <jxl/codestream_header.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+namespace OpenEXR = OPENEXR_IMF_NAMESPACE;
+namespace Imath = IMATH_NAMESPACE;
+
+// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using
+// uint64_t as recommended causes build failures with previous OpenEXR versions
+// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent
+// to uint64_t. This alternative should work in all cases.
+using ExrInt64 = decltype(std::declval<OpenEXR::IStream>().tellg());
+
+class InMemoryOStream : public OpenEXR::OStream {
+ public:
+ // `bytes` must outlive the InMemoryOStream.
+ explicit InMemoryOStream(std::vector<uint8_t>* const bytes)
+ : OStream(/*fileName=*/""), bytes_(*bytes) {}
+
+ void write(const char c[], const int n) override {
+ if (bytes_.size() < pos_ + n) {
+ bytes_.resize(pos_ + n);
+ }
+ std::copy_n(c, n, bytes_.begin() + pos_);
+ pos_ += n;
+ }
+
+ ExrInt64 tellp() override { return pos_; }
+ void seekp(const ExrInt64 pos) override {
+ if (bytes_.size() + 1 < pos) {
+ bytes_.resize(pos - 1);
+ }
+ pos_ = pos;
+ }
+
+ private:
+ std::vector<uint8_t>& bytes_;
+ size_t pos_ = 0;
+};
+
+// Loads a Big-Endian float
+float LoadBEFloat(const uint8_t* p) {
+ uint32_t u = LoadBE32(p);
+ float result;
+ memcpy(&result, &u, 4);
+ return result;
+}
+
+// Loads a Little-Endian float
+float LoadLEFloat(const uint8_t* p) {
+ uint32_t u = LoadLE32(p);
+ float result;
+ memcpy(&result, &u, 4);
+ return result;
+}
+
+Status EncodeImageEXR(const PackedImage& image, const JxlBasicInfo& info,
+ const JxlColorEncoding& c_enc, ThreadPool* pool,
+ std::vector<uint8_t>* bytes) {
+ OpenEXR::setGlobalThreadCount(0);
+
+ const size_t xsize = info.xsize;
+ const size_t ysize = info.ysize;
+ const bool has_alpha = info.alpha_bits > 0;
+ const bool alpha_is_premultiplied = info.alpha_premultiplied;
+
+ if (info.num_color_channels != 3 ||
+ c_enc.color_space != JXL_COLOR_SPACE_RGB ||
+ c_enc.transfer_function != JXL_TRANSFER_FUNCTION_LINEAR) {
+ return JXL_FAILURE("Unsupported color encoding for OpenEXR output.");
+ }
+
+ const size_t num_channels = 3 + (has_alpha ? 1 : 0);
+ const JxlPixelFormat format = image.format;
+
+ if (format.data_type != JXL_TYPE_FLOAT) {
+ return JXL_FAILURE("Unsupported pixel format for OpenEXR output");
+ }
+
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
+ size_t in_stride = num_channels * 4 * xsize;
+
+ OpenEXR::Header header(xsize, ysize);
+ OpenEXR::Chromaticities chromaticities;
+ chromaticities.red =
+ Imath::V2f(c_enc.primaries_red_xy[0], c_enc.primaries_red_xy[1]);
+ chromaticities.green =
+ Imath::V2f(c_enc.primaries_green_xy[0], c_enc.primaries_green_xy[1]);
+ chromaticities.blue =
+ Imath::V2f(c_enc.primaries_blue_xy[0], c_enc.primaries_blue_xy[1]);
+ chromaticities.white =
+ Imath::V2f(c_enc.white_point_xy[0], c_enc.white_point_xy[1]);
+ OpenEXR::addChromaticities(header, chromaticities);
+ OpenEXR::addWhiteLuminance(header, info.intensity_target);
+
+ auto loadFloat =
+ format.endianness == JXL_BIG_ENDIAN ? LoadBEFloat : LoadLEFloat;
+ auto loadAlpha =
+ has_alpha ? loadFloat : [](const uint8_t* p) -> float { return 1.0f; };
+
+ // Ensure that the destructor of RgbaOutputFile has run before we look at the
+ // size of `bytes`.
+ {
+ InMemoryOStream os(bytes);
+ OpenEXR::RgbaOutputFile output(
+ os, header, has_alpha ? OpenEXR::WRITE_RGBA : OpenEXR::WRITE_RGB);
+ // How many rows to write at once. Again, the OpenEXR documentation
+ // recommends writing the whole image in one call.
+ const int y_chunk_size = ysize;
+ std::vector<OpenEXR::Rgba> output_rows(xsize * y_chunk_size);
+
+ for (size_t start_y = 0; start_y < ysize; start_y += y_chunk_size) {
+ // Inclusive.
+ const size_t end_y = std::min(start_y + y_chunk_size - 1, ysize - 1);
+ output.setFrameBuffer(output_rows.data() - start_y * xsize,
+ /*xStride=*/1, /*yStride=*/xsize);
+ for (size_t y = start_y; y <= end_y; ++y) {
+ const uint8_t* in_row = &in[(y - start_y) * in_stride];
+ OpenEXR::Rgba* const JXL_RESTRICT row_data =
+ &output_rows[(y - start_y) * xsize];
+ for (size_t x = 0; x < xsize; ++x) {
+ const uint8_t* in_pixel = &in_row[4 * num_channels * x];
+ float r = loadFloat(&in_pixel[0]);
+ float g = loadFloat(&in_pixel[4]);
+ float b = loadFloat(&in_pixel[8]);
+ const float alpha = loadAlpha(&in_pixel[12]);
+ if (!alpha_is_premultiplied) {
+ r *= alpha;
+ g *= alpha;
+ b *= alpha;
+ }
+ row_data[x] = OpenEXR::Rgba(r, g, b, alpha);
+ }
+ }
+ output.writePixels(/*numScanLines=*/end_y - start_y + 1);
+ }
+ }
+
+ return true;
+}
+
+class EXREncoder : public Encoder {
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 2, 3, 4}) {
+ for (const JxlDataType data_type : {JXL_TYPE_FLOAT, JXL_TYPE_FLOAT16}) {
+ for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+ formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+ /*data_type=*/data_type,
+ /*endianness=*/endianness,
+ /*align=*/0});
+ }
+ }
+ }
+ return formats;
+ }
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool = nullptr) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ encoded_image->icc.clear();
+ encoded_image->bitstreams.clear();
+ encoded_image->bitstreams.reserve(ppf.frames.size());
+ for (const auto& frame : ppf.frames) {
+ JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+ encoded_image->bitstreams.emplace_back();
+ JXL_RETURN_IF_ERROR(EncodeImageEXR(frame.color, ppf.info,
+ ppf.color_encoding, pool,
+ &encoded_image->bitstreams.back()));
+ }
+ return true;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Encoder> GetEXREncoder() {
+ return jxl::make_unique<EXREncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/exr.h b/third_party/jpeg-xl/lib/extras/enc/exr.h
new file mode 100644
index 0000000000..1baaa0272f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/exr.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_EXR_H_
+#define LIB_EXTRAS_ENC_EXR_H_
+
+// Encodes OpenEXR images in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetEXREncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_EXR_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/jpegli.cc b/third_party/jpeg-xl/lib/extras/enc/jpegli.cc
new file mode 100644
index 0000000000..43cf32a19c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jpegli.cc
@@ -0,0 +1,503 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jpegli.h"
+
+#include <jxl/codestream_header.h>
+#include <setjmp.h>
+#include <stdint.h>
+
+#include "lib/extras/enc/encode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+void MyErrorExit(j_common_ptr cinfo) {
+ jmp_buf* env = static_cast<jmp_buf*>(cinfo->client_data);
+ (*cinfo->err->output_message)(cinfo);
+ jpegli_destroy_compress(reinterpret_cast<j_compress_ptr>(cinfo));
+ longjmp(*env, 1);
+}
+
+Status VerifyInput(const PackedPixelFile& ppf) {
+ const JxlBasicInfo& info = ppf.info;
+ JXL_RETURN_IF_ERROR(Encoder::VerifyBasicInfo(info));
+ if (info.alpha_bits > 0) {
+ return JXL_FAILURE("Alpha is not supported for JPEG output.");
+ }
+ if (ppf.frames.size() != 1) {
+ return JXL_FAILURE("JPEG input must have exactly one frame.");
+ }
+ const PackedImage& image = ppf.frames[0].color;
+ JXL_RETURN_IF_ERROR(Encoder::VerifyImageSize(image, info));
+ if (image.format.data_type == JXL_TYPE_FLOAT16) {
+ return JXL_FAILURE("FLOAT16 input is not supported.");
+ }
+ JXL_RETURN_IF_ERROR(Encoder::VerifyBitDepth(image.format.data_type,
+ info.bits_per_sample,
+ info.exponent_bits_per_sample));
+ if ((image.format.data_type == JXL_TYPE_UINT8 && info.bits_per_sample != 8) ||
+ (image.format.data_type == JXL_TYPE_UINT16 &&
+ info.bits_per_sample != 16)) {
+ return JXL_FAILURE("Only full bit depth unsigned types are supported.");
+ }
+ return true;
+}
+
+Status GetColorEncoding(const PackedPixelFile& ppf,
+ ColorEncoding* color_encoding) {
+ if (!ppf.icc.empty()) {
+ PaddedBytes icc;
+ icc.assign(ppf.icc.data(), ppf.icc.data() + ppf.icc.size());
+ JXL_RETURN_IF_ERROR(color_encoding->SetICC(std::move(icc)));
+ } else {
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+ ppf.color_encoding, color_encoding));
+ }
+ if (color_encoding->ICC().empty()) {
+ return JXL_FAILURE("Invalid color encoding.");
+ }
+ return true;
+}
+
+bool HasICCProfile(const std::vector<uint8_t>& app_data) {
+ size_t pos = 0;
+ while (pos < app_data.size()) {
+ if (pos + 16 > app_data.size()) return false;
+ uint8_t marker = app_data[pos + 1];
+ size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2;
+ if (marker == 0xe2 && memcmp(&app_data[pos + 4], "ICC_PROFILE", 12) == 0) {
+ return true;
+ }
+ pos += marker_len;
+ }
+ return false;
+}
+
+Status WriteAppData(j_compress_ptr cinfo,
+ const std::vector<uint8_t>& app_data) {
+ size_t pos = 0;
+ while (pos < app_data.size()) {
+ if (pos + 4 > app_data.size()) {
+ return JXL_FAILURE("Incomplete APP header.");
+ }
+ uint8_t marker = app_data[pos + 1];
+ size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2;
+ if (app_data[pos] != 0xff || marker < 0xe0 || marker > 0xef) {
+ return JXL_FAILURE("Invalid APP marker %02x %02x", app_data[pos], marker);
+ }
+ if (marker_len <= 4) {
+ return JXL_FAILURE("Invalid APP marker length.");
+ }
+ if (pos + marker_len > app_data.size()) {
+ return JXL_FAILURE("Incomplete APP data");
+ }
+ jpegli_write_marker(cinfo, marker, &app_data[pos + 4], marker_len - 4);
+ pos += marker_len;
+ }
+ return true;
+}
+
+static constexpr int kICCMarker = 0xe2;
+constexpr unsigned char kICCSignature[12] = {
+ 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+static constexpr uint8_t kUnknownTf = 2;
+static constexpr unsigned char kCICPTagSignature[4] = {0x63, 0x69, 0x63, 0x70};
+static constexpr size_t kCICPTagSize = 12;
+
+bool FindCICPTag(const uint8_t* icc_data, size_t len, bool is_first_chunk,
+ size_t* cicp_offset, size_t* cicp_length, uint8_t* cicp_tag,
+ size_t* cicp_pos) {
+ if (is_first_chunk) {
+ // Look up the offset of the CICP tag from the first chunk of ICC data.
+ if (len < 132) {
+ return false;
+ }
+ uint32_t tag_count = LoadBE32(&icc_data[128]);
+ if (len < 132 + 12 * tag_count) {
+ return false;
+ }
+ for (uint32_t i = 0; i < tag_count; ++i) {
+ if (memcmp(&icc_data[132 + 12 * i], kCICPTagSignature, 4) == 0) {
+ *cicp_offset = LoadBE32(&icc_data[136 + 12 * i]);
+ *cicp_length = LoadBE32(&icc_data[140 + 12 * i]);
+ }
+ }
+ if (*cicp_length < kCICPTagSize) {
+ return false;
+ }
+ }
+ if (*cicp_offset < len) {
+ size_t n_bytes = std::min(len - *cicp_offset, kCICPTagSize - *cicp_pos);
+ memcpy(&cicp_tag[*cicp_pos], &icc_data[*cicp_offset], n_bytes);
+ *cicp_pos += n_bytes;
+ *cicp_offset = 0;
+ } else {
+ *cicp_offset -= len;
+ }
+ return true;
+}
+
+uint8_t LookupCICPTransferFunctionFromAppData(const uint8_t* app_data,
+ size_t len) {
+ size_t last_index = 0;
+ size_t cicp_offset = 0;
+ size_t cicp_length = 0;
+ uint8_t cicp_tag[kCICPTagSize] = {};
+ size_t cicp_pos = 0;
+ size_t pos = 0;
+ while (pos < len) {
+ const uint8_t* marker = &app_data[pos];
+ if (pos + 4 > len) {
+ return kUnknownTf;
+ }
+ size_t marker_size = (marker[2] << 8) + marker[3] + 2;
+ if (pos + marker_size > len) {
+ return kUnknownTf;
+ }
+ if (marker_size < 18 || marker[0] != 0xff || marker[1] != kICCMarker ||
+ memcmp(&marker[4], kICCSignature, 12) != 0) {
+ pos += marker_size;
+ continue;
+ }
+ uint8_t index = marker[16];
+ uint8_t total = marker[17];
+ const uint8_t* payload = marker + 18;
+ const size_t payload_size = marker_size - 18;
+ if (index != last_index + 1 || index > total) {
+ return kUnknownTf;
+ }
+ if (!FindCICPTag(payload, payload_size, last_index == 0, &cicp_offset,
+ &cicp_length, &cicp_tag[0], &cicp_pos)) {
+ return kUnknownTf;
+ }
+ if (cicp_pos == kCICPTagSize) {
+ break;
+ }
+ ++last_index;
+ }
+ if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) {
+ return cicp_tag[9];
+ }
+ return kUnknownTf;
+}
+
+uint8_t LookupCICPTransferFunctionFromICCProfile(const uint8_t* icc_data,
+ size_t len) {
+ size_t cicp_offset = 0;
+ size_t cicp_length = 0;
+ uint8_t cicp_tag[kCICPTagSize] = {};
+ size_t cicp_pos = 0;
+ if (!FindCICPTag(icc_data, len, true, &cicp_offset, &cicp_length,
+ &cicp_tag[0], &cicp_pos)) {
+ return kUnknownTf;
+ }
+ if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) {
+ return cicp_tag[9];
+ }
+ return kUnknownTf;
+}
+
+JpegliDataType ConvertDataType(JxlDataType type) {
+ switch (type) {
+ case JXL_TYPE_UINT8:
+ return JPEGLI_TYPE_UINT8;
+ case JXL_TYPE_UINT16:
+ return JPEGLI_TYPE_UINT16;
+ case JXL_TYPE_FLOAT:
+ return JPEGLI_TYPE_FLOAT;
+ default:
+ return JPEGLI_TYPE_UINT8;
+ }
+}
+
+JpegliEndianness ConvertEndianness(JxlEndianness endianness) {
+ switch (endianness) {
+ case JXL_NATIVE_ENDIAN:
+ return JPEGLI_NATIVE_ENDIAN;
+ case JXL_LITTLE_ENDIAN:
+ return JPEGLI_LITTLE_ENDIAN;
+ case JXL_BIG_ENDIAN:
+ return JPEGLI_BIG_ENDIAN;
+ default:
+ return JPEGLI_NATIVE_ENDIAN;
+ }
+}
+
+void ToFloatRow(const uint8_t* row_in, JxlPixelFormat format, size_t len,
+ float* row_out) {
+ bool is_little_endian =
+ (format.endianness == JXL_LITTLE_ENDIAN ||
+ (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()));
+ static constexpr double kMul8 = 1.0 / 255.0;
+ static constexpr double kMul16 = 1.0 / 65535.0;
+ if (format.data_type == JXL_TYPE_UINT8) {
+ for (size_t x = 0; x < len; ++x) {
+ row_out[x] = row_in[x] * kMul8;
+ }
+ } else if (format.data_type == JXL_TYPE_UINT16 && is_little_endian) {
+ for (size_t x = 0; x < len; ++x) {
+ row_out[x] = LoadLE16(&row_in[2 * x]) * kMul16;
+ }
+ } else if (format.data_type == JXL_TYPE_UINT16 && !is_little_endian) {
+ for (size_t x = 0; x < len; ++x) {
+ row_out[x] = LoadBE16(&row_in[2 * x]) * kMul16;
+ }
+ } else if (format.data_type == JXL_TYPE_FLOAT && is_little_endian) {
+ for (size_t x = 0; x < len; ++x) {
+ row_out[x] = LoadLEFloat(&row_in[4 * x]);
+ }
+ } else if (format.data_type == JXL_TYPE_FLOAT && !is_little_endian) {
+ for (size_t x = 0; x < len; ++x) {
+ row_out[x] = LoadBEFloat(&row_in[4 * x]);
+ }
+ }
+}
+
+Status EncodeJpegToTargetSize(const PackedPixelFile& ppf,
+ const JpegSettings& jpeg_settings,
+ size_t target_size, ThreadPool* pool,
+ std::vector<uint8_t>* output) {
+ output->clear();
+ size_t best_error = std::numeric_limits<size_t>::max();
+ float distance0 = -1.0f;
+ float distance1 = -1.0f;
+ float distance = 1.0f;
+ for (int step = 0; step < 15; ++step) {
+ JpegSettings settings = jpeg_settings;
+ settings.libjpeg_quality = 0;
+ settings.distance = distance;
+ settings.target_size = 0;
+ std::vector<uint8_t> compressed;
+ JXL_RETURN_IF_ERROR(EncodeJpeg(ppf, settings, pool, &compressed));
+ size_t size = compressed.size();
+ // prefer being under the target size to being over it
+ size_t error = size < target_size
+ ? target_size - size
+ : static_cast<size_t>(1.2f * (size - target_size));
+ if (error < best_error) {
+ best_error = error;
+ std::swap(*output, compressed);
+ }
+ float rel_error = size * 1.0f / target_size;
+ if (std::abs(rel_error - 1.0f) < 0.002f) {
+ break;
+ }
+ if (size < target_size) {
+ distance1 = distance;
+ } else {
+ distance0 = distance;
+ }
+ if (distance1 == -1) {
+ distance *= std::pow(rel_error, 1.5) * 1.05;
+ } else if (distance0 == -1) {
+ distance *= std::pow(rel_error, 1.5) * 0.95;
+ } else {
+ distance = 0.5 * (distance0 + distance1);
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings,
+ ThreadPool* pool, std::vector<uint8_t>* compressed) {
+ if (jpeg_settings.libjpeg_quality > 0) {
+ auto encoder = Encoder::FromExtension(".jpg");
+ encoder->SetOption("q", std::to_string(jpeg_settings.libjpeg_quality));
+ if (!jpeg_settings.libjpeg_chroma_subsampling.empty()) {
+ encoder->SetOption("chroma_subsampling",
+ jpeg_settings.libjpeg_chroma_subsampling);
+ }
+ EncodedImage encoded;
+ JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded, pool));
+ size_t target_size = encoded.bitstreams[0].size();
+ return EncodeJpegToTargetSize(ppf, jpeg_settings, target_size, pool,
+ compressed);
+ }
+ if (jpeg_settings.target_size > 0) {
+ return EncodeJpegToTargetSize(ppf, jpeg_settings, jpeg_settings.target_size,
+ pool, compressed);
+ }
+ JXL_RETURN_IF_ERROR(VerifyInput(ppf));
+
+ ColorEncoding color_encoding;
+ JXL_RETURN_IF_ERROR(GetColorEncoding(ppf, &color_encoding));
+
+ ColorSpaceTransform c_transform(GetJxlCms());
+ ColorEncoding xyb_encoding;
+ if (jpeg_settings.xyb) {
+ if (ppf.info.num_color_channels != 3) {
+ return JXL_FAILURE("Only RGB input is supported in XYB mode.");
+ }
+ if (HasICCProfile(jpeg_settings.app_data)) {
+ return JXL_FAILURE("APP data ICC profile is not supported in XYB mode.");
+ }
+ const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(false);
+ JXL_RETURN_IF_ERROR(
+ c_transform.Init(color_encoding, c_desired, 255.0f, ppf.info.xsize, 1));
+ xyb_encoding.SetColorSpace(jxl::ColorSpace::kXYB);
+ xyb_encoding.rendering_intent = jxl::RenderingIntent::kPerceptual;
+ JXL_RETURN_IF_ERROR(xyb_encoding.CreateICC());
+ }
+ const ColorEncoding& output_encoding =
+ jpeg_settings.xyb ? xyb_encoding : color_encoding;
+
+ // We need to declare all the non-trivial destructor local variables
+ // before the call to setjmp().
+ std::vector<uint8_t> pixels;
+ unsigned char* output_buffer = nullptr;
+ unsigned long output_size = 0;
+ std::vector<uint8_t> row_bytes;
+ size_t rowlen = RoundUpTo(ppf.info.xsize, VectorSize());
+ hwy::AlignedFreeUniquePtr<float[]> xyb_tmp =
+ hwy::AllocateAligned<float>(6 * rowlen);
+ hwy::AlignedFreeUniquePtr<float[]> premul_absorb =
+ hwy::AllocateAligned<float>(VectorSize() * 12);
+ ComputePremulAbsorb(255.0f, premul_absorb.get());
+
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ jpeg_error_mgr jerr;
+ jmp_buf env;
+ cinfo.err = jpegli_std_error(&jerr);
+ jerr.error_exit = &MyErrorExit;
+ if (setjmp(env)) {
+ return false;
+ }
+ cinfo.client_data = static_cast<void*>(&env);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &output_buffer, &output_size);
+ const JxlBasicInfo& info = ppf.info;
+ cinfo.image_width = info.xsize;
+ cinfo.image_height = info.ysize;
+ cinfo.input_components = info.num_color_channels;
+ cinfo.in_color_space =
+ cinfo.input_components == 1 ? JCS_GRAYSCALE : JCS_RGB;
+ if (jpeg_settings.xyb) {
+ jpegli_set_xyb_mode(&cinfo);
+ } else if (jpeg_settings.use_std_quant_tables) {
+ jpegli_use_standard_quant_tables(&cinfo);
+ }
+ uint8_t cicp_tf = kUnknownTf;
+ if (!jpeg_settings.app_data.empty()) {
+ cicp_tf = LookupCICPTransferFunctionFromAppData(
+ jpeg_settings.app_data.data(), jpeg_settings.app_data.size());
+ } else if (!output_encoding.IsSRGB()) {
+ cicp_tf = LookupCICPTransferFunctionFromICCProfile(
+ output_encoding.ICC().data(), output_encoding.ICC().size());
+ }
+ jpegli_set_cicp_transfer_function(&cinfo, cicp_tf);
+ jpegli_set_defaults(&cinfo);
+ if (!jpeg_settings.chroma_subsampling.empty()) {
+ if (jpeg_settings.chroma_subsampling == "444") {
+ cinfo.comp_info[0].h_samp_factor = 1;
+ cinfo.comp_info[0].v_samp_factor = 1;
+ } else if (jpeg_settings.chroma_subsampling == "440") {
+ cinfo.comp_info[0].h_samp_factor = 1;
+ cinfo.comp_info[0].v_samp_factor = 2;
+ } else if (jpeg_settings.chroma_subsampling == "422") {
+ cinfo.comp_info[0].h_samp_factor = 2;
+ cinfo.comp_info[0].v_samp_factor = 1;
+ } else if (jpeg_settings.chroma_subsampling == "420") {
+ cinfo.comp_info[0].h_samp_factor = 2;
+ cinfo.comp_info[0].v_samp_factor = 2;
+ } else {
+ return false;
+ }
+ for (int i = 1; i < cinfo.num_components; ++i) {
+ cinfo.comp_info[i].h_samp_factor = 1;
+ cinfo.comp_info[i].v_samp_factor = 1;
+ }
+ }
+ jpegli_enable_adaptive_quantization(
+ &cinfo, jpeg_settings.use_adaptive_quantization);
+ jpegli_set_distance(&cinfo, jpeg_settings.distance, TRUE);
+ jpegli_set_progressive_level(&cinfo, jpeg_settings.progressive_level);
+ cinfo.optimize_coding = jpeg_settings.optimize_coding;
+ if (!jpeg_settings.app_data.empty()) {
+ // Make sure jpegli_start_compress() does not write any APP markers.
+ cinfo.write_JFIF_header = false;
+ cinfo.write_Adobe_marker = false;
+ }
+ const PackedImage& image = ppf.frames[0].color;
+ if (jpeg_settings.xyb) {
+ jpegli_set_input_format(&cinfo, JPEGLI_TYPE_FLOAT, JPEGLI_NATIVE_ENDIAN);
+ } else {
+ jpegli_set_input_format(&cinfo, ConvertDataType(image.format.data_type),
+ ConvertEndianness(image.format.endianness));
+ }
+ jpegli_start_compress(&cinfo, TRUE);
+ if (!jpeg_settings.app_data.empty()) {
+ JXL_RETURN_IF_ERROR(WriteAppData(&cinfo, jpeg_settings.app_data));
+ }
+ if ((jpeg_settings.app_data.empty() && !output_encoding.IsSRGB()) ||
+ jpeg_settings.xyb) {
+ jpegli_write_icc_profile(&cinfo, output_encoding.ICC().data(),
+ output_encoding.ICC().size());
+ }
+ const uint8_t* pixels = reinterpret_cast<const uint8_t*>(image.pixels());
+ if (jpeg_settings.xyb) {
+ float* src_buf = c_transform.BufSrc(0);
+ float* dst_buf = c_transform.BufDst(0);
+ for (size_t y = 0; y < image.ysize; ++y) {
+ // convert to float
+ ToFloatRow(&pixels[y * image.stride], image.format, 3 * image.xsize,
+ src_buf);
+ // convert to linear srgb
+ if (!c_transform.Run(0, src_buf, dst_buf)) {
+ return false;
+ }
+ // deinterleave channels
+ float* row0 = &xyb_tmp[0];
+ float* row1 = &xyb_tmp[rowlen];
+ float* row2 = &xyb_tmp[2 * rowlen];
+ for (size_t x = 0; x < image.xsize; ++x) {
+ row0[x] = dst_buf[3 * x + 0];
+ row1[x] = dst_buf[3 * x + 1];
+ row2[x] = dst_buf[3 * x + 2];
+ }
+ // convert to xyb
+ LinearRGBRowToXYB(row0, row1, row2, premul_absorb.get(), image.xsize);
+ // scale xyb
+ ScaleXYBRow(row0, row1, row2, image.xsize);
+ // interleave channels
+ float* row_out = &xyb_tmp[3 * rowlen];
+ for (size_t x = 0; x < image.xsize; ++x) {
+ row_out[3 * x + 0] = row0[x];
+ row_out[3 * x + 1] = row1[x];
+ row_out[3 * x + 2] = row2[x];
+ }
+ // feed to jpegli as native endian floats
+ JSAMPROW row[] = {reinterpret_cast<uint8_t*>(row_out)};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ }
+ } else {
+ row_bytes.resize(image.stride);
+ for (size_t y = 0; y < info.ysize; ++y) {
+ memcpy(&row_bytes[0], pixels + y * image.stride, image.stride);
+ JSAMPROW row[] = {row_bytes.data()};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ }
+ }
+ jpegli_finish_compress(&cinfo);
+ compressed->resize(output_size);
+ std::copy_n(output_buffer, output_size, compressed->data());
+ return true;
+ };
+ bool success = try_catch_block();
+ jpegli_destroy_compress(&cinfo);
+ if (output_buffer) free(output_buffer);
+ return success;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/jpegli.h b/third_party/jpeg-xl/lib/extras/enc/jpegli.h
new file mode 100644
index 0000000000..194b9f7e48
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jpegli.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JPEGLI_H_
+#define LIB_EXTRAS_ENC_JPEGLI_H_
+
+// Encodes JPG pixels and metadata in memory using the libjpegli library.
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace extras {
+
+struct JpegSettings {
+ bool xyb = false;
+ size_t target_size = 0;
+ float distance = 1.f;
+ bool use_adaptive_quantization = true;
+ bool use_std_quant_tables = false;
+ int progressive_level = 2;
+ bool optimize_coding = true;
+ std::string chroma_subsampling;
+ int libjpeg_quality = 0;
+ std::string libjpeg_chroma_subsampling;
+ // If not empty, must contain concatenated APP marker segments. In this case,
+ // these and only these APP marker segments will be written to the JPEG
+ // output. In xyb mode app_data must not contain an ICC profile, in this
+ // case an additional APP2 ICC profile for the XYB colorspace will be emitted.
+ std::vector<uint8_t> app_data;
+};
+
+Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings,
+ ThreadPool* pool, std::vector<uint8_t>* compressed);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_JPEGLI_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/jpg.cc b/third_party/jpeg-xl/lib/extras/enc/jpg.cc
new file mode 100644
index 0000000000..179bcbe777
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jpg.cc
@@ -0,0 +1,427 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jpg.h"
+
+#include <jpeglib.h>
+#include <setjmp.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <iterator>
+#include <numeric>
+#include <sstream>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/exif.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+#if JPEGXL_ENABLE_SJPEG
+#include "sjpeg.h"
+#endif
+
+namespace jxl {
+namespace extras {
+
+namespace {
+
+constexpr unsigned char kICCSignature[12] = {
+ 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+constexpr size_t kMaxBytesInMarker = 65533;
+
+constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69,
+ 0x66, 0x00, 0x00};
+constexpr int kExifMarker = JPEG_APP0 + 1;
+
+enum class JpegEncoder {
+ kLibJpeg,
+ kSJpeg,
+};
+
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+
+// Popular jpeg scan scripts
+// The fields of the individual scans are:
+// comps_in_scan, component_index[], Ss, Se, Ah, Al
+static constexpr jpeg_scan_info kScanScript1[] = {
+ {1, {0}, 0, 0, 0, 0}, //
+ {1, {1}, 0, 0, 0, 0}, //
+ {1, {2}, 0, 0, 0, 0}, //
+ {1, {0}, 1, 8, 0, 0}, //
+ {1, {0}, 9, 63, 0, 0}, //
+ {1, {1}, 1, 63, 0, 0}, //
+ {1, {2}, 1, 63, 0, 0}, //
+};
+static constexpr size_t kNumScans1 = ARRAY_SIZE(kScanScript1);
+
+static constexpr jpeg_scan_info kScanScript2[] = {
+ {1, {0}, 0, 0, 0, 0}, //
+ {1, {1}, 0, 0, 0, 0}, //
+ {1, {2}, 0, 0, 0, 0}, //
+ {1, {0}, 1, 2, 0, 1}, //
+ {1, {0}, 3, 63, 0, 1}, //
+ {1, {0}, 1, 63, 1, 0}, //
+ {1, {1}, 1, 63, 0, 0}, //
+ {1, {2}, 1, 63, 0, 0}, //
+};
+static constexpr size_t kNumScans2 = ARRAY_SIZE(kScanScript2);
+
+static constexpr jpeg_scan_info kScanScript3[] = {
+ {1, {0}, 0, 0, 0, 0}, //
+ {1, {1}, 0, 0, 0, 0}, //
+ {1, {2}, 0, 0, 0, 0}, //
+ {1, {0}, 1, 63, 0, 2}, //
+ {1, {0}, 1, 63, 2, 1}, //
+ {1, {0}, 1, 63, 1, 0}, //
+ {1, {1}, 1, 63, 0, 0}, //
+ {1, {2}, 1, 63, 0, 0}, //
+};
+static constexpr size_t kNumScans3 = ARRAY_SIZE(kScanScript3);
+
+static constexpr jpeg_scan_info kScanScript4[] = {
+ {3, {0, 1, 2}, 0, 0, 0, 1}, //
+ {1, {0}, 1, 5, 0, 2}, //
+ {1, {2}, 1, 63, 0, 1}, //
+ {1, {1}, 1, 63, 0, 1}, //
+ {1, {0}, 6, 63, 0, 2}, //
+ {1, {0}, 1, 63, 2, 1}, //
+ {3, {0, 1, 2}, 0, 0, 1, 0}, //
+ {1, {2}, 1, 63, 1, 0}, //
+ {1, {1}, 1, 63, 1, 0}, //
+ {1, {0}, 1, 63, 1, 0}, //
+};
+static constexpr size_t kNumScans4 = ARRAY_SIZE(kScanScript4);
+
+static constexpr jpeg_scan_info kScanScript5[] = {
+ {3, {0, 1, 2}, 0, 0, 0, 1}, //
+ {1, {0}, 1, 5, 0, 2}, //
+ {1, {1}, 1, 5, 0, 2}, //
+ {1, {2}, 1, 5, 0, 2}, //
+ {1, {1}, 6, 63, 0, 2}, //
+ {1, {2}, 6, 63, 0, 2}, //
+ {1, {0}, 6, 63, 0, 2}, //
+ {1, {0}, 1, 63, 2, 1}, //
+ {1, {1}, 1, 63, 2, 1}, //
+ {1, {2}, 1, 63, 2, 1}, //
+ {3, {0, 1, 2}, 0, 0, 1, 0}, //
+ {1, {0}, 1, 63, 1, 0}, //
+ {1, {1}, 1, 63, 1, 0}, //
+ {1, {2}, 1, 63, 1, 0}, //
+};
+static constexpr size_t kNumScans5 = ARRAY_SIZE(kScanScript5);
+
+// Adapt RGB scan info to grayscale jpegs.
+void FilterScanComponents(const jpeg_compress_struct* cinfo,
+ jpeg_scan_info* si) {
+ const int all_comps_in_scan = si->comps_in_scan;
+ si->comps_in_scan = 0;
+ for (int j = 0; j < all_comps_in_scan; ++j) {
+ const int component = si->component_index[j];
+ if (component < cinfo->input_components) {
+ si->component_index[si->comps_in_scan++] = component;
+ }
+ }
+}
+
+Status SetJpegProgression(int progressive_id,
+ std::vector<jpeg_scan_info>* scan_infos,
+ jpeg_compress_struct* cinfo) {
+ if (progressive_id < 0) {
+ return true;
+ }
+ if (progressive_id == 0) {
+ jpeg_simple_progression(cinfo);
+ return true;
+ }
+ constexpr const jpeg_scan_info* kScanScripts[] = {
+ kScanScript1, kScanScript2, kScanScript3, kScanScript4, kScanScript5,
+ };
+ constexpr size_t kNumScans[] = {kNumScans1, kNumScans2, kNumScans3,
+ kNumScans4, kNumScans5};
+ if (progressive_id > static_cast<int>(ARRAY_SIZE(kNumScans))) {
+ return JXL_FAILURE("Unknown jpeg scan script id %d", progressive_id);
+ }
+ const jpeg_scan_info* scan_script = kScanScripts[progressive_id - 1];
+ const size_t num_scans = kNumScans[progressive_id - 1];
+ // filter scan script for number of components
+ for (size_t i = 0; i < num_scans; ++i) {
+ jpeg_scan_info scan_info = scan_script[i];
+ FilterScanComponents(cinfo, &scan_info);
+ if (scan_info.comps_in_scan > 0) {
+ scan_infos->emplace_back(std::move(scan_info));
+ }
+ }
+ cinfo->scan_info = scan_infos->data();
+ cinfo->num_scans = scan_infos->size();
+ return true;
+}
+
+bool IsSRGBEncoding(const JxlColorEncoding& c) {
+ return ((c.color_space == JXL_COLOR_SPACE_RGB ||
+ c.color_space == JXL_COLOR_SPACE_GRAY) &&
+ c.primaries == JXL_PRIMARIES_SRGB &&
+ c.white_point == JXL_WHITE_POINT_D65 &&
+ c.transfer_function == JXL_TRANSFER_FUNCTION_SRGB);
+}
+
+void WriteICCProfile(jpeg_compress_struct* const cinfo,
+ const std::vector<uint8_t>& icc) {
+ constexpr size_t kMaxIccBytesInMarker =
+ kMaxBytesInMarker - sizeof kICCSignature - 2;
+ const int num_markers =
+ static_cast<int>(DivCeil(icc.size(), kMaxIccBytesInMarker));
+ size_t begin = 0;
+ for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+ const size_t length = std::min(kMaxIccBytesInMarker, icc.size() - begin);
+ jpeg_write_m_header(
+ cinfo, kICCMarker,
+ static_cast<unsigned int>(length + sizeof kICCSignature + 2));
+ for (const unsigned char c : kICCSignature) {
+ jpeg_write_m_byte(cinfo, c);
+ }
+ jpeg_write_m_byte(cinfo, current_marker + 1);
+ jpeg_write_m_byte(cinfo, num_markers);
+ for (size_t i = 0; i < length; ++i) {
+ jpeg_write_m_byte(cinfo, icc[begin]);
+ ++begin;
+ }
+ }
+}
+void WriteExif(jpeg_compress_struct* const cinfo,
+ const std::vector<uint8_t>& exif) {
+ jpeg_write_m_header(
+ cinfo, kExifMarker,
+ static_cast<unsigned int>(exif.size() + sizeof kExifSignature));
+ for (const unsigned char c : kExifSignature) {
+ jpeg_write_m_byte(cinfo, c);
+ }
+ for (size_t i = 0; i < exif.size(); ++i) {
+ jpeg_write_m_byte(cinfo, exif[i]);
+ }
+}
+
+Status SetChromaSubsampling(const std::string& subsampling,
+ jpeg_compress_struct* const cinfo) {
+ const std::pair<const char*,
+ std::pair<std::array<uint8_t, 3>, std::array<uint8_t, 3>>>
+ options[] = {{"444", {{{1, 1, 1}}, {{1, 1, 1}}}},
+ {"420", {{{2, 1, 1}}, {{2, 1, 1}}}},
+ {"422", {{{2, 1, 1}}, {{1, 1, 1}}}},
+ {"440", {{{1, 1, 1}}, {{2, 1, 1}}}}};
+ for (const auto& option : options) {
+ if (subsampling == option.first) {
+ for (size_t i = 0; i < 3; i++) {
+ cinfo->comp_info[i].h_samp_factor = option.second.first[i];
+ cinfo->comp_info[i].v_samp_factor = option.second.second[i];
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+Status EncodeWithLibJpeg(const PackedImage& image, const JxlBasicInfo& info,
+ const JxlColorEncoding& color_encoding,
+ const std::vector<uint8_t>& icc,
+ std::vector<uint8_t> exif, size_t quality,
+ const std::string& chroma_subsampling,
+ int progressive_id, bool optimize_coding,
+ std::vector<uint8_t>* bytes) {
+ if (BITS_IN_JSAMPLE != 8 || sizeof(JSAMPLE) != 1) {
+ return JXL_FAILURE("Only 8 bit JSAMPLE is supported.");
+ }
+ jpeg_compress_struct cinfo = {};
+ jpeg_error_mgr jerr;
+ cinfo.err = jpeg_std_error(&jerr);
+ jpeg_create_compress(&cinfo);
+ unsigned char* buffer = nullptr;
+ unsigned long size = 0;
+ jpeg_mem_dest(&cinfo, &buffer, &size);
+ cinfo.image_width = image.xsize;
+ cinfo.image_height = image.ysize;
+ cinfo.input_components = info.num_color_channels;
+ cinfo.in_color_space = info.num_color_channels == 1 ? JCS_GRAYSCALE : JCS_RGB;
+ jpeg_set_defaults(&cinfo);
+ cinfo.optimize_coding = optimize_coding;
+ if (cinfo.input_components == 3) {
+ JXL_RETURN_IF_ERROR(SetChromaSubsampling(chroma_subsampling, &cinfo));
+ }
+ if (color_encoding.color_space == JXL_COLOR_SPACE_XYB) {
+ // Tell libjpeg not to convert XYB data to YCbCr.
+ jpeg_set_colorspace(&cinfo, JCS_RGB);
+ }
+ jpeg_set_quality(&cinfo, quality, TRUE);
+ std::vector<jpeg_scan_info> scan_infos;
+ JXL_RETURN_IF_ERROR(SetJpegProgression(progressive_id, &scan_infos, &cinfo));
+ jpeg_start_compress(&cinfo, TRUE);
+ if (!icc.empty()) {
+ WriteICCProfile(&cinfo, icc);
+ }
+ if (!exif.empty()) {
+ ResetExifOrientation(exif);
+ WriteExif(&cinfo, exif);
+ }
+ if (cinfo.input_components > 3 || cinfo.input_components < 0)
+ return JXL_FAILURE("invalid numbers of components");
+
+ std::vector<uint8_t> raw_bytes(image.pixels_size);
+ memcpy(&raw_bytes[0], reinterpret_cast<const uint8_t*>(image.pixels()),
+ image.pixels_size);
+ for (size_t y = 0; y < info.ysize; ++y) {
+ JSAMPROW row[] = {raw_bytes.data() + y * image.stride};
+
+ jpeg_write_scanlines(&cinfo, row, 1);
+ }
+ jpeg_finish_compress(&cinfo);
+ jpeg_destroy_compress(&cinfo);
+ bytes->resize(size);
+ // Compressed image data is initialized by libjpeg, which we are not
+ // instrumenting with msan.
+ msan::UnpoisonMemory(buffer, size);
+ std::copy_n(buffer, size, bytes->data());
+ std::free(buffer);
+ return true;
+}
+
+Status EncodeWithSJpeg(const PackedImage& image, const JxlBasicInfo& info,
+ const std::vector<uint8_t>& icc,
+ std::vector<uint8_t> exif, size_t quality,
+ const std::string& chroma_subsampling,
+ std::vector<uint8_t>* bytes) {
+#if !JPEGXL_ENABLE_SJPEG
+ return JXL_FAILURE("JPEG XL was built without sjpeg support");
+#else
+ sjpeg::EncoderParam param(quality);
+ if (!icc.empty()) {
+ param.iccp.assign(icc.begin(), icc.end());
+ }
+ if (!exif.empty()) {
+ ResetExifOrientation(exif);
+ param.exif.assign(exif.begin(), exif.end());
+ }
+ if (chroma_subsampling == "444") {
+ param.yuv_mode = SJPEG_YUV_444;
+ } else if (chroma_subsampling == "420") {
+ param.yuv_mode = SJPEG_YUV_SHARP;
+ } else {
+ return JXL_FAILURE("sjpeg does not support this chroma subsampling mode");
+ }
+ size_t stride = info.xsize * 3;
+ const uint8_t* pixels = reinterpret_cast<const uint8_t*>(image.pixels());
+ std::string output;
+ JXL_RETURN_IF_ERROR(
+ sjpeg::Encode(pixels, image.xsize, image.ysize, stride, param, &output));
+ bytes->assign(
+ reinterpret_cast<const uint8_t*>(output.data()),
+ reinterpret_cast<const uint8_t*>(output.data() + output.size()));
+ return true;
+#endif
+}
+
+Status EncodeImageJPG(const PackedImage& image, const JxlBasicInfo& info,
+ const JxlColorEncoding& color_encoding,
+ const std::vector<uint8_t>& icc,
+ std::vector<uint8_t> exif, JpegEncoder encoder,
+ size_t quality, const std::string& chroma_subsampling,
+ int progressive_id, bool optimize_coding,
+ ThreadPool* pool, std::vector<uint8_t>* bytes) {
+ if (image.format.data_type != JXL_TYPE_UINT8) {
+ return JXL_FAILURE("Unsupported pixel data type");
+ }
+ if (info.alpha_bits > 0) {
+ return JXL_FAILURE("alpha is not supported");
+ }
+ if (quality > 100) {
+ return JXL_FAILURE("please specify a 0-100 JPEG quality");
+ }
+
+ switch (encoder) {
+ case JpegEncoder::kLibJpeg:
+ JXL_RETURN_IF_ERROR(EncodeWithLibJpeg(
+ image, info, color_encoding, icc, std::move(exif), quality,
+ chroma_subsampling, progressive_id, optimize_coding, bytes));
+ break;
+ case JpegEncoder::kSJpeg:
+ JXL_RETURN_IF_ERROR(EncodeWithSJpeg(image, info, icc, std::move(exif),
+ quality, chroma_subsampling, bytes));
+ break;
+ default:
+ return JXL_FAILURE("tried to use an unknown JPEG encoder");
+ }
+
+ return true;
+}
+
+class JPEGEncoder : public Encoder {
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 3}) {
+ for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+ formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+ /*data_type=*/JXL_TYPE_UINT8,
+ /*endianness=*/endianness,
+ /*align=*/0});
+ }
+ }
+ return formats;
+ }
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool = nullptr) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ int quality = 100;
+ std::string chroma_subsampling = "444";
+ JpegEncoder jpeg_encoder = JpegEncoder::kLibJpeg;
+ int progressive_id = -1;
+ bool optimize_coding = true;
+ for (const auto& it : options()) {
+ if (it.first == "q") {
+ std::istringstream is(it.second);
+ JXL_RETURN_IF_ERROR(static_cast<bool>(is >> quality));
+ } else if (it.first == "chroma_subsampling") {
+ chroma_subsampling = it.second;
+ } else if (it.first == "jpeg_encoder") {
+ if (it.second == "libjpeg") {
+ jpeg_encoder = JpegEncoder::kLibJpeg;
+ } else if (it.second == "sjpeg") {
+ jpeg_encoder = JpegEncoder::kSJpeg;
+ } else {
+ return JXL_FAILURE("unknown jpeg encoder \"%s\"", it.second.c_str());
+ }
+ } else if (it.first == "progressive") {
+ std::istringstream is(it.second);
+ JXL_RETURN_IF_ERROR(static_cast<bool>(is >> progressive_id));
+ } else if (it.first == "optimize" && it.second == "OFF") {
+ optimize_coding = false;
+ }
+ }
+ std::vector<uint8_t> icc;
+ if (!IsSRGBEncoding(ppf.color_encoding)) {
+ icc = ppf.icc;
+ }
+ encoded_image->bitstreams.clear();
+ encoded_image->bitstreams.reserve(ppf.frames.size());
+ for (const auto& frame : ppf.frames) {
+ JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+ encoded_image->bitstreams.emplace_back();
+ JXL_RETURN_IF_ERROR(EncodeImageJPG(
+ frame.color, ppf.info, ppf.color_encoding, icc, ppf.metadata.exif,
+ jpeg_encoder, quality, chroma_subsampling, progressive_id,
+ optimize_coding, pool, &encoded_image->bitstreams.back()));
+ }
+ return true;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Encoder> GetJPEGEncoder() {
+ return jxl::make_unique<JPEGEncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/jpg.h b/third_party/jpeg-xl/lib/extras/enc/jpg.h
new file mode 100644
index 0000000000..20b37cd168
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jpg.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JPG_H_
+#define LIB_EXTRAS_ENC_JPG_H_
+
+// Encodes JPG pixels and metadata in memory.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetJPEGEncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_JPG_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/jxl.cc b/third_party/jpeg-xl/lib/extras/enc/jxl.cc
new file mode 100644
index 0000000000..633c6f2ade
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jxl.cc
@@ -0,0 +1,276 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/jxl.h"
+
+#include <jxl/encode_cxx.h>
+
+#include "lib/jxl/exif.h"
+
+namespace jxl {
+namespace extras {
+
+JxlEncoderStatus SetOption(const JXLOption& opt,
+ JxlEncoderFrameSettings* settings) {
+ return opt.is_float
+ ? JxlEncoderFrameSettingsSetFloatOption(settings, opt.id, opt.fval)
+ : JxlEncoderFrameSettingsSetOption(settings, opt.id, opt.ival);
+}
+
+bool SetFrameOptions(const std::vector<JXLOption>& options, size_t frame_index,
+ size_t* option_idx, JxlEncoderFrameSettings* settings) {
+ while (*option_idx < options.size()) {
+ const auto& opt = options[*option_idx];
+ if (opt.frame_index > frame_index) {
+ break;
+ }
+ if (JXL_ENC_SUCCESS != SetOption(opt, settings)) {
+ fprintf(stderr, "Setting option id %d failed.\n", opt.id);
+ return false;
+ }
+ (*option_idx)++;
+ }
+ return true;
+}
+
+bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
+ const std::vector<uint8_t>* jpeg_bytes,
+ std::vector<uint8_t>* compressed) {
+ auto encoder = JxlEncoderMake(/*memory_manager=*/nullptr);
+ JxlEncoder* enc = encoder.get();
+
+ if (params.allow_expert_options) {
+ JxlEncoderAllowExpertOptions(enc);
+ }
+
+ if (params.runner_opaque != nullptr &&
+ JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc, params.runner,
+ params.runner_opaque)) {
+ fprintf(stderr, "JxlEncoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ auto settings = JxlEncoderFrameSettingsCreate(enc, nullptr);
+ size_t option_idx = 0;
+ if (!SetFrameOptions(params.options, 0, &option_idx, settings)) {
+ return false;
+ }
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetFrameDistance(settings, params.distance)) {
+ fprintf(stderr, "Setting frame distance failed.\n");
+ return false;
+ }
+
+ bool use_boxes = !ppf.metadata.exif.empty() || !ppf.metadata.xmp.empty() ||
+ !ppf.metadata.jumbf.empty() || !ppf.metadata.iptc.empty();
+ bool use_container = params.use_container || use_boxes ||
+ (jpeg_bytes && params.jpeg_store_metadata);
+
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderUseContainer(enc, static_cast<int>(use_container))) {
+ fprintf(stderr, "JxlEncoderUseContainer failed.\n");
+ return false;
+ }
+
+ if (jpeg_bytes) {
+ if (params.jpeg_store_metadata &&
+ JXL_ENC_SUCCESS != JxlEncoderStoreJPEGMetadata(enc, JXL_TRUE)) {
+ fprintf(stderr, "Storing JPEG metadata failed.\n");
+ return false;
+ }
+ if (JXL_ENC_SUCCESS != JxlEncoderAddJPEGFrame(settings, jpeg_bytes->data(),
+ jpeg_bytes->size())) {
+ fprintf(stderr, "JxlEncoderAddJPEGFrame() failed.\n");
+ return false;
+ }
+ } else {
+ size_t num_alpha_channels = 0; // Adjusted below.
+ JxlBasicInfo basic_info = ppf.info;
+ if (basic_info.alpha_bits > 0) num_alpha_channels = 1;
+ if (params.intensity_target > 0) {
+ basic_info.intensity_target = params.intensity_target;
+ }
+ basic_info.num_extra_channels =
+ std::max<uint32_t>(num_alpha_channels, ppf.info.num_extra_channels);
+ basic_info.num_color_channels = ppf.info.num_color_channels;
+ const bool lossless = params.distance == 0;
+ basic_info.uses_original_profile = lossless;
+ if (params.override_bitdepth != 0) {
+ basic_info.bits_per_sample = params.override_bitdepth;
+ basic_info.exponent_bits_per_sample =
+ params.override_bitdepth == 32 ? 8 : 0;
+ }
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetCodestreamLevel(enc, params.codestream_level)) {
+ fprintf(stderr, "Setting --codestream_level failed.\n");
+ return false;
+ }
+ if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc, &basic_info)) {
+ fprintf(stderr, "JxlEncoderSetBasicInfo() failed.\n");
+ return false;
+ }
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetFrameBitDepth(settings, &params.input_bitdepth)) {
+ fprintf(stderr, "JxlEncoderSetFrameBitDepth() failed.\n");
+ return false;
+ }
+ if (num_alpha_channels != 0 &&
+ JXL_ENC_SUCCESS != JxlEncoderSetExtraChannelDistance(
+ settings, 0, params.alpha_distance)) {
+ fprintf(stderr, "Setting alpha distance failed.\n");
+ return false;
+ }
+ if (lossless &&
+ JXL_ENC_SUCCESS != JxlEncoderSetFrameLossless(settings, JXL_TRUE)) {
+ fprintf(stderr, "JxlEncoderSetFrameLossless() failed.\n");
+ return false;
+ }
+ if (!ppf.icc.empty()) {
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetICCProfile(enc, ppf.icc.data(), ppf.icc.size())) {
+ fprintf(stderr, "JxlEncoderSetICCProfile() failed.\n");
+ return false;
+ }
+ } else {
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetColorEncoding(enc, &ppf.color_encoding)) {
+ fprintf(stderr, "JxlEncoderSetColorEncoding() failed.\n");
+ return false;
+ }
+ }
+
+ if (use_boxes) {
+ if (JXL_ENC_SUCCESS != JxlEncoderUseBoxes(enc)) {
+ fprintf(stderr, "JxlEncoderUseBoxes() failed.\n");
+ return false;
+ }
+ // Prepend 4 zero bytes to exif for tiff header offset
+ std::vector<uint8_t> exif_with_offset;
+ bool bigendian;
+ if (IsExif(ppf.metadata.exif, &bigendian)) {
+ exif_with_offset.resize(ppf.metadata.exif.size() + 4);
+ memcpy(exif_with_offset.data() + 4, ppf.metadata.exif.data(),
+ ppf.metadata.exif.size());
+ }
+ const struct BoxInfo {
+ const char* type;
+ const std::vector<uint8_t>& bytes;
+ } boxes[] = {
+ {"Exif", exif_with_offset},
+ {"xml ", ppf.metadata.xmp},
+ {"jumb", ppf.metadata.jumbf},
+ {"xml ", ppf.metadata.iptc},
+ };
+ for (size_t i = 0; i < sizeof boxes / sizeof *boxes; ++i) {
+ const BoxInfo& box = boxes[i];
+ if (!box.bytes.empty() &&
+ JXL_ENC_SUCCESS != JxlEncoderAddBox(enc, box.type, box.bytes.data(),
+ box.bytes.size(),
+ params.compress_boxes)) {
+ fprintf(stderr, "JxlEncoderAddBox() failed (%s).\n", box.type);
+ return false;
+ }
+ }
+ JxlEncoderCloseBoxes(enc);
+ }
+
+ for (size_t num_frame = 0; num_frame < ppf.frames.size(); ++num_frame) {
+ const jxl::extras::PackedFrame& pframe = ppf.frames[num_frame];
+ const jxl::extras::PackedImage& pimage = pframe.color;
+ JxlPixelFormat ppixelformat = pimage.format;
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetFrameHeader(settings, &pframe.frame_info)) {
+ fprintf(stderr, "JxlEncoderSetFrameHeader() failed.\n");
+ return false;
+ }
+ if (!SetFrameOptions(params.options, num_frame, &option_idx, settings)) {
+ return false;
+ }
+ if (num_alpha_channels > 0) {
+ JxlExtraChannelInfo extra_channel_info;
+ JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &extra_channel_info);
+ extra_channel_info.bits_per_sample = ppf.info.alpha_bits;
+ extra_channel_info.exponent_bits_per_sample =
+ ppf.info.alpha_exponent_bits;
+ if (params.premultiply != -1) {
+ if (params.premultiply != 0 && params.premultiply != 1) {
+ fprintf(stderr, "premultiply must be one of: -1, 0, 1.\n");
+ return false;
+ }
+ extra_channel_info.alpha_premultiplied = params.premultiply;
+ }
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetExtraChannelInfo(enc, 0, &extra_channel_info)) {
+ fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
+ return false;
+ }
+ // We take the extra channel blend info frame_info, but don't do
+ // clamping.
+ JxlBlendInfo extra_channel_blend_info =
+ pframe.frame_info.layer_info.blend_info;
+ extra_channel_blend_info.clamp = JXL_FALSE;
+ JxlEncoderSetExtraChannelBlendInfo(settings, 0,
+ &extra_channel_blend_info);
+ }
+ size_t num_interleaved_alpha =
+ (ppixelformat.num_channels - ppf.info.num_color_channels);
+ // Add extra channel info for the rest of the extra channels.
+ for (size_t i = 0; i < ppf.info.num_extra_channels; ++i) {
+ if (i < ppf.extra_channels_info.size()) {
+ const auto& ec_info = ppf.extra_channels_info[i].ec_info;
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetExtraChannelInfo(enc, num_interleaved_alpha + i,
+ &ec_info)) {
+ fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n");
+ return false;
+ }
+ }
+ }
+ if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(settings, &ppixelformat,
+ pimage.pixels(),
+ pimage.pixels_size)) {
+ fprintf(stderr, "JxlEncoderAddImageFrame() failed.\n");
+ return false;
+ }
+ // Only set extra channel buffer if it is provided non-interleaved.
+ for (size_t i = 0; i < pframe.extra_channels.size(); ++i) {
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetExtraChannelBuffer(settings, &ppixelformat,
+ pframe.extra_channels[i].pixels(),
+ pframe.extra_channels[i].stride *
+ pframe.extra_channels[i].ysize,
+ num_interleaved_alpha + i)) {
+ fprintf(stderr, "JxlEncoderSetExtraChannelBuffer() failed.\n");
+ return false;
+ }
+ }
+ }
+ }
+ JxlEncoderCloseInput(enc);
+ // Reading compressed output
+ compressed->clear();
+ compressed->resize(4096);
+ uint8_t* next_out = compressed->data();
+ size_t avail_out = compressed->size() - (next_out - compressed->data());
+ JxlEncoderStatus result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (result == JXL_ENC_NEED_MORE_OUTPUT) {
+ result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+ if (result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed->data();
+ compressed->resize(compressed->size() * 2);
+ next_out = compressed->data() + offset;
+ avail_out = compressed->size() - offset;
+ }
+ }
+ compressed->resize(next_out - compressed->data());
+ if (result != JXL_ENC_SUCCESS) {
+ fprintf(stderr, "JxlEncoderProcessOutput failed.\n");
+ return false;
+ }
+ return true;
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/jxl.h b/third_party/jpeg-xl/lib/extras/enc/jxl.h
new file mode 100644
index 0000000000..3e77fce3c1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/jxl.h
@@ -0,0 +1,78 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_JXL_H_
+#define LIB_EXTRAS_ENC_JXL_H_
+
+#include <jxl/encode.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/thread_parallel_runner.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+
+struct JXLOption {
+ JXLOption(JxlEncoderFrameSettingId id, int64_t val, size_t frame_index)
+ : id(id), is_float(false), ival(val), frame_index(frame_index) {}
+ JXLOption(JxlEncoderFrameSettingId id, float val, size_t frame_index)
+ : id(id), is_float(true), fval(val), frame_index(frame_index) {}
+
+ JxlEncoderFrameSettingId id;
+ bool is_float;
+ union {
+ int64_t ival;
+ float fval;
+ };
+ size_t frame_index;
+};
+
+struct JXLCompressParams {
+ std::vector<JXLOption> options;
+ // Target butteraugli distance, 0.0 means lossless.
+ float distance = 1.0f;
+ float alpha_distance = 1.0f;
+ // If set to true, forces container mode.
+ bool use_container = false;
+ // Whether to enable/disable byte-exact jpeg reconstruction for jpeg inputs.
+ bool jpeg_store_metadata = true;
+ // Whether to create brob boxes.
+ bool compress_boxes = true;
+ // Upper bound on the intensity level present in the image in nits (zero means
+ // that the library chooses a default).
+ float intensity_target = 0;
+ // Overrides for bitdepth, codestream level and alpha premultiply.
+ size_t override_bitdepth = 0;
+ int32_t codestream_level = -1;
+ int32_t premultiply = -1;
+ // Override input buffer interpretation.
+ JxlBitDepth input_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0};
+ // If runner_opaque is set, the decoder uses this parallel runner.
+ JxlParallelRunner runner = JxlThreadParallelRunner;
+ void* runner_opaque = nullptr;
+
+ bool allow_expert_options = false;
+
+ void AddOption(JxlEncoderFrameSettingId id, int64_t val) {
+ options.emplace_back(JXLOption(id, val, 0));
+ }
+ void AddFloatOption(JxlEncoderFrameSettingId id, float val) {
+ options.emplace_back(JXLOption(id, val, 0));
+ }
+};
+
+bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf,
+ const std::vector<uint8_t>* jpeg_bytes,
+ std::vector<uint8_t>* compressed);
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_JXL_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/npy.cc b/third_party/jpeg-xl/lib/extras/enc/npy.cc
new file mode 100644
index 0000000000..e7a659184b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/npy.cc
@@ -0,0 +1,322 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/npy.h"
+
+#include <jxl/types.h>
+#include <stdio.h>
+
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+// JSON value writing
+
+class JSONField {
+ public:
+ virtual ~JSONField() = default;
+ virtual void Write(std::ostream& o, uint32_t indent) const = 0;
+
+ protected:
+ JSONField() = default;
+};
+
+class JSONValue : public JSONField {
+ public:
+ template <typename T>
+ explicit JSONValue(const T& value) : value_(std::to_string(value)) {}
+
+ explicit JSONValue(const std::string& value) : value_("\"" + value + "\"") {}
+
+ explicit JSONValue(bool value) : value_(value ? "true" : "false") {}
+
+ void Write(std::ostream& o, uint32_t indent) const override { o << value_; }
+
+ private:
+ std::string value_;
+};
+
+class JSONDict : public JSONField {
+ public:
+ JSONDict() = default;
+
+ template <typename T>
+ T* AddEmpty(const std::string& key) {
+ static_assert(std::is_convertible<T*, JSONField*>::value,
+ "T must be a JSONField");
+ T* ret = new T();
+ values_.emplace_back(
+ key, std::unique_ptr<JSONField>(static_cast<JSONField*>(ret)));
+ return ret;
+ }
+
+ template <typename T>
+ void Add(const std::string& key, const T& value) {
+ values_.emplace_back(key, std::unique_ptr<JSONField>(new JSONValue(value)));
+ }
+
+ void Write(std::ostream& o, uint32_t indent) const override {
+ std::string indent_str(indent, ' ');
+ o << "{";
+ bool is_first = true;
+ for (const auto& key_value : values_) {
+ if (!is_first) {
+ o << ",";
+ }
+ is_first = false;
+ o << std::endl << indent_str << " \"" << key_value.first << "\": ";
+ key_value.second->Write(o, indent + 2);
+ }
+ if (!values_.empty()) {
+ o << std::endl << indent_str;
+ }
+ o << "}";
+ }
+
+ private:
+ // Dictionary with order.
+ std::vector<std::pair<std::string, std::unique_ptr<JSONField>>> values_;
+};
+
+class JSONArray : public JSONField {
+ public:
+ JSONArray() = default;
+
+ template <typename T>
+ T* AddEmpty() {
+ static_assert(std::is_convertible<T*, JSONField*>::value,
+ "T must be a JSONField");
+ T* ret = new T();
+ values_.emplace_back(ret);
+ return ret;
+ }
+
+ template <typename T>
+ void Add(const T& value) {
+ values_.emplace_back(new JSONValue(value));
+ }
+
+ void Write(std::ostream& o, uint32_t indent) const override {
+ std::string indent_str(indent, ' ');
+ o << "[";
+ bool is_first = true;
+ for (const auto& value : values_) {
+ if (!is_first) {
+ o << ",";
+ }
+ is_first = false;
+ o << std::endl << indent_str << " ";
+ value->Write(o, indent + 2);
+ }
+ if (!values_.empty()) {
+ o << std::endl << indent_str;
+ }
+ o << "]";
+ }
+
+ private:
+ std::vector<std::unique_ptr<JSONField>> values_;
+};
+
+void GenerateMetadata(const PackedPixelFile& ppf, std::vector<uint8_t>* out) {
+ JSONDict meta;
+ // Same order as in 18181-3 CD.
+
+ // Frames.
+ auto* meta_frames = meta.AddEmpty<JSONArray>("frames");
+ for (size_t i = 0; i < ppf.frames.size(); i++) {
+ auto* frame_i = meta_frames->AddEmpty<JSONDict>();
+ if (ppf.info.have_animation) {
+ frame_i->Add("duration",
+ JSONValue(ppf.frames[i].frame_info.duration * 1.0f *
+ ppf.info.animation.tps_denominator /
+ ppf.info.animation.tps_numerator));
+ }
+
+ frame_i->Add("name", JSONValue(ppf.frames[i].name));
+
+ if (ppf.info.animation.have_timecodes) {
+ frame_i->Add("timecode", JSONValue(ppf.frames[i].frame_info.timecode));
+ }
+ }
+
+#define METADATA(FIELD) meta.Add(#FIELD, ppf.info.FIELD)
+
+ METADATA(intensity_target);
+ METADATA(min_nits);
+ METADATA(relative_to_max_display);
+ METADATA(linear_below);
+
+ if (ppf.info.have_preview) {
+ meta.AddEmpty<JSONDict>("preview");
+ // TODO(veluca): can we have duration/name/timecode here?
+ }
+
+ {
+ auto ectype = meta.AddEmpty<JSONArray>("extra_channel_type");
+ auto bps = meta.AddEmpty<JSONArray>("bits_per_sample");
+ auto ebps = meta.AddEmpty<JSONArray>("exp_bits_per_sample");
+ bps->Add(ppf.info.bits_per_sample);
+ ebps->Add(ppf.info.exponent_bits_per_sample);
+ for (size_t i = 0; i < ppf.extra_channels_info.size(); i++) {
+ switch (ppf.extra_channels_info[i].ec_info.type) {
+ case JXL_CHANNEL_ALPHA: {
+ ectype->Add(std::string("Alpha"));
+ break;
+ }
+ case JXL_CHANNEL_DEPTH: {
+ ectype->Add(std::string("Depth"));
+ break;
+ }
+ case JXL_CHANNEL_SPOT_COLOR: {
+ ectype->Add(std::string("SpotColor"));
+ break;
+ }
+ case JXL_CHANNEL_SELECTION_MASK: {
+ ectype->Add(std::string("SelectionMask"));
+ break;
+ }
+ case JXL_CHANNEL_BLACK: {
+ ectype->Add(std::string("Black"));
+ break;
+ }
+ case JXL_CHANNEL_CFA: {
+ ectype->Add(std::string("CFA"));
+ break;
+ }
+ case JXL_CHANNEL_THERMAL: {
+ ectype->Add(std::string("Thermal"));
+ break;
+ }
+ default: {
+ ectype->Add(std::string("UNKNOWN"));
+ break;
+ }
+ }
+ bps->Add(ppf.extra_channels_info[i].ec_info.bits_per_sample);
+ ebps->Add(ppf.extra_channels_info[i].ec_info.exponent_bits_per_sample);
+ }
+ }
+
+ std::ostringstream os;
+ meta.Write(os, 0);
+ out->resize(os.str().size());
+ memcpy(out->data(), os.str().data(), os.str().size());
+}
+
+void Append(std::vector<uint8_t>* out, const void* data, size_t size) {
+ size_t pos = out->size();
+ out->resize(pos + size);
+ memcpy(out->data() + pos, data, size);
+}
+
+void WriteNPYHeader(size_t xsize, size_t ysize, uint32_t num_channels,
+ size_t num_frames, std::vector<uint8_t>* out) {
+ const uint8_t header[] = "\x93NUMPY\x01\x00";
+ Append(out, header, 8);
+ std::stringstream ss;
+ ss << "{'descr': '<f4', 'fortran_order': False, 'shape': (" << num_frames
+ << ", " << ysize << ", " << xsize << ", " << num_channels << "), }\n";
+ // 16-bit little endian header length.
+ uint8_t header_len[2] = {static_cast<uint8_t>(ss.str().size() % 256),
+ static_cast<uint8_t>(ss.str().size() / 256)};
+ Append(out, header_len, 2);
+ Append(out, ss.str().data(), ss.str().size());
+}
+
+bool WriteFrameToNPYArray(size_t xsize, size_t ysize, const PackedFrame& frame,
+ std::vector<uint8_t>* out) {
+ const auto& color = frame.color;
+ if (color.xsize != xsize || color.ysize != ysize) {
+ return false;
+ }
+ for (const auto& ec : frame.extra_channels) {
+ if (ec.xsize != xsize || ec.ysize != ysize) {
+ return false;
+ }
+ }
+ // interleave the samples from color and extra channels
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ {
+ size_t sample_size = color.pixel_stride();
+ size_t offset = y * color.stride + x * sample_size;
+ uint8_t* pixels = reinterpret_cast<uint8_t*>(color.pixels());
+ JXL_ASSERT(offset + sample_size <= color.pixels_size);
+ Append(out, pixels + offset, sample_size);
+ }
+ for (const auto& ec : frame.extra_channels) {
+ size_t sample_size = ec.pixel_stride();
+ size_t offset = y * ec.stride + x * sample_size;
+ uint8_t* pixels = reinterpret_cast<uint8_t*>(ec.pixels());
+ JXL_ASSERT(offset + sample_size <= ec.pixels_size);
+ Append(out, pixels + offset, sample_size);
+ }
+ }
+ }
+ return true;
+}
+
+// Writes a PackedPixelFile as a numpy 4D ndarray in binary format.
+bool WriteNPYArray(const PackedPixelFile& ppf, std::vector<uint8_t>* out) {
+ size_t xsize = ppf.info.xsize;
+ size_t ysize = ppf.info.ysize;
+ WriteNPYHeader(xsize, ysize,
+ ppf.info.num_color_channels + ppf.extra_channels_info.size(),
+ ppf.frames.size(), out);
+ for (const auto& frame : ppf.frames) {
+ if (!WriteFrameToNPYArray(xsize, ysize, frame, out)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+class NumPyEncoder : public Encoder {
+ public:
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool = nullptr) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ GenerateMetadata(ppf, &encoded_image->metadata);
+ encoded_image->bitstreams.emplace_back();
+ if (!WriteNPYArray(ppf, &encoded_image->bitstreams.back())) {
+ return false;
+ }
+ if (ppf.preview_frame) {
+ size_t xsize = ppf.info.preview.xsize;
+ size_t ysize = ppf.info.preview.ysize;
+ WriteNPYHeader(xsize, ysize, ppf.info.num_color_channels, 1,
+ &encoded_image->preview_bitstream);
+ if (!WriteFrameToNPYArray(xsize, ysize, *ppf.preview_frame,
+ &encoded_image->preview_bitstream)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 3}) {
+ formats.push_back(JxlPixelFormat{num_channels, JXL_TYPE_FLOAT,
+ JXL_LITTLE_ENDIAN, /*align=*/0});
+ }
+ return formats;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Encoder> GetNumPyEncoder() {
+ return jxl::make_unique<NumPyEncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/npy.h b/third_party/jpeg-xl/lib/extras/enc/npy.h
new file mode 100644
index 0000000000..3ee6208ec2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/npy.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_NPY_H_
+#define LIB_EXTRAS_ENC_NPY_H_
+
+// Encodes pixels to numpy array, used for conformance testing.
+
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetNumPyEncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_NPY_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/pgx.cc b/third_party/jpeg-xl/lib/extras/enc/pgx.cc
new file mode 100644
index 0000000000..201c8b4189
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/pgx.cc
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/pgx.h"
+
+#include <jxl/codestream_header.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+constexpr size_t kMaxHeaderSize = 200;
+
+Status EncodeHeader(const JxlBasicInfo& info, char* header,
+ int* chars_written) {
+ if (info.alpha_bits > 0) {
+ return JXL_FAILURE("PGX: can't store alpha");
+ }
+ if (info.num_color_channels != 1) {
+ return JXL_FAILURE("PGX: must be grayscale");
+ }
+ // TODO(lode): verify other bit depths: for other bit depths such as 1 or 4
+ // bits, have a test case to verify it works correctly. For bits > 16, we may
+ // need to change the way external_image works.
+ if (info.bits_per_sample != 8 && info.bits_per_sample != 16) {
+ return JXL_FAILURE("PGX: bits other than 8 or 16 not yet supported");
+ }
+
+ // Use ML (Big Endian), LM may not be well supported by all decoders.
+ *chars_written = snprintf(header, kMaxHeaderSize, "PG ML + %u %u %u\n",
+ info.bits_per_sample, info.xsize, info.ysize);
+ JXL_RETURN_IF_ERROR(static_cast<unsigned int>(*chars_written) <
+ kMaxHeaderSize);
+ return true;
+}
+
+Status EncodeImagePGX(const PackedFrame& frame, const JxlBasicInfo& info,
+ std::vector<uint8_t>* bytes) {
+ char header[kMaxHeaderSize];
+ int header_size = 0;
+ JXL_RETURN_IF_ERROR(EncodeHeader(info, header, &header_size));
+
+ const PackedImage& color = frame.color;
+ const JxlPixelFormat format = color.format;
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+ size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type);
+ size_t bytes_per_sample = data_bits_per_sample / kBitsPerByte;
+ size_t num_samples = info.xsize * info.ysize;
+
+ if (info.bits_per_sample != data_bits_per_sample) {
+ return JXL_FAILURE("Bit depth does not match pixel data type");
+ }
+
+ std::vector<uint8_t> pixels(num_samples * bytes_per_sample);
+
+ if (format.data_type == JXL_TYPE_UINT8) {
+ memcpy(&pixels[0], in, num_samples * bytes_per_sample);
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ if (format.endianness != JXL_BIG_ENDIAN) {
+ const uint8_t* p_in = in;
+ uint8_t* p_out = pixels.data();
+ for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) {
+ StoreBE16(LoadLE16(p_in), p_out);
+ }
+ } else {
+ memcpy(&pixels[0], in, num_samples * bytes_per_sample);
+ }
+ } else {
+ return JXL_FAILURE("Unsupported pixel data type");
+ }
+
+ bytes->resize(static_cast<size_t>(header_size) + pixels.size());
+ memcpy(bytes->data(), header, static_cast<size_t>(header_size));
+ memcpy(bytes->data() + header_size, pixels.data(), pixels.size());
+
+ return true;
+}
+
+class PGXEncoder : public Encoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+ for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+ formats.push_back(JxlPixelFormat{/*num_channels=*/1,
+ /*data_type=*/data_type,
+ /*endianness=*/endianness,
+ /*align=*/0});
+ }
+ }
+ return formats;
+ }
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ encoded_image->icc.assign(ppf.icc.begin(), ppf.icc.end());
+ encoded_image->bitstreams.clear();
+ encoded_image->bitstreams.reserve(ppf.frames.size());
+ for (const auto& frame : ppf.frames) {
+ JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+ encoded_image->bitstreams.emplace_back();
+ JXL_RETURN_IF_ERROR(
+ EncodeImagePGX(frame, ppf.info, &encoded_image->bitstreams.back()));
+ }
+ return true;
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Encoder> GetPGXEncoder() {
+ return jxl::make_unique<PGXEncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/pgx.h b/third_party/jpeg-xl/lib/extras/enc/pgx.h
new file mode 100644
index 0000000000..f24e391b09
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/pgx.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_PGX_H_
+#define LIB_EXTRAS_ENC_PGX_H_
+
+// Encodes PGX pixels in memory.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetPGXEncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_PGX_H_
diff --git a/third_party/jpeg-xl/lib/extras/enc/pnm.cc b/third_party/jpeg-xl/lib/extras/enc/pnm.cc
new file mode 100644
index 0000000000..7cebcc2a1f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/pnm.cc
@@ -0,0 +1,303 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/enc/pnm.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/fields.h" // AllDefault
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+constexpr size_t kMaxHeaderSize = 200;
+
+class PNMEncoder : public Encoder {
+ public:
+ Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image,
+ ThreadPool* pool = nullptr) const override {
+ JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info));
+ if (!ppf.metadata.exif.empty() || !ppf.metadata.iptc.empty() ||
+ !ppf.metadata.jumbf.empty() || !ppf.metadata.xmp.empty()) {
+ JXL_WARNING("PNM encoder ignoring metadata - use a different codec");
+ }
+ encoded_image->icc = ppf.icc;
+ encoded_image->bitstreams.clear();
+ encoded_image->bitstreams.reserve(ppf.frames.size());
+ for (const auto& frame : ppf.frames) {
+ JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info));
+ encoded_image->bitstreams.emplace_back();
+ JXL_RETURN_IF_ERROR(
+ EncodeFrame(ppf, frame, &encoded_image->bitstreams.back()));
+ }
+ for (size_t i = 0; i < ppf.extra_channels_info.size(); ++i) {
+ const auto& ec_info = ppf.extra_channels_info[i].ec_info;
+ encoded_image->extra_channel_bitstreams.emplace_back();
+ auto& ec_bitstreams = encoded_image->extra_channel_bitstreams.back();
+ for (const auto& frame : ppf.frames) {
+ ec_bitstreams.emplace_back();
+ JXL_RETURN_IF_ERROR(EncodeExtraChannel(frame.extra_channels[i],
+ ec_info.bits_per_sample,
+ &ec_bitstreams.back()));
+ }
+ }
+ return true;
+ }
+
+ protected:
+ virtual Status EncodeFrame(const PackedPixelFile& ppf,
+ const PackedFrame& frame,
+ std::vector<uint8_t>* bytes) const = 0;
+ virtual Status EncodeExtraChannel(const PackedImage& image,
+ size_t bits_per_sample,
+ std::vector<uint8_t>* bytes) const = 0;
+};
+
+class PPMEncoder : public PNMEncoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ return {JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
+ JxlPixelFormat{3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
+ }
+ Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+ std::vector<uint8_t>* bytes) const override {
+ return EncodeImage(frame.color, ppf.info.bits_per_sample, bytes);
+ }
+ Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes) const override {
+ return EncodeImage(image, bits_per_sample, bytes);
+ }
+
+ private:
+ Status EncodeImage(const PackedImage& image, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes) const {
+ uint32_t maxval = (1u << bits_per_sample) - 1;
+ char type = image.format.num_channels == 1 ? '5' : '6';
+ char header[kMaxHeaderSize];
+ size_t header_size =
+ snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%u\n",
+ type, image.xsize, image.ysize, maxval);
+ JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
+ bytes->resize(header_size + image.pixels_size);
+ memcpy(bytes->data(), header, header_size);
+ memcpy(bytes->data() + header_size,
+ reinterpret_cast<uint8_t*>(image.pixels()), image.pixels_size);
+ return true;
+ }
+};
+
+class PGMEncoder : public PPMEncoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ return {JxlPixelFormat{1, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0},
+ JxlPixelFormat{1, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}};
+ }
+};
+
+class PFMEncoder : public PNMEncoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 3}) {
+ for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) {
+ formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+ /*data_type=*/JXL_TYPE_FLOAT,
+ /*endianness=*/endianness,
+ /*align=*/0});
+ }
+ }
+ return formats;
+ }
+ Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+ std::vector<uint8_t>* bytes) const override {
+ return EncodeImage(frame.color, bytes);
+ }
+ Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes) const override {
+ return EncodeImage(image, bytes);
+ }
+
+ private:
+ Status EncodeImage(const PackedImage& image,
+ std::vector<uint8_t>* bytes) const {
+ char type = image.format.num_channels == 1 ? 'f' : 'F';
+ double scale = image.format.endianness == JXL_LITTLE_ENDIAN ? -1.0 : 1.0;
+ char header[kMaxHeaderSize];
+ size_t header_size =
+ snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%.1f\n",
+ type, image.xsize, image.ysize, scale);
+ JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize);
+ bytes->resize(header_size + image.pixels_size);
+ memcpy(bytes->data(), header, header_size);
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(image.pixels());
+ uint8_t* out = bytes->data() + header_size;
+ for (size_t y = 0; y < image.ysize; ++y) {
+ size_t y_out = image.ysize - 1 - y;
+ const uint8_t* row_in = &in[y * image.stride];
+ uint8_t* row_out = &out[y_out * image.stride];
+ memcpy(row_out, row_in, image.stride);
+ }
+ return true;
+ }
+};
+
+class PAMEncoder : public PNMEncoder {
+ public:
+ std::vector<JxlPixelFormat> AcceptedFormats() const override {
+ std::vector<JxlPixelFormat> formats;
+ for (const uint32_t num_channels : {1, 2, 3, 4}) {
+ for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) {
+ formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels,
+ /*data_type=*/data_type,
+ /*endianness=*/JXL_BIG_ENDIAN,
+ /*align=*/0});
+ }
+ }
+ return formats;
+ }
+ Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame,
+ std::vector<uint8_t>* bytes) const override {
+ const PackedImage& color = frame.color;
+ const auto& ec_info = ppf.extra_channels_info;
+ JXL_RETURN_IF_ERROR(frame.extra_channels.size() == ec_info.size());
+ for (const auto& ec : frame.extra_channels) {
+ if (ec.xsize != color.xsize || ec.ysize != color.ysize) {
+ return JXL_FAILURE("Extra channel and color size mismatch.");
+ }
+ if (ec.format.data_type != color.format.data_type ||
+ ec.format.endianness != color.format.endianness) {
+ return JXL_FAILURE("Extra channel and color format mismatch.");
+ }
+ }
+ if (ppf.info.bits_per_sample != ppf.info.alpha_bits) {
+ return JXL_FAILURE("Alpha bit depth does not match image bit depth");
+ }
+ for (const auto& it : ec_info) {
+ if (it.ec_info.bits_per_sample != ppf.info.bits_per_sample) {
+ return JXL_FAILURE(
+ "Extra channel bit depth does not match image bit depth");
+ }
+ }
+ const char* kColorTypes[4] = {"GRAYSCALE", "GRAYSCALE_ALPHA", "RGB",
+ "RGB_ALPHA"};
+ uint32_t maxval = (1u << ppf.info.bits_per_sample) - 1;
+ uint32_t depth = color.format.num_channels + ec_info.size();
+ char header[kMaxHeaderSize];
+ size_t pos = 0;
+ pos += snprintf(header + pos, kMaxHeaderSize - pos,
+ "P7\nWIDTH %" PRIuS "\nHEIGHT %" PRIuS
+ "\nDEPTH %u\n"
+ "MAXVAL %u\nTUPLTYPE %s\n",
+ color.xsize, color.ysize, depth, maxval,
+ kColorTypes[color.format.num_channels - 1]);
+ JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+ for (const auto& info : ec_info) {
+ pos += snprintf(header + pos, kMaxHeaderSize - pos, "TUPLTYPE %s\n",
+ ExtraChannelTypeName(info.ec_info.type).c_str());
+ JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+ }
+ pos += snprintf(header + pos, kMaxHeaderSize - pos, "ENDHDR\n");
+ JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize);
+ size_t total_size = color.pixels_size;
+ for (const auto& ec : frame.extra_channels) {
+ total_size += ec.pixels_size;
+ }
+ bytes->resize(pos + total_size);
+ memcpy(bytes->data(), header, pos);
+ // If we have no extra channels, just copy color pixel data over.
+ if (frame.extra_channels.empty()) {
+ memcpy(bytes->data() + pos, reinterpret_cast<uint8_t*>(color.pixels()),
+ color.pixels_size);
+ return true;
+ }
+ // Interleave color and extra channels.
+ const uint8_t* in = reinterpret_cast<const uint8_t*>(color.pixels());
+ std::vector<const uint8_t*> ec_in(frame.extra_channels.size());
+ for (size_t i = 0; i < frame.extra_channels.size(); ++i) {
+ ec_in[i] =
+ reinterpret_cast<const uint8_t*>(frame.extra_channels[i].pixels());
+ }
+ uint8_t* out = bytes->data() + pos;
+ size_t pwidth = PackedImage::BitsPerChannel(color.format.data_type) / 8;
+ for (size_t y = 0; y < color.ysize; ++y) {
+ for (size_t x = 0; x < color.xsize; ++x) {
+ memcpy(out, in, color.pixel_stride());
+ out += color.pixel_stride();
+ in += color.pixel_stride();
+ for (auto& p : ec_in) {
+ memcpy(out, p, pwidth);
+ out += pwidth;
+ p += pwidth;
+ }
+ }
+ }
+ return true;
+ }
+ Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample,
+ std::vector<uint8_t>* bytes) const override {
+ return true;
+ }
+
+ private:
+ static std::string ExtraChannelTypeName(JxlExtraChannelType type) {
+ switch (type) {
+ case JXL_CHANNEL_ALPHA:
+ return std::string("Alpha");
+ case JXL_CHANNEL_DEPTH:
+ return std::string("Depth");
+ case JXL_CHANNEL_SPOT_COLOR:
+ return std::string("SpotColor");
+ case JXL_CHANNEL_SELECTION_MASK:
+ return std::string("SelectionMask");
+ case JXL_CHANNEL_BLACK:
+ return std::string("Black");
+ case JXL_CHANNEL_CFA:
+ return std::string("CFA");
+ case JXL_CHANNEL_THERMAL:
+ return std::string("Thermal");
+ default:
+ return std::string("UNKNOWN");
+ }
+ }
+};
+
+} // namespace
+
+std::unique_ptr<Encoder> GetPPMEncoder() {
+ return jxl::make_unique<PPMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPFMEncoder() {
+ return jxl::make_unique<PFMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPGMEncoder() {
+ return jxl::make_unique<PGMEncoder>();
+}
+
+std::unique_ptr<Encoder> GetPAMEncoder() {
+ return jxl::make_unique<PAMEncoder>();
+}
+
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/enc/pnm.h b/third_party/jpeg-xl/lib/extras/enc/pnm.h
new file mode 100644
index 0000000000..403208cecd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/enc/pnm.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_ENC_PNM_H_
+#define LIB_EXTRAS_ENC_PNM_H_
+
+// Encodes/decodes PBM/PGM/PPM/PFM pixels in memory.
+
+// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown)
+#include <hwy/highway.h>
+#include <memory>
+
+#include "lib/extras/enc/encode.h"
+
+namespace jxl {
+namespace extras {
+
+std::unique_ptr<Encoder> GetPAMEncoder();
+std::unique_ptr<Encoder> GetPGMEncoder();
+std::unique_ptr<Encoder> GetPPMEncoder();
+std::unique_ptr<Encoder> GetPFMEncoder();
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_ENC_PNM_H_
diff --git a/third_party/jpeg-xl/lib/extras/exif.cc b/third_party/jpeg-xl/lib/extras/exif.cc
new file mode 100644
index 0000000000..7d926558c3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/exif.cc
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/exif.h"
+
+#include "lib/jxl/base/byte_order.h"
+
+namespace jxl {
+
+constexpr uint16_t kExifOrientationTag = 274;
+
+void ResetExifOrientation(std::vector<uint8_t>& exif) {
+ if (exif.size() < 12) return; // not enough bytes for a valid exif blob
+ bool bigendian;
+ uint8_t* t = exif.data();
+ if (LoadLE32(t) == 0x2A004D4D) {
+ bigendian = true;
+ } else if (LoadLE32(t) == 0x002A4949) {
+ bigendian = false;
+ } else {
+ return; // not a valid tiff header
+ }
+ t += 4;
+ uint32_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t));
+ if (exif.size() < 12 + offset + 2 || offset < 8) return;
+ t += offset - 4;
+ uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ while (nb_tags > 0) {
+ if (t + 12 >= exif.data() + exif.size()) return;
+ uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ if (tag == kExifOrientationTag) {
+ uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t));
+ t += 4;
+ if (type == 3 && count == 1) {
+ if (bigendian) {
+ StoreBE16(1, t);
+ } else {
+ StoreLE16(1, t);
+ }
+ }
+ return;
+ } else {
+ t += 10;
+ nb_tags--;
+ }
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/exif.h b/third_party/jpeg-xl/lib/extras/exif.h
new file mode 100644
index 0000000000..f22b2ccef5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/exif.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_EXIF_H_
+#define LIB_EXTRAS_EXIF_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+namespace jxl {
+
+// Sets the Exif orientation to the identity, to avoid repeated orientation
+void ResetExifOrientation(std::vector<uint8_t>& exif);
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_EXIF_H_
diff --git a/third_party/jpeg-xl/lib/extras/hlg.cc b/third_party/jpeg-xl/lib/extras/hlg.cc
new file mode 100644
index 0000000000..e39a0807f5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/hlg.cc
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/hlg.h"
+
+#include <cmath>
+
+#include "lib/jxl/enc_color_management.h"
+
+namespace jxl {
+
+float GetHlgGamma(const float peak_luminance, const float surround_luminance) {
+ return 1.2f * std::pow(1.111f, std::log2(peak_luminance / 1000.f)) *
+ std::pow(0.98f, std::log2(surround_luminance / 5.f));
+}
+
+Status HlgOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) {
+ ColorEncoding linear_rec2020;
+ linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+ linear_rec2020.primaries = Primaries::k2100;
+ linear_rec2020.white_point = WhitePoint::kD65;
+ linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+ JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+ JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ib->ysize(), ThreadPool::NoInit,
+ [&](const int y, const int thread) {
+ float* const JXL_RESTRICT rows[3] = {ib->color()->PlaneRow(0, y),
+ ib->color()->PlaneRow(1, y),
+ ib->color()->PlaneRow(2, y)};
+ for (size_t x = 0; x < ib->xsize(); ++x) {
+ float& red = rows[0][x];
+ float& green = rows[1][x];
+ float& blue = rows[2][x];
+ const float luminance =
+ 0.2627f * red + 0.6780f * green + 0.0593f * blue;
+ const float ratio = std::pow(luminance, gamma - 1);
+ if (std::isfinite(ratio)) {
+ red *= ratio;
+ green *= ratio;
+ blue *= ratio;
+ }
+ }
+ },
+ "HlgOOTF"));
+ return true;
+}
+
+Status HlgInverseOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) {
+ return HlgOOTF(ib, 1.f / gamma, pool);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/hlg.h b/third_party/jpeg-xl/lib/extras/hlg.h
new file mode 100644
index 0000000000..4cfec444f4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/hlg.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_HLG_H_
+#define LIB_EXTRAS_HLG_H_
+
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+float GetHlgGamma(float peak_luminance, float surround_luminance = 5.f);
+
+Status HlgOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr);
+
+Status HlgInverseOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_HLG_H_
diff --git a/third_party/jpeg-xl/lib/extras/jpegli_test.cc b/third_party/jpeg-xl/lib/extras/jpegli_test.cc
new file mode 100644
index 0000000000..6aa8afe4c0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/jpegli_test.cc
@@ -0,0 +1,405 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if JPEGXL_ENABLE_JPEG && JPEGXL_ENABLE_JPEGLI
+
+#include "lib/extras/dec/jpegli.h"
+
+#include <jxl/color_encoding.h>
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/extras/dec/jpg.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/enc/jpegli.h"
+#include "lib/extras/enc/jpg.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace extras {
+namespace {
+
+using test::Butteraugli3Norm;
+using test::ButteraugliDistance;
+using test::TestImage;
+
+Status ReadTestImage(const std::string& pathname, PackedPixelFile* ppf) {
+ const PaddedBytes encoded = jxl::test::ReadTestData(pathname);
+ ColorHints color_hints;
+ if (pathname.find(".ppm") != std::string::npos) {
+ color_hints.Add("color_space", "RGB_D65_SRG_Rel_SRG");
+ } else if (pathname.find(".pgm") != std::string::npos) {
+ color_hints.Add("color_space", "Gra_D65_Rel_SRG");
+ }
+ return DecodeBytes(Span<const uint8_t>(encoded), color_hints, ppf);
+}
+
+std::vector<uint8_t> GetAppData(const std::vector<uint8_t>& compressed) {
+ std::vector<uint8_t> result;
+ size_t pos = 2; // After SOI
+ while (pos + 4 < compressed.size()) {
+ if (compressed[pos] != 0xff || compressed[pos + 1] < 0xe0 ||
+ compressed[pos + 1] > 0xf0) {
+ break;
+ }
+ size_t len = (compressed[pos + 2] << 8) + compressed[pos + 3] + 2;
+ if (pos + len > compressed.size()) {
+ break;
+ }
+ result.insert(result.end(), &compressed[pos], &compressed[pos] + len);
+ pos += len;
+ }
+ return result;
+}
+
+Status DecodeWithLibjpeg(const std::vector<uint8_t>& compressed,
+ PackedPixelFile* ppf,
+ const JPGDecompressParams* dparams = nullptr) {
+ return DecodeImageJPG(Span<const uint8_t>(compressed), ColorHints(), ppf,
+ /*constraints=*/nullptr, dparams);
+}
+
+Status EncodeWithLibjpeg(const PackedPixelFile& ppf, int quality,
+ std::vector<uint8_t>* compressed) {
+ std::unique_ptr<Encoder> encoder = GetJPEGEncoder();
+ encoder->SetOption("q", std::to_string(quality));
+ EncodedImage encoded;
+ JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded));
+ JXL_RETURN_IF_ERROR(!encoded.bitstreams.empty());
+ *compressed = std::move(encoded.bitstreams[0]);
+ return true;
+}
+
+std::string Description(const JxlColorEncoding& color_encoding) {
+ ColorEncoding c_enc;
+ JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c_enc));
+ return Description(c_enc);
+}
+
+float BitsPerPixel(const PackedPixelFile& ppf,
+ const std::vector<uint8_t>& compressed) {
+ const size_t num_pixels = ppf.info.xsize * ppf.info.ysize;
+ return compressed.size() * 8.0 / num_pixels;
+}
+
+TEST(JpegliTest, JpegliSRGBDecodeTest) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf0;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding));
+ EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+ PackedPixelFile ppf1;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1));
+ PackedPixelFile ppf2;
+ JpegDecompressParams dparams;
+ ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2));
+ EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1));
+}
+
+TEST(JpegliTest, JpegliGrayscaleDecodeTest) {
+ std::string testimage = "jxl/flower/flower_small.g.depth8.pgm";
+ PackedPixelFile ppf0;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+ EXPECT_EQ("Gra_D65_Rel_SRG", Description(ppf0.color_encoding));
+ EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+ PackedPixelFile ppf1;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1));
+ PackedPixelFile ppf2;
+ JpegDecompressParams dparams;
+ ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2));
+ EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1));
+}
+
+TEST(JpegliTest, JpegliXYBEncodeTest) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ settings.xyb = true;
+ ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ PackedPixelFile ppf_out;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+ EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.45f));
+ EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f));
+}
+
+TEST(JpegliTest, JpegliDecodeTestLargeSmoothArea) {
+ TestImage t;
+ const size_t xsize = 2070;
+ const size_t ysize = 1063;
+ t.SetDimensions(xsize, ysize).SetChannels(3);
+ t.SetAllBitDepths(8).SetEndianness(JXL_NATIVE_ENDIAN);
+ TestImage::Frame frame = t.AddFrame();
+ frame.RandomFill();
+ // Create a large smooth area in the top half of the image. This is to test
+ // that the bias statistics calculation can handle many blocks with all-zero
+ // AC coefficients.
+ for (size_t y = 0; y < ysize / 2; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ for (size_t c = 0; c < 3; ++c) {
+ frame.SetValue(y, x, c, 0.5f);
+ }
+ }
+ }
+ const PackedPixelFile& ppf0 = t.ppf();
+
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+ PackedPixelFile ppf1;
+ JpegDecompressParams dparams;
+ ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf1));
+ EXPECT_LT(ButteraugliDistance(ppf0, ppf1), 3.0f);
+}
+
+TEST(JpegliTest, JpegliYUVEncodeTest) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ settings.xyb = false;
+ ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ PackedPixelFile ppf_out;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+ EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.7f));
+ EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f));
+}
+
+TEST(JpegliTest, JpegliYUVChromaSubsamplingEncodeTest) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ for (const char* sampling : {"440", "422", "420"}) {
+ settings.xyb = false;
+ settings.chroma_subsampling = std::string(sampling);
+ ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ PackedPixelFile ppf_out;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+ EXPECT_LE(BitsPerPixel(ppf_in, compressed), 1.55f);
+ EXPECT_LE(ButteraugliDistance(ppf_in, ppf_out), 1.82f);
+ }
+}
+
+TEST(JpegliTest, JpegliYUVEncodeTestNoAq) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ settings.xyb = false;
+ settings.use_adaptive_quantization = false;
+ ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ PackedPixelFile ppf_out;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out));
+ EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.85f));
+ EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.25f));
+}
+
+TEST(JpegliTest, JpegliHDRRoundtripTest) {
+ std::string testimage = "jxl/hdr_room.png";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_202_Rel_HLG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(16, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ settings.xyb = false;
+ ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ PackedPixelFile ppf_out;
+ JpegDecompressParams dparams;
+ dparams.output_data_type = JXL_TYPE_UINT16;
+ ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf_out));
+ EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(2.95f));
+ EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.05f));
+}
+
+TEST(JpegliTest, JpegliSetAppData) {
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf_in;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf_in));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding));
+ EXPECT_EQ(8, ppf_in.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ JpegSettings settings;
+ settings.app_data = {0xff, 0xe3, 0, 4, 0, 1};
+ EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+ EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+ settings.app_data = {0xff, 0xe3, 0, 6, 0, 1, 2, 3, 0xff, 0xef, 0, 4, 0, 1};
+ EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+ EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+ settings.xyb = true;
+ EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+ EXPECT_EQ(0, memcmp(settings.app_data.data(), GetAppData(compressed).data(),
+ settings.app_data.size()));
+
+ settings.xyb = false;
+ settings.app_data = {0};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.app_data = {0xff, 0xe0};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.app_data = {0xff, 0xe0, 0, 2};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.app_data = {0xff, 0xeb, 0, 4, 0};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.app_data = {0xff, 0xeb, 0, 4, 0, 1, 2, 3};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.app_data = {0xff, 0xab, 0, 4, 0, 1};
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+
+ settings.xyb = false;
+ settings.app_data = {
+ 0xff, 0xeb, 0, 4, 0, 1, //
+ 0xff, 0xe2, 0, 20, 0x49, 0x43, 0x43, 0x5F, 0x50, //
+ 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00, 0, 1, //
+ 0, 0, 0, 0, //
+ };
+ EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+ EXPECT_EQ(settings.app_data, GetAppData(compressed));
+
+ settings.xyb = true;
+ EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed));
+}
+
+struct TestConfig {
+ int num_colors;
+ int passes;
+ int dither;
+};
+
+class JpegliColorQuantTestParam : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(JpegliColorQuantTestParam, JpegliColorQuantizeTest) {
+ TestConfig config = GetParam();
+ std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm";
+ PackedPixelFile ppf0;
+ ASSERT_TRUE(ReadTestImage(testimage, &ppf0));
+ EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding));
+ EXPECT_EQ(8, ppf0.info.bits_per_sample);
+
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed));
+
+ PackedPixelFile ppf1;
+ JPGDecompressParams dparams1;
+ dparams1.two_pass_quant = (config.passes == 2);
+ dparams1.num_colors = config.num_colors;
+ dparams1.dither_mode = config.dither;
+ ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1, &dparams1));
+
+ PackedPixelFile ppf2;
+ JpegDecompressParams dparams2;
+ dparams2.two_pass_quant = (config.passes == 2);
+ dparams2.num_colors = config.num_colors;
+ dparams2.dither_mode = config.dither;
+ ASSERT_TRUE(DecodeJpeg(compressed, dparams2, nullptr, &ppf2));
+
+ double dist1 = Butteraugli3Norm(ppf0, ppf1);
+ double dist2 = Butteraugli3Norm(ppf0, ppf2);
+ printf("distance: %f vs %f\n", dist2, dist1);
+ if (config.passes == 1) {
+ if (config.num_colors == 16 && config.dither == 2) {
+ // TODO(szabadka) Fix this case.
+ EXPECT_LT(dist2, dist1 * 1.5);
+ } else {
+ EXPECT_LT(dist2, dist1 * 1.05);
+ }
+ } else if (config.num_colors > 64) {
+ // TODO(szabadka) Fix 2pass quantization for <= 64 colors.
+ EXPECT_LT(dist2, dist1 * 1.1);
+ } else if (config.num_colors > 32) {
+ EXPECT_LT(dist2, dist1 * 1.2);
+ } else {
+ EXPECT_LT(dist2, dist1 * 1.7);
+ }
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ for (int num_colors = 8; num_colors <= 256; num_colors *= 2) {
+ for (int passes = 1; passes <= 2; ++passes) {
+ for (int dither = 0; dither < 3; dither += passes) {
+ TestConfig config;
+ config.num_colors = num_colors;
+ config.passes = passes;
+ config.dither = dither;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ static constexpr const char* kDitherModeStr[] = {"No", "Ordered", "FS"};
+ os << c.passes << "pass";
+ os << c.num_colors << "colors";
+ os << kDitherModeStr[c.dither] << "dither";
+ return os;
+}
+
+std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(JpegliColorQuantTest,
+ JpegliColorQuantTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace extras
+} // namespace jxl
+#endif // JPEGXL_ENABLE_JPEG
diff --git a/third_party/jpeg-xl/lib/extras/packed_image.h b/third_party/jpeg-xl/lib/extras/packed_image.h
new file mode 100644
index 0000000000..3eaf5a0c6d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/packed_image.h
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_PACKED_IMAGE_H_
+#define LIB_EXTRAS_PACKED_IMAGE_H_
+
+// Helper class for storing external (int or float, interleaved) images. This is
+// the common format used by other libraries and in the libjxl API.
+
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/common.h"
+
+namespace jxl {
+namespace extras {
+
+// Class representing an interleaved image with a bunch of channels.
+class PackedImage {
+ public:
+ PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format)
+ : PackedImage(xsize, ysize, format, CalcStride(format, xsize)) {}
+
+ PackedImage Copy() const {
+ PackedImage copy(xsize, ysize, format);
+ memcpy(reinterpret_cast<uint8_t*>(copy.pixels()),
+ reinterpret_cast<const uint8_t*>(pixels()), pixels_size);
+ return copy;
+ }
+
+ // The interleaved pixels as defined in the storage format.
+ void* pixels() const { return pixels_.get(); }
+
+ // The image size in pixels.
+ size_t xsize;
+ size_t ysize;
+
+ // The number of bytes per row.
+ size_t stride;
+
+ // Pixel storage format and buffer size of the pixels_ pointer.
+ JxlPixelFormat format;
+ size_t pixels_size;
+
+ size_t pixel_stride() const {
+ return (BitsPerChannel(format.data_type) * format.num_channels /
+ jxl::kBitsPerByte);
+ }
+
+ static size_t BitsPerChannel(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 8;
+ case JXL_TYPE_UINT16:
+ return 16;
+ case JXL_TYPE_FLOAT:
+ return 32;
+ case JXL_TYPE_FLOAT16:
+ return 16;
+ default:
+ JXL_ABORT("Unhandled JxlDataType");
+ }
+ }
+
+ private:
+ PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format,
+ size_t stride)
+ : xsize(xsize),
+ ysize(ysize),
+ stride(stride),
+ format(format),
+ pixels_size(ysize * stride),
+ pixels_(malloc(std::max<size_t>(1, pixels_size)), free) {}
+
+ static size_t CalcStride(const JxlPixelFormat& format, size_t xsize) {
+ size_t stride = xsize * (BitsPerChannel(format.data_type) *
+ format.num_channels / jxl::kBitsPerByte);
+ if (format.align > 1) {
+ stride = jxl::DivCeil(stride, format.align) * format.align;
+ }
+ return stride;
+ }
+
+ std::unique_ptr<void, decltype(free)*> pixels_;
+};
+
+// Helper class representing a frame, as seen from the API. Animations will have
+// multiple frames, but a single frame can have a color/grayscale channel and
+// multiple extra channels. The order of the extra channels should be the same
+// as all other frames in the same image.
+class PackedFrame {
+ public:
+ template <typename... Args>
+ explicit PackedFrame(Args&&... args) : color(std::forward<Args>(args)...) {}
+
+ PackedFrame Copy() const {
+ PackedFrame copy(color.xsize, color.ysize, color.format);
+ copy.frame_info = frame_info;
+ copy.name = name;
+ copy.color = color.Copy();
+ for (size_t i = 0; i < extra_channels.size(); ++i) {
+ PackedImage ec = extra_channels[i].Copy();
+ copy.extra_channels.emplace_back(std::move(ec));
+ }
+ return copy;
+ }
+
+ // The Frame metadata.
+ JxlFrameHeader frame_info = {};
+ std::string name;
+
+ // The pixel data for the color (or grayscale) channels.
+ PackedImage color;
+ // Extra channel image data.
+ std::vector<PackedImage> extra_channels;
+};
+
+// Optional metadata associated with a file
+class PackedMetadata {
+ public:
+ std::vector<uint8_t> exif;
+ std::vector<uint8_t> iptc;
+ std::vector<uint8_t> jumbf;
+ std::vector<uint8_t> xmp;
+};
+
+// The extra channel metadata information.
+struct PackedExtraChannel {
+ JxlExtraChannelInfo ec_info;
+ size_t index;
+ std::string name;
+};
+
+// Helper class representing a JXL image file as decoded to pixels from the API.
+class PackedPixelFile {
+ public:
+ JxlBasicInfo info = {};
+
+ std::vector<PackedExtraChannel> extra_channels_info;
+
+ // Color information of the decoded pixels.
+ // If the icc is empty, the JxlColorEncoding should be used instead.
+ std::vector<uint8_t> icc;
+ JxlColorEncoding color_encoding = {};
+ // The icc profile of the original image.
+ std::vector<uint8_t> orig_icc;
+
+ std::unique_ptr<PackedFrame> preview_frame;
+ std::vector<PackedFrame> frames;
+
+ PackedMetadata metadata;
+ PackedPixelFile() { JxlEncoderInitBasicInfo(&info); };
+};
+
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_PACKED_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/extras/packed_image_convert.cc b/third_party/jpeg-xl/lib/extras/packed_image_convert.cc
new file mode 100644
index 0000000000..1dd2b45a7f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/packed_image_convert.cc
@@ -0,0 +1,300 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/packed_image_convert.h"
+
+#include <jxl/color_encoding.h>
+#include <jxl/types.h>
+
+#include <cstdint>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+namespace extras {
+
+Status ConvertPackedFrameToImageBundle(const JxlBasicInfo& info,
+ const PackedFrame& frame,
+ const CodecInOut& io, ThreadPool* pool,
+ ImageBundle* bundle) {
+ JXL_ASSERT(frame.color.pixels() != nullptr);
+ const bool float_in = frame.color.format.data_type == JXL_TYPE_FLOAT16 ||
+ frame.color.format.data_type == JXL_TYPE_FLOAT;
+ size_t frame_bits_per_sample =
+ float_in ? PackedImage::BitsPerChannel(frame.color.format.data_type)
+ : info.bits_per_sample;
+ JXL_ASSERT(frame_bits_per_sample != 0);
+ // It is ok for the frame.color.format.num_channels to not match the
+ // number of channels on the image.
+ JXL_ASSERT(1 <= frame.color.format.num_channels &&
+ frame.color.format.num_channels <= 4);
+
+ const Span<const uint8_t> span(
+ static_cast<const uint8_t*>(frame.color.pixels()),
+ frame.color.pixels_size);
+ JXL_ASSERT(Rect(frame.frame_info.layer_info.crop_x0,
+ frame.frame_info.layer_info.crop_y0,
+ frame.frame_info.layer_info.xsize,
+ frame.frame_info.layer_info.ysize)
+ .IsInside(Rect(0, 0, info.xsize, info.ysize)));
+ if (info.have_animation) {
+ bundle->duration = frame.frame_info.duration;
+ bundle->blend = frame.frame_info.layer_info.blend_info.blendmode > 0;
+ bundle->use_for_next_frame =
+ frame.frame_info.layer_info.save_as_reference > 0;
+ bundle->origin.x0 = frame.frame_info.layer_info.crop_x0;
+ bundle->origin.y0 = frame.frame_info.layer_info.crop_y0;
+ }
+ bundle->name = frame.name; // frame.frame_info.name_length is ignored here.
+ JXL_ASSERT(io.metadata.m.color_encoding.IsGray() ==
+ (frame.color.format.num_channels <= 2));
+
+ JXL_RETURN_IF_ERROR(ConvertFromExternal(
+ span, frame.color.xsize, frame.color.ysize, io.metadata.m.color_encoding,
+ frame_bits_per_sample, frame.color.format, pool, bundle));
+
+ bundle->extra_channels().resize(io.metadata.m.extra_channel_info.size());
+ for (size_t i = 0; i < frame.extra_channels.size(); i++) {
+ const auto& ppf_ec = frame.extra_channels[i];
+ bundle->extra_channels()[i] = ImageF(ppf_ec.xsize, ppf_ec.ysize);
+ JXL_CHECK(BufferToImageF(ppf_ec.format, ppf_ec.xsize, ppf_ec.ysize,
+ ppf_ec.pixels(), ppf_ec.pixels_size, pool,
+ &bundle->extra_channels()[i]));
+ }
+ return true;
+}
+
+Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
+ ThreadPool* pool, CodecInOut* io) {
+ const bool has_alpha = ppf.info.alpha_bits != 0;
+ JXL_ASSERT(!ppf.frames.empty());
+ if (has_alpha) {
+ JXL_ASSERT(ppf.info.alpha_bits == ppf.info.bits_per_sample);
+ JXL_ASSERT(ppf.info.alpha_exponent_bits ==
+ ppf.info.exponent_bits_per_sample);
+ }
+
+ const bool is_gray = ppf.info.num_color_channels == 1;
+ JXL_ASSERT(ppf.info.num_color_channels == 1 ||
+ ppf.info.num_color_channels == 3);
+
+ // Convert the image metadata
+ io->SetSize(ppf.info.xsize, ppf.info.ysize);
+ io->metadata.m.bit_depth.bits_per_sample = ppf.info.bits_per_sample;
+ io->metadata.m.bit_depth.exponent_bits_per_sample =
+ ppf.info.exponent_bits_per_sample;
+ io->metadata.m.bit_depth.floating_point_sample =
+ ppf.info.exponent_bits_per_sample != 0;
+ io->metadata.m.modular_16_bit_buffer_sufficient =
+ ppf.info.exponent_bits_per_sample == 0 && ppf.info.bits_per_sample <= 12;
+
+ io->metadata.m.SetAlphaBits(ppf.info.alpha_bits,
+ ppf.info.alpha_premultiplied);
+
+ io->metadata.m.xyb_encoded = !ppf.info.uses_original_profile;
+ JXL_ASSERT(ppf.info.orientation > 0 && ppf.info.orientation <= 8);
+ io->metadata.m.orientation = ppf.info.orientation;
+
+ // Convert animation metadata
+ JXL_ASSERT(ppf.frames.size() == 1 || ppf.info.have_animation);
+ io->metadata.m.have_animation = ppf.info.have_animation;
+ io->metadata.m.animation.tps_numerator = ppf.info.animation.tps_numerator;
+ io->metadata.m.animation.tps_denominator = ppf.info.animation.tps_denominator;
+ io->metadata.m.animation.num_loops = ppf.info.animation.num_loops;
+
+ // Convert the color encoding.
+ if (!ppf.icc.empty()) {
+ PaddedBytes icc;
+ icc.append(ppf.icc);
+ if (!io->metadata.m.color_encoding.SetICC(std::move(icc))) {
+ fprintf(stderr, "Warning: error setting ICC profile, assuming SRGB\n");
+ io->metadata.m.color_encoding = ColorEncoding::SRGB(is_gray);
+ } else {
+ if (io->metadata.m.color_encoding.IsGray() != is_gray) {
+ // E.g. JPG image has 3 channels, but gray ICC.
+ return JXL_FAILURE("Embedded ICC does not match image color type");
+ }
+ }
+ } else {
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(
+ ppf.color_encoding, &io->metadata.m.color_encoding));
+ if (io->metadata.m.color_encoding.ICC().empty()) {
+ return JXL_FAILURE("Failed to serialize ICC");
+ }
+ }
+
+ // Convert the extra blobs
+ io->blobs.exif = ppf.metadata.exif;
+ io->blobs.iptc = ppf.metadata.iptc;
+ io->blobs.jumbf = ppf.metadata.jumbf;
+ io->blobs.xmp = ppf.metadata.xmp;
+
+ // Append all other extra channels.
+ for (const auto& info : ppf.extra_channels_info) {
+ ExtraChannelInfo out;
+ out.type = static_cast<jxl::ExtraChannel>(info.ec_info.type);
+ out.bit_depth.bits_per_sample = info.ec_info.bits_per_sample;
+ out.bit_depth.exponent_bits_per_sample =
+ info.ec_info.exponent_bits_per_sample;
+ out.bit_depth.floating_point_sample =
+ info.ec_info.exponent_bits_per_sample != 0;
+ out.dim_shift = info.ec_info.dim_shift;
+ out.name = info.name;
+ out.alpha_associated = (info.ec_info.alpha_premultiplied != 0);
+ out.spot_color[0] = info.ec_info.spot_color[0];
+ out.spot_color[1] = info.ec_info.spot_color[1];
+ out.spot_color[2] = info.ec_info.spot_color[2];
+ out.spot_color[3] = info.ec_info.spot_color[3];
+ io->metadata.m.extra_channel_info.push_back(std::move(out));
+ }
+
+ // Convert the preview
+ if (ppf.preview_frame) {
+ size_t preview_xsize = ppf.preview_frame->color.xsize;
+ size_t preview_ysize = ppf.preview_frame->color.ysize;
+ io->metadata.m.have_preview = true;
+ JXL_RETURN_IF_ERROR(
+ io->metadata.m.preview_size.Set(preview_xsize, preview_ysize));
+ JXL_RETURN_IF_ERROR(ConvertPackedFrameToImageBundle(
+ ppf.info, *ppf.preview_frame, *io, pool, &io->preview_frame));
+ }
+
+ // Convert the pixels
+ io->frames.clear();
+ for (const auto& frame : ppf.frames) {
+ ImageBundle bundle(&io->metadata.m);
+ JXL_RETURN_IF_ERROR(
+ ConvertPackedFrameToImageBundle(ppf.info, frame, *io, pool, &bundle));
+ io->frames.push_back(std::move(bundle));
+ }
+
+ if (ppf.info.exponent_bits_per_sample == 0) {
+ // uint case.
+ io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth();
+ }
+ if (ppf.info.intensity_target != 0) {
+ io->metadata.m.SetIntensityTarget(ppf.info.intensity_target);
+ } else {
+ SetIntensityTarget(&io->metadata.m);
+ }
+ io->CheckMetadata();
+ return true;
+}
+
+// Allows converting from internal CodecInOut to external PackedPixelFile
+Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
+ const JxlPixelFormat& pixel_format,
+ const ColorEncoding& c_desired,
+ ThreadPool* pool,
+ PackedPixelFile* ppf) {
+ const bool has_alpha = io.metadata.m.HasAlpha();
+ bool alpha_premultiplied = false;
+ JXL_ASSERT(!io.frames.empty());
+
+ if (has_alpha) {
+ JXL_ASSERT(io.metadata.m.GetAlphaBits() ==
+ io.metadata.m.bit_depth.bits_per_sample);
+ const auto* alpha_channel = io.metadata.m.Find(ExtraChannel::kAlpha);
+ JXL_ASSERT(alpha_channel->bit_depth.exponent_bits_per_sample ==
+ io.metadata.m.bit_depth.exponent_bits_per_sample);
+ alpha_premultiplied = alpha_channel->alpha_associated;
+ }
+
+ // Convert the image metadata
+ ppf->info.xsize = io.metadata.size.xsize();
+ ppf->info.ysize = io.metadata.size.ysize();
+ ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels();
+ ppf->info.bits_per_sample = io.metadata.m.bit_depth.bits_per_sample;
+ ppf->info.exponent_bits_per_sample =
+ io.metadata.m.bit_depth.exponent_bits_per_sample;
+
+ ppf->info.intensity_target = io.metadata.m.tone_mapping.intensity_target;
+ ppf->info.linear_below = io.metadata.m.tone_mapping.linear_below;
+ ppf->info.min_nits = io.metadata.m.tone_mapping.min_nits;
+ ppf->info.relative_to_max_display =
+ io.metadata.m.tone_mapping.relative_to_max_display;
+
+ ppf->info.alpha_bits = io.metadata.m.GetAlphaBits();
+ ppf->info.alpha_premultiplied = alpha_premultiplied;
+
+ ppf->info.uses_original_profile = !io.metadata.m.xyb_encoded;
+ JXL_ASSERT(0 < io.metadata.m.orientation && io.metadata.m.orientation <= 8);
+ ppf->info.orientation =
+ static_cast<JxlOrientation>(io.metadata.m.orientation);
+ ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels();
+
+ // Convert animation metadata
+ JXL_ASSERT(io.frames.size() == 1 || io.metadata.m.have_animation);
+ ppf->info.have_animation = io.metadata.m.have_animation;
+ ppf->info.animation.tps_numerator = io.metadata.m.animation.tps_numerator;
+ ppf->info.animation.tps_denominator = io.metadata.m.animation.tps_denominator;
+ ppf->info.animation.num_loops = io.metadata.m.animation.num_loops;
+
+ // Convert the color encoding
+ ppf->icc.assign(c_desired.ICC().begin(), c_desired.ICC().end());
+ ConvertInternalToExternalColorEncoding(c_desired, &ppf->color_encoding);
+
+ // Convert the extra blobs
+ ppf->metadata.exif = io.blobs.exif;
+ ppf->metadata.iptc = io.blobs.iptc;
+ ppf->metadata.jumbf = io.blobs.jumbf;
+ ppf->metadata.xmp = io.blobs.xmp;
+ const bool float_out = pixel_format.data_type == JXL_TYPE_FLOAT ||
+ pixel_format.data_type == JXL_TYPE_FLOAT16;
+ // Convert the pixels
+ ppf->frames.clear();
+ for (const auto& frame : io.frames) {
+ JXL_ASSERT(frame.metadata()->bit_depth.bits_per_sample != 0);
+ // It is ok for the frame.color().kNumPlanes to not match the
+ // number of channels on the image.
+ const uint32_t num_channels =
+ frame.metadata()->color_encoding.Channels() + has_alpha;
+ JxlPixelFormat format{/*num_channels=*/num_channels,
+ /*data_type=*/pixel_format.data_type,
+ /*endianness=*/pixel_format.endianness,
+ /*align=*/pixel_format.align};
+
+ PackedFrame packed_frame(frame.oriented_xsize(), frame.oriented_ysize(),
+ format);
+ const size_t bits_per_sample =
+ float_out ? packed_frame.color.BitsPerChannel(pixel_format.data_type)
+ : ppf->info.bits_per_sample;
+ packed_frame.name = frame.name;
+ packed_frame.frame_info.name_length = frame.name.size();
+ // Color transform
+ ImageBundle ib = frame.Copy();
+ const ImageBundle* to_color_transform = &ib;
+ ImageMetadata metadata = io.metadata.m;
+ ImageBundle store(&metadata);
+ const ImageBundle* transformed;
+ // TODO(firsching): handle the transform here.
+ JXL_RETURN_IF_ERROR(TransformIfNeeded(*to_color_transform, c_desired,
+ GetJxlCms(), pool, &store,
+ &transformed));
+
+ JXL_RETURN_IF_ERROR(ConvertToExternal(
+ *transformed, bits_per_sample, float_out, format.num_channels,
+ format.endianness,
+ /* stride_out=*/packed_frame.color.stride, pool,
+ packed_frame.color.pixels(), packed_frame.color.pixels_size,
+ /*out_callback=*/{}, frame.metadata()->GetOrientation()));
+
+ // TODO(firsching): Convert the extra channels, beside one potential alpha
+ // channel. FIXME!
+ JXL_CHECK(frame.extra_channels().size() <= has_alpha);
+ ppf->frames.push_back(std::move(packed_frame));
+ }
+
+ return true;
+}
+} // namespace extras
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/packed_image_convert.h b/third_party/jpeg-xl/lib/extras/packed_image_convert.h
new file mode 100644
index 0000000000..100adccc09
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/packed_image_convert.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
+#define LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
+
+// Helper functions to convert from the external image types to the internal
+// CodecInOut to help transitioning to the external types.
+
+#include <jxl/types.h>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+namespace extras {
+
+// Converts an external PackedPixelFile to the internal CodecInOut for use with
+// internal functions directly.
+Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf,
+ ThreadPool* pool, CodecInOut* io);
+
+// Converts an internal CodecInOut for use with internal function to an external
+// PackedPixelFile.
+Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io,
+ const JxlPixelFormat& pixel_format,
+ const ColorEncoding& c_desired,
+ ThreadPool* pool,
+ PackedPixelFile* ppf);
+} // namespace extras
+} // namespace jxl
+
+#endif // LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_
diff --git a/third_party/jpeg-xl/lib/extras/size_constraints.h b/third_party/jpeg-xl/lib/extras/size_constraints.h
new file mode 100644
index 0000000000..cf06f8cb22
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/size_constraints.h
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SIZE_CONSTRAINTS_H_
+#define LIB_JXL_SIZE_CONSTRAINTS_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+struct SizeConstraints {
+ // Upper limit on pixel dimensions/area, enforced by VerifyDimensions
+ // (called from decoders). Fuzzers set smaller values to limit memory use.
+ uint32_t dec_max_xsize = 0xFFFFFFFFu;
+ uint32_t dec_max_ysize = 0xFFFFFFFFu;
+ uint64_t dec_max_pixels = 0xFFFFFFFFu; // Might be up to ~0ull
+};
+
+template <typename T,
+ class = typename std::enable_if<std::is_unsigned<T>::value>::type>
+Status VerifyDimensions(const SizeConstraints* constraints, T xs, T ys) {
+ if (!constraints) return true;
+
+ if (xs == 0 || ys == 0) return JXL_FAILURE("Empty image.");
+ if (xs > constraints->dec_max_xsize) return JXL_FAILURE("Image too wide.");
+ if (ys > constraints->dec_max_ysize) return JXL_FAILURE("Image too tall.");
+
+ const uint64_t num_pixels = static_cast<uint64_t>(xs) * ys;
+ if (num_pixels > constraints->dec_max_pixels) {
+ return JXL_FAILURE("Image too big.");
+ }
+
+ return true;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_SIZE_CONSTRAINTS_H_
diff --git a/third_party/jpeg-xl/lib/extras/time.cc b/third_party/jpeg-xl/lib/extras/time.cc
new file mode 100644
index 0000000000..73d1b8f260
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/time.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/time.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <ctime>
+
+#include "lib/jxl/base/os_macros.h" // for JXL_OS_*
+
+#if JXL_OS_WIN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif // NOMINMAX
+#include <windows.h>
+#endif // JXL_OS_WIN
+
+#if JXL_OS_MAC
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif // JXL_OS_MAC
+
+#if JXL_OS_HAIKU
+#include <OS.h>
+#endif // JXL_OS_HAIKU
+
+namespace jxl {
+
+double Now() {
+#if JXL_OS_WIN
+ LARGE_INTEGER counter;
+ (void)QueryPerformanceCounter(&counter);
+ LARGE_INTEGER freq;
+ (void)QueryPerformanceFrequency(&freq);
+ return double(counter.QuadPart) / freq.QuadPart;
+#elif JXL_OS_MAC
+ const auto t = mach_absolute_time();
+ // On OSX/iOS platform the elapsed time is cpu time unit
+ // We have to query the time base information to convert it back
+ // See https://developer.apple.com/library/mac/qa/qa1398/_index.html
+ static mach_timebase_info_data_t timebase;
+ if (timebase.denom == 0) {
+ (void)mach_timebase_info(&timebase);
+ }
+ return double(t) * timebase.numer / timebase.denom * 1E-9;
+#elif JXL_OS_HAIKU
+ return double(system_time_nsecs()) * 1E-9;
+#else
+ timespec t;
+ clock_gettime(CLOCK_MONOTONIC, &t);
+ return t.tv_sec + t.tv_nsec * 1E-9;
+#endif
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/extras/time.h b/third_party/jpeg-xl/lib/extras/time.h
new file mode 100644
index 0000000000..c71414b877
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/time.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TIME_H_
+#define LIB_EXTRAS_TIME_H_
+
+// OS-specific function for timing.
+
+namespace jxl {
+
+// Returns current time [seconds] from a monotonic clock with unspecified
+// starting point - only suitable for computing elapsed time.
+double Now();
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_TIME_H_
diff --git a/third_party/jpeg-xl/lib/extras/tone_mapping.cc b/third_party/jpeg-xl/lib/extras/tone_mapping.cc
new file mode 100644
index 0000000000..1cdd6ed826
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/tone_mapping.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/tone_mapping.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/extras/tone_mapping.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/image_bundle.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+static constexpr float rec2020_luminances[3] = {0.2627f, 0.6780f, 0.0593f};
+
+Status ToneMapFrame(const std::pair<float, float> display_nits,
+ ImageBundle* const ib, ThreadPool* const pool) {
+ // Perform tone mapping as described in Report ITU-R BT.2390-8, section 5.4
+ // (pp. 23-25).
+ // https://www.itu.int/pub/R-REP-BT.2390-8-2020
+
+ HWY_FULL(float) df;
+ using V = decltype(Zero(df));
+
+ ColorEncoding linear_rec2020;
+ linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+ linear_rec2020.primaries = Primaries::k2100;
+ linear_rec2020.white_point = WhitePoint::kD65;
+ linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+ JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+ JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+ Rec2408ToneMapper<decltype(df)> tone_mapper(
+ {ib->metadata()->tone_mapping.min_nits,
+ ib->metadata()->IntensityTarget()},
+ display_nits, rec2020_luminances);
+
+ return RunOnPool(
+ pool, 0, ib->ysize(), ThreadPool::NoInit,
+ [&](const uint32_t y, size_t /* thread */) {
+ float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y);
+ float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y);
+ float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y);
+ for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) {
+ V red = Load(df, row_r + x);
+ V green = Load(df, row_g + x);
+ V blue = Load(df, row_b + x);
+ tone_mapper.ToneMap(&red, &green, &blue);
+ Store(red, df, row_r + x);
+ Store(green, df, row_g + x);
+ Store(blue, df, row_b + x);
+ }
+ },
+ "ToneMap");
+}
+
+Status GamutMapFrame(ImageBundle* const ib, float preserve_saturation,
+ ThreadPool* const pool) {
+ HWY_FULL(float) df;
+ using V = decltype(Zero(df));
+
+ ColorEncoding linear_rec2020;
+ linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+ linear_rec2020.primaries = Primaries::k2100;
+ linear_rec2020.white_point = WhitePoint::kD65;
+ linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+ JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC());
+ JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool));
+
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ib->ysize(), ThreadPool::NoInit,
+ [&](const uint32_t y, size_t /* thread*/) {
+ float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y);
+ float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y);
+ float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y);
+ for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) {
+ V red = Load(df, row_r + x);
+ V green = Load(df, row_g + x);
+ V blue = Load(df, row_b + x);
+ GamutMap(&red, &green, &blue, rec2020_luminances,
+ preserve_saturation);
+ Store(red, df, row_r + x);
+ Store(green, df, row_g + x);
+ Store(blue, df, row_b + x);
+ }
+ },
+ "GamutMap"));
+
+ return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+namespace {
+HWY_EXPORT(ToneMapFrame);
+HWY_EXPORT(GamutMapFrame);
+} // namespace
+
+Status ToneMapTo(const std::pair<float, float> display_nits,
+ CodecInOut* const io, ThreadPool* const pool) {
+ const auto tone_map_frame = HWY_DYNAMIC_DISPATCH(ToneMapFrame);
+ for (ImageBundle& ib : io->frames) {
+ JXL_RETURN_IF_ERROR(tone_map_frame(display_nits, &ib, pool));
+ }
+ io->metadata.m.SetIntensityTarget(display_nits.second);
+ return true;
+}
+
+Status GamutMap(CodecInOut* const io, float preserve_saturation,
+ ThreadPool* const pool) {
+ const auto gamut_map_frame = HWY_DYNAMIC_DISPATCH(GamutMapFrame);
+ for (ImageBundle& ib : io->frames) {
+ JXL_RETURN_IF_ERROR(gamut_map_frame(&ib, preserve_saturation, pool));
+ }
+ return true;
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/extras/tone_mapping.h b/third_party/jpeg-xl/lib/extras/tone_mapping.h
new file mode 100644
index 0000000000..1f474101eb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/tone_mapping.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_EXTRAS_TONE_MAPPING_H_
+#define LIB_EXTRAS_TONE_MAPPING_H_
+
+#include "lib/jxl/codec_in_out.h"
+
+namespace jxl {
+
+// Important: after calling this, the result will contain many out-of-gamut
+// colors. It is very strongly recommended to call GamutMap afterwards to
+// rectify this.
+Status ToneMapTo(std::pair<float, float> display_nits, CodecInOut* io,
+ ThreadPool* pool = nullptr);
+
+// `preserve_saturation` indicates to what extent to favor saturation over
+// luminance when mapping out-of-gamut colors to Rec. 2020. 0 preserves
+// luminance at the complete expense of saturation, while 1 gives the most
+// saturated color with the same hue that Rec. 2020 can represent even if it
+// means lowering the luminance. Values in between correspond to linear mixtures
+// of those two extremes.
+Status GamutMap(CodecInOut* io, float preserve_saturation,
+ ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_EXTRAS_TONE_MAPPING_H_
diff --git a/third_party/jpeg-xl/lib/extras/tone_mapping_gbench.cc b/third_party/jpeg-xl/lib/extras/tone_mapping_gbench.cc
new file mode 100644
index 0000000000..f1d5357345
--- /dev/null
+++ b/third_party/jpeg-xl/lib/extras/tone_mapping_gbench.cc
@@ -0,0 +1,40 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/extras/codec.h"
+#include "lib/extras/tone_mapping.h"
+#include "lib/jxl/enc_color_management.h"
+
+namespace jxl {
+
+static void BM_ToneMapping(benchmark::State& state) {
+ Image3F color(2268, 1512);
+ FillImage(0.5f, &color);
+
+ // Use linear Rec. 2020 so that `ToneMapTo` doesn't have to convert to it and
+ // we mainly measure the tone mapping itself.
+ ColorEncoding linear_rec2020;
+ linear_rec2020.SetColorSpace(ColorSpace::kRGB);
+ linear_rec2020.primaries = Primaries::k2100;
+ linear_rec2020.white_point = WhitePoint::kD65;
+ linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear);
+ JXL_CHECK(linear_rec2020.CreateICC());
+
+ for (auto _ : state) {
+ state.PauseTiming();
+ CodecInOut tone_mapping_input;
+ tone_mapping_input.SetFromImage(CopyImage(color), linear_rec2020);
+ tone_mapping_input.metadata.m.SetIntensityTarget(255);
+ state.ResumeTiming();
+
+ JXL_CHECK(ToneMapTo({0.1, 100}, &tone_mapping_input));
+ }
+
+ state.SetItemsProcessed(state.iterations() * color.xsize() * color.ysize());
+}
+BENCHMARK(BM_ToneMapping);
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/gbench_main.cc b/third_party/jpeg-xl/lib/gbench_main.cc
new file mode 100644
index 0000000000..1cc1772017
--- /dev/null
+++ b/third_party/jpeg-xl/lib/gbench_main.cc
@@ -0,0 +1,8 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+
+BENCHMARK_MAIN();
diff --git a/third_party/jpeg-xl/lib/include/jxl/butteraugli.h b/third_party/jpeg-xl/lib/include/jxl/butteraugli.h
new file mode 100644
index 0000000000..88b97f6d07
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/butteraugli.h
@@ -0,0 +1,160 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_butteraugli
+ * @{
+ * @file butteraugli.h
+ * @brief Butteraugli API for JPEG XL.
+ */
+
+#ifndef JXL_BUTTERAUGLI_H_
+#define JXL_BUTTERAUGLI_H_
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#include <jxl/jxl_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+
+/**
+ * Opaque structure that holds a butteraugli API.
+ *
+ * Allocated and initialized with JxlButteraugliApiCreate().
+ * Cleaned up and deallocated with JxlButteraugliApiDestroy().
+ */
+typedef struct JxlButteraugliApiStruct JxlButteraugliApi;
+
+/**
+ * Opaque structure that holds intermediary butteraugli results.
+ *
+ * Allocated and initialized with JxlButteraugliCompute().
+ * Cleaned up and deallocated with JxlButteraugliResultDestroy().
+ */
+typedef struct JxlButteraugliResultStruct JxlButteraugliResult;
+
+/**
+ * Deinitializes and frees JxlButteraugliResult instance.
+ *
+ * @param result instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlButteraugliResultDestroy(JxlButteraugliResult* result);
+
+/**
+ * Creates an instance of JxlButteraugliApi and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ * manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlEncoder otherwise
+ */
+JXL_EXPORT JxlButteraugliApi* JxlButteraugliApiCreate(
+ const JxlMemoryManager* memory_manager);
+
+/**
+ * Set the parallel runner for multithreading.
+ *
+ * @param api api instance.
+ * @param parallel_runner function pointer to runner for multithreading. A
+ * multithreaded runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ */
+JXL_EXPORT void JxlButteraugliApiSetParallelRunner(
+ JxlButteraugliApi* api, JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque);
+
+/**
+ * Set the hf_asymmetry option for butteraugli.
+ *
+ * @param api api instance.
+ * @param v new hf_asymmetry value.
+ */
+JXL_EXPORT void JxlButteraugliApiSetHFAsymmetry(JxlButteraugliApi* api,
+ float v);
+
+/**
+ * Set the intensity_target option for butteraugli.
+ *
+ * @param api api instance.
+ * @param v new intensity_target value.
+ */
+JXL_EXPORT void JxlButteraugliApiSetIntensityTarget(JxlButteraugliApi* api,
+ float v);
+
+/**
+ * Deinitializes and frees JxlButteraugliApi instance.
+ *
+ * @param api instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlButteraugliApiDestroy(JxlButteraugliApi* api);
+
+/**
+ * Computes intermediary butteraugli result between an original image and a
+ * distortion.
+ *
+ * @param api api instance for this computation.
+ * @param xsize width of the compared images.
+ * @param ysize height of the compared images.
+ * @param pixel_format_orig pixel format for original image.
+ * @param buffer_orig pixel data for original image.
+ * @param size_orig size of buffer_orig in bytes.
+ * @param pixel_format_dist pixel format for distortion.
+ * @param buffer_dist pixel data for distortion.
+ * @param size_dist size of buffer_dist in bytes.
+ * @return @c NULL if the results can not be computed or initialized.
+ * @return pointer to initialized and computed intermediary result.
+ */
+JXL_EXPORT JxlButteraugliResult* JxlButteraugliCompute(
+ const JxlButteraugliApi* api, uint32_t xsize, uint32_t ysize,
+ const JxlPixelFormat* pixel_format_orig, const void* buffer_orig,
+ size_t size_orig, const JxlPixelFormat* pixel_format_dist,
+ const void* buffer_dist, size_t size_dist);
+
+/**
+ * Computes butteraugli max distance based on an intermediary butteraugli
+ * result.
+ *
+ * @param result intermediary result instance.
+ * @return max distance.
+ */
+JXL_EXPORT float JxlButteraugliResultGetMaxDistance(
+ const JxlButteraugliResult* result);
+
+/**
+ * Computes a butteraugli distance based on an intermediary butteraugli result.
+ *
+ * @param result intermediary result instance.
+ * @param pnorm pnorm to calculate.
+ * @return distance using the given pnorm.
+ */
+JXL_EXPORT float JxlButteraugliResultGetDistance(
+ const JxlButteraugliResult* result, float pnorm);
+
+/**
+ * Get a pointer to the distmap in the result.
+ *
+ * @param result intermediary result instance.
+ * @param buffer will be set to the distmap. The distance value for (x,y) will
+ * be available at buffer + y * row_stride + x.
+ * @param row_stride will be set to the row stride of the distmap.
+ */
+JXL_EXPORT void JxlButteraugliResultGetDistmap(
+ const JxlButteraugliResult* result, const float** buffer,
+ uint32_t* row_stride);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_BUTTERAUGLI_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/butteraugli_cxx.h b/third_party/jpeg-xl/lib/include/jxl/butteraugli_cxx.h
new file mode 100644
index 0000000000..34f904c0df
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/butteraugli_cxx.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_butteraugli
+/// @{
+///
+/// @file butteraugli_cxx.h
+/// @brief C++ header-only helper for @ref butteraugli.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_BUTTERAUGLI_CXX_H_
+#define JXL_BUTTERAUGLI_CXX_H_
+
+#include <jxl/butteraugli.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/butteraugli.h from C sources."
+#endif
+
+/// Struct to call JxlButteraugliApiDestroy from the JxlButteraugliApiPtr
+/// unique_ptr.
+struct JxlButteraugliApiDestroyStruct {
+ /// Calls @ref JxlButteraugliApiDestroy() on the passed api.
+ void operator()(JxlButteraugliApi* api) { JxlButteraugliApiDestroy(api); }
+};
+
+/// std::unique_ptr<> type that calls JxlButteraugliApiDestroy() when releasing
+/// the pointer.
+///
+/// Use this helper type from C++ sources to ensure the api is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlButteraugliApi, JxlButteraugliApiDestroyStruct>
+ JxlButteraugliApiPtr;
+
+/// Struct to call JxlButteraugliResultDestroy from the JxlButteraugliResultPtr
+/// unique_ptr.
+struct JxlButteraugliResultDestroyStruct {
+ /// Calls @ref JxlButteraugliResultDestroy() on the passed result object.
+ void operator()(JxlButteraugliResult* result) {
+ JxlButteraugliResultDestroy(result);
+ }
+};
+
+/// std::unique_ptr<> type that calls JxlButteraugliResultDestroy() when
+/// releasing the pointer.
+///
+/// Use this helper type from C++ sources to ensure the result object is
+/// destroyed and their internal resources released.
+typedef std::unique_ptr<JxlButteraugliResult, JxlButteraugliResultDestroyStruct>
+ JxlButteraugliResultPtr;
+
+#endif // JXL_BUTTERAUGLI_CXX_H_
+
+/// @}
diff --git a/third_party/jpeg-xl/lib/include/jxl/cms_interface.h b/third_party/jpeg-xl/lib/include/jxl/cms_interface.h
new file mode 100644
index 0000000000..684281e641
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/cms_interface.h
@@ -0,0 +1,232 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file cms_interface.h
+ * @brief Interface to allow the injection of different color management systems
+ * (CMSes, also called color management modules, or CMMs) in JPEG XL.
+ *
+ * A CMS is needed by the JPEG XL encoder and decoder to perform colorspace
+ * conversions. This defines an interface that can be implemented for different
+ * CMSes and then passed to the library.
+ */
+
+#ifndef JXL_CMS_INTERFACE_H_
+#define JXL_CMS_INTERFACE_H_
+
+#include <jxl/color_encoding.h>
+#include <jxl/types.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Represents an input or output colorspace to a color transform, as a
+ * serialized ICC profile. */
+typedef struct {
+ /** The serialized ICC profile. This is guaranteed to be present and valid. */
+ struct {
+ const uint8_t* data;
+ size_t size;
+ } icc;
+
+ /** Structured representation of the colorspace, if applicable. If all fields
+ * are different from their "unknown" value, then this is equivalent to the
+ * ICC representation of the colorspace. If some are "unknown", those that are
+ * not are still valid and can still be used on their own if they are useful.
+ */
+ JxlColorEncoding color_encoding;
+
+ /** Number of components per pixel. This can be deduced from the other
+ * representations of the colorspace but is provided for convenience and
+ * validation. */
+ size_t num_channels;
+} JxlColorProfile;
+
+/** Allocates and returns the data needed for @p num_threads parallel transforms
+ * from the @p input colorspace to @p output, with up to @p pixels_per_thread
+ * pixels to transform per call to JxlCmsInterface::run. @p init_data comes
+ * directly from the JxlCmsInterface instance. Since @c run only receives the
+ * data returned by @c init, a reference to @p init_data should be kept there
+ * if access to it is desired in @c run. Likewise for JxlCmsInterface::destroy.
+ *
+ * The ICC data in @p input and @p output is guaranteed to outlive the @c init /
+ * @c run / @c destroy cycle.
+ *
+ * @param init_data JxlCmsInterface::init_data passed as-is.
+ * @param num_threads the maximum number of threads from which
+ * JxlCmsInterface::run will be called.
+ * @param pixels_per_thread the maximum number of pixels that each call to
+ * JxlCmsInterface::run will have to transform.
+ * @param input_profile the input colorspace for the transform.
+ * @param output_profile the colorspace to which JxlCmsInterface::run should
+ * convert the input data.
+ * @param intensity_target for colorspaces where luminance is relative
+ * (essentially: not PQ), indicates the luminance at which (1, 1, 1) will
+ * be displayed. This is useful for conversions between PQ and a relative
+ * luminance colorspace, in either direction: @p intensity_target cd/m²
+ * in PQ should map to and from (1, 1, 1) in the relative one.\n
+ * It is also used for conversions to and from HLG, as it is
+ * scene-referred while other colorspaces are assumed to be
+ * display-referred. That is, conversions from HLG should apply the OOTF
+ * for a peak display luminance of @p intensity_target, and conversions
+ * to HLG should undo it. The OOTF is a gamma function applied to the
+ * luminance channel (https://www.itu.int/rec/R-REC-BT.2100-2-201807-I
+ * page 7), with the gamma value computed as
+ * <tt>1.2 * 1.111^log2(intensity_target / 1000)</tt> (footnote 2 page 8
+ * of the same document).
+ * @return The data needed for the transform, or @c NULL in case of failure.
+ * This will be passed to the other functions as @c user_data.
+ */
+typedef void* (*jpegxl_cms_init_func)(void* init_data, size_t num_threads,
+ size_t pixels_per_thread,
+ const JxlColorProfile* input_profile,
+ const JxlColorProfile* output_profile,
+ float intensity_target);
+
+/** Returns a buffer that can be used by callers of the interface to store the
+ * input of the conversion or read its result, if they pass it as the input or
+ * output of the @c run function.
+ * @param user_data the data returned by @c init.
+ * @param thread the index of the thread for which to return a buffer.
+ * @return A buffer that can be used by the caller for passing to @c run.
+ */
+typedef float* (*jpegxl_cms_get_buffer_func)(void* user_data, size_t thread);
+
+/** Executes one transform and returns true on success or false on error. It
+ * must be possible to call this from different threads with different values
+ * for @p thread, all between 0 (inclusive) and the value of @p num_threads
+ * passed to @c init (exclusive). It is allowed to implement this by locking
+ * such that the transforms are essentially performed sequentially, if such a
+ * performance profile is acceptable. @p user_data is the data returned by
+ * @c init.
+ * The buffers each contain @p num_pixels × @c num_channels interleaved floating
+ * point (0..1) samples where @c num_channels is the number of color channels of
+ * their respective color profiles. It is guaranteed that the only case in which
+ * they might overlap is if the output has fewer channels than the input, in
+ * which case the pointers may be identical.
+ * For CMYK data, 0 represents the maximum amount of ink while 1 represents no
+ * ink.
+ * @param user_data the data returned by @c init.
+ * @param thread the index of the thread from which the function is being
+ * called.
+ * @param input_buffer the buffer containing the pixel data to be transformed.
+ * @param output_buffer the buffer receiving the transformed pixel data.
+ * @param num_pixels the number of pixels to transform from @p input to
+ * @p output.
+ * @return JXL_TRUE on success, JXL_FALSE on failure.
+ */
+typedef JXL_BOOL (*jpegxl_cms_run_func)(void* user_data, size_t thread,
+ const float* input_buffer,
+ float* output_buffer,
+ size_t num_pixels);
+
+/** Performs the necessary clean-up and frees the memory allocated for user
+ * data.
+ */
+typedef void (*jpegxl_cms_destroy_func)(void*);
+
+/**
+ * Interface for performing colorspace transforms. The @c init function can be
+ * called several times to instantiate several transforms, including before
+ * other transforms have been destroyed.
+ *
+ * The call sequence for a given colorspace transform could look like the
+ * following:
+ * @dot
+ * digraph calls {
+ * newrank = true
+ * node [shape = box, fontname = monospace]
+ * init [label = "user_data <- init(\l\
+ * init_data = data,\l\
+ * num_threads = 3,\l\
+ * pixels_per_thread = 20,\l\
+ * input = (sRGB, 3 channels),\l\
+ * output = (Display-P3, 3 channels),\l\
+ * intensity_target = 255\l\
+ * )\l"]
+ * subgraph cluster_0 {
+ * color = lightgrey
+ * label = "thread 1"
+ * labeljust = "c"
+ * run_1_1 [label = "run(\l\
+ * user_data,\l\
+ * thread = 1,\l\
+ * input = in[0],\l\
+ * output = out[0],\l\
+ * num_pixels = 20\l\
+ * )\l"]
+ * run_1_2 [label = "run(\l\
+ * user_data,\l\
+ * thread = 1,\l\
+ * input = in[3],\l\
+ * output = out[3],\l\
+ * num_pixels = 20\l\
+ * )\l"]
+ * }
+ * subgraph cluster_1 {
+ * color = lightgrey
+ * label = "thread 2"
+ * labeljust = "l"
+ * run_2_1 [label = "run(\l\
+ * user_data,\l\
+ * thread = 2,\l\
+ * input = in[1],\l\
+ * output = out[1],\l\
+ * num_pixels = 20\l\
+ * )\l"]
+ * run_2_2 [label = "run(\l\
+ * user_data,\l\
+ * thread = 2,\l\
+ * input = in[4],\l\
+ * output = out[4],\l\
+ * num_pixels = 13\l\
+ * )\l"]
+ * }
+ * subgraph cluster_3 {
+ * color = lightgrey
+ * label = "thread 3"
+ * labeljust = "c"
+ * run_3_1 [label = "run(\l\
+ * user_data,\l\
+ * thread = 3,\l\
+ * input = in[2],\l\
+ * output = out[2],\l\
+ * num_pixels = 20\l\
+ * )\l"]
+ * }
+ * init -> {run_1_1; run_2_1; run_3_1; rank = same}
+ * run_1_1 -> run_1_2
+ * run_2_1 -> run_2_2
+ * {run_1_2; run_2_2, run_3_1} -> "destroy(user_data)"
+ * }
+ * @enddot
+ */
+typedef struct {
+ /** CMS-specific data that will be passed to @ref init. */
+ void* init_data;
+ /** Prepares a colorspace transform as described in the documentation of @ref
+ * jpegxl_cms_init_func. */
+ jpegxl_cms_init_func init;
+ /** Returns a buffer that can be used as input to @c run. */
+ jpegxl_cms_get_buffer_func get_src_buf;
+ /** Returns a buffer that can be used as output from @c run. */
+ jpegxl_cms_get_buffer_func get_dst_buf;
+ /** Executes the transform on a batch of pixels, per @ref jpegxl_cms_run_func.
+ */
+ jpegxl_cms_run_func run;
+ /** Cleans up the transform. */
+ jpegxl_cms_destroy_func destroy;
+} JxlCmsInterface;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_CMS_INTERFACE_H_ */
+
+/** @} */
diff --git a/third_party/jpeg-xl/lib/include/jxl/codestream_header.h b/third_party/jpeg-xl/lib/include/jxl/codestream_header.h
new file mode 100644
index 0000000000..66dd7df4ce
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/codestream_header.h
@@ -0,0 +1,430 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file codestream_header.h
+ * @brief Definitions of structs and enums for the metadata from the JPEG XL
+ * codestream headers (signature, metadata, preview dimensions, ...), excluding
+ * color encoding which is in color_encoding.h.
+ */
+
+#ifndef JXL_CODESTREAM_HEADER_H_
+#define JXL_CODESTREAM_HEADER_H_
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Image orientation metadata.
+ * Values 1..8 match the EXIF definitions.
+ * The name indicates the operation to perform to transform from the encoded
+ * image to the display image.
+ */
+typedef enum {
+ JXL_ORIENT_IDENTITY = 1,
+ JXL_ORIENT_FLIP_HORIZONTAL = 2,
+ JXL_ORIENT_ROTATE_180 = 3,
+ JXL_ORIENT_FLIP_VERTICAL = 4,
+ JXL_ORIENT_TRANSPOSE = 5,
+ JXL_ORIENT_ROTATE_90_CW = 6,
+ JXL_ORIENT_ANTI_TRANSPOSE = 7,
+ JXL_ORIENT_ROTATE_90_CCW = 8,
+} JxlOrientation;
+
+/** Given type of an extra channel.
+ */
+typedef enum {
+ JXL_CHANNEL_ALPHA,
+ JXL_CHANNEL_DEPTH,
+ JXL_CHANNEL_SPOT_COLOR,
+ JXL_CHANNEL_SELECTION_MASK,
+ JXL_CHANNEL_BLACK,
+ JXL_CHANNEL_CFA,
+ JXL_CHANNEL_THERMAL,
+ JXL_CHANNEL_RESERVED0,
+ JXL_CHANNEL_RESERVED1,
+ JXL_CHANNEL_RESERVED2,
+ JXL_CHANNEL_RESERVED3,
+ JXL_CHANNEL_RESERVED4,
+ JXL_CHANNEL_RESERVED5,
+ JXL_CHANNEL_RESERVED6,
+ JXL_CHANNEL_RESERVED7,
+ JXL_CHANNEL_UNKNOWN,
+ JXL_CHANNEL_OPTIONAL
+} JxlExtraChannelType;
+
+/** The codestream preview header */
+typedef struct {
+ /** Preview width in pixels */
+ uint32_t xsize;
+
+ /** Preview height in pixels */
+ uint32_t ysize;
+} JxlPreviewHeader;
+
+/** The codestream animation header, optionally present in the beginning of
+ * the codestream, and if it is it applies to all animation frames, unlike
+ * JxlFrameHeader which applies to an individual frame.
+ */
+typedef struct {
+ /** Numerator of ticks per second of a single animation frame time unit */
+ uint32_t tps_numerator;
+
+ /** Denominator of ticks per second of a single animation frame time unit */
+ uint32_t tps_denominator;
+
+ /** Amount of animation loops, or 0 to repeat infinitely */
+ uint32_t num_loops;
+
+ /** Whether animation time codes are present at animation frames in the
+ * codestream */
+ JXL_BOOL have_timecodes;
+} JxlAnimationHeader;
+
+/** Basic image information. This information is available from the file
+ * signature and first part of the codestream header.
+ */
+typedef struct {
+ /* TODO(lode): need additional fields for (transcoded) JPEG? For reusable
+ * fields orientation must be read from Exif APP1. For has_icc_profile: must
+ * look up where ICC profile is guaranteed to be in a JPEG file to be able to
+ * indicate this. */
+
+ /* TODO(lode): make struct packed, and/or make this opaque struct with getter
+ * functions (still separate struct from opaque decoder) */
+
+ /** Whether the codestream is embedded in the container format. If true,
+ * metadata information and extensions may be available in addition to the
+ * codestream.
+ */
+ JXL_BOOL have_container;
+
+ /** Width of the image in pixels, before applying orientation.
+ */
+ uint32_t xsize;
+
+ /** Height of the image in pixels, before applying orientation.
+ */
+ uint32_t ysize;
+
+ /** Original image color channel bit depth.
+ */
+ uint32_t bits_per_sample;
+
+ /** Original image color channel floating point exponent bits, or 0 if they
+ * are unsigned integer. For example, if the original data is half-precision
+ * (binary16) floating point, bits_per_sample is 16 and
+ * exponent_bits_per_sample is 5, and so on for other floating point
+ * precisions.
+ */
+ uint32_t exponent_bits_per_sample;
+
+ /** Upper bound on the intensity level present in the image in nits. For
+ * unsigned integer pixel encodings, this is the brightness of the largest
+ * representable value. The image does not necessarily contain a pixel
+ * actually this bright. An encoder is allowed to set 255 for SDR images
+ * without computing a histogram.
+ * Leaving this set to its default of 0 lets libjxl choose a sensible default
+ * value based on the color encoding.
+ */
+ float intensity_target;
+
+ /** Lower bound on the intensity level present in the image. This may be
+ * loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+ * decoder will map [min_nits, intensity_target] to the display range.
+ */
+ float min_nits;
+
+ /** See the description of @see linear_below.
+ */
+ JXL_BOOL relative_to_max_display;
+
+ /** The tone mapping will leave unchanged (linear mapping) any pixels whose
+ * brightness is strictly below this. The interpretation depends on
+ * relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+ * display brightness [nits], otherwise an absolute brightness [nits].
+ */
+ float linear_below;
+
+ /** Whether the data in the codestream is encoded in the original color
+ * profile that is attached to the codestream metadata header, or is
+ * encoded in an internally supported absolute color space (which the decoder
+ * can always convert to linear or non-linear sRGB or to XYB). If the original
+ * profile is used, the decoder outputs pixel data in the color space matching
+ * that profile, but doesn't convert it to any other color space. If the
+ * original profile is not used, the decoder only outputs the data as sRGB
+ * (linear if outputting to floating point, nonlinear with standard sRGB
+ * transfer function if outputting to unsigned integers) but will not convert
+ * it to to the original color profile. The decoder also does not convert to
+ * the target display color profile. To convert the pixel data produced by
+ * the decoder to the original color profile, one of the JxlDecoderGetColor*
+ * functions needs to be called with @ref JXL_COLOR_PROFILE_TARGET_DATA to get
+ * the color profile of the decoder output, and then an external CMS can be
+ * used for conversion.
+ * Note that for lossy compression, this should be set to false for most use
+ * cases, and if needed, the image should be converted to the original color
+ * profile after decoding, as described above.
+ */
+ JXL_BOOL uses_original_profile;
+
+ /** Indicates a preview image exists near the beginning of the codestream.
+ * The preview itself or its dimensions are not included in the basic info.
+ */
+ JXL_BOOL have_preview;
+
+ /** Indicates animation frames exist in the codestream. The animation
+ * information is not included in the basic info.
+ */
+ JXL_BOOL have_animation;
+
+ /** Image orientation, value 1-8 matching the values used by JEITA CP-3451C
+ * (Exif version 2.3).
+ */
+ JxlOrientation orientation;
+
+ /** Number of color channels encoded in the image, this is either 1 for
+ * grayscale data, or 3 for colored data. This count does not include
+ * the alpha channel or other extra channels. To check presence of an alpha
+ * channel, such as in the case of RGBA color, check alpha_bits != 0.
+ * If and only if this is 1, the JxlColorSpace in the JxlColorEncoding is
+ * JXL_COLOR_SPACE_GRAY.
+ */
+ uint32_t num_color_channels;
+
+ /** Number of additional image channels. This includes the main alpha channel,
+ * but can also include additional channels such as depth, additional alpha
+ * channels, spot colors, and so on. Information about the extra channels
+ * can be queried with JxlDecoderGetExtraChannelInfo. The main alpha channel,
+ * if it exists, also has its information available in the alpha_bits,
+ * alpha_exponent_bits and alpha_premultiplied fields in this JxlBasicInfo.
+ */
+ uint32_t num_extra_channels;
+
+ /** Bit depth of the encoded alpha channel, or 0 if there is no alpha channel.
+ * If present, matches the alpha_bits value of the JxlExtraChannelInfo
+ * associated with this alpha channel.
+ */
+ uint32_t alpha_bits;
+
+ /** Alpha channel floating point exponent bits, or 0 if they are unsigned. If
+ * present, matches the alpha_bits value of the JxlExtraChannelInfo associated
+ * with this alpha channel. integer.
+ */
+ uint32_t alpha_exponent_bits;
+
+ /** Whether the alpha channel is premultiplied. Only used if there is a main
+ * alpha channel. Matches the alpha_premultiplied value of the
+ * JxlExtraChannelInfo associated with this alpha channel.
+ */
+ JXL_BOOL alpha_premultiplied;
+
+ /** Dimensions of encoded preview image, only used if have_preview is
+ * JXL_TRUE.
+ */
+ JxlPreviewHeader preview;
+
+ /** Animation header with global animation properties for all frames, only
+ * used if have_animation is JXL_TRUE.
+ */
+ JxlAnimationHeader animation;
+
+ /** Intrinsic width of the image.
+ * The intrinsic size can be different from the actual size in pixels
+ * (as given by xsize and ysize) and it denotes the recommended dimensions
+ * for displaying the image, i.e. applications are advised to resample the
+ * decoded image to the intrinsic dimensions.
+ */
+ uint32_t intrinsic_xsize;
+
+ /** Intrinsic height of the image.
+ * The intrinsic size can be different from the actual size in pixels
+ * (as given by xsize and ysize) and it denotes the recommended dimensions
+ * for displaying the image, i.e. applications are advised to resample the
+ * decoded image to the intrinsic dimensions.
+ */
+ uint32_t intrinsic_ysize;
+
+ /** Padding for forwards-compatibility, in case more fields are exposed
+ * in a future version of the library.
+ */
+ uint8_t padding[100];
+} JxlBasicInfo;
+
+/** Information for a single extra channel.
+ */
+typedef struct {
+ /** Given type of an extra channel.
+ */
+ JxlExtraChannelType type;
+
+ /** Total bits per sample for this channel.
+ */
+ uint32_t bits_per_sample;
+
+ /** Floating point exponent bits per channel, or 0 if they are unsigned
+ * integer.
+ */
+ uint32_t exponent_bits_per_sample;
+
+ /** The exponent the channel is downsampled by on each axis.
+ * TODO(lode): expand this comment to match the JPEG XL specification,
+ * specify how to upscale, how to round the size computation, and to which
+ * extra channels this field applies.
+ */
+ uint32_t dim_shift;
+
+ /** Length of the extra channel name in bytes, or 0 if no name.
+ * Excludes null termination character.
+ */
+ uint32_t name_length;
+
+ /** Whether alpha channel uses premultiplied alpha. Only applicable if
+ * type is JXL_CHANNEL_ALPHA.
+ */
+ JXL_BOOL alpha_premultiplied;
+
+ /** Spot color of the current spot channel in linear RGBA. Only applicable if
+ * type is JXL_CHANNEL_SPOT_COLOR.
+ */
+ float spot_color[4];
+
+ /** Only applicable if type is JXL_CHANNEL_CFA.
+ * TODO(lode): add comment about the meaning of this field.
+ */
+ uint32_t cfa_channel;
+} JxlExtraChannelInfo;
+
+/* TODO(lode): add API to get the codestream header extensions. */
+/** Extensions in the codestream header. */
+typedef struct {
+ /** Extension bits. */
+ uint64_t extensions;
+} JxlHeaderExtensions;
+
+/** Frame blend modes.
+ * When decoding, if coalescing is enabled (default), this can be ignored.
+ */
+typedef enum {
+ JXL_BLEND_REPLACE = 0,
+ JXL_BLEND_ADD = 1,
+ JXL_BLEND_BLEND = 2,
+ JXL_BLEND_MULADD = 3,
+ JXL_BLEND_MUL = 4,
+} JxlBlendMode;
+
+/** The information about blending the color channels or a single extra channel.
+ * When decoding, if coalescing is enabled (default), this can be ignored and
+ * the blend mode is considered to be JXL_BLEND_REPLACE.
+ * When encoding, these settings apply to the pixel data given to the encoder.
+ */
+typedef struct {
+ /** Blend mode.
+ */
+ JxlBlendMode blendmode;
+ /** Reference frame ID to use as the 'bottom' layer (0-3).
+ */
+ uint32_t source;
+ /** Which extra channel to use as the 'alpha' channel for blend modes
+ * JXL_BLEND_BLEND and JXL_BLEND_MULADD.
+ */
+ uint32_t alpha;
+ /** Clamp values to [0,1] for the purpose of blending.
+ */
+ JXL_BOOL clamp;
+} JxlBlendInfo;
+
+/** The information about layers.
+ * When decoding, if coalescing is enabled (default), this can be ignored.
+ * When encoding, these settings apply to the pixel data given to the encoder,
+ * the encoder could choose an internal representation that differs.
+ */
+typedef struct {
+ /** Whether cropping is applied for this frame. When decoding, if false,
+ * crop_x0 and crop_y0 are set to zero, and xsize and ysize to the main
+ * image dimensions. When encoding and this is false, those fields are
+ * ignored. When decoding, if coalescing is enabled (default), this is always
+ * false, regardless of the internal encoding in the JPEG XL codestream.
+ */
+ JXL_BOOL have_crop;
+
+ /** Horizontal offset of the frame (can be negative).
+ */
+ int32_t crop_x0;
+
+ /** Vertical offset of the frame (can be negative).
+ */
+ int32_t crop_y0;
+
+ /** Width of the frame (number of columns).
+ */
+ uint32_t xsize;
+
+ /** Height of the frame (number of rows).
+ */
+ uint32_t ysize;
+
+ /** The blending info for the color channels. Blending info for extra channels
+ * has to be retrieved separately using JxlDecoderGetExtraChannelBlendInfo.
+ */
+ JxlBlendInfo blend_info;
+
+ /** After blending, save the frame as reference frame with this ID (0-3).
+ * Special case: if the frame duration is nonzero, ID 0 means "will not be
+ * referenced in the future". This value is not used for the last frame.
+ * When encoding, ID 3 is reserved to frames that are generated internally by
+ * the encoder, and should not be used by applications.
+ */
+ uint32_t save_as_reference;
+} JxlLayerInfo;
+
+/** The header of one displayed frame or non-coalesced layer. */
+typedef struct {
+ /** How long to wait after rendering in ticks. The duration in seconds of a
+ * tick is given by tps_numerator and tps_denominator in JxlAnimationHeader.
+ */
+ uint32_t duration;
+
+ /** SMPTE timecode of the current frame in form 0xHHMMSSFF, or 0. The bits are
+ * interpreted from most-significant to least-significant as hour, minute,
+ * second, and frame. If timecode is nonzero, it is strictly larger than that
+ * of a previous frame with nonzero duration. These values are only available
+ * if have_timecodes in JxlAnimationHeader is JXL_TRUE.
+ * This value is only used if have_timecodes in JxlAnimationHeader is
+ * JXL_TRUE.
+ */
+ uint32_t timecode;
+
+ /** Length of the frame name in bytes, or 0 if no name.
+ * Excludes null termination character. This value is set by the decoder.
+ * For the encoder, this value is ignored and @ref JxlEncoderSetFrameName is
+ * used instead to set the name and the length.
+ */
+ uint32_t name_length;
+
+ /** Indicates this is the last animation frame. This value is set by the
+ * decoder to indicate no further frames follow. For the encoder, it is not
+ * required to set this value and it is ignored, @ref JxlEncoderCloseFrames is
+ * used to indicate the last frame to the encoder instead.
+ */
+ JXL_BOOL is_last;
+
+ /** Information about the layer in case of no coalescing.
+ */
+ JxlLayerInfo layer_info;
+} JxlFrameHeader;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_CODESTREAM_HEADER_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/color_encoding.h b/third_party/jpeg-xl/lib/include/jxl/color_encoding.h
new file mode 100644
index 0000000000..b16f6a01ee
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/color_encoding.h
@@ -0,0 +1,162 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file color_encoding.h
+ * @brief Color Encoding definitions used by JPEG XL.
+ * All CIE units are for the standard 1931 2 degree observer.
+ */
+
+#ifndef JXL_COLOR_ENCODING_H_
+#define JXL_COLOR_ENCODING_H_
+
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Color space of the image data. */
+typedef enum {
+ /** Tristimulus RGB */
+ JXL_COLOR_SPACE_RGB,
+ /** Luminance based, the primaries in JxlColorEncoding must be ignored. This
+ * value implies that num_color_channels in JxlBasicInfo is 1, any other value
+ * implies num_color_channels is 3. */
+ JXL_COLOR_SPACE_GRAY,
+ /** XYB (opsin) color space */
+ JXL_COLOR_SPACE_XYB,
+ /** None of the other table entries describe the color space appropriately */
+ JXL_COLOR_SPACE_UNKNOWN,
+} JxlColorSpace;
+
+/** Built-in whitepoints for color encoding. When decoding, the numerical xy
+ * whitepoint value can be read from the JxlColorEncoding white_point field
+ * regardless of the enum value. When encoding, enum values except
+ * JXL_WHITE_POINT_CUSTOM override the numerical fields. Some enum values match
+ * a subset of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)), however the
+ * white point and RGB primaries are separate enums here.
+ */
+typedef enum {
+ /** CIE Standard Illuminant D65: 0.3127, 0.3290 */
+ JXL_WHITE_POINT_D65 = 1,
+ /** White point must be read from the JxlColorEncoding white_point field, or
+ * as ICC profile. This enum value is not an exact match of the corresponding
+ * CICP value. */
+ JXL_WHITE_POINT_CUSTOM = 2,
+ /** CIE Standard Illuminant E (equal-energy): 1/3, 1/3 */
+ JXL_WHITE_POINT_E = 10,
+ /** DCI-P3 from SMPTE RP 431-2: 0.314, 0.351 */
+ JXL_WHITE_POINT_DCI = 11,
+} JxlWhitePoint;
+
+/** Built-in primaries for color encoding. When decoding, the primaries can be
+ * read from the JxlColorEncoding primaries_red_xy, primaries_green_xy and
+ * primaries_blue_xy fields regardless of the enum value. When encoding, the
+ * enum values except JXL_PRIMARIES_CUSTOM override the numerical fields. Some
+ * enum values match a subset of CICP (Rec. ITU-T H.273 | ISO/IEC
+ * 23091-2:2019(E)), however the white point and RGB primaries are separate
+ * enums here.
+ */
+typedef enum {
+ /** The CIE xy values of the red, green and blue primaries are: 0.639998686,
+ 0.330010138; 0.300003784, 0.600003357; 0.150002046, 0.059997204 */
+ JXL_PRIMARIES_SRGB = 1,
+ /** Primaries must be read from the JxlColorEncoding primaries_red_xy,
+ * primaries_green_xy and primaries_blue_xy fields, or as ICC profile. This
+ * enum value is not an exact match of the corresponding CICP value. */
+ JXL_PRIMARIES_CUSTOM = 2,
+ /** As specified in Rec. ITU-R BT.2100-1 */
+ JXL_PRIMARIES_2100 = 9,
+ /** As specified in SMPTE RP 431-2 */
+ JXL_PRIMARIES_P3 = 11,
+} JxlPrimaries;
+
+/** Built-in transfer functions for color encoding. Enum values match a subset
+ * of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)) unless specified
+ * otherwise. */
+typedef enum {
+ /** As specified in SMPTE RP 431-2 */
+ JXL_TRANSFER_FUNCTION_709 = 1,
+ /** None of the other table entries describe the transfer function. */
+ JXL_TRANSFER_FUNCTION_UNKNOWN = 2,
+ /** The gamma exponent is 1 */
+ JXL_TRANSFER_FUNCTION_LINEAR = 8,
+ /** As specified in IEC 61966-2-1 sRGB */
+ JXL_TRANSFER_FUNCTION_SRGB = 13,
+ /** As specified in SMPTE ST 2084 */
+ JXL_TRANSFER_FUNCTION_PQ = 16,
+ /** As specified in SMPTE ST 428-1 */
+ JXL_TRANSFER_FUNCTION_DCI = 17,
+ /** As specified in Rec. ITU-R BT.2100-1 (HLG) */
+ JXL_TRANSFER_FUNCTION_HLG = 18,
+ /** Transfer function follows power law given by the gamma value in
+ JxlColorEncoding. Not a CICP value. */
+ JXL_TRANSFER_FUNCTION_GAMMA = 65535,
+} JxlTransferFunction;
+
+/** Renderig intent for color encoding, as specified in ISO 15076-1:2010 */
+typedef enum {
+ /** vendor-specific */
+ JXL_RENDERING_INTENT_PERCEPTUAL = 0,
+ /** media-relative */
+ JXL_RENDERING_INTENT_RELATIVE,
+ /** vendor-specific */
+ JXL_RENDERING_INTENT_SATURATION,
+ /** ICC-absolute */
+ JXL_RENDERING_INTENT_ABSOLUTE,
+} JxlRenderingIntent;
+
+/** Color encoding of the image as structured information.
+ */
+typedef struct {
+ /** Color space of the image data.
+ */
+ JxlColorSpace color_space;
+
+ /** Built-in white point. If this value is JXL_WHITE_POINT_CUSTOM, must
+ * use the numerical whitepoint values from white_point_xy.
+ */
+ JxlWhitePoint white_point;
+
+ /** Numerical whitepoint values in CIE xy space. */
+ double white_point_xy[2];
+
+ /** Built-in RGB primaries. If this value is JXL_PRIMARIES_CUSTOM, must
+ * use the numerical primaries values below. This field and the custom values
+ * below are unused and must be ignored if the color space is
+ * JXL_COLOR_SPACE_GRAY or JXL_COLOR_SPACE_XYB.
+ */
+ JxlPrimaries primaries;
+
+ /** Numerical red primary values in CIE xy space. */
+ double primaries_red_xy[2];
+
+ /** Numerical green primary values in CIE xy space. */
+ double primaries_green_xy[2];
+
+ /** Numerical blue primary values in CIE xy space. */
+ double primaries_blue_xy[2];
+
+ /** Transfer function if have_gamma is 0 */
+ JxlTransferFunction transfer_function;
+
+ /** Gamma value used when transfer_function is JXL_TRANSFER_FUNCTION_GAMMA
+ */
+ double gamma;
+
+ /** Rendering intent defined for the color profile. */
+ JxlRenderingIntent rendering_intent;
+} JxlColorEncoding;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_COLOR_ENCODING_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/decode.h b/third_party/jpeg-xl/lib/include/jxl/decode.h
new file mode 100644
index 0000000000..156499ce48
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/decode.h
@@ -0,0 +1,1446 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_decoder
+ * @{
+ * @file decode.h
+ * @brief Decoding API for JPEG XL.
+ */
+
+#ifndef JXL_DECODE_H_
+#define JXL_DECODE_H_
+
+#include <jxl/cms_interface.h>
+#include <jxl/codestream_header.h>
+#include <jxl/color_encoding.h>
+#include <jxl/jxl_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+#include <jxl/version.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Decoder library version.
+ *
+ * @return the decoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlDecoderVersion(void);
+
+/** The result of @ref JxlSignatureCheck.
+ */
+typedef enum {
+ /** Not enough bytes were passed to determine if a valid signature was found.
+ */
+ JXL_SIG_NOT_ENOUGH_BYTES = 0,
+
+ /** No valid JPEG XL header was found. */
+ JXL_SIG_INVALID = 1,
+
+ /** A valid JPEG XL codestream signature was found, that is a JPEG XL image
+ * without container.
+ */
+ JXL_SIG_CODESTREAM = 2,
+
+ /** A valid container signature was found, that is a JPEG XL image embedded
+ * in a box format container.
+ */
+ JXL_SIG_CONTAINER = 3,
+} JxlSignature;
+
+/**
+ * JPEG XL signature identification.
+ *
+ * Checks if the passed buffer contains a valid JPEG XL signature. The passed @p
+ * buf of size
+ * @p size doesn't need to be a full image, only the beginning of the file.
+ *
+ * @return a flag indicating if a JPEG XL signature was found and what type.
+ * - @ref JXL_SIG_NOT_ENOUGH_BYTES if not enough bytes were passed to
+ * determine if a valid signature is there.
+ * - @ref JXL_SIG_INVALID if no valid signature found for JPEG XL decoding.
+ * - @ref JXL_SIG_CODESTREAM if a valid JPEG XL codestream signature was
+ * found.
+ * - @ref JXL_SIG_CONTAINER if a valid JPEG XL container signature was found.
+ */
+JXL_EXPORT JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len);
+
+/**
+ * Opaque structure that holds the JPEG XL decoder.
+ *
+ * Allocated and initialized with @ref JxlDecoderCreate().
+ * Cleaned up and deallocated with @ref JxlDecoderDestroy().
+ */
+typedef struct JxlDecoderStruct JxlDecoder;
+
+/**
+ * Creates an instance of @ref JxlDecoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ * manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized @ref JxlDecoder otherwise
+ */
+JXL_EXPORT JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a @ref JxlDecoder instance, so it can be re-used for decoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with @ref JxlDecoderCreate, but the memory manager is kept.
+ *
+ * @param dec instance to be re-initialized.
+ */
+JXL_EXPORT void JxlDecoderReset(JxlDecoder* dec);
+
+/**
+ * Deinitializes and frees @ref JxlDecoder instance.
+ *
+ * @param dec instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlDecoderDestroy(JxlDecoder* dec);
+
+/**
+ * Return value for @ref JxlDecoderProcessInput.
+ * The values from @ref JXL_DEC_BASIC_INFO onwards are optional informative
+ * events that can be subscribed to, they are never returned if they
+ * have not been registered with @ref JxlDecoderSubscribeEvents.
+ */
+typedef enum {
+ /** Function call finished successfully, or decoding is finished and there is
+ * nothing more to be done.
+ *
+ * Note that @ref JxlDecoderProcessInput will return JXL_DEC_SUCCESS if all
+ * events that were registered with @ref JxlDecoderSubscribeEvents were
+ * processed, even before the end of the JPEG XL codestream.
+ *
+ * In this case, the return value @ref JxlDecoderReleaseInput will be the same
+ * as it was at the last signaled event. E.g. if JXL_DEC_FULL_IMAGE was
+ * subscribed to, then all bytes from the end of the JPEG XL codestream
+ * (including possible boxes needed for jpeg reconstruction) will be returned
+ * as unprocessed.
+ */
+ JXL_DEC_SUCCESS = 0,
+
+ /** An error occurred, for example invalid input file or out of memory.
+ * TODO(lode): add function to get error information from decoder.
+ */
+ JXL_DEC_ERROR = 1,
+
+ /** The decoder needs more input bytes to continue. Before the next @ref
+ * JxlDecoderProcessInput call, more input data must be set, by calling @ref
+ * JxlDecoderReleaseInput (if input was set previously) and then calling @ref
+ * JxlDecoderSetInput. @ref JxlDecoderReleaseInput returns how many bytes
+ * are not yet processed, before a next call to @ref JxlDecoderProcessInput
+ * all unprocessed bytes must be provided again (the address need not match,
+ * but the contents must), and more bytes must be concatenated after the
+ * unprocessed bytes.
+ * In most cases, @ref JxlDecoderReleaseInput will return no unprocessed bytes
+ * at this event, the only exceptions are if the previously set input ended
+ * within (a) the raw codestream signature, (b) the signature box, (c) a box
+ * header, or (d) the first 4 bytes of a brob, ftyp, or jxlp box. In any of
+ * these cases the number of unprocessed bytes is less than 20.
+ */
+ JXL_DEC_NEED_MORE_INPUT = 2,
+
+ /** The decoder is able to decode a preview image and requests setting a
+ * preview output buffer using @ref JxlDecoderSetPreviewOutBuffer. This occurs
+ * if @ref JXL_DEC_PREVIEW_IMAGE is requested and it is possible to decode a
+ * preview image from the codestream and the preview out buffer was not yet
+ * set. There is maximum one preview image in a codestream.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the frame header (including ToC) of the preview frame as
+ * unprocessed.
+ */
+ JXL_DEC_NEED_PREVIEW_OUT_BUFFER = 3,
+
+ /** The decoder requests an output buffer to store the full resolution image,
+ * which can be set with @ref JxlDecoderSetImageOutBuffer or with @ref
+ * JxlDecoderSetImageOutCallback. This event re-occurs for new frames if
+ * there are multiple animation frames and requires setting an output again.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the frame header (including ToC) as unprocessed.
+ */
+ JXL_DEC_NEED_IMAGE_OUT_BUFFER = 5,
+
+ /** The JPEG reconstruction buffer is too small for reconstructed JPEG
+ * codestream to fit. @ref JxlDecoderSetJPEGBuffer must be called again to
+ * make room for remaining bytes. This event may occur multiple times
+ * after @ref JXL_DEC_JPEG_RECONSTRUCTION.
+ */
+ JXL_DEC_JPEG_NEED_MORE_OUTPUT = 6,
+
+ /** The box contents output buffer is too small. @ref JxlDecoderSetBoxBuffer
+ * must be called again to make room for remaining bytes. This event may occur
+ * multiple times after @ref JXL_DEC_BOX.
+ */
+ JXL_DEC_BOX_NEED_MORE_OUTPUT = 7,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": Basic information such as image dimensions and
+ * extra channels. This event occurs max once per image.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the basic info as unprocessed (including the last byte of basic info
+ * if it did not end on a byte boundary).
+ */
+ JXL_DEC_BASIC_INFO = 0x40,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": User extensions of the codestream header. This
+ * event occurs max once per image and always later than @ref
+ * JXL_DEC_BASIC_INFO and earlier than any pixel data.
+ *
+ * @deprecated The decoder no longer returns this, the header extensions,
+ * if any, are available at the JXL_DEC_BASIC_INFO event.
+ */
+ JXL_DEC_EXTENSIONS = 0x80,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": Color encoding or ICC profile from the
+ * codestream header. This event occurs max once per image and always later
+ * than @ref JXL_DEC_BASIC_INFO and earlier than any pixel data.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the image header (which is the start of the first frame) as
+ * unprocessed.
+ */
+ JXL_DEC_COLOR_ENCODING = 0x100,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": Preview image, a small frame, decoded. This
+ * event can only happen if the image has a preview frame encoded. This event
+ * occurs max once for the codestream and always later than @ref
+ * JXL_DEC_COLOR_ENCODING and before @ref JXL_DEC_FRAME.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the preview frame as unprocessed.
+ */
+ JXL_DEC_PREVIEW_IMAGE = 0x200,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": Beginning of a frame. @ref
+ * JxlDecoderGetFrameHeader can be used at this point. A note on frames:
+ * a JPEG XL image can have internal frames that are not intended to be
+ * displayed (e.g. used for compositing a final frame), but this only returns
+ * displayed frames, unless @ref JxlDecoderSetCoalescing was set to JXL_FALSE:
+ * in that case, the individual layers are returned, without blending. Note
+ * that even when coalescing is disabled, only frames of type kRegularFrame
+ * are returned; frames of type kReferenceOnly and kLfFrame are always for
+ * internal purposes only and cannot be accessed. A displayed frame either has
+ * an animation duration or is the only or last frame in the image. This event
+ * occurs max once per displayed frame, always later than @ref
+ * JXL_DEC_COLOR_ENCODING, and always earlier than any pixel data. While
+ * JPEG XL supports encoding a single frame as the composition of multiple
+ * internal sub-frames also called frames, this event is not indicated for the
+ * internal frames.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the frame header (including ToC) as unprocessed.
+ */
+ JXL_DEC_FRAME = 0x400,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": full frame (or layer, in case coalescing is
+ * disabled) is decoded. @ref JxlDecoderSetImageOutBuffer must be used after
+ * getting the basic image information to be able to get the image pixels, if
+ * not this return status only indicates we're past this point in the
+ * codestream. This event occurs max once per frame.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the frame (or if @ref JXL_DEC_JPEG_RECONSTRUCTION is subscribed to,
+ * from the end of the last box that is needed for jpeg reconstruction) as
+ * unprocessed.
+ */
+ JXL_DEC_FULL_IMAGE = 0x1000,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": JPEG reconstruction data decoded. @ref
+ * JxlDecoderSetJPEGBuffer may be used to set a JPEG reconstruction buffer
+ * after getting the JPEG reconstruction data. If a JPEG reconstruction buffer
+ * is set a byte stream identical to the JPEG codestream used to encode the
+ * image will be written to the JPEG reconstruction buffer instead of pixels
+ * to the image out buffer. This event occurs max once per image and always
+ * before @ref JXL_DEC_FULL_IMAGE.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the 'jbrd' box as unprocessed.
+ */
+ JXL_DEC_JPEG_RECONSTRUCTION = 0x2000,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": The header of a box of the container format
+ * (BMFF) is decoded. The following API functions related to boxes can be used
+ * after this event:
+ * - @ref JxlDecoderSetBoxBuffer and @ref JxlDecoderReleaseBoxBuffer
+ * "JxlDecoderReleaseBoxBuffer": set and release a buffer to get the box
+ * data.
+ * - @ref JxlDecoderGetBoxType get the 4-character box typename.
+ * - @ref JxlDecoderGetBoxSizeRaw get the size of the box as it appears in
+ * the container file, not decompressed.
+ * - @ref JxlDecoderSetDecompressBoxes to configure whether to get the box
+ * data decompressed, or possibly compressed.
+ *
+ * Boxes can be compressed. This is so when their box type is
+ * "brob". In that case, they have an underlying decompressed box
+ * type and decompressed data. @ref JxlDecoderSetDecompressBoxes allows
+ * configuring which data to get. Decompressing requires
+ * Brotli. @ref JxlDecoderGetBoxType has a flag to get the compressed box
+ * type, which can be "brob", or the decompressed box type. If a box
+ * is not compressed (its compressed type is not "brob"), then
+ * the output decompressed box type and data is independent of what
+ * setting is configured.
+ *
+ * The buffer set with @ref JxlDecoderSetBoxBuffer must be set again for each
+ * next box to be obtained, or can be left unset to skip outputting this box.
+ * The output buffer contains the full box data when the next @ref JXL_DEC_BOX
+ * event or @ref JXL_DEC_SUCCESS occurs. @ref JXL_DEC_BOX occurs for all
+ * boxes, including non-metadata boxes such as the signature box or codestream
+ * boxes. To check whether the box is a metadata type for respectively EXIF,
+ * XMP or JUMBF, use @ref JxlDecoderGetBoxType and check for types "Exif",
+ * "xml " and "jumb" respectively.
+ *
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * start of the box header as unprocessed.
+ */
+ JXL_DEC_BOX = 0x4000,
+
+ /** Informative event by @ref JxlDecoderProcessInput
+ * "JxlDecoderProcessInput": a progressive step in decoding the frame is
+ * reached. When calling @ref JxlDecoderFlushImage at this point, the flushed
+ * image will correspond exactly to this point in decoding, and not yet
+ * contain partial results (such as partially more fine detail) of a next
+ * step. By default, this event will trigger maximum once per frame, when a
+ * 8x8th resolution (DC) image is ready (the image data is still returned at
+ * full resolution, giving upscaled DC). Use @ref
+ * JxlDecoderSetProgressiveDetail to configure more fine-grainedness. The
+ * event is not guaranteed to trigger, not all images have progressive steps
+ * or DC encoded.
+ * In this case, @ref JxlDecoderReleaseInput will return all bytes from the
+ * end of the section that was needed to produce this progressive event as
+ * unprocessed.
+ */
+ JXL_DEC_FRAME_PROGRESSION = 0x8000,
+} JxlDecoderStatus;
+
+/** Rewinds decoder to the beginning. The same input must be given again from
+ * the beginning of the file and the decoder will emit events from the beginning
+ * again. When rewinding (as opposed to @ref JxlDecoderReset), the decoder can
+ * keep state about the image, which it can use to skip to a requested frame
+ * more efficiently with @ref JxlDecoderSkipFrames. Settings such as parallel
+ * runner or subscribed events are kept. After rewind, @ref
+ * JxlDecoderSubscribeEvents can be used again, and it is feasible to leave out
+ * events that were already handled before, such as @ref JXL_DEC_BASIC_INFO
+ * and @ref JXL_DEC_COLOR_ENCODING, since they will provide the same information
+ * as before.
+ * The difference to @ref JxlDecoderReset is that some state is kept, namely
+ * settings set by a call to
+ * - @ref JxlDecoderSetCoalescing,
+ * - @ref JxlDecoderSetDesiredIntensityTarget,
+ * - @ref JxlDecoderSetDecompressBoxes,
+ * - @ref JxlDecoderSetKeepOrientation,
+ * - @ref JxlDecoderSetUnpremultiplyAlpha,
+ * - @ref JxlDecoderSetParallelRunner,
+ * - @ref JxlDecoderSetRenderSpotcolors, and
+ * - @ref JxlDecoderSubscribeEvents.
+ *
+ * @param dec decoder object
+ */
+JXL_EXPORT void JxlDecoderRewind(JxlDecoder* dec);
+
+/** Makes the decoder skip the next `amount` frames. It still needs to process
+ * the input, but will not output the frame events. It can be more efficient
+ * when skipping frames, and even more so when using this after @ref
+ * JxlDecoderRewind. If the decoder is already processing a frame (could
+ * have emitted @ref JXL_DEC_FRAME but not yet @ref JXL_DEC_FULL_IMAGE), it
+ * starts skipping from the next frame. If the amount is larger than the amount
+ * of frames remaining in the image, all remaining frames are skipped. Calling
+ * this function multiple times adds the amount to skip to the already existing
+ * amount.
+ *
+ * A frame here is defined as a frame that without skipping emits events such
+ * as @ref JXL_DEC_FRAME and @ref JXL_DEC_FULL_IMAGE, frames that are internal
+ * to the file format but are not rendered as part of an animation, or are not
+ * the final still frame of a still image, are not counted.
+ *
+ * @param dec decoder object
+ * @param amount the amount of frames to skip
+ */
+JXL_EXPORT void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount);
+
+/**
+ * Skips processing the current frame. Can be called after frame processing
+ * already started, signaled by a @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event,
+ * but before the corresponding @ref JXL_DEC_FULL_IMAGE event. The next signaled
+ * event will be another @ref JXL_DEC_FRAME, or @ref JXL_DEC_SUCCESS if there
+ * are no more frames. If pixel data is required from the already processed part
+ * of the frame, @ref JxlDecoderFlushImage must be called before this.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS if there is a frame to skip, and @ref
+ * JXL_DEC_ERROR if the function was not called during frame processing.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec);
+
+/**
+ * Get the default pixel format for this decoder.
+ *
+ * Requires that the decoder can produce JxlBasicInfo.
+ *
+ * @param dec @ref JxlDecoder to query when creating the recommended pixel
+ * format.
+ * @param format JxlPixelFormat to populate with the recommended settings for
+ * the data loaded into this decoder.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_NEED_MORE_INPUT if the
+ * basic info isn't yet available, and @ref JXL_DEC_ERROR otherwise.
+ *
+ * DEPRECATED: this function will be removed in the future.
+ */
+JXL_DEPRECATED JXL_EXPORT JxlDecoderStatus
+JxlDecoderDefaultPixelFormat(const JxlDecoder* dec, JxlPixelFormat* format);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * decoding.
+ *
+ * @param dec decoder object
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ * be NULL to use the default, single-threaded, runner. A multithreaded
+ * runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return @ref JXL_DEC_SUCCESS if the runner was set, @ref JXL_DEC_ERROR
+ * otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque);
+
+/**
+ * Returns a hint indicating how many more bytes the decoder is expected to
+ * need to make @ref JxlDecoderGetBasicInfo available after the next @ref
+ * JxlDecoderProcessInput call. This is a suggested large enough value for
+ * the amount of bytes to provide in the next @ref JxlDecoderSetInput call, but
+ * it is not guaranteed to be an upper bound nor a lower bound. This number does
+ * not include bytes that have already been released from the input. Can be used
+ * before the first @ref JxlDecoderProcessInput call, and is correct the first
+ * time in most cases. If not, @ref JxlDecoderSizeHintBasicInfo can be called
+ * again to get an updated hint.
+ *
+ * @param dec decoder object
+ * @return the size hint in bytes if the basic info is not yet fully decoded.
+ * @return 0 when the basic info is already available.
+ */
+JXL_EXPORT size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec);
+
+/** Select for which informative events, i.e. @ref JXL_DEC_BASIC_INFO, etc., the
+ * decoder should return with a status. It is not required to subscribe to any
+ * events, data can still be requested from the decoder as soon as it available.
+ * By default, the decoder is subscribed to no events (events_wanted == 0), and
+ * the decoder will then only return when it cannot continue because it needs
+ * more input data or more output buffer. This function may only be be called
+ * before using @ref JxlDecoderProcessInput.
+ *
+ * @param dec decoder object
+ * @param events_wanted bitfield of desired events.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec,
+ int events_wanted);
+
+/** Enables or disables preserving of as-in-bitstream pixeldata
+ * orientation. Some images are encoded with an Orientation tag
+ * indicating that the decoder must perform a rotation and/or
+ * mirroring to the encoded image data.
+ *
+ * - If skip_reorientation is JXL_FALSE (the default): the decoder
+ * will apply the transformation from the orientation setting, hence
+ * rendering the image according to its specified intent. When
+ * producing a JxlBasicInfo, the decoder will always set the
+ * orientation field to JXL_ORIENT_IDENTITY (matching the returned
+ * pixel data) and also align xsize and ysize so that they correspond
+ * to the width and the height of the returned pixel data.
+ * - If skip_reorientation is JXL_TRUE: the decoder will skip
+ * applying the transformation from the orientation setting, returning
+ * the image in the as-in-bitstream pixeldata orientation.
+ * This may be faster to decode since the decoder doesn't have to apply the
+ * transformation, but can cause wrong display of the image if the
+ * orientation tag is not correctly taken into account by the user.
+ *
+ * By default, this option is disabled, and the returned pixel data is
+ * re-oriented according to the image's Orientation setting.
+ *
+ * This function must be called at the beginning, before decoding is performed.
+ *
+ * @see JxlBasicInfo for the orientation field, and @ref JxlOrientation for the
+ * possible values.
+ *
+ * @param dec decoder object
+ * @param skip_reorientation JXL_TRUE to enable, JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetKeepOrientation(JxlDecoder* dec, JXL_BOOL skip_reorientation);
+
+/**
+ * Enables or disables preserving of associated alpha channels. If
+ * unpremul_alpha is set to JXL_FALSE then for associated alpha channel, the
+ * pixel data is returned with premultiplied colors. If it is set to JXL_TRUE,
+ * The colors will be unpremultiplied based on the alpha channel. This function
+ * has no effect if the image does not have an associated alpha channel.
+ *
+ * By default, this option is disabled, and the returned pixel data "as is".
+ *
+ * This function must be called at the beginning, before decoding is performed.
+ *
+ * @param dec decoder object
+ * @param unpremul_alpha JXL_TRUE to enable, JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec, JXL_BOOL unpremul_alpha);
+
+/** Enables or disables rendering spot colors. By default, spot colors
+ * are rendered, which is OK for viewing the decoded image. If render_spotcolors
+ * is JXL_FALSE, then spot colors are not rendered, and have to be retrieved
+ * separately using @ref JxlDecoderSetExtraChannelBuffer. This is useful for
+ * e.g. printing applications.
+ *
+ * @param dec decoder object
+ * @param render_spotcolors JXL_TRUE to enable (default), JXL_FALSE to disable.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetRenderSpotcolors(JxlDecoder* dec, JXL_BOOL render_spotcolors);
+
+/** Enables or disables coalescing of zero-duration frames. By default, frames
+ * are returned with coalescing enabled, i.e. all frames have the image
+ * dimensions, and are blended if needed. When coalescing is disabled, frames
+ * can have arbitrary dimensions, a non-zero crop offset, and blending is not
+ * performed. For display, coalescing is recommended. For loading a multi-layer
+ * still image as separate layers (as opposed to the merged image), coalescing
+ * has to be disabled.
+ *
+ * @param dec decoder object
+ * @param coalescing JXL_TRUE to enable coalescing (default), JXL_FALSE to
+ * disable it.
+ * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec,
+ JXL_BOOL coalescing);
+
+/**
+ * Decodes JPEG XL file using the available bytes. Requires input has been
+ * set with @ref JxlDecoderSetInput. After @ref JxlDecoderProcessInput, input
+ * can optionally be released with @ref JxlDecoderReleaseInput and then set
+ * again to next bytes in the stream. @ref JxlDecoderReleaseInput returns how
+ * many bytes are not yet processed, before a next call to @ref
+ * JxlDecoderProcessInput all unprocessed bytes must be provided again (the
+ * address need not match, but the contents must), and more bytes may be
+ * concatenated after the unprocessed bytes.
+ *
+ * The returned status indicates whether the decoder needs more input bytes, or
+ * more output buffer for a certain type of output data. No matter what the
+ * returned status is (other than @ref JXL_DEC_ERROR), new information, such
+ * as @ref JxlDecoderGetBasicInfo, may have become available after this call.
+ * When the return value is not @ref JXL_DEC_ERROR or @ref JXL_DEC_SUCCESS, the
+ * decoding requires more @ref JxlDecoderProcessInput calls to continue.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS when decoding finished and all events handled.
+ * If you still have more unprocessed input data anyway, then you can still
+ * continue by using @ref JxlDecoderSetInput and calling @ref
+ * JxlDecoderProcessInput again, similar to handling @ref
+ * JXL_DEC_NEED_MORE_INPUT. @ref JXL_DEC_SUCCESS can occur instead of @ref
+ * JXL_DEC_NEED_MORE_INPUT when, for example, the input data ended right at
+ * the boundary of a box of the container format, all essential codestream
+ * boxes were already decoded, but extra metadata boxes are still present in
+ * the next data. @ref JxlDecoderProcessInput cannot return success if all
+ * codestream boxes have not been seen yet.
+ * @return @ref JXL_DEC_ERROR when decoding failed, e.g. invalid codestream.
+ * TODO(lode): document the input data mechanism
+ * @return @ref JXL_DEC_NEED_MORE_INPUT when more input data is necessary.
+ * @return @ref JXL_DEC_BASIC_INFO when basic info such as image dimensions is
+ * available and this informative event is subscribed to.
+ * @return @ref JXL_DEC_COLOR_ENCODING when color profile information is
+ * available and this informative event is subscribed to.
+ * @return @ref JXL_DEC_PREVIEW_IMAGE when preview pixel information is
+ * available and output in the preview buffer.
+ * @return @ref JXL_DEC_FULL_IMAGE when all pixel information at highest detail
+ * is available and has been output in the pixel buffer.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec);
+
+/**
+ * Sets input data for @ref JxlDecoderProcessInput. The data is owned by the
+ * caller and may be used by the decoder until @ref JxlDecoderReleaseInput is
+ * called or the decoder is destroyed or reset so must be kept alive until then.
+ * Cannot be called if @ref JxlDecoderSetInput was already called and @ref
+ * JxlDecoderReleaseInput was not yet called, and cannot be called after @ref
+ * JxlDecoderCloseInput indicating the end of input was called.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to read from
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if input was already set without releasing or @ref
+ * JxlDecoderCloseInput was already called, @ref JXL_DEC_SUCCESS otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec,
+ const uint8_t* data,
+ size_t size);
+
+/**
+ * Releases input which was provided with @ref JxlDecoderSetInput. Between @ref
+ * JxlDecoderProcessInput and @ref JxlDecoderReleaseInput, the user may not
+ * alter the data in the buffer. Calling @ref JxlDecoderReleaseInput is required
+ * whenever any input is already set and new input needs to be added with @ref
+ * JxlDecoderSetInput, but is not required before @ref JxlDecoderDestroy or @ref
+ * JxlDecoderReset. Calling @ref JxlDecoderReleaseInput when no input is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return The amount of bytes the decoder has not yet processed that are still
+ * remaining in the data set by @ref JxlDecoderSetInput, or 0 if no input is
+ * set or @ref JxlDecoderReleaseInput was already called. For a next call
+ * to @ref JxlDecoderProcessInput, the buffer must start with these
+ * unprocessed bytes. From this value it is possible to infer the position
+ * of certain JPEG XL codestream elements (e.g. end of headers, frame
+ * start/end). See the documentation of individual values of @ref
+ * JxlDecoderStatus for more information.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseInput(JxlDecoder* dec);
+
+/**
+ * Marks the input as finished, indicates that no more @ref JxlDecoderSetInput
+ * will be called. This function allows the decoder to determine correctly if it
+ * should return success, need more input or error in certain cases. For
+ * backwards compatibility with a previous version of the API, using this
+ * function is optional when not using the @ref JXL_DEC_BOX event (the decoder
+ * is able to determine the end of the image frames without marking the end),
+ * but using this function is required when using @ref JXL_DEC_BOX for getting
+ * metadata box contents. This function does not replace @ref
+ * JxlDecoderReleaseInput, that function should still be called if its return
+ * value is needed.
+ *
+ * @ref JxlDecoderCloseInput should be called as soon as all known input bytes
+ * are set (e.g. at the beginning when not streaming but setting all input
+ * at once), before the final @ref JxlDecoderProcessInput calls.
+ *
+ * @param dec decoder object
+ */
+JXL_EXPORT void JxlDecoderCloseInput(JxlDecoder* dec);
+
+/**
+ * Outputs the basic image information, such as image dimensions, bit depth and
+ * all other JxlBasicInfo fields, if available.
+ *
+ * @param dec decoder object
+ * @param info struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ * in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+ JxlBasicInfo* info);
+
+/**
+ * Outputs information for extra channel at the given index. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param info struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ * in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelInfo(
+ const JxlDecoder* dec, size_t index, JxlExtraChannelInfo* info);
+
+/**
+ * Outputs name for extra channel at the given index in UTF-8. The index must be
+ * smaller than num_extra_channels in the associated JxlBasicInfo. The buffer
+ * for name must have at least name_length + 1 bytes allocated, gotten from
+ * the associated JxlExtraChannelInfo.
+ *
+ * @param dec decoder object
+ * @param index index of the extra channel to query.
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR
+ * in case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+ size_t index,
+ char* name,
+ size_t size);
+
+/** Defines which color profile to get: the profile from the codestream
+ * metadata header, which represents the color profile of the original image,
+ * or the color profile from the pixel data produced by the decoder. Both are
+ * the same if the JxlBasicInfo has uses_original_profile set.
+ */
+typedef enum {
+ /** Get the color profile of the original image from the metadata.
+ */
+ JXL_COLOR_PROFILE_TARGET_ORIGINAL = 0,
+
+ /** Get the color profile of the pixel data the decoder outputs. */
+ JXL_COLOR_PROFILE_TARGET_DATA = 1,
+} JxlColorProfileTarget;
+
+/**
+ * Outputs the color profile as JPEG XL encoded structured data, if available.
+ * This is an alternative to an ICC Profile, which can represent a more limited
+ * amount of color spaces, but represents them exactly through enum values.
+ *
+ * It is often possible to use @ref JxlDecoderGetColorAsICCProfile as an
+ * alternative anyway. The following scenarios are possible:
+ * - The JPEG XL image has an attached ICC Profile, in that case, the encoded
+ * structured data is not available, this function will return an error
+ * status. @ref JxlDecoderGetColorAsICCProfile should be called instead.
+ * - The JPEG XL image has an encoded structured color profile, and it
+ * represents an RGB or grayscale color space. This function will return it.
+ * You can still use @ref JxlDecoderGetColorAsICCProfile as well as an
+ * alternative if desired, though depending on which RGB color space is
+ * represented, the ICC profile may be a close approximation. It is also not
+ * always feasible to deduce from an ICC profile which named color space it
+ * exactly represents, if any, as it can represent any arbitrary space.
+ * HDR color spaces such as those using PQ and HLG are also potentially
+ * problematic, in that: while ICC profiles can encode a transfer function
+ * that happens to approximate those of PQ and HLG (HLG for only one given
+ * system gamma at a time, and necessitating a 3D LUT if gamma is to be
+ * different from 1), they cannot (before ICCv4.4) semantically signal that
+ * this is the color space that they represent. Therefore, they will
+ * typically not actually be interpreted as representing an HDR color space.
+ * This is especially detrimental to PQ which will then be interpreted as if
+ * the maximum signal value represented SDR white instead of 10000 cd/m^2,
+ * meaning that the image will be displayed two orders of magnitude (5-7 EV)
+ * too dim.
+ * - The JPEG XL image has an encoded structured color profile, and it
+ * indicates an unknown or xyb color space. In that case, @ref
+ * JxlDecoderGetColorAsICCProfile is not available.
+ *
+ * When rendering an image on a system where ICC-based color management is used,
+ * @ref JxlDecoderGetColorAsICCProfile should generally be used first as it will
+ * return a ready-to-use profile (with the aforementioned caveat about HDR).
+ * When knowledge about the nominal color space is desired if available, @ref
+ * JxlDecoderGetColorAsEncodedProfile should be used first.
+ *
+ * @param dec decoder object
+ * @param unused_format deprecated, can be NULL
+ * @param target whether to get the original color profile from the metadata
+ * or the color profile of the decoded pixels.
+ * @param color_encoding struct to copy the information into, or NULL to only
+ * check whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the data is available and returned, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ * case the encoded structured color profile does not exist in the
+ * codestream.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, JxlColorEncoding* color_encoding);
+
+/**
+ * Outputs the size in bytes of the ICC profile returned by @ref
+ * JxlDecoderGetColorAsICCProfile, if available, or indicates there is none
+ * available. In most cases, the image will have an ICC profile available, but
+ * if it does not, @ref JxlDecoderGetColorAsEncodedProfile must be used instead.
+ *
+ * @see JxlDecoderGetColorAsEncodedProfile for more information. The ICC
+ * profile is either the exact ICC profile attached to the codestream metadata,
+ * or a close approximation generated from JPEG XL encoded structured data,
+ * depending of what is encoded in the codestream.
+ *
+ * @param dec decoder object
+ * @param unused_format deprecated, can be NULL
+ * @param target whether to get the original color profile from the metadata
+ * or the color profile of the decoded pixels.
+ * @param size variable to output the size into, or NULL to only check the
+ * return status.
+ * @return @ref JXL_DEC_SUCCESS if the ICC profile is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if the decoder has not yet received enough
+ * input data to determine whether an ICC profile is available or what its
+ * size is, @ref JXL_DEC_ERROR in case the ICC profile is not available and
+ * cannot be generated.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetICCProfileSize(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, size_t* size);
+
+/**
+ * Outputs ICC profile if available. The profile is only available if @ref
+ * JxlDecoderGetICCProfileSize returns success. The output buffer must have
+ * at least as many bytes as given by @ref JxlDecoderGetICCProfileSize.
+ *
+ * @param dec decoder object
+ * @param unused_format deprecated, can be NULL
+ * @param target whether to get the original color profile from the metadata
+ * or the color profile of the decoded pixels.
+ * @param icc_profile buffer to copy the ICC profile into
+ * @param size size of the icc_profile buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS if the profile was successfully returned is
+ * available, @ref JXL_DEC_NEED_MORE_INPUT if not yet available, @ref
+ * JXL_DEC_ERROR if the profile doesn't exist or the output size is not
+ * large enough.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsICCProfile(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, uint8_t* icc_profile, size_t size);
+
+/** Sets the desired output color profile of the decoded image by calling
+ * @ref JxlDecoderSetOutputColorProfile, passing on @c color_encoding and
+ * setting @c icc_data to NULL. See @ref JxlDecoderSetOutputColorProfile for
+ * details.
+ *
+ * @param dec decoder object
+ * @param color_encoding the default color encoding to set
+ * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref
+ * JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+ JxlDecoder* dec, const JxlColorEncoding* color_encoding);
+
+/** Requests that the decoder perform tone mapping to the peak display luminance
+ * passed as @c desired_intensity_target, if appropriate.
+ * @note This is provided for convenience and the exact tone mapping that is
+ * performed is not meant to be considered authoritative in any way. It may
+ * change from version to version.
+ * @param dec decoder object
+ * @param desired_intensity_target the intended target peak luminance
+ * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref
+ * JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget(
+ JxlDecoder* dec, float desired_intensity_target);
+
+/**
+ * Sets the desired output color profile of the decoded image either from a
+ * color encoding or an ICC profile. Valid calls of this function have either @c
+ * color_encoding or @c icc_data set to NULL and @c icc_size must be 0 if and
+ * only if @c icc_data is NULL.
+ *
+ * Depending on whether a color management system (CMS) has been set the
+ * behavior is as follows:
+ *
+ * If a color management system (CMS) has been set with @ref JxlDecoderSetCms,
+ * and the CMS supports output to the desired color encoding or ICC profile,
+ * then it will provide the output in that color encoding or ICC profile. If the
+ * desired color encoding or the ICC is not supported, then an error will be
+ * returned.
+ *
+ * If no CMS has been set with @ref JxlDecoderSetCms, there are two cases:
+ *
+ * (1) Calling this function with a color encoding will convert XYB images to
+ * the desired color encoding. In this case, if the requested color encoding has
+ * a narrower gamut, or the white points differ, then the resulting image can
+ * have significant color distortion. Non-XYB images will not be converted to
+ * the desired color space.
+ *
+ * (2) Calling this function with an ICC profile will result in an error.
+ *
+ * If called with an ICC profile (after a call to @ref JxlDecoderSetCms), the
+ * ICC profile has to be a valid RGB or grayscale color profile.
+ *
+ * Can only be set after the @ref JXL_DEC_COLOR_ENCODING event occurred and
+ * before any other event occurred, and should be used before getting
+ * JXL_COLOR_PROFILE_TARGET_DATA.
+ *
+ * This function must not be called before JxlDecoderSetCms.
+ *
+ * @param dec decoder orbject
+ * @param color_encoding the output color encoding
+ * @param icc_data bytes of the icc profile
+ * @param icc_size size of the icc profile in bytes
+ * @return @ref JXL_DEC_SUCCESS if the color profile was set successfully, @ref
+ * JXL_DEC_ERROR otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetOutputColorProfile(
+ JxlDecoder* dec, const JxlColorEncoding* color_encoding,
+ const uint8_t* icc_data, size_t icc_size);
+
+/**
+ * Sets the color management system (CMS) that will be used for color
+ * conversion (if applicable) during decoding. May only be set before starting
+ * decoding and must not be called after @ref JxlDecoderSetOutputColorProfile.
+ *
+ * See @ref JxlDecoderSetOutputColorProfile for how color conversions are done
+ * depending on whether or not a CMS has been set with @ref JxlDecoderSetCms.
+ *
+ * @param dec decoder object.
+ * @param cms structure representing a CMS implementation. See @ref
+ * JxlCmsInterface for more details.
+ */
+JXL_EXPORT void JxlDecoderSetCms(JxlDecoder* dec, JxlCmsInterface cms);
+// TODO(firsching): add a function JxlDecoderSetDefaultCms() for setting a
+// default in case libjxl is build with a CMS.
+
+/**
+ * Returns the minimum size in bytes of the preview image output pixel buffer
+ * for the given format. This is the buffer for @ref
+ * JxlDecoderSetPreviewOutBuffer. Requires the preview header information is
+ * available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of pixels
+ * @param size output value, buffer size in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+ const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the small resolution preview image
+ * to. The size of the buffer must be at least as large as given by @ref
+ * JxlDecoderPreviewOutBufferSize. The buffer follows the format described
+ * by JxlPixelFormat. The preview image dimensions are given by the
+ * JxlPreviewHeader. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of pixels. Object owned by user and its contents are
+ * copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+ JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Outputs the information from the frame, such as duration when have_animation.
+ * This function can be called when @ref JXL_DEC_FRAME occurred for the current
+ * frame, even when have_animation in the JxlBasicInfo is JXL_FALSE.
+ *
+ * @param dec decoder object
+ * @param header struct to copy the information into, or NULL to only check
+ * whether the information is available through the return value.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ * case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+ JxlFrameHeader* header);
+
+/**
+ * Outputs name for the current frame. The buffer for name must have at least
+ * name_length + 1 bytes allocated, gotten from the associated JxlFrameHeader.
+ *
+ * @param dec decoder object
+ * @param name buffer to copy the name into
+ * @param size size of the name buffer in bytes, including zero termination
+ * character, so this must be at least JxlFrameHeader.name_length + 1.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref
+ * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in
+ * case of other error conditions.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec,
+ char* name, size_t size);
+
+/**
+ * Outputs the blend information for the current frame for a specific extra
+ * channel. This function can be called when @ref JXL_DEC_FRAME occurred for the
+ * current frame, even when have_animation in the JxlBasicInfo is JXL_FALSE.
+ * This information is only useful if coalescing is disabled; otherwise the
+ * decoder will have performed blending already.
+ *
+ * @param dec decoder object
+ * @param index the index of the extra channel
+ * @param blend_info struct to copy the information into
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo(
+ const JxlDecoder* dec, size_t index, JxlBlendInfo* blend_info);
+
+/**
+ * Returns the minimum size in bytes of the image output pixel buffer for the
+ * given format. This is the buffer for @ref JxlDecoderSetImageOutBuffer.
+ * Requires that the basic image information is available in the decoder in the
+ * case of coalescing enabled (default). In case coalescing is disabled, this
+ * can only be called after the @ref JXL_DEC_FRAME event occurs. In that case,
+ * it will return the size required to store the possibly cropped frame (which
+ * can be larger or smaller than the image dimensions).
+ *
+ * @param dec decoder object
+ * @param format format of the pixels.
+ * @param size output value, buffer size in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * information not available yet.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+ const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size);
+
+/**
+ * Sets the buffer to write the full resolution image to. This can be set when
+ * the @ref JXL_DEC_FRAME event occurs, must be set when the @ref
+ * JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, and applies only for the
+ * current frame. The size of the buffer must be at least as large as given
+ * by @ref JxlDecoderImageOutBufferSize. The buffer follows the format described
+ * by JxlPixelFormat. The buffer is owned by the caller.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ * are copied internally.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * size too small.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetImageOutBuffer(
+ JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size);
+
+/**
+ * Function type for @ref JxlDecoderSetImageOutCallback.
+ *
+ * The callback may be called simultaneously by different threads when using a
+ * threaded parallel runner, on different pixels.
+ *
+ * @param opaque optional user data, as given to @ref
+ * JxlDecoderSetImageOutCallback.
+ * @param x horizontal position of leftmost pixel of the pixel data.
+ * @param y vertical position of the pixel data.
+ * @param num_pixels amount of pixels included in the pixel data, horizontally.
+ * This is not the same as xsize of the full image, it may be smaller.
+ * @param pixels pixel data as a horizontal stripe, in the format passed to @ref
+ * JxlDecoderSetImageOutCallback. The memory is not owned by the user, and
+ * is only valid during the time the callback is running.
+ */
+typedef void (*JxlImageOutCallback)(void* opaque, size_t x, size_t y,
+ size_t num_pixels, const void* pixels);
+
+/**
+ * Initialization callback for @ref JxlDecoderSetMultithreadedImageOutCallback.
+ *
+ * @param init_opaque optional user data, as given to @ref
+ * JxlDecoderSetMultithreadedImageOutCallback.
+ * @param num_threads maximum number of threads that will call the @c run
+ * callback concurrently.
+ * @param num_pixels_per_thread maximum number of pixels that will be passed in
+ * one call to @c run.
+ * @return a pointer to data that will be passed to the @c run callback, or
+ * @c NULL if initialization failed.
+ */
+typedef void* (*JxlImageOutInitCallback)(void* init_opaque, size_t num_threads,
+ size_t num_pixels_per_thread);
+
+/**
+ * Worker callback for @ref JxlDecoderSetMultithreadedImageOutCallback.
+ *
+ * @param run_opaque user data returned by the @c init callback.
+ * @param thread_id number in `[0, num_threads)` identifying the thread of the
+ * current invocation of the callback.
+ * @param x horizontal position of the first (leftmost) pixel of the pixel data.
+ * @param y vertical position of the pixel data.
+ * @param num_pixels number of pixels in the pixel data. May be less than the
+ * full @c xsize of the image, and will be at most equal to the @c
+ * num_pixels_per_thread that was passed to @c init.
+ * @param pixels pixel data as a horizontal stripe, in the format passed to @ref
+ * JxlDecoderSetMultithreadedImageOutCallback. The data pointed to
+ * remains owned by the caller and is only guaranteed to outlive the current
+ * callback invocation.
+ */
+typedef void (*JxlImageOutRunCallback)(void* run_opaque, size_t thread_id,
+ size_t x, size_t y, size_t num_pixels,
+ const void* pixels);
+
+/**
+ * Destruction callback for @ref JxlDecoderSetMultithreadedImageOutCallback,
+ * called after all invocations of the @c run callback to perform any
+ * appropriate clean-up of the @c run_opaque data returned by @c init.
+ *
+ * @param run_opaque user data returned by the @c init callback.
+ */
+typedef void (*JxlImageOutDestroyCallback)(void* run_opaque);
+
+/**
+ * Sets pixel output callback. This is an alternative to @ref
+ * JxlDecoderSetImageOutBuffer. This can be set when the @ref JXL_DEC_FRAME
+ * event occurs, must be set when the @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event
+ * occurs, and applies only for the current frame. Only one of @ref
+ * JxlDecoderSetImageOutBuffer or @ref JxlDecoderSetImageOutCallback may be used
+ * for the same frame, not both at the same time.
+ *
+ * The callback will be called multiple times, to receive the image
+ * data in small chunks. The callback receives a horizontal stripe of pixel
+ * data, 1 pixel high, xsize pixels wide, called a scanline. The xsize here is
+ * not the same as the full image width, the scanline may be a partial section,
+ * and xsize may differ between calls. The user can then process and/or copy the
+ * partial scanline to an image buffer. The callback may be called
+ * simultaneously by different threads when using a threaded parallel runner, on
+ * different pixels.
+ *
+ * If @ref JxlDecoderFlushImage is not used, then each pixel will be visited
+ * exactly once by the different callback calls, during processing with one or
+ * more @ref JxlDecoderProcessInput calls. These pixels are decoded to full
+ * detail, they are not part of a lower resolution or lower quality progressive
+ * pass, but the final pass.
+ *
+ * If @ref JxlDecoderFlushImage is used, then in addition each pixel will be
+ * visited zero or one times during the blocking @ref JxlDecoderFlushImage call.
+ * Pixels visited as a result of @ref JxlDecoderFlushImage may represent a lower
+ * resolution or lower quality intermediate progressive pass of the image. Any
+ * visited pixel will be of a quality at least as good or better than previous
+ * visits of this pixel. A pixel may be visited zero times if it cannot be
+ * decoded yet or if it was already decoded to full precision (this behavior is
+ * not guaranteed).
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user; its contents are
+ * copied internally.
+ * @param callback the callback function receiving partial scanlines of pixel
+ * data.
+ * @param opaque optional user data, which will be passed on to the callback,
+ * may be NULL.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such
+ * as @ref JxlDecoderSetImageOutBuffer already set.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetImageOutCallback(JxlDecoder* dec, const JxlPixelFormat* format,
+ JxlImageOutCallback callback, void* opaque);
+
+/** Similar to @ref JxlDecoderSetImageOutCallback except that the callback is
+ * allowed an initialization phase during which it is informed of how many
+ * threads will call it concurrently, and those calls are further informed of
+ * which thread they are occurring in.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user; its contents are
+ * copied internally.
+ * @param init_callback initialization callback.
+ * @param run_callback the callback function receiving partial scanlines of
+ * pixel data.
+ * @param destroy_callback clean-up callback invoked after all calls to @c
+ * run_callback. May be NULL if no clean-up is necessary.
+ * @param init_opaque optional user data passed to @c init_callback, may be NULL
+ * (unlike the return value from @c init_callback which may only be NULL if
+ * initialization failed).
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such
+ * as @ref JxlDecoderSetImageOutBuffer having already been called.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback(
+ JxlDecoder* dec, const JxlPixelFormat* format,
+ JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback,
+ JxlImageOutDestroyCallback destroy_callback, void* init_opaque);
+
+/**
+ * Returns the minimum size in bytes of an extra channel pixel buffer for the
+ * given format. This is the buffer for @ref JxlDecoderSetExtraChannelBuffer.
+ * Requires the basic image information is available in the decoder.
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. The num_channels value is ignored and is
+ * always treated to be 1.
+ * @param size output value, buffer size in bytes
+ * @param index which extra channel to get, matching the index used in @ref
+ * JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in
+ * the associated JxlBasicInfo.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * information not available yet or invalid index.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderExtraChannelBufferSize(
+ const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size,
+ uint32_t index);
+
+/**
+ * Sets the buffer to write an extra channel to. This can be set when
+ * the @ref JXL_DEC_FRAME or @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs,
+ * and applies only for the current frame. The size of the buffer must be at
+ * least as large as given by @ref JxlDecoderExtraChannelBufferSize. The buffer
+ * follows the format described by JxlPixelFormat, but where num_channels is 1.
+ * The buffer is owned by the caller. The amount of extra channels is given by
+ * the num_extra_channels field in the associated JxlBasicInfo, and the
+ * information of individual extra channels can be queried with @ref
+ * JxlDecoderGetExtraChannelInfo. To get multiple extra channels, this function
+ * must be called multiple times, once for each wanted index. Not all images
+ * have extra channels. The alpha channel is an extra channel and can be gotten
+ * as part of the color channels when using an RGBA pixel buffer with @ref
+ * JxlDecoderSetImageOutBuffer, but additionally also can be gotten
+ * separately as extra channel. The color channels themselves cannot be gotten
+ * this way.
+ *
+ *
+ * @param dec decoder object
+ * @param format format of the pixels. Object owned by user and its contents
+ * are copied internally. The num_channels value is ignored and is always
+ * treated to be 1.
+ * @param buffer buffer type to output the pixel data to
+ * @param size size of buffer in bytes
+ * @param index which extra channel to get, matching the index used in @ref
+ * JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in
+ * the associated JxlBasicInfo.
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * size too small or invalid index.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec, const JxlPixelFormat* format,
+ void* buffer, size_t size, uint32_t index);
+
+/**
+ * Sets output buffer for reconstructed JPEG codestream.
+ *
+ * The data is owned by the caller and may be used by the decoder until @ref
+ * JxlDecoderReleaseJPEGBuffer is called or the decoder is destroyed or
+ * reset so must be kept alive until then.
+ *
+ * If a JPEG buffer was set before and released with @ref
+ * JxlDecoderReleaseJPEGBuffer, bytes that the decoder has already output
+ * should not be included, only the remaining bytes output must be set.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to write to
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref
+ * JxlDecoderReleaseJPEGBuffer was not called on it, @ref JXL_DEC_SUCCESS
+ * otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec,
+ uint8_t* data, size_t size);
+
+/**
+ * Releases buffer which was provided with @ref JxlDecoderSetJPEGBuffer.
+ *
+ * Calling @ref JxlDecoderReleaseJPEGBuffer is required whenever
+ * a buffer is already set and a new buffer needs to be added with @ref
+ * JxlDecoderSetJPEGBuffer, but is not required before @ref
+ * JxlDecoderDestroy or @ref JxlDecoderReset.
+ *
+ * Calling @ref JxlDecoderReleaseJPEGBuffer when no buffer is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet written to of the data
+ * set by @ref JxlDecoderSetJPEGBuffer, or 0 if no buffer is set or @ref
+ * JxlDecoderReleaseJPEGBuffer was already called.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec);
+
+/**
+ * Sets output buffer for box output codestream.
+ *
+ * The data is owned by the caller and may be used by the decoder until @ref
+ * JxlDecoderReleaseBoxBuffer is called or the decoder is destroyed or
+ * reset so must be kept alive until then.
+ *
+ * If for the current box a box buffer was set before and released with @ref
+ * JxlDecoderReleaseBoxBuffer, bytes that the decoder has already output
+ * should not be included, only the remaining bytes output must be set.
+ *
+ * The @ref JxlDecoderReleaseBoxBuffer must be used at the next @ref JXL_DEC_BOX
+ * event or final @ref JXL_DEC_SUCCESS event to compute the size of the output
+ * box bytes.
+ *
+ * @param dec decoder object
+ * @param data pointer to next bytes to write to
+ * @param size amount of bytes available starting from data
+ * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref
+ * JxlDecoderReleaseBoxBuffer was not called on it, @ref JXL_DEC_SUCCESS
+ * otherwise
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec,
+ uint8_t* data, size_t size);
+
+/**
+ * Releases buffer which was provided with @ref JxlDecoderSetBoxBuffer.
+ *
+ * Calling @ref JxlDecoderReleaseBoxBuffer is required whenever
+ * a buffer is already set and a new buffer needs to be added with @ref
+ * JxlDecoderSetBoxBuffer, but is not required before @ref
+ * JxlDecoderDestroy or @ref JxlDecoderReset.
+ *
+ * Calling @ref JxlDecoderReleaseBoxBuffer when no buffer is set is
+ * not an error and returns 0.
+ *
+ * @param dec decoder object
+ * @return the amount of bytes the decoder has not yet written to of the data
+ * set by @ref JxlDecoderSetBoxBuffer, or 0 if no buffer is set or @ref
+ * JxlDecoderReleaseBoxBuffer was already called.
+ */
+JXL_EXPORT size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec);
+
+/**
+ * Configures whether to get boxes in raw mode or in decompressed mode. In raw
+ * mode, boxes are output as their bytes appear in the container file, which may
+ * be decompressed, or compressed if their type is "brob". In decompressed mode,
+ * "brob" boxes are decompressed with Brotli before outputting them. The size of
+ * the decompressed stream is not known before the decompression has already
+ * finished.
+ *
+ * The default mode is raw. This setting can only be changed before decoding, or
+ * directly after a @ref JXL_DEC_BOX event, and is remembered until the decoder
+ * is reset or destroyed.
+ *
+ * Enabling decompressed mode requires Brotli support from the library.
+ *
+ * @param dec decoder object
+ * @param decompress JXL_TRUE to transparently decompress, JXL_FALSE to get
+ * boxes in raw mode.
+ * @return @ref JXL_DEC_ERROR if decompressed mode is set and Brotli is not
+ * available, @ref JXL_DEC_SUCCESS otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec,
+ JXL_BOOL decompress);
+
+/**
+ * Outputs the type of the current box, after a @ref JXL_DEC_BOX event occurred,
+ * as 4 characters without null termination character. In case of a compressed
+ * "brob" box, this will return "brob" if the decompressed argument is
+ * JXL_FALSE, or the underlying box type if the decompressed argument is
+ * JXL_TRUE.
+ *
+ * The following box types are currently described in ISO/IEC 18181-2:
+ * - "Exif": a box with EXIF metadata. Starts with a 4-byte tiff header offset
+ * (big-endian uint32) that indicates the start of the actual EXIF data
+ * (which starts with a tiff header). Usually the offset will be zero and the
+ * EXIF data starts immediately after the offset field. The Exif orientation
+ * should be ignored by applications; the JPEG XL codestream orientation
+ * takes precedence and libjxl will by default apply the correct orientation
+ * automatically (see @ref JxlDecoderSetKeepOrientation).
+ * - "xml ": a box with XML data, in particular XMP metadata.
+ * - "jumb": a JUMBF superbox (JPEG Universal Metadata Box Format, ISO/IEC
+ * 19566-5).
+ * - "JXL ": mandatory signature box, must come first, 12 bytes long including
+ * the box header
+ * - "ftyp": a second mandatory signature box, must come second, 20 bytes long
+ * including the box header
+ * - "jxll": a JXL level box. This indicates if the codestream is level 5 or
+ * level 10 compatible. If not present, it is level 5. Level 10 allows more
+ * features such as very high image resolution and bit-depths above 16 bits
+ * per channel. Added automatically by the encoder when
+ * JxlEncoderSetCodestreamLevel is used
+ * - "jxlc": a box with the image codestream, in case the codestream is not
+ * split across multiple boxes. The codestream contains the JPEG XL image
+ * itself, including the basic info such as image dimensions, ICC color
+ * profile, and all the pixel data of all the image frames.
+ * - "jxlp": a codestream box in case it is split across multiple boxes.
+ * The contents are the same as in case of a jxlc box, when concatenated.
+ * - "brob": a Brotli-compressed box, which otherwise represents an existing
+ * type of box such as Exif or "xml ". When @ref JxlDecoderSetDecompressBoxes
+ * is set to JXL_TRUE, these boxes will be transparently decompressed by the
+ * decoder.
+ * - "jxli": frame index box, can list the keyframes in case of a JPEG XL
+ * animation allowing the decoder to jump to individual frames more
+ * efficiently.
+ * - "jbrd": JPEG reconstruction box, contains the information required to
+ * byte-for-byte losslessly recontruct a JPEG-1 image. The JPEG DCT
+ * coefficients (pixel content) themselves as well as the ICC profile are
+ * encoded in the JXL codestream (jxlc or jxlp) itself. EXIF, XMP and JUMBF
+ * metadata is encoded in the corresponding boxes. The jbrd box itself
+ * contains information such as the remaining app markers of the JPEG-1 file
+ * and everything else required to fit the information together into the
+ * exact original JPEG file.
+ *
+ * Other application-specific boxes can exist. Their typename should not begin
+ * with "jxl" or "JXL" or conflict with other existing typenames.
+ *
+ * The signature, jxl* and jbrd boxes are processed by the decoder and would
+ * typically be ignored by applications. The typical way to use this function is
+ * to check if an encountered box contains metadata that the application is
+ * interested in (e.g. EXIF or XMP metadata), in order to conditionally set a
+ * box buffer.
+ *
+ * @param dec decoder object
+ * @param type buffer to copy the type into
+ * @param decompressed which box type to get: JXL_FALSE to get the raw box type,
+ * which can be "brob", JXL_TRUE, get the underlying box type.
+ * @return @ref JXL_DEC_SUCCESS if the value is available, @ref JXL_DEC_ERROR if
+ * not, for example the JXL file does not use the container format.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec,
+ JxlBoxType type,
+ JXL_BOOL decompressed);
+
+/**
+ * Returns the size of a box as it appears in the container file, after the @ref
+ * JXL_DEC_BOX event. For a non-compressed box, this is the size of the
+ * contents, excluding the 4 bytes indicating the box type. For a compressed
+ * "brob" box, this is the size of the compressed box contents plus the
+ * additional 4 byte indicating the underlying box type, but excluding the 4
+ * bytes indicating "brob". This function gives the size of the data that will
+ * be written in the output buffer when getting boxes in the default raw
+ * compressed mode. When @ref JxlDecoderSetDecompressBoxes is enabled, the
+ * return value of function does not change, and the decompressed size is not
+ * known before it has already been decompressed and output.
+ *
+ * @param dec decoder object
+ * @param size raw size of the box in bytes
+ * @return @ref JXL_DEC_ERROR if no box size is available, @ref JXL_DEC_SUCCESS
+ * otherwise.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec,
+ uint64_t* size);
+
+/**
+ * Configures at which progressive steps in frame decoding these @ref
+ * JXL_DEC_FRAME_PROGRESSION event occurs. The default value for the level
+ * of detail if this function is never called is `kDC`.
+ *
+ * @param dec decoder object
+ * @param detail at which level of detail to trigger @ref
+ * JXL_DEC_FRAME_PROGRESSION
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * an invalid value for the progressive detail.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetProgressiveDetail(JxlDecoder* dec, JxlProgressiveDetail detail);
+
+/**
+ * Returns the intended downsampling ratio for the progressive frame produced
+ * by @ref JxlDecoderFlushImage after the latest @ref JXL_DEC_FRAME_PROGRESSION
+ * event.
+ *
+ * @param dec decoder object
+ * @return The intended downsampling ratio, can be 1, 2, 4 or 8.
+ */
+JXL_EXPORT size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec);
+
+/**
+ * Outputs progressive step towards the decoded image so far when only partial
+ * input was received. If the flush was successful, the buffer set with @ref
+ * JxlDecoderSetImageOutBuffer will contain partial image data.
+ *
+ * Can be called when @ref JxlDecoderProcessInput returns @ref
+ * JXL_DEC_NEED_MORE_INPUT, after the @ref JXL_DEC_FRAME event already occurred
+ * and before the @ref JXL_DEC_FULL_IMAGE event occurred for a frame.
+ *
+ * @param dec decoder object
+ * @return @ref JXL_DEC_SUCCESS if image data was flushed to the output buffer,
+ * or @ref JXL_DEC_ERROR when no flush was done, e.g. if not enough image
+ * data was available yet even for flush, or no output buffer was set yet.
+ * This error is not fatal, it only indicates no flushed image is available
+ * right now. Regular decoding can still be performed.
+ */
+JXL_EXPORT JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec);
+
+/**
+ * Sets the bit depth of the output buffer or callback.
+ *
+ * Can be called after @ref JxlDecoderSetImageOutBuffer or @ref
+ * JxlDecoderSetImageOutCallback. For float pixel data types, only the default
+ * @ref JXL_BIT_DEPTH_FROM_PIXEL_FORMAT setting is supported.
+ *
+ * @param dec decoder object
+ * @param bit_depth the bit depth setting of the pixel output
+ * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as
+ * incompatible custom bit depth and pixel data type.
+ */
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetImageOutBitDepth(JxlDecoder* dec, const JxlBitDepth* bit_depth);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_DECODE_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/decode_cxx.h b/third_party/jpeg-xl/lib/include/jxl/decode_cxx.h
new file mode 100644
index 0000000000..bc6e8a3789
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/decode_cxx.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_decoder
+/// @{
+///
+/// @file decode_cxx.h
+/// @brief C++ header-only helper for @ref decode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_DECODE_CXX_H_
+#define JXL_DECODE_CXX_H_
+
+#include <jxl/decode.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/decode.h from C sources."
+#endif
+
+/// Struct to call JxlDecoderDestroy from the JxlDecoderPtr unique_ptr.
+struct JxlDecoderDestroyStruct {
+ /// Calls @ref JxlDecoderDestroy() on the passed decoder.
+ void operator()(JxlDecoder* decoder) { JxlDecoderDestroy(decoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlDecoderDestroy() when releasing the
+/// decoder.
+///
+/// Use this helper type from C++ sources to ensure the decoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlDecoder, JxlDecoderDestroyStruct> JxlDecoderPtr;
+
+/// Creates an instance of JxlDecoder into a JxlDecoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlDecoderDestroy() when
+/// releasing the pointer. See @ref JxlDecoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+/// manager will be copied internally.
+/// @return a @c NULL JxlDecoderPtr if the instance can not be allocated or
+/// initialized
+/// @return initialized JxlDecoderPtr instance otherwise.
+static inline JxlDecoderPtr JxlDecoderMake(
+ const JxlMemoryManager* memory_manager) {
+ return JxlDecoderPtr(JxlDecoderCreate(memory_manager));
+}
+
+#endif // JXL_DECODE_CXX_H_
+
+/// @}
diff --git a/third_party/jpeg-xl/lib/include/jxl/encode.h b/third_party/jpeg-xl/lib/include/jxl/encode.h
new file mode 100644
index 0000000000..bd2906e20a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/encode.h
@@ -0,0 +1,1213 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_encoder
+ * @{
+ * @file encode.h
+ * @brief Encoding API for JPEG XL.
+ */
+
+#ifndef JXL_ENCODE_H_
+#define JXL_ENCODE_H_
+
+#include <jxl/cms_interface.h>
+#include <jxl/codestream_header.h>
+#include <jxl/jxl_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/version.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Encoder library version.
+ *
+ * @return the encoder library version as an integer:
+ * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example,
+ * version 1.2.3 would return 1002003.
+ */
+JXL_EXPORT uint32_t JxlEncoderVersion(void);
+
+/**
+ * Opaque structure that holds the JPEG XL encoder.
+ *
+ * Allocated and initialized with JxlEncoderCreate().
+ * Cleaned up and deallocated with JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderStruct JxlEncoder;
+
+/**
+ * Settings and metadata for a single image frame. This includes encoder options
+ * for a frame such as compression quality and speed.
+ *
+ * Allocated and initialized with JxlEncoderFrameSettingsCreate().
+ * Cleaned up and deallocated when the encoder is destroyed with
+ * JxlEncoderDestroy().
+ */
+typedef struct JxlEncoderFrameSettingsStruct JxlEncoderFrameSettings;
+
+/** DEPRECATED: Use JxlEncoderFrameSettings instead.
+ */
+typedef JxlEncoderFrameSettings JxlEncoderOptions;
+
+/**
+ * Return value for multiple encoder functions.
+ */
+typedef enum {
+ /** Function call finished successfully, or encoding is finished and there is
+ * nothing more to be done.
+ */
+ JXL_ENC_SUCCESS = 0,
+
+ /** An error occurred, for example out of memory.
+ */
+ JXL_ENC_ERROR = 1,
+
+ /** The encoder needs more output buffer to continue encoding.
+ */
+ JXL_ENC_NEED_MORE_OUTPUT = 2,
+
+ /** DEPRECATED: the encoder does not return this status and there is no need
+ * to handle or expect it.
+ * Instead, JXL_ENC_ERROR is returned with error condition
+ * JXL_ENC_ERR_NOT_SUPPORTED.
+ */
+ JXL_ENC_NOT_SUPPORTED = 3,
+
+} JxlEncoderStatus;
+
+/**
+ * Error conditions:
+ * API usage errors have the 0x80 bit set to 1
+ * Other errors have the 0x80 bit set to 0
+ */
+typedef enum {
+ /** No error
+ */
+ JXL_ENC_ERR_OK = 0,
+
+ /** Generic encoder error due to unspecified cause
+ */
+ JXL_ENC_ERR_GENERIC = 1,
+
+ /** Out of memory
+ * TODO(jon): actually catch this and return this error
+ */
+ JXL_ENC_ERR_OOM = 2,
+
+ /** JPEG bitstream reconstruction data could not be
+ * represented (e.g. too much tail data)
+ */
+ JXL_ENC_ERR_JBRD = 3,
+
+ /** Input is invalid (e.g. corrupt JPEG file or ICC profile)
+ */
+ JXL_ENC_ERR_BAD_INPUT = 4,
+
+ /** The encoder doesn't (yet) support this. Either no version of libjxl
+ * supports this, and the API is used incorrectly, or the libjxl version
+ * should have been checked before trying to do this.
+ */
+ JXL_ENC_ERR_NOT_SUPPORTED = 0x80,
+
+ /** The encoder API is used in an incorrect way.
+ * In this case, a debug build of libjxl should output a specific error
+ * message. (if not, please open an issue about it)
+ */
+ JXL_ENC_ERR_API_USAGE = 0x81,
+
+} JxlEncoderError;
+
+/**
+ * Id of encoder options for a frame. This includes options such as setting
+ * encoding effort/speed or overriding the use of certain coding tools, for this
+ * frame. This does not include non-frame related encoder options such as for
+ * boxes.
+ */
+typedef enum {
+ /** Sets encoder effort/speed level without affecting decoding speed. Valid
+ * values are, from faster to slower speed: 1:lightning 2:thunder 3:falcon
+ * 4:cheetah 5:hare 6:wombat 7:squirrel 8:kitten 9:tortoise.
+ * Default: squirrel (7).
+ */
+ JXL_ENC_FRAME_SETTING_EFFORT = 0,
+
+ /** Sets the decoding speed tier for the provided options. Minimum is 0
+ * (slowest to decode, best quality/density), and maximum is 4 (fastest to
+ * decode, at the cost of some quality/density). Default is 0.
+ */
+ JXL_ENC_FRAME_SETTING_DECODING_SPEED = 1,
+
+ /** Sets resampling option. If enabled, the image is downsampled before
+ * compression, and upsampled to original size in the decoder. Integer option,
+ * use -1 for the default behavior (resampling only applied for low quality),
+ * 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for 4x4
+ * downsampling, 8 for 8x8 downsampling.
+ */
+ JXL_ENC_FRAME_SETTING_RESAMPLING = 2,
+
+ /** Similar to JXL_ENC_FRAME_SETTING_RESAMPLING, but for extra channels.
+ * Integer option, use -1 for the default behavior (depends on encoder
+ * implementation), 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for
+ * 4x4 downsampling, 8 for 8x8 downsampling.
+ */
+ JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING = 3,
+
+ /** Indicates the frame added with @ref JxlEncoderAddImageFrame is already
+ * downsampled by the downsampling factor set with @ref
+ * JXL_ENC_FRAME_SETTING_RESAMPLING. The input frame must then be given in the
+ * downsampled resolution, not the full image resolution. The downsampled
+ * resolution is given by ceil(xsize / resampling), ceil(ysize / resampling)
+ * with xsize and ysize the dimensions given in the basic info, and resampling
+ * the factor set with @ref JXL_ENC_FRAME_SETTING_RESAMPLING.
+ * Use 0 to disable, 1 to enable. Default value is 0.
+ */
+ JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED = 4,
+
+ /** Adds noise to the image emulating photographic film noise, the higher the
+ * given number, the grainier the image will be. As an example, a value of 100
+ * gives low noise whereas a value of 3200 gives a lot of noise. The default
+ * value is 0.
+ */
+ JXL_ENC_FRAME_SETTING_PHOTON_NOISE = 5,
+
+ /** Enables adaptive noise generation. This setting is not recommended for
+ * use, please use JXL_ENC_FRAME_SETTING_PHOTON_NOISE instead. Use -1 for the
+ * default (encoder chooses), 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_NOISE = 6,
+
+ /** Enables or disables dots generation. Use -1 for the default (encoder
+ * chooses), 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_DOTS = 7,
+
+ /** Enables or disables patches generation. Use -1 for the default (encoder
+ * chooses), 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_PATCHES = 8,
+
+ /** Edge preserving filter level, -1 to 3. Use -1 for the default (encoder
+ * chooses), 0 to 3 to set a strength.
+ */
+ JXL_ENC_FRAME_SETTING_EPF = 9,
+
+ /** Enables or disables the gaborish filter. Use -1 for the default (encoder
+ * chooses), 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_GABORISH = 10,
+
+ /** Enables modular encoding. Use -1 for default (encoder
+ * chooses), 0 to enforce VarDCT mode (e.g. for photographic images), 1 to
+ * enforce modular mode (e.g. for lossless images).
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR = 11,
+
+ /** Enables or disables preserving color of invisible pixels. Use -1 for the
+ * default (1 if lossless, 0 if lossy), 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE = 12,
+
+ /** Determines the order in which 256x256 regions are stored in the codestream
+ * for progressive rendering. Use -1 for the encoder
+ * default, 0 for scanline order, 1 for center-first order.
+ */
+ JXL_ENC_FRAME_SETTING_GROUP_ORDER = 13,
+
+ /** Determines the horizontal position of center for the center-first group
+ * order. Use -1 to automatically use the middle of the image, 0..xsize to
+ * specifically set it.
+ */
+ JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X = 14,
+
+ /** Determines the center for the center-first group order. Use -1 to
+ * automatically use the middle of the image, 0..ysize to specifically set it.
+ */
+ JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y = 15,
+
+ /** Enables or disables progressive encoding for modular mode. Use -1 for the
+ * encoder default, 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_RESPONSIVE = 16,
+
+ /** Set the progressive mode for the AC coefficients of VarDCT, using spectral
+ * progression from the DCT coefficients. Use -1 for the encoder default, 0 to
+ * disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC = 17,
+
+ /** Set the progressive mode for the AC coefficients of VarDCT, using
+ * quantization of the least significant bits. Use -1 for the encoder default,
+ * 0 to disable, 1 to enable.
+ */
+ JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC = 18,
+
+ /** Set the progressive mode using lower-resolution DC images for VarDCT. Use
+ * -1 for the encoder default, 0 to disable, 1 to have an extra 64x64 lower
+ * resolution pass, 2 to have a 512x512 and 64x64 lower resolution pass.
+ */
+ JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC = 19,
+
+ /** Use Global channel palette if the amount of colors is smaller than this
+ * percentage of range. Use 0-100 to set an explicit percentage, -1 to use the
+ * encoder default. Used for modular encoding.
+ */
+ JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT = 20,
+
+ /** Use Local (per-group) channel palette if the amount of colors is smaller
+ * than this percentage of range. Use 0-100 to set an explicit percentage, -1
+ * to use the encoder default. Used for modular encoding.
+ */
+ JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT = 21,
+
+ /** Use color palette if amount of colors is smaller than or equal to this
+ * amount, or -1 to use the encoder default. Used for modular encoding.
+ */
+ JXL_ENC_FRAME_SETTING_PALETTE_COLORS = 22,
+
+ /** Enables or disables delta palette. Use -1 for the default (encoder
+ * chooses), 0 to disable, 1 to enable. Used in modular mode.
+ */
+ JXL_ENC_FRAME_SETTING_LOSSY_PALETTE = 23,
+
+ /** Color transform for internal encoding: -1 = default, 0=XYB, 1=none (RGB),
+ * 2=YCbCr. The XYB setting performs the forward XYB transform. None and
+ * YCbCr both perform no transform, but YCbCr is used to indicate that the
+ * encoded data losslessly represents YCbCr values.
+ */
+ JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM = 24,
+
+ /** Reversible color transform for modular encoding: -1=default, 0-41=RCT
+ * index, e.g. index 0 = none, index 6 = YCoCg.
+ * If this option is set to a non-default value, the RCT will be globally
+ * applied to the whole frame.
+ * The default behavior is to try several RCTs locally per modular group,
+ * depending on the speed and distance setting.
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE = 25,
+
+ /** Group size for modular encoding: -1=default, 0=128, 1=256, 2=512, 3=1024.
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE = 26,
+
+ /** Predictor for modular encoding. -1 = default, 0=zero, 1=left, 2=top,
+ * 3=avg0, 4=select, 5=gradient, 6=weighted, 7=topright, 8=topleft,
+ * 9=leftleft, 10=avg1, 11=avg2, 12=avg3, 13=toptop predictive average 14=mix
+ * 5 and 6, 15=mix everything.
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR = 27,
+
+ /** Fraction of pixels used to learn MA trees as a percentage. -1 = default,
+ * 0 = no MA and fast decode, 50 = default value, 100 = all, values above
+ * 100 are also permitted. Higher values use more encoder memory.
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT = 28,
+
+ /** Number of extra (previous-channel) MA tree properties to use. -1 =
+ * default, 0-11 = valid values. Recommended values are in the range 0 to 3,
+ * or 0 to amount of channels minus 1 (including all extra channels, and
+ * excluding color channels when using VarDCT mode). Higher value gives slower
+ * encoding and slower decoding.
+ */
+ JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS = 29,
+
+ /** Enable or disable CFL (chroma-from-luma) for lossless JPEG recompression.
+ * -1 = default, 0 = disable CFL, 1 = enable CFL.
+ */
+ JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL = 30,
+
+ /** Prepare the frame for indexing in the frame index box.
+ * 0 = ignore this frame (same as not setting a value),
+ * 1 = index this frame within the Frame Index Box.
+ * If any frames are indexed, the first frame needs to
+ * be indexed, too. If the first frame is not indexed, and
+ * a later frame is attempted to be indexed, JXL_ENC_ERROR will occur.
+ * If non-keyframes, i.e., frames with cropping, blending or patches are
+ * attempted to be indexed, JXL_ENC_ERROR will occur.
+ */
+ JXL_ENC_FRAME_INDEX_BOX = 31,
+
+ /** Sets brotli encode effort for use in JPEG recompression and compressed
+ * metadata boxes (brob). Can be -1 (default) or 0 (fastest) to 11 (slowest).
+ * Default is based on the general encode effort in case of JPEG
+ * recompression, and 4 for brob boxes.
+ */
+ JXL_ENC_FRAME_SETTING_BROTLI_EFFORT = 32,
+
+ /** Enables or disables brotli compression of metadata boxes derived from
+ * a JPEG frame when using JxlEncoderAddJPEGFrame. This has no effect on boxes
+ * added using JxlEncoderAddBox.
+ * -1 = default, 0 = disable compression, 1 = enable compression.
+ */
+ JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES = 33,
+
+ /** Enum value not to be used as an option. This value is added to force the
+ * C compiler to have the enum to take a known size.
+ */
+ JXL_ENC_FRAME_SETTING_FILL_ENUM = 65535,
+
+} JxlEncoderFrameSettingId;
+
+/**
+ * Creates an instance of JxlEncoder and initializes it.
+ *
+ * @p memory_manager will be used for all the library dynamic allocations made
+ * from this instance. The parameter may be NULL, in which case the default
+ * allocator will be used. See jpegxl/memory_manager.h for details.
+ *
+ * @param memory_manager custom allocator function. It may be NULL. The memory
+ * manager will be copied internally.
+ * @return @c NULL if the instance can not be allocated or initialized
+ * @return pointer to initialized JxlEncoder otherwise
+ */
+JXL_EXPORT JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager);
+
+/**
+ * Re-initializes a JxlEncoder instance, so it can be re-used for encoding
+ * another image. All state and settings are reset as if the object was
+ * newly created with JxlEncoderCreate, but the memory manager is kept.
+ *
+ * @param enc instance to be re-initialized.
+ */
+JXL_EXPORT void JxlEncoderReset(JxlEncoder* enc);
+
+/**
+ * Deinitializes and frees JxlEncoder instance.
+ *
+ * @param enc instance to be cleaned up and deallocated.
+ */
+JXL_EXPORT void JxlEncoderDestroy(JxlEncoder* enc);
+
+/**
+ * Sets the color management system (CMS) that will be used for color conversion
+ * (if applicable) during encoding. May only be set before starting encoding. If
+ * left unset, the default CMS implementation will be used.
+ *
+ * @param enc encoder object.
+ * @param cms structure representing a CMS implementation. See JxlCmsInterface
+ * for more details.
+ */
+JXL_EXPORT void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms);
+
+/**
+ * Set the parallel runner for multithreading. May only be set before starting
+ * encoding.
+ *
+ * @param enc encoder object.
+ * @param parallel_runner function pointer to runner for multithreading. It may
+ * be NULL to use the default, single-threaded, runner. A multithreaded
+ * runner should be set to reach fast performance.
+ * @param parallel_runner_opaque opaque pointer for parallel_runner.
+ * @return JXL_ENC_SUCCESS if the runner was set, JXL_ENC_ERROR
+ * otherwise (the previous runner remains set).
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetParallelRunner(JxlEncoder* enc, JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque);
+
+/**
+ * Get the (last) error code in case JXL_ENC_ERROR was returned.
+ *
+ * @param enc encoder object.
+ * @return the JxlEncoderError that caused the (last) JXL_ENC_ERROR to be
+ * returned.
+ */
+JXL_EXPORT JxlEncoderError JxlEncoderGetError(JxlEncoder* enc);
+
+/**
+ * Encodes JPEG XL file using the available bytes. @p *avail_out indicates how
+ * many output bytes are available, and @p *next_out points to the input bytes.
+ * *avail_out will be decremented by the amount of bytes that have been
+ * processed by the encoder and *next_out will be incremented by the same
+ * amount, so *next_out will now point at the amount of *avail_out unprocessed
+ * bytes.
+ *
+ * The returned status indicates whether the encoder needs more output bytes.
+ * When the return value is not JXL_ENC_ERROR or JXL_ENC_SUCCESS, the encoding
+ * requires more JxlEncoderProcessOutput calls to continue.
+ *
+ * The caller must guarantee that *avail_out >= 32 when calling
+ * JxlEncoderProcessOutput; otherwise, JXL_ENC_NEED_MORE_OUTPUT will be
+ * returned. It is guaranteed that, if *avail_out >= 32, at least one byte of
+ * output will be written.
+ *
+ * This encodes the frames and/or boxes added so far. If the last frame or last
+ * box has been added, @ref JxlEncoderCloseInput, @ref JxlEncoderCloseFrames
+ * and/or @ref JxlEncoderCloseBoxes must be called before the next
+ * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded
+ * correctly.
+ *
+ * @param enc encoder object.
+ * @param next_out pointer to next bytes to write to.
+ * @param avail_out amount of bytes available starting from *next_out.
+ * @return JXL_ENC_SUCCESS when encoding finished and all events handled.
+ * @return JXL_ENC_ERROR when encoding failed, e.g. invalid input.
+ * @return JXL_ENC_NEED_MORE_OUTPUT more output buffer is necessary.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc,
+ uint8_t** next_out,
+ size_t* avail_out);
+
+/**
+ * Sets the frame information for this frame to the encoder. This includes
+ * animation information such as frame duration to store in the frame header.
+ * The frame header fields represent the frame as passed to the encoder, but not
+ * necessarily the exact values as they will be encoded file format: the encoder
+ * could change crop and blending options of a frame for more efficient encoding
+ * or introduce additional internal frames. Animation duration and time code
+ * information is not altered since those are immutable metadata of the frame.
+ *
+ * It is not required to use this function, however if have_animation is set
+ * to true in the basic info, then this function should be used to set the
+ * time duration of this individual frame. By default individual frames have a
+ * time duration of 0, making them form a composite still. See @ref
+ * JxlFrameHeader for more information.
+ *
+ * This information is stored in the JxlEncoderFrameSettings and so is used for
+ * any frame encoded with these JxlEncoderFrameSettings. It is ok to change
+ * between @ref JxlEncoderAddImageFrame calls, each added image frame will have
+ * the frame header that was set in the options at the time of calling
+ * JxlEncoderAddImageFrame.
+ *
+ * The is_last and name_length fields of the JxlFrameHeader are ignored, use
+ * @ref JxlEncoderCloseFrames to indicate last frame, and @ref
+ * JxlEncoderSetFrameName to indicate the name and its length instead.
+ * Calling this function will clear any name that was previously set with @ref
+ * JxlEncoderSetFrameName.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param frame_header frame header data to set. Object owned by the caller and
+ * does not need to be kept in memory, its information is copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetFrameHeader(JxlEncoderFrameSettings* frame_settings,
+ const JxlFrameHeader* frame_header);
+
+/**
+ * Sets blend info of an extra channel. The blend info of extra channels is set
+ * separately from that of the color channels, the color channels are set with
+ * @ref JxlEncoderSetFrameHeader.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param index index of the extra channel to use.
+ * @param blend_info blend info to set for the extra channel
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo(
+ JxlEncoderFrameSettings* frame_settings, size_t index,
+ const JxlBlendInfo* blend_info);
+
+/**
+ * Sets the name of the animation frame. This function is optional, frames are
+ * not required to have a name. This setting is a part of the frame header, and
+ * the same principles as for @ref JxlEncoderSetFrameHeader apply. The
+ * name_length field of JxlFrameHeader is ignored by the encoder, this function
+ * determines the name length instead as the length in bytes of the C string.
+ *
+ * The maximum possible name length is 1071 bytes (excluding terminating null
+ * character).
+ *
+ * Calling @ref JxlEncoderSetFrameHeader clears any name that was
+ * previously set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param frame_name name of the next frame to be encoded, as a UTF-8 encoded C
+ * string (zero terminated). Owned by the caller, and copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameName(
+ JxlEncoderFrameSettings* frame_settings, const char* frame_name);
+
+/**
+ * Sets the bit depth of the input buffer.
+ *
+ * For float pixel formats, only the default JXL_BIT_DEPTH_FROM_PIXEL_FORMAT
+ * setting is allowed, while for unsigned pixel formats,
+ * JXL_BIT_DEPTH_FROM_CODESTREAM setting is also allowed. See the comment on
+ * @ref JxlEncoderAddImageFrame for the effects of the bit depth setting.
+
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param bit_depth the bit depth setting of the pixel input
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameBitDepth(
+ JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth);
+
+/**
+ * Sets the buffer to read JPEG encoded bytes from for the next frame to encode.
+ *
+ * If JxlEncoderSetBasicInfo has not yet been called, calling
+ * JxlEncoderAddJPEGFrame will implicitly call it with the parameters of the
+ * added JPEG frame.
+ *
+ * If JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile has not yet been
+ * called, calling JxlEncoderAddJPEGFrame will implicitly call it with the
+ * parameters of the added JPEG frame.
+ *
+ * If the encoder is set to store JPEG reconstruction metadata using @ref
+ * JxlEncoderStoreJPEGMetadata and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * If this is the last frame, @ref JxlEncoderCloseInput or @ref
+ * JxlEncoderCloseFrames must be called before the next
+ * @ref JxlEncoderProcessOutput call.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param buffer bytes to read JPEG from. Owned by the caller and its contents
+ * are copied internally.
+ * @param size size of buffer in bytes.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderAddJPEGFrame(const JxlEncoderFrameSettings* frame_settings,
+ const uint8_t* buffer, size_t size);
+
+/**
+ * Sets the buffer to read pixels from for the next image to encode. Must call
+ * JxlEncoderSetBasicInfo before JxlEncoderAddImageFrame.
+ *
+ * Currently only some data types for pixel formats are supported:
+ * - JXL_TYPE_UINT8, with range 0..255
+ * - JXL_TYPE_UINT16, with range 0..65535
+ * - JXL_TYPE_FLOAT16, with nominal range 0..1
+ * - JXL_TYPE_FLOAT, with nominal range 0..1
+ *
+ * Note: the sample data type in pixel_format is allowed to be different from
+ * what is described in the JxlBasicInfo. The type in pixel_format, together
+ * with an optional @ref JxlBitDepth parameter set by @ref
+ * JxlEncoderSetFrameBitDepth describes the format of the uncompressed pixel
+ * buffer. The bits_per_sample and exponent_bits_per_sample in the JxlBasicInfo
+ * describes what will actually be encoded in the JPEG XL codestream.
+ * For example, to encode a 12-bit image, you would set bits_per_sample to 12,
+ * while the input frame buffer can be in the following formats:
+ * - if pixel format is in JXL_TYPE_UINT16 with default bit depth setting
+ * (i.e. JXL_BIT_DEPTH_FROM_PIXEL_FORMAT), input sample values are rescaled
+ * to 16-bit, i.e. multiplied by 65535/4095;
+ * - if pixel format is in JXL_TYPE_UINT16 with JXL_BIT_DEPTH_FROM_CODESTREAM
+ * bit depth setting, input sample values are provided unscaled;
+ * - if pixel format is in JXL_TYPE_FLOAT, input sample values are rescaled
+ * to 0..1, i.e. multiplied by 1.f/4095.f.
+ * While it is allowed, it is obviously not recommended to use a pixel_format
+ * with lower precision than what is specified in the JxlBasicInfo.
+ *
+ * We support interleaved channels as described by the JxlPixelFormat:
+ * - single-channel data, e.g. grayscale
+ * - single-channel + alpha
+ * - trichromatic, e.g. RGB
+ * - trichromatic + alpha
+ *
+ * Extra channels not handled here need to be set by @ref
+ * JxlEncoderSetExtraChannelBuffer.
+ * If the image has alpha, and alpha is not passed here, it will implicitly be
+ * set to all-opaque (an alpha value of 1.0 everywhere).
+ *
+ * The pixels are assumed to be encoded in the original profile that is set with
+ * JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile. If none of these
+ * functions were used, the pixels are assumed to be nonlinear sRGB for integer
+ * data types (JXL_TYPE_UINT8, JXL_TYPE_UINT16), and linear sRGB for floating
+ * point data types (JXL_TYPE_FLOAT16, JXL_TYPE_FLOAT).
+ *
+ * Sample values in floating-point pixel formats are allowed to be outside the
+ * nominal range, e.g. to represent out-of-sRGB-gamut colors in the
+ * uses_original_profile=false case. They are however not allowed to be NaN or
+ * +-infinity.
+ *
+ * If this is the last frame, @ref JxlEncoderCloseInput or @ref
+ * JxlEncoderCloseFrames must be called before the next
+ * @ref JxlEncoderProcessOutput call.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param pixel_format format for pixels. Object owned by the caller and its
+ * contents are copied internally.
+ * @param buffer buffer type to input the pixel data from. Owned by the caller
+ * and its contents are copied internally.
+ * @param size size of buffer in bytes. This size should match what is implied
+ * by the frame dimensions and the pixel format.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddImageFrame(
+ const JxlEncoderFrameSettings* frame_settings,
+ const JxlPixelFormat* pixel_format, const void* buffer, size_t size);
+
+/**
+ * Sets the buffer to read pixels from for an extra channel at a given index.
+ * The index must be smaller than the num_extra_channels in the associated
+ * JxlBasicInfo. Must call @ref JxlEncoderSetExtraChannelInfo before
+ * JxlEncoderSetExtraChannelBuffer.
+ *
+ * TODO(firsching): mention what data types in pixel formats are supported.
+ *
+ * It is required to call this function for every extra channel, except for the
+ * alpha channel if that was already set through @ref JxlEncoderAddImageFrame.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param pixel_format format for pixels. Object owned by the caller and its
+ * contents are copied internally. The num_channels value is ignored, since the
+ * number of channels for an extra channel is always assumed to be one.
+ * @param buffer buffer type to input the pixel data from. Owned by the caller
+ * and its contents are copied internally.
+ * @param size size of buffer in bytes. This size should match what is implied
+ * by the frame dimensions and the pixel format.
+ * @param index index of the extra channel to use.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer(
+ const JxlEncoderFrameSettings* frame_settings,
+ const JxlPixelFormat* pixel_format, const void* buffer, size_t size,
+ uint32_t index);
+
+/** Adds a metadata box to the file format. JxlEncoderProcessOutput must be used
+ * to effectively write the box to the output. @ref JxlEncoderUseBoxes must
+ * be enabled before using this function.
+ *
+ * Boxes allow inserting application-specific data and metadata (Exif, XML/XMP,
+ * JUMBF and user defined boxes).
+ *
+ * The box format follows ISO BMFF and shares features and box types with other
+ * image and video formats, including the Exif, XML and JUMBF boxes. The box
+ * format for JPEG XL is specified in ISO/IEC 18181-2.
+ *
+ * Boxes in general don't contain other boxes inside, except a JUMBF superbox.
+ * Boxes follow each other sequentially and are byte-aligned. If the container
+ * format is used, the JXL stream consists of concatenated boxes.
+ * It is also possible to use a direct codestream without boxes, but in that
+ * case metadata cannot be added.
+ *
+ * Each box generally has the following byte structure in the file:
+ * - 4 bytes: box size including box header (Big endian. If set to 0, an
+ * 8-byte 64-bit size follows instead).
+ * - 4 bytes: type, e.g. "JXL " for the signature box, "jxlc" for a codestream
+ * box.
+ * - N bytes: box contents.
+ *
+ * Only the box contents are provided to the contents argument of this function,
+ * the encoder encodes the size header itself. Most boxes are written
+ * automatically by the encoder as needed ("JXL ", "ftyp", "jxll", "jxlc",
+ * "jxlp", "jxli", "jbrd"), and this function only needs to be called to add
+ * optional metadata when encoding from pixels (using JxlEncoderAddImageFrame).
+ * When recompressing JPEG files (using JxlEncoderAddJPEGFrame), if the input
+ * JPEG contains EXIF, XMP or JUMBF metadata, the corresponding boxes are
+ * already added automatically.
+ *
+ * Box types are given by 4 characters. The following boxes can be added with
+ * this function:
+ * - "Exif": a box with EXIF metadata, can be added by libjxl users, or is
+ * automatically added when needed for JPEG reconstruction. The contents of
+ * this box must be prepended by a 4-byte tiff header offset, which may
+ * be 4 zero bytes in case the tiff header follows immediately.
+ * The EXIF metadata must be in sync with what is encoded in the JPEG XL
+ * codestream, specifically the image orientation. While this is not
+ * recommended in practice, in case of conflicting metadata, the JPEG XL
+ * codestream takes precedence.
+ * - "xml ": a box with XML data, in particular XMP metadata, can be added by
+ * libjxl users, or is automatically added when needed for JPEG reconstruction
+ * - "jumb": a JUMBF superbox, which can contain boxes with different types of
+ * metadata inside. This box type can be added by the encoder transparently,
+ * and other libraries to create and handle JUMBF content exist.
+ * - Application-specific boxes. Their typename should not begin with "jxl" or
+ * "JXL" or conflict with other existing typenames, and they should be
+ * registered with MP4RA (mp4ra.org).
+ *
+ * These boxes can be stored uncompressed or Brotli-compressed (using a "brob"
+ * box), depending on the compress_box parameter.
+ *
+ * @param enc encoder object.
+ * @param type the box type, e.g. "Exif" for EXIF metadata, "xml " for XMP or
+ * IPTC metadata, "jumb" for JUMBF metadata.
+ * @param contents the full contents of the box, for example EXIF
+ * data. ISO BMFF box header must not be included, only the contents. Owned by
+ * the caller and its contents are copied internally.
+ * @param size size of the box contents.
+ * @param compress_box Whether to compress this box as a "brob" box. Requires
+ * Brotli support.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error, such as when
+ * using this function without JxlEncoderUseContainer, or adding a box type
+ * that would result in an invalid file format.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc,
+ const JxlBoxType type,
+ const uint8_t* contents,
+ size_t size,
+ JXL_BOOL compress_box);
+
+/**
+ * Indicates the intention to add metadata boxes. This allows @ref
+ * JxlEncoderAddBox to be used. When using this function, then it is required
+ * to use @ref JxlEncoderCloseBoxes at the end.
+ *
+ * By default the encoder assumes no metadata boxes will be added.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc);
+
+/**
+ * Declares that no further boxes will be added with @ref JxlEncoderAddBox.
+ * This function must be called after the last box is added so the encoder knows
+ * the stream will be finished. It is not necessary to use this function if
+ * @ref JxlEncoderUseBoxes is not used. Further frames may still be added.
+ *
+ * Must be called between JxlEncoderAddBox of the last box
+ * and the next call to JxlEncoderProcessOutput, or @ref JxlEncoderProcessOutput
+ * won't output the last box correctly.
+ *
+ * NOTE: if you don't need to close frames and boxes at separate times, you can
+ * use @ref JxlEncoderCloseInput instead to close both at once.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseBoxes(JxlEncoder* enc);
+
+/**
+ * Declares that no frames will be added and @ref JxlEncoderAddImageFrame and
+ * @ref JxlEncoderAddJPEGFrame won't be called anymore. Further metadata boxes
+ * may still be added. This function or @ref JxlEncoderCloseInput must be called
+ * after adding the last frame and the next call to
+ * @ref JxlEncoderProcessOutput, or the frame won't be properly marked as last.
+ *
+ * NOTE: if you don't need to close frames and boxes at separate times, you can
+ * use @ref JxlEncoderCloseInput instead to close both at once.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseFrames(JxlEncoder* enc);
+
+/**
+ * Closes any input to the encoder, equivalent to calling JxlEncoderCloseFrames
+ * as well as calling JxlEncoderCloseBoxes if needed. No further input of any
+ * kind may be given to the encoder, but further @ref JxlEncoderProcessOutput
+ * calls should be done to create the final output.
+ *
+ * The requirements of both @ref JxlEncoderCloseFrames and @ref
+ * JxlEncoderCloseBoxes apply to this function. Either this function or the
+ * other two must be called after the final frame and/or box, and the next
+ * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded
+ * correctly.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderCloseInput(JxlEncoder* enc);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder. This
+ * is an alternative to JxlEncoderSetICCProfile and only one of these two must
+ * be used. This one sets the color encoding as a @ref JxlColorEncoding, while
+ * the other sets it as ICC binary data.
+ * Must be called after JxlEncoderSetBasicInfo.
+ *
+ * @param enc encoder object.
+ * @param color color encoding. Object owned by the caller and its contents are
+ * copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderSetColorEncoding(JxlEncoder* enc, const JxlColorEncoding* color);
+
+/**
+ * Sets the original color encoding of the image encoded by this encoder as an
+ * ICC color profile. This is an alternative to JxlEncoderSetColorEncoding and
+ * only one of these two must be used. This one sets the color encoding as ICC
+ * binary data, while the other defines it as a @ref JxlColorEncoding.
+ * Must be called after JxlEncoderSetBasicInfo.
+ *
+ * @param enc encoder object.
+ * @param icc_profile bytes of the original ICC profile
+ * @param size size of the icc_profile buffer in bytes
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or
+ * JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+ const uint8_t* icc_profile,
+ size_t size);
+
+/**
+ * Initializes a JxlBasicInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to an 8-bit RGB image, no alpha or any
+ * other extra channels.
+ *
+ * @param info global image metadata. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitBasicInfo(JxlBasicInfo* info);
+
+/**
+ * Initializes a JxlFrameHeader struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to a frame with no animation duration and the
+ * 'replace' blend mode. After using this function, For animation duration must
+ * be set, for composite still blend settings must be set.
+ *
+ * @param frame_header frame metadata. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header);
+
+/**
+ * Initializes a JxlBlendInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ *
+ * @param blend_info blending info. Object owned by the caller.
+ */
+JXL_EXPORT void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info);
+
+/**
+ * Sets the global metadata of the image encoded by this encoder.
+ *
+ * If the JxlBasicInfo contains information of extra channels beyond an alpha
+ * channel, then @ref JxlEncoderSetExtraChannelInfo must be called between
+ * JxlEncoderSetBasicInfo and @ref JxlEncoderAddImageFrame. In order to indicate
+ * extra channels, the value of `info.num_extra_channels` should be set to the
+ * number of extra channels, also counting the alpha channel if present.
+ *
+ * @param enc encoder object.
+ * @param info global image metadata. Object owned by the caller and its
+ * contents are copied internally.
+ * @return JXL_ENC_SUCCESS if the operation was successful,
+ * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+ const JxlBasicInfo* info);
+
+/**
+ * Initializes a JxlExtraChannelInfo struct to default values.
+ * For forwards-compatibility, this function has to be called before values
+ * are assigned to the struct fields.
+ * The default values correspond to an 8-bit channel of the provided type.
+ *
+ * @param type type of the extra channel.
+ * @param info global extra channel metadata. Object owned by the caller and its
+ * contents are copied internally.
+ */
+JXL_EXPORT void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type,
+ JxlExtraChannelInfo* info);
+
+/**
+ * Sets information for the extra channel at the given index. The index
+ * must be smaller than num_extra_channels in the associated JxlBasicInfo.
+ *
+ * @param enc encoder object
+ * @param index index of the extra channel to set.
+ * @param info global extra channel metadata. Object owned by the caller and its
+ * contents are copied internally.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo(
+ JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info);
+
+/**
+ * Sets the name for the extra channel at the given index in UTF-8. The index
+ * must be smaller than the num_extra_channels in the associated JxlBasicInfo.
+ *
+ * TODO(lode): remove size parameter for consistency with
+ * JxlEncoderSetFrameName
+ *
+ * @param enc encoder object
+ * @param index index of the extra channel to set.
+ * @param name buffer with the name of the extra channel.
+ * @param size size of the name buffer in bytes, not counting the terminating
+ * character.
+ * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc,
+ size_t index,
+ const char* name,
+ size_t size);
+
+/**
+ * Sets a frame-specific option of integer type to the encoder options.
+ * The JxlEncoderFrameSettingId argument determines which option is set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param option ID of the option to set.
+ * @param value Integer value to set for this option.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in
+ * case of an error, such as invalid or unknown option id, or invalid integer
+ * value for the given option. If an error is returned, the state of the
+ * JxlEncoderFrameSettings object is still valid and is the same as before this
+ * function was called.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
+ JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+ int64_t value);
+
+/**
+ * Sets a frame-specific option of float type to the encoder options.
+ * The JxlEncoderFrameSettingId argument determines which option is set.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param option ID of the option to set.
+ * @param value Float value to set for this option.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in
+ * case of an error, such as invalid or unknown option id, or invalid integer
+ * value for the given option. If an error is returned, the state of the
+ * JxlEncoderFrameSettings object is still valid and is the same as before this
+ * function was called.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption(
+ JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+ float value);
+
+/** Forces the encoder to use the box-based container format (BMFF) even
+ * when not necessary.
+ *
+ * When using @ref JxlEncoderUseBoxes, @ref JxlEncoderStoreJPEGMetadata or @ref
+ * JxlEncoderSetCodestreamLevel with level 10, the encoder will automatically
+ * also use the container format, it is not necessary to use
+ * JxlEncoderUseContainer for those use cases.
+ *
+ * By default this setting is disabled.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param use_container true if the encoder should always output the JPEG XL
+ * container format, false to only output it when necessary.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+ JXL_BOOL use_container);
+
+/**
+ * Configure the encoder to store JPEG reconstruction metadata in the JPEG XL
+ * container.
+ *
+ * If this is set to true and a single JPEG frame is added, it will be
+ * possible to losslessly reconstruct the JPEG codestream.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param store_jpeg_metadata true if the encoder should store JPEG metadata.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderStoreJPEGMetadata(JxlEncoder* enc, JXL_BOOL store_jpeg_metadata);
+
+/** Sets the feature level of the JPEG XL codestream. Valid values are 5 and
+ * 10, or -1 (to choose automatically). Using the minimum required level, or
+ * level 5 in most cases, is recommended for compatibility with all decoders.
+ *
+ * Level 5: for end-user image delivery, this level is the most widely
+ * supported level by image decoders and the recommended level to use unless a
+ * level 10 feature is absolutely necessary. Supports a maximum resolution
+ * 268435456 pixels total with a maximum width or height of 262144 pixels,
+ * maximum 16-bit color channel depth, maximum 120 frames per second for
+ * animation, maximum ICC color profile size of 4 MiB, it allows all color
+ * models and extra channel types except CMYK and the JXL_CHANNEL_BLACK extra
+ * channel, and a maximum of 4 extra channels in addition to the 3 color
+ * channels. It also sets boundaries to certain internally used coding tools.
+ *
+ * Level 10: this level removes or increases the bounds of most of the level
+ * 5 limitations, allows CMYK color and up to 32 bits per color channel, but
+ * may be less widely supported.
+ *
+ * The default value is -1. This means the encoder will automatically choose
+ * between level 5 and level 10 based on what information is inside the @ref
+ * JxlBasicInfo structure. Do note that some level 10 features, particularly
+ * those used by animated JPEG XL codestreams, might require level 10, even
+ * though the @ref JxlBasicInfo only suggests level 5. In this case, the level
+ * must be explicitly set to 10, otherwise the encoder will return an error.
+ * The encoder will restrict internal encoding choices to those compatible with
+ * the level setting.
+ *
+ * This setting can only be set at the beginning, before encoding starts.
+ *
+ * @param enc encoder object.
+ * @param level the level value to set, must be -1, 5, or 10.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc,
+ int level);
+
+/** Returns the codestream level required to support the currently configured
+ * settings and basic info. This function can only be used at the beginning,
+ * before encoding starts, but after setting basic info.
+ *
+ * This does not support per-frame settings, only global configuration, such as
+ * the image dimensions, that are known at the time of writing the header of
+ * the JPEG XL file.
+ *
+ * If this returns 5, nothing needs to be done and the codestream can be
+ * compatible with any decoder. If this returns 10, JxlEncoderSetCodestreamLevel
+ * has to be used to set the codestream level to 10, or the encoder can be
+ * configured differently to allow using the more compatible level 5.
+ *
+ * @param enc encoder object.
+ * @return -1 if no level can support the configuration (e.g. image dimensions
+ * larger than even level 10 supports), 5 if level 5 is supported, 10 if setting
+ * the codestream level to 10 is required.
+ *
+ */
+JXL_EXPORT int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc);
+
+/**
+ * Enables lossless encoding.
+ *
+ * This is not an option like the others on itself, but rather while enabled it
+ * overrides a set of existing options (such as distance, modular mode and
+ * color transform) that enables bit-for-bit lossless encoding.
+ *
+ * When disabled, those options are not overridden, but since those options
+ * could still have been manually set to a combination that operates losslessly,
+ * using this function with lossless set to JXL_DEC_FALSE does not guarantee
+ * lossy encoding, though the default set of options is lossy.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param lossless whether to override options for lossless mode
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameLossless(
+ JxlEncoderFrameSettings* frame_settings, JXL_BOOL lossless);
+
+/** DEPRECATED: use JxlEncoderSetFrameLossless instead.
+ */
+JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetLossless(JxlEncoderFrameSettings*, JXL_BOOL);
+
+/**
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param effort the effort value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ *
+ * DEPRECATED: use JxlEncoderFrameSettingsSetOption(frame_settings,
+ * JXL_ENC_FRAME_SETTING_EFFORT, effort) instead.
+ */
+JXL_DEPRECATED JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetEffort(JxlEncoderFrameSettings* frame_settings, int effort);
+
+/**
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param tier the decoding speed tier to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ *
+ * DEPRECATED: use JxlEncoderFrameSettingsSetOption(frame_settings,
+ * JXL_ENC_FRAME_SETTING_DECODING_SPEED, tier) instead.
+ */
+JXL_DEPRECATED JXL_EXPORT JxlEncoderStatus JxlEncoderOptionsSetDecodingSpeed(
+ JxlEncoderFrameSettings* frame_settings, int tier);
+
+/**
+ * Sets the distance level for lossy compression: target max butteraugli
+ * distance, lower = higher quality. Range: 0 .. 15.
+ * 0.0 = mathematically lossless (however, use JxlEncoderSetFrameLossless
+ * instead to use true lossless, as setting distance to 0 alone is not the only
+ * requirement). 1.0 = visually lossless. Recommended range: 0.5 .. 3.0. Default
+ * value: 1.0.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param distance the distance value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameDistance(
+ JxlEncoderFrameSettings* frame_settings, float distance);
+
+/** DEPRECATED: use JxlEncoderSetFrameDistance instead.
+ */
+JXL_DEPRECATED JXL_EXPORT JxlEncoderStatus
+JxlEncoderOptionsSetDistance(JxlEncoderFrameSettings*, float);
+
+/**
+ * Sets the distance level for lossy compression of extra channels.
+ * The distance is as in JxlEncoderSetFrameDistance (lower = higher quality).
+ * If not set, or if set to the special value -1, the distance that was set with
+ * JxlEncoderSetFrameDistance will be used.
+ *
+ * @param frame_settings set of options and metadata for this frame. Also
+ * includes reference to the encoder object.
+ * @param index index of the extra channel to set a distance value for.
+ * @param distance the distance value to set.
+ * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR
+ * otherwise.
+ */
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelDistance(
+ JxlEncoderFrameSettings* frame_settings, size_t index, float distance);
+
+/**
+ * Create a new set of encoder options, with all values initially copied from
+ * the @p source options, or set to default if @p source is NULL.
+ *
+ * The returned pointer is an opaque struct tied to the encoder and it will be
+ * deallocated by the encoder when JxlEncoderDestroy() is called. For functions
+ * taking both a @ref JxlEncoder and a @ref JxlEncoderFrameSettings, only
+ * JxlEncoderFrameSettings created with this function for the same encoder
+ * instance can be used.
+ *
+ * @param enc encoder object.
+ * @param source source options to copy initial values from, or NULL to get
+ * defaults initialized to defaults.
+ * @return the opaque struct pointer identifying a new set of encoder options.
+ */
+JXL_EXPORT JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate(
+ JxlEncoder* enc, const JxlEncoderFrameSettings* source);
+
+/** DEPRECATED: use JxlEncoderFrameSettingsCreate instead.
+ */
+JXL_DEPRECATED JXL_EXPORT JxlEncoderFrameSettings* JxlEncoderOptionsCreate(
+ JxlEncoder*, const JxlEncoderFrameSettings*);
+
+/**
+ * Sets a color encoding to be sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+ JXL_BOOL is_gray);
+
+/**
+ * Sets a color encoding to be linear sRGB.
+ *
+ * @param color_encoding color encoding instance.
+ * @param is_gray whether the color encoding should be gray scale or color.
+ */
+JXL_EXPORT void JxlColorEncodingSetToLinearSRGB(
+ JxlColorEncoding* color_encoding, JXL_BOOL is_gray);
+
+/**
+ * Enables usage of expert options.
+ *
+ * At the moment, the only expert option is setting an effort value of 10,
+ * which gives the best compression for pixel-lossless modes but is very slow.
+ *
+ * @param enc encoder object.
+ */
+JXL_EXPORT void JxlEncoderAllowExpertOptions(JxlEncoder* enc);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_ENCODE_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/encode_cxx.h b/third_party/jpeg-xl/lib/include/jxl/encode_cxx.h
new file mode 100644
index 0000000000..3889e12c14
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/encode_cxx.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_encoder
+///@{
+///
+/// @file encode_cxx.h
+/// @brief C++ header-only helper for @ref encode.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_ENCODE_CXX_H_
+#define JXL_ENCODE_CXX_H_
+
+#include <jxl/encode.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error "This a C++ only header. Use jxl/encode.h from C sources."
+#endif
+
+/// Struct to call JxlEncoderDestroy from the JxlEncoderPtr unique_ptr.
+struct JxlEncoderDestroyStruct {
+ /// Calls @ref JxlEncoderDestroy() on the passed encoder.
+ void operator()(JxlEncoder* encoder) { JxlEncoderDestroy(encoder); }
+};
+
+/// std::unique_ptr<> type that calls JxlEncoderDestroy() when releasing the
+/// encoder.
+///
+/// Use this helper type from C++ sources to ensure the encoder is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<JxlEncoder, JxlEncoderDestroyStruct> JxlEncoderPtr;
+
+/// Creates an instance of JxlEncoder into a JxlEncoderPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call JxlEncoderDestroy() when
+/// releasing the pointer. See @ref JxlEncoderCreate for details on the
+/// instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+/// manager will be copied internally.
+/// @return a @c NULL JxlEncoderPtr if the instance can not be allocated or
+/// initialized
+/// @return initialized JxlEncoderPtr instance otherwise.
+static inline JxlEncoderPtr JxlEncoderMake(
+ const JxlMemoryManager* memory_manager) {
+ return JxlEncoderPtr(JxlEncoderCreate(memory_manager));
+}
+
+#endif // JXL_ENCODE_CXX_H_
+
+/// @}
diff --git a/third_party/jpeg-xl/lib/include/jxl/memory_manager.h b/third_party/jpeg-xl/lib/include/jxl/memory_manager.h
new file mode 100644
index 0000000000..52640a8beb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/memory_manager.h
@@ -0,0 +1,72 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file memory_manager.h
+ * @brief Abstraction functions used by JPEG XL to allocate memory.
+ */
+
+#ifndef JXL_MEMORY_MANAGER_H_
+#define JXL_MEMORY_MANAGER_H_
+
+#include <stddef.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * Allocating function for a memory region of a given size.
+ *
+ * Allocates a contiguous memory region of size @p size bytes. The returned
+ * memory may not be aligned to a specific size or initialized at all.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param size in bytes of the requested memory region.
+ * @return @c NULL if the memory can not be allocated,
+ * @return pointer to the memory otherwise.
+ */
+typedef void* (*jpegxl_alloc_func)(void* opaque, size_t size);
+
+/**
+ * Deallocating function pointer type.
+ *
+ * This function @b MUST do nothing if @p address is @c NULL.
+ *
+ * @param opaque custom memory manager handle provided by the caller.
+ * @param address memory region pointer returned by ::jpegxl_alloc_func, or @c
+ * NULL.
+ */
+typedef void (*jpegxl_free_func)(void* opaque, void* address);
+
+/**
+ * Memory Manager struct.
+ * These functions, when provided by the caller, will be used to handle memory
+ * allocations.
+ */
+typedef struct JxlMemoryManagerStruct {
+ /** The opaque pointer that will be passed as the first parameter to all the
+ * functions in this struct. */
+ void* opaque;
+
+ /** Memory allocation function. This can be NULL if and only if also the
+ * free() member in this class is NULL. All dynamic memory will be allocated
+ * and freed with these functions if they are not NULL. */
+ jpegxl_alloc_func alloc;
+ /** Free function matching the alloc() member. */
+ jpegxl_free_func free;
+
+ /* TODO(deymo): Add cache-aligned alloc/free functions here. */
+} JxlMemoryManager;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_MEMORY_MANAGER_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/parallel_runner.h b/third_party/jpeg-xl/lib/include/jxl/parallel_runner.h
new file mode 100644
index 0000000000..45394e972c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/parallel_runner.h
@@ -0,0 +1,156 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ */
+/**
+ * @file parallel_runner.h
+ */
+
+/** API for running data operations in parallel in a multi-threaded environment.
+ * This module allows the JPEG XL caller to define their own way of creating and
+ * assigning threads.
+ *
+ * The JxlParallelRunner function type defines a parallel data processing
+ * runner that may be implemented by the caller to allow the library to process
+ * in multiple threads. The multi-threaded processing in this library only
+ * requires to run the same function over each number of a range, possibly
+ * running each call in a different thread. The JPEG XL caller is responsible
+ * for implementing this logic using the thread APIs available in their system.
+ * For convenience, a C++ implementation based on std::thread is provided in
+ * jpegxl/parallel_runner_thread.h (part of the jpegxl_threads library).
+ *
+ * Thread pools usually store small numbers of heterogeneous tasks in a queue.
+ * When tasks are identical or differ only by an integer input parameter, it is
+ * much faster to store just one function of an integer parameter and call it
+ * for each value. Conventional vector-of-tasks can be run in parallel using a
+ * lambda function adapter that simply calls task_funcs[task].
+ *
+ * If no multi-threading is desired, a @c NULL value of JxlParallelRunner
+ * will use an internal implementation without multi-threading.
+ */
+
+#ifndef JXL_PARALLEL_RUNNER_H_
+#define JXL_PARALLEL_RUNNER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Return code used in the JxlParallel* functions as return value. A value
+ * of 0 means success and any other value means error. The special value
+ * JXL_PARALLEL_RET_RUNNER_ERROR can be used by the runner to indicate any
+ * other error.
+ */
+typedef int JxlParallelRetCode;
+
+/**
+ * General error returned by the JxlParallelRunInit function to indicate
+ * an error.
+ */
+#define JXL_PARALLEL_RET_RUNNER_ERROR (-1)
+
+/**
+ * Parallel run initialization callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called by the JxlParallelRunner only once, on the
+ * same thread that called JxlParallelRunner, before any parallel execution.
+ * The purpose of this call is to provide the maximum number of threads that the
+ * JxlParallelRunner will use, which can be used by JPEG XL to allocate
+ * per-thread storage if needed.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param num_threads the maximum number of threads. This value must be
+ * positive.
+ * @return 0 if the initialization process was successful.
+ * @return an error code if there was an error, which should be returned by
+ * JxlParallelRunner().
+ */
+typedef JxlParallelRetCode (*JxlParallelRunInit)(void* jpegxl_opaque,
+ size_t num_threads);
+
+/**
+ * Parallel run data processing callback. See JxlParallelRunner for details.
+ *
+ * This function MUST be called once for every number in the range [start_range,
+ * end_range) (including start_range but not including end_range) passing this
+ * number as the @p value. Calls for different value may be executed from
+ * different threads in parallel.
+ *
+ * @param jpegxl_opaque the @p jpegxl_opaque handle provided to
+ * JxlParallelRunner() must be passed here.
+ * @param value the number in the range [start_range, end_range) of the call.
+ * @param thread_id the thread number where this function is being called from.
+ * This must be lower than the @p num_threads value passed to
+ * JxlParallelRunInit.
+ */
+typedef void (*JxlParallelRunFunction)(void* jpegxl_opaque, uint32_t value,
+ size_t thread_id);
+
+/**
+ * JxlParallelRunner function type. A parallel runner implementation can be
+ * provided by a JPEG XL caller to allow running computations in multiple
+ * threads. This function must call the initialization function @p init in the
+ * same thread that called it and then call the passed @p func once for every
+ * number in the range [start_range, end_range) (including start_range but not
+ * including end_range) possibly from different multiple threads in parallel.
+ *
+ * The JxlParallelRunner function does not need to be re-entrant. This means
+ * that the same JxlParallelRunner function with the same runner_opaque
+ * provided parameter will not be called from the library from either @p init or
+ * @p func in the same decoder or encoder instance. However, a single decoding
+ * or encoding instance may call the provided JxlParallelRunner multiple
+ * times for different parts of the decoding or encoding process.
+ *
+ * @return 0 if the @p init call succeeded (returned 0) and no other error
+ * occurred in the runner code.
+ * @return JXL_PARALLEL_RET_RUNNER_ERROR if an error occurred in the runner
+ * code, for example, setting up the threads.
+ * @return the return value of @p init() if non-zero.
+ */
+typedef JxlParallelRetCode (*JxlParallelRunner)(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/* The following is an example of a JxlParallelRunner that doesn't use any
+ * multi-threading. Note that this implementation doesn't store any state
+ * between multiple calls of the ExampleSequentialRunner function, so the
+ * runner_opaque value is not used.
+
+ JxlParallelRetCode ExampleSequentialRunner(void* runner_opaque,
+ void* jpegxl_opaque,
+ JxlParallelRunInit init,
+ JxlParallelRunFunction func,
+ uint32_t start_range,
+ uint32_t end_range) {
+ // We only use one thread (the currently running thread).
+ JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+ if (init_ret != 0) return init_ret;
+
+ // In case of other initialization error (for example when initializing the
+ // threads) one can return JXL_PARALLEL_RET_RUNNER_ERROR.
+
+ for (uint32_t i = start_range; i < end_range; i++) {
+ // Every call is in the thread number 0. These don't need to be in any
+ // order.
+ (*func)(jpegxl_opaque, i, 0);
+ }
+ return 0;
+ }
+ */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h b/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h
new file mode 100644
index 0000000000..196e66d30a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner.h
@@ -0,0 +1,78 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_threads
+ * @{
+ * @file resizable_parallel_runner.h
+ * @brief implementation using std::thread of a resizeable ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created can be changed after creation of the thread pool; the threads
+ * (including the main thread) are re-used for every
+ * ResizableParallelRunner::Runner call. Only one concurrent
+ * JxlResizableParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * Compared to the implementation in @ref thread_parallel_runner.h, this
+ * implementation is tuned for execution on lower-powered systems, including
+ * for example ARM CPUs with big.LITTLE computation models.
+ */
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_H_
+
+#include <jxl/jxl_threads_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlResizableParallelRunner. Use as the opaque
+ * runner. The runner will execute tasks on the calling thread until
+ * @ref JxlResizableParallelRunnerSetThreads is called.
+ */
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+ const JxlMemoryManager* memory_manager);
+
+/** Changes the number of threads for JxlResizableParallelRunner.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+ void* runner_opaque, size_t num_threads);
+
+/** Suggests a number of threads to use for an image of given size.
+ */
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize);
+
+/** Destroys the runner created by JxlResizableParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_RESIZABLE_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner_cxx.h b/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner_cxx.h
new file mode 100644
index 0000000000..39bbbd283a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/resizable_parallel_runner_cxx.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_threads
+/// @{
+///
+/// @file resizable_parallel_runner_cxx.h
+/// @ingroup libjxl_threads
+/// @brief C++ header-only helper for @ref resizable_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+#define JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+
+#include <jxl/resizable_parallel_runner.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+ "This a C++ only header. Use jxl/jxl_resizable_parallel_runner.h from C" \
+ "sources."
+#endif
+
+/// Struct to call JxlResizableParallelRunnerDestroy from the
+/// JxlResizableParallelRunnerPtr unique_ptr.
+struct JxlResizableParallelRunnerDestroyStruct {
+ /// Calls @ref JxlResizableParallelRunnerDestroy() on the passed runner.
+ void operator()(void* runner) { JxlResizableParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlResizableParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlResizableParallelRunnerDestroyStruct>
+ JxlResizableParallelRunnerPtr;
+
+/// Creates an instance of JxlResizableParallelRunner into a
+/// JxlResizableParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlResizableParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlResizableParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+/// manager will be copied internally.
+/// @return a @c NULL JxlResizableParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlResizableParallelRunnerPtr instance otherwise.
+static inline JxlResizableParallelRunnerPtr JxlResizableParallelRunnerMake(
+ const JxlMemoryManager* memory_manager) {
+ return JxlResizableParallelRunnerPtr(
+ JxlResizableParallelRunnerCreate(memory_manager));
+}
+
+#endif // JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_
+
+/// @}
diff --git a/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner.h b/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner.h
new file mode 100644
index 0000000000..715648b256
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner.h
@@ -0,0 +1,72 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_threads
+ * @{
+ * @file thread_parallel_runner.h
+ * @brief implementation using std::thread of a ::JxlParallelRunner.
+ */
+
+/** Implementation of JxlParallelRunner than can be used to enable
+ * multithreading when using the JPEG XL library. This uses std::thread
+ * internally and related synchronization functions. The number of threads
+ * created is fixed at construction time and the threads are re-used for every
+ * ThreadParallelRunner::Runner call. Only one concurrent
+ * JxlThreadParallelRunner call per instance is allowed at a time.
+ *
+ * This is a scalable, lower-overhead thread pool runner, especially suitable
+ * for data-parallel computations in the fork-join model, where clients need to
+ * know when all tasks have completed.
+ *
+ * This thread pool can efficiently load-balance millions of tasks using an
+ * atomic counter, thus avoiding per-task virtual or system calls. With 48
+ * hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+ * 10-20x higher when using std::async, and ~200x for a queue-based thread
+ */
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_H_
+#define JXL_THREAD_PARALLEL_RUNNER_H_
+
+#include <jxl/jxl_threads_export.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/** Parallel runner internally using std::thread. Use as JxlParallelRunner.
+ */
+JXL_THREADS_EXPORT JxlParallelRetCode JxlThreadParallelRunner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+/** Creates the runner for JxlThreadParallelRunner. Use as the opaque
+ * runner.
+ */
+JXL_THREADS_EXPORT void* JxlThreadParallelRunnerCreate(
+ const JxlMemoryManager* memory_manager, size_t num_worker_threads);
+
+/** Destroys the runner created by JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT void JxlThreadParallelRunnerDestroy(void* runner_opaque);
+
+/** Returns a default num_worker_threads value for
+ * JxlThreadParallelRunnerCreate.
+ */
+JXL_THREADS_EXPORT size_t JxlThreadParallelRunnerDefaultNumWorkerThreads();
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_THREAD_PARALLEL_RUNNER_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner_cxx.h b/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner_cxx.h
new file mode 100644
index 0000000000..4974ffee87
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/thread_parallel_runner_cxx.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/// @addtogroup libjxl_threads
+/// @{
+///
+/// @file thread_parallel_runner_cxx.h
+/// @brief C++ header-only helper for @ref thread_parallel_runner.h.
+///
+/// There's no binary library associated with the header since this is a header
+/// only library.
+
+#ifndef JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+#define JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+
+#include <jxl/thread_parallel_runner.h>
+
+#include <memory>
+
+#if !(defined(__cplusplus) || defined(c_plusplus))
+#error \
+ "This a C++ only header. Use jxl/jxl_thread_parallel_runner.h from C" \
+ "sources."
+#endif
+
+/// Struct to call JxlThreadParallelRunnerDestroy from the
+/// JxlThreadParallelRunnerPtr unique_ptr.
+struct JxlThreadParallelRunnerDestroyStruct {
+ /// Calls @ref JxlThreadParallelRunnerDestroy() on the passed runner.
+ void operator()(void* runner) { JxlThreadParallelRunnerDestroy(runner); }
+};
+
+/// std::unique_ptr<> type that calls JxlThreadParallelRunnerDestroy() when
+/// releasing the runner.
+///
+/// Use this helper type from C++ sources to ensure the runner is destroyed and
+/// their internal resources released.
+typedef std::unique_ptr<void, JxlThreadParallelRunnerDestroyStruct>
+ JxlThreadParallelRunnerPtr;
+
+/// Creates an instance of JxlThreadParallelRunner into a
+/// JxlThreadParallelRunnerPtr and initializes it.
+///
+/// This function returns a unique_ptr that will call
+/// JxlThreadParallelRunnerDestroy() when releasing the pointer. See @ref
+/// JxlThreadParallelRunnerCreate for details on the instance creation.
+///
+/// @param memory_manager custom allocator function. It may be NULL. The memory
+/// manager will be copied internally.
+/// @param num_worker_threads the number of worker threads to create.
+/// @return a @c NULL JxlThreadParallelRunnerPtr if the instance can not be
+/// allocated or initialized
+/// @return initialized JxlThreadParallelRunnerPtr instance otherwise.
+static inline JxlThreadParallelRunnerPtr JxlThreadParallelRunnerMake(
+ const JxlMemoryManager* memory_manager, size_t num_worker_threads) {
+ return JxlThreadParallelRunnerPtr(
+ JxlThreadParallelRunnerCreate(memory_manager, num_worker_threads));
+}
+
+#endif // JXL_THREAD_PARALLEL_RUNNER_CXX_H_
+
+/// @}
diff --git a/third_party/jpeg-xl/lib/include/jxl/types.h b/third_party/jpeg-xl/lib/include/jxl/types.h
new file mode 100644
index 0000000000..9ffb4c6868
--- /dev/null
+++ b/third_party/jpeg-xl/lib/include/jxl/types.h
@@ -0,0 +1,186 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file types.h
+ * @brief Data types for the JPEG XL API, for both encoding and decoding.
+ */
+
+#ifndef JXL_TYPES_H_
+#define JXL_TYPES_H_
+
+#include <jxl/jxl_export.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/**
+ * A portable @c bool replacement.
+ *
+ * ::JXL_BOOL is a "documentation" type: actually it is @c int, but in API it
+ * denotes a type, whose only values are ::JXL_TRUE and ::JXL_FALSE.
+ */
+#define JXL_BOOL int
+/** Portable @c true replacement. */
+#define JXL_TRUE 1
+/** Portable @c false replacement. */
+#define JXL_FALSE 0
+
+/** Data type for the sample values per channel per pixel.
+ */
+typedef enum {
+ /** Use 32-bit single-precision floating point values, with range 0.0-1.0
+ * (within gamut, may go outside this range for wide color gamut). Floating
+ * point output, either JXL_TYPE_FLOAT or JXL_TYPE_FLOAT16, is recommended
+ * for HDR and wide gamut images when color profile conversion is required. */
+ JXL_TYPE_FLOAT = 0,
+
+ /** Use type uint8_t. May clip wide color gamut data.
+ */
+ JXL_TYPE_UINT8 = 2,
+
+ /** Use type uint16_t. May clip wide color gamut data.
+ */
+ JXL_TYPE_UINT16 = 3,
+
+ /** Use 16-bit IEEE 754 half-precision floating point values */
+ JXL_TYPE_FLOAT16 = 5,
+} JxlDataType;
+
+/* DEPRECATED: bit-packed 1-bit data type. Use JXL_TYPE_UINT8 instead.
+ */
+JXL_DEPRECATED static const int JXL_TYPE_BOOLEAN = 1;
+
+/* DEPRECATED: uint32_t data type. Use JXL_TYPE_FLOAT instead.
+ */
+JXL_DEPRECATED static const int JXL_TYPE_UINT32 = 4;
+
+/** Ordering of multi-byte data.
+ */
+typedef enum {
+ /** Use the endianness of the system, either little endian or big endian,
+ * without forcing either specific endianness. Do not use if pixel data
+ * should be exported to a well defined format.
+ */
+ JXL_NATIVE_ENDIAN = 0,
+ /** Force little endian */
+ JXL_LITTLE_ENDIAN = 1,
+ /** Force big endian */
+ JXL_BIG_ENDIAN = 2,
+} JxlEndianness;
+
+/** Data type for the sample values per channel per pixel for the output buffer
+ * for pixels. This is not necessarily the same as the data type encoded in the
+ * codestream. The channels are interleaved per pixel. The pixels are
+ * organized row by row, left to right, top to bottom.
+ * TODO(lode): support different channel orders if needed (RGB, BGR, ...)
+ */
+typedef struct {
+ /** Amount of channels available in a pixel buffer.
+ * 1: single-channel data, e.g. grayscale or a single extra channel
+ * 2: single-channel + alpha
+ * 3: trichromatic, e.g. RGB
+ * 4: trichromatic + alpha
+ * TODO(lode): this needs finetuning. It is not yet defined how the user
+ * chooses output color space. CMYK+alpha needs 5 channels.
+ */
+ uint32_t num_channels;
+
+ /** Data type of each channel.
+ */
+ JxlDataType data_type;
+
+ /** Whether multi-byte data types are represented in big endian or little
+ * endian format. This applies to JXL_TYPE_UINT16, JXL_TYPE_UINT32
+ * and JXL_TYPE_FLOAT.
+ */
+ JxlEndianness endianness;
+
+ /** Align scanlines to a multiple of align bytes, or 0 to require no
+ * alignment at all (which has the same effect as value 1)
+ */
+ size_t align;
+} JxlPixelFormat;
+
+/** Settings for the interpretation of the input and output buffers.
+ */
+typedef enum {
+ /** This is the default setting, where the encoder expects the input pixels
+ * to use the full range of the pixel format data type (e.g. for UINT16, the
+ * input range is 0 .. 65535 and the value 65535 is mapped to 1.0 when
+ * converting to float), and the decoder uses the full range to output
+ * pixels. If the bit depth in the basic info is different from this, the
+ * encoder expects the values to be rescaled accordingly (e.g. multiplied by
+ * 65535/4095 for a 12-bit image using UINT16 input data type). */
+ JXL_BIT_DEPTH_FROM_PIXEL_FORMAT = 0,
+
+ /** If this setting is selected, the encoder expects the input pixels to be
+ * in the range defined by the bits_per_sample value of the basic info (e.g.
+ * for 12-bit images using UINT16 input data types, the allowed range is
+ * 0 .. 4095 and the value 4095 is mapped to 1.0 when converting to float),
+ * and the decoder outputs pixels in this range. */
+ JXL_BIT_DEPTH_FROM_CODESTREAM = 1,
+
+ /** This setting can only be used in the decoder to select a custom range for
+ * pixel output */
+ JXL_BIT_DEPTH_CUSTOM = 2,
+} JxlBitDepthType;
+
+/** Data type for describing the interpretation of the input and output buffers
+ * in terms of the range of allowed input and output pixel values. */
+typedef struct {
+ /** Bit depth setting, see comment on @ref JxlBitDepthType */
+ JxlBitDepthType type;
+
+ /** Custom bits per sample */
+ uint32_t bits_per_sample;
+
+ /** Custom exponent bits per sample */
+ uint32_t exponent_bits_per_sample;
+} JxlBitDepth;
+
+/** Data type holding the 4-character type name of an ISOBMFF box.
+ */
+typedef char JxlBoxType[4];
+
+/** Types of progressive detail.
+ * Setting a progressive detail with value N implies all progressive details
+ * with smaller or equal value. Currently only the following level of
+ * progressive detail is implemented:
+ * - kDC (which implies kFrames)
+ * - kLastPasses (which implies kDC and kFrames)
+ * - kPasses (which implies kLastPasses, kDC and kFrames)
+ */
+typedef enum {
+ // after completed kRegularFrames
+ kFrames = 0,
+ // after completed DC (1:8)
+ kDC = 1,
+ // after completed AC passes that are the last pass for their resolution
+ // target.
+ kLastPasses = 2,
+ // after completed AC passes that are not the last pass for their resolution
+ // target.
+ kPasses = 3,
+ // during DC frame when lower resolution are completed (1:32, 1:16)
+ kDCProgressive = 4,
+ // after completed groups
+ kDCGroups = 5,
+ // after completed groups
+ kGroups = 6,
+} JxlProgressiveDetail;
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}
+#endif
+
+#endif /* JXL_TYPES_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/jpegli.cmake b/third_party/jpeg-xl/lib/jpegli.cmake
new file mode 100644
index 0000000000..4b19375a76
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli.cmake
@@ -0,0 +1,106 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+set(JPEGLI_INTERNAL_LIBS
+ hwy
+ Threads::Threads
+ ${ATOMICS_LIBRARIES}
+)
+
+add_library(jpegli-static STATIC EXCLUDE_FROM_ALL "${JPEGXL_INTERNAL_JPEGLI_SOURCES}")
+target_compile_options(jpegli-static PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+target_compile_options(jpegli-static PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jpegli-static PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jpegli-static PUBLIC
+ "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>"
+ "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+ "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+ "${JXL_HWY_INCLUDE_DIRS}"
+)
+target_include_directories(jpegli-static PUBLIC "${JPEG_INCLUDE_DIRS}")
+target_link_libraries(jpegli-static PUBLIC ${JPEGLI_INTERNAL_LIBS})
+
+#
+# Tests for jpegli-static
+#
+
+if(BUILD_TESTING)
+# TODO(eustas): merge into jxl_tests.cmake?
+# Individual test binaries:
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_JPEGLI_TESTS)
+ # The TESTNAME is the name without the extension or directory.
+ get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+ add_executable(${TESTNAME} ${TESTFILE} ${JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES})
+ target_compile_options(${TESTNAME} PRIVATE
+ ${JPEGXL_INTERNAL_FLAGS}
+ # Add coverage flags to the test binary so code in the private headers of
+ # the library is also instrumented when running tests that execute it.
+ ${JPEGXL_COVERAGE_FLAGS}
+ )
+ target_compile_definitions(${TESTNAME} PRIVATE
+ -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+ target_include_directories(${TESTNAME} PRIVATE "${PROJECT_SOURCE_DIR}")
+ target_link_libraries(${TESTNAME}
+ hwy
+ jpegli-static
+ gmock
+ GTest::GTest
+ GTest::Main
+ ${JPEG_LIBRARIES}
+ )
+ set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}")
+ # Output test targets in the test directory.
+ set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/")
+ if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error")
+ endif ()
+ jxl_discover_tests(${TESTNAME})
+endforeach ()
+endif()
+
+#
+# Build libjpeg.so that links to libjpeg-static
+#
+
+if (JPEGXL_ENABLE_JPEGLI_LIBJPEG AND NOT APPLE AND NOT WIN32 AND NOT JPEGXL_EMSCRIPTEN)
+add_library(jpegli-libjpeg-obj OBJECT "${JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES}")
+target_compile_options(jpegli-libjpeg-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jpegli-libjpeg-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jpegli-libjpeg-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jpegli-libjpeg-obj PUBLIC "${PROJECT_SOURCE_DIR}")
+target_compile_definitions(jpegli-libjpeg-obj PUBLIC
+ ${JPEGLI_LIBJPEG_OBJ_COMPILE_DEFINITIONS}
+)
+set(JPEGLI_LIBJPEG_INTERNAL_OBJECTS $<TARGET_OBJECTS:jpegli-libjpeg-obj>)
+
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/jpegli)
+add_library(jpeg SHARED ${JPEGLI_LIBJPEG_INTERNAL_OBJECTS})
+target_link_libraries(jpeg PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+target_link_libraries(jpeg PRIVATE jpegli-static)
+set_target_properties(jpeg PROPERTIES
+ VERSION ${JPEGLI_LIBJPEG_LIBRARY_VERSION}
+ SOVERSION ${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}
+ LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli"
+ RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli")
+
+# Add a jpeg.version file as a version script to tag symbols with the
+# appropriate version number.
+set_target_properties(jpeg PROPERTIES
+ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION})
+set_property(TARGET jpeg APPEND_STRING PROPERTY
+ LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}")
+
+# This hides the default visibility symbols from static libraries bundled into
+# the shared library. In particular this prevents exposing symbols from hwy
+# in the shared library.
+if(LINKER_SUPPORT_EXCLUDE_LIBS)
+ set_property(TARGET jpeg APPEND_STRING PROPERTY
+ LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}")
+endif()
+endif()
diff --git a/third_party/jpeg-xl/lib/jpegli/README.md b/third_party/jpeg-xl/lib/jpegli/README.md
new file mode 100644
index 0000000000..1eef402eef
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/README.md
@@ -0,0 +1,28 @@
+# Improved JPEG decoder implementation
+
+This subdirectory contains a JPEG decoder implementation that is API and ABI
+compatible with libjpeg62.
+
+*NOTE*: This is still a work in progress, currently only API functions called
+from libjxl's benchmark_xl tool are implemented.
+
+To decompress an ```input.jpg``` file with this new library:
+
+```
+(from the libjxl root directory)
+$ ./ci.sh opt
+$ LD_PRELOAD=./build/libjpeg.so.62 ./build/tools/benchmark_xl --input input.jpg --codec=jpeg --decode_only --save_decompressed --output_dir .
+```
+
+The decompressed file will be saved as ```input.jpg.jpeg.png```.
+
+To benchmark the jpeg encoding-decoding round-trip on an ```input.png``` with
+the new library, first build a statically linked ```cjpeg-static``` binary,
+which is found in ```$PATH```, and then run:
+
+```
+(from the libjxl root directory)
+$ ./ci.sh opt
+$ LD_PRELOAD=./build/libjpeg.so.62 ./build/tools/benchmark_xl --input input.png --codec=jpeg:cjpeg-static:q90
+```
+
diff --git a/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.cc b/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.cc
new file mode 100644
index 0000000000..a1c0b89ad3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.cc
@@ -0,0 +1,563 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Floor;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+static constexpr float kInputScaling = 1.0f / 255.0f;
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+ HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+ // One Newton-Raphson iteration.
+ static HWY_INLINE V ReciprocalNR(const V x) {
+ const auto rcp = ApproximateReciprocal(x);
+ const auto sum = Add(rcp, rcp);
+ const auto x_rcp = Mul(x, rcp);
+ return NegMulAdd(x_rcp, rcp, sum);
+ }
+
+ V operator()(const V n, const V d) const {
+#if 1 // Faster on SKX
+ return Div(n, d);
+#else
+ return n * ReciprocalNR(d);
+#endif
+ }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+ const T (&p)[NP],
+ const T (&q)[NQ]) {
+ constexpr size_t kDegP = NP / 4 - 1;
+ constexpr size_t kDegQ = NQ / 4 - 1;
+ auto yp = LoadDup128(d, &p[kDegP * 4]);
+ auto yq = LoadDup128(d, &q[kDegQ * 4]);
+ // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+ // compiler warning that the index is out of bounds since we are already
+ // checking that it is not out of bounds with (kDegP >= n) and the access
+ // will be optimized away. Similarly with q and kDegQ.
+ HWY_FENCE;
+ if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+ if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+ if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+ if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+ if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+ if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+ if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+ if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+ return FastDivision<T, V>()(yp, yq);
+}
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+ // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+ HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+ HWY_REP4(1.4287160470083755E+00f),
+ HWY_REP4(7.4245873327820566E-01f)};
+ HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+ HWY_REP4(1.0096718572241148E+00f),
+ HWY_REP4(1.7409343003366853E-01f)};
+
+ const Rebind<int32_t, DF> di;
+ const auto x_bits = BitCast(di, x);
+
+ // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+ const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3
+ // Shifted exponent = log2; also used to clear mantissa.
+ const auto exp_shifted = ShiftRight<23>(exp_bits);
+ const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+ const auto exp_val = ConvertTo(df, exp_shifted);
+ return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q),
+ exp_val);
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+ const Rebind<int32_t, DF> di;
+ auto floorx = Floor(x);
+ auto exp =
+ BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127))));
+ auto frac = Sub(x, floorx);
+ auto num = Add(frac, Set(df, 1.01749063e+01));
+ num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+ num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+ num = Mul(num, exp);
+ auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+ den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+ den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+ return Div(num, den);
+}
+
+inline float FastPow2f(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+ const auto kBase = Set(d, -0.74174993f);
+ const auto kMul4 = Set(d, 3.2353257320940401f);
+ const auto kMul2 = Set(d, 12.906028311180409f);
+ const auto kOffset2 = Set(d, 305.04035728311436f);
+ const auto kMul3 = Set(d, 5.0220313103171232f);
+ const auto kOffset3 = Set(d, 2.1925739705298404f);
+ const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3);
+ const auto kMul0 = Set(d, 0.74760422233706747f);
+ const auto k1 = Set(d, 1.0f);
+
+ // Avoid division by zero.
+ const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f));
+ const auto v2 = Div(k1, Add(v1, kOffset2));
+ const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3));
+ const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4));
+ // TODO(jyrki):
+ // A log or two here could make sense. In butteraugli we have effectively
+ // log(log(x + C)) for this kind of use, as a single log is used in
+ // saturating visual masking and here the modulation values are exponential,
+ // another log would counter that.
+ return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3))));
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.0480446705883f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.14672470003f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+ // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+ // is related to the number of photons.
+ //
+ // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+ // This ratio allows quantization to move from jxl's opsin space to
+ // butteraugli's log-gamma space.
+ static const float kEpsilon = 1e-2;
+ static const float kNumOffset = kEpsilon / kInputScaling / kInputScaling;
+ static const float kNumMul = kSGRetMul * 3 * kSGmul;
+ static const float kVOffset = (kSGVOffset * kLog2 + kEpsilon) / kInputScaling;
+ static const float kDenMul = kLog2 * kSGmul * kInputScaling * kInputScaling;
+
+ v = ZeroIfNegative(v);
+ const auto num_mul = Set(d, kNumMul);
+ const auto num_offset = Set(d, kNumOffset);
+ const auto den_offset = Set(d, kVOffset);
+ const auto den_mul = Set(d, kDenMul);
+
+ const auto v2 = Mul(v, v);
+
+ const auto num = MulAdd(num_mul, v2, num_offset);
+ const auto den = MulAdd(Mul(den_mul, v), v2, den_offset);
+ return invert ? Div(num, den) : Div(den, num);
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+ using DScalar = HWY_CAPPED(float, 1);
+ auto vscalar = Load(DScalar(), &v);
+ return GetLane(
+ RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+ // A simple HDR compatible gamma function.
+ const auto mul = Set(d, kSGmul);
+ const auto kRetMul = Set(d, kSGRetMul);
+ const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+ const auto kVOffset = Set(d, kSGVOffset);
+
+ v *= mul;
+
+ // This should happen rarely, but may lead to a NaN, which is rather
+ // undesirable. Since negative photons don't exist we solve the NaNs by
+ // clamping here.
+ // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+ v = ZeroIfNegative(v);
+ return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+ const RowBuffer<float>& input, const V out_val) {
+ static const float kBias = 0.16f / kInputScaling;
+ static const float kScale = kInputScaling / 64.0f;
+ auto overall_ratio = Zero(d);
+ const auto bias = Set(d, kBias);
+ const auto scale = Set(d, kScale);
+ const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* const JXL_RESTRICT row_in = block_start + dy * input.stride();
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto iny = Add(Load(d, row_in + dx), bias);
+ const auto ratio_g =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, iny);
+ overall_ratio = Add(overall_ratio, ratio_g);
+ }
+ }
+ overall_ratio = Mul(SumOfLanes(d, overall_ratio), scale);
+ // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+ // less than that.
+ // ln(2) constant folded in because we want std::log but have FastLog2f.
+ const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+ return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y,
+ const RowBuffer<float>& input, const V out_val) {
+ // Zero out the invalid differences for the rightmost value per row.
+ const Rebind<uint32_t, D> du;
+ HWY_ALIGN constexpr uint32_t kMaskRight[8] = {~0u, ~0u, ~0u, ~0u,
+ ~0u, ~0u, ~0u, 0};
+
+ auto sum = Zero(d); // sum of absolute differences with right and below
+ static const float kSumCoeff = -2.0052193233688884f * kInputScaling / 112.0;
+ auto sumcoeff = Set(d, kSumCoeff);
+
+ const float* const JXL_RESTRICT block_start = input.Row(y) + x;
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* JXL_RESTRICT row_in = block_start + dy * input.stride();
+ const float* JXL_RESTRICT row_in_next =
+ dy == 7 ? row_in : row_in + input.stride();
+
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto p = Load(d, row_in + dx);
+ const auto pr = LoadU(d, row_in + dx + 1);
+ const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+ sum = Add(sum, And(mask, AbsDiff(p, pr)));
+ const auto pd = Load(d, row_in_next + dx);
+ sum = Add(sum, AbsDiff(p, pd));
+ }
+ }
+
+ sum = SumOfLanes(d, sum);
+ return MulAdd(sum, sumcoeff, out_val);
+}
+
+void PerBlockModulations(const float y_quant_01, const RowBuffer<float>& input,
+ const size_t yb0, const size_t yblen,
+ RowBuffer<float>* aq_map) {
+ static const float kAcQuant = 0.841f;
+ float base_level = 0.48f * kAcQuant;
+ float kDampenRampStart = 9.0f;
+ float kDampenRampEnd = 65.0f;
+ float dampen = 1.0f;
+ if (y_quant_01 >= kDampenRampStart) {
+ dampen = 1.0f - ((y_quant_01 - kDampenRampStart) /
+ (kDampenRampEnd - kDampenRampStart));
+ if (dampen < 0) {
+ dampen = 0;
+ }
+ }
+ const float mul = kAcQuant * dampen;
+ const float add = (1.0f - dampen) * base_level;
+ for (size_t iy = 0; iy < yblen; iy++) {
+ const size_t yb = yb0 + iy;
+ const size_t y = yb * 8;
+ float* const JXL_RESTRICT row_out = aq_map->Row(yb);
+ const HWY_CAPPED(float, 8) df;
+ for (size_t ix = 0; ix < aq_map->xsize(); ix++) {
+ size_t x = ix * 8;
+ auto out_val = Set(df, row_out[ix]);
+ out_val = ComputeMask(df, out_val);
+ out_val = HfModulation(df, x, y, input, out_val);
+ out_val = GammaModulation(df, x, y, input, out_val);
+ // We want multiplicative quantization field, so everything
+ // until this point has been modulating the exponent.
+ row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+ }
+ }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+ static const float kLogOffset = 28;
+ static const float kMul = 211.50759899638012f;
+ const auto mul_v = Set(d, kMul * 1e8);
+ const auto offset_v = Set(d, kLogOffset);
+ return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v)));
+}
+
+template <typename V>
+void Sort4(V& min0, V& min1, V& min2, V& min3) {
+ const auto tmp0 = Min(min0, min1);
+ const auto tmp1 = Max(min0, min1);
+ const auto tmp2 = Min(min2, min3);
+ const auto tmp3 = Max(min2, min3);
+ const auto tmp4 = Max(tmp0, tmp2);
+ const auto tmp5 = Min(tmp1, tmp3);
+ min0 = Min(tmp0, tmp2);
+ min1 = Min(tmp4, tmp5);
+ min2 = Max(tmp4, tmp5);
+ min3 = Max(tmp1, tmp3);
+}
+
+template <typename V>
+void UpdateMin4(const V v, V& min0, V& min1, V& min2, V& min3) {
+ const auto tmp0 = Max(min0, v);
+ const auto tmp1 = Max(min1, tmp0);
+ const auto tmp2 = Max(min2, tmp1);
+ min0 = Min(min0, v);
+ min1 = Min(min1, tmp0);
+ min2 = Min(min2, tmp1);
+ min3 = Min(min3, tmp2);
+}
+
+// Computes a linear combination of the 4 lowest values of the 3x3 neighborhood
+// of each pixel. Output is downsampled 2x.
+void FuzzyErosion(const RowBuffer<float>& pre_erosion, const size_t yb0,
+ const size_t yblen, RowBuffer<float>* tmp,
+ RowBuffer<float>* aq_map) {
+ int xsize_blocks = aq_map->xsize();
+ int xsize = pre_erosion.xsize();
+ HWY_FULL(float) d;
+ const auto mul0 = Set(d, 0.125f);
+ const auto mul1 = Set(d, 0.075f);
+ const auto mul2 = Set(d, 0.06f);
+ const auto mul3 = Set(d, 0.05f);
+ for (size_t iy = 0; iy < 2 * yblen; ++iy) {
+ size_t y = 2 * yb0 + iy;
+ const float* JXL_RESTRICT rowt = pre_erosion.Row(y - 1);
+ const float* JXL_RESTRICT rowm = pre_erosion.Row(y);
+ const float* JXL_RESTRICT rowb = pre_erosion.Row(y + 1);
+ float* row_out = tmp->Row(y);
+ for (int x = 0; x < xsize; x += Lanes(d)) {
+ int xm1 = x - 1;
+ int xp1 = x + 1;
+ auto min0 = LoadU(d, rowm + x);
+ auto min1 = LoadU(d, rowm + xm1);
+ auto min2 = LoadU(d, rowm + xp1);
+ auto min3 = LoadU(d, rowt + xm1);
+ Sort4(min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowt + x), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowt + xp1), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + xm1), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + x), min0, min1, min2, min3);
+ UpdateMin4(LoadU(d, rowb + xp1), min0, min1, min2, min3);
+ const auto v = Add(Add(Mul(mul0, min0), Mul(mul1, min1)),
+ Add(Mul(mul2, min2), Mul(mul3, min3)));
+ Store(v, d, row_out + x);
+ }
+ if (iy % 2 == 1) {
+ const float* JXL_RESTRICT row_out0 = tmp->Row(y - 1);
+ float* JXL_RESTRICT aq_out = aq_map->Row(yb0 + iy / 2);
+ for (int bx = 0, x = 0; bx < xsize_blocks; ++bx, x += 2) {
+ aq_out[bx] =
+ (row_out[x] + row_out[x + 1] + row_out0[x] + row_out0[x + 1]);
+ }
+ }
+ }
+}
+
+void ComputePreErosion(const RowBuffer<float>& input, const size_t xsize,
+ const size_t y0, const size_t ylen, int border,
+ float* diff_buffer, RowBuffer<float>* pre_erosion) {
+ const size_t xsize_out = xsize / 4;
+ const size_t y0_out = y0 / 4;
+
+ // The XYB gamma is 3.0 to be able to decode faster with two muls.
+ // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+ // We approximate the gamma difference by adding one cubic root into
+ // the adaptive quantization. This gives us a total gamma of 2.6666
+ // for quantization uses.
+ static const float match_gamma_offset = 0.019 / kInputScaling;
+
+ const HWY_CAPPED(float, 8) df;
+
+ static const float limit = 0.2f;
+ // Computes image (padded to multiple of 8x8) of local pixel differences.
+ // Subsample both directions by 4.
+ for (size_t iy = 0; iy < ylen; ++iy) {
+ size_t y = y0 + iy;
+ const float* row_in = input.Row(y);
+ const float* row_in1 = input.Row(y + 1);
+ const float* row_in2 = input.Row(y - 1);
+ float* JXL_RESTRICT row_out = diff_buffer;
+ const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+ const auto quarter = Set(df, 0.25f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto in = LoadU(df, row_in + x);
+ const auto in_r = LoadU(df, row_in + x + 1);
+ const auto in_l = LoadU(df, row_in + x - 1);
+ const auto in_t = LoadU(df, row_in2 + x);
+ const auto in_b = LoadU(df, row_in1 + x);
+ const auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b)));
+ const auto gammacv =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+ df, Add(in, match_gamma_offset_v));
+ auto diff = Mul(gammacv, Sub(in, base));
+ diff = Mul(diff, diff);
+ diff = Min(diff, Set(df, limit));
+ diff = MaskingSqrt(df, diff);
+ if ((iy & 3) != 0) {
+ diff = Add(diff, LoadU(df, row_out + x));
+ }
+ StoreU(diff, df, row_out + x);
+ }
+ if (iy % 4 == 3) {
+ size_t y_out = y0_out + iy / 4;
+ float* row_dout = pre_erosion->Row(y_out);
+ for (size_t x = 0; x < xsize_out; x++) {
+ row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+ row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+ 0.25f;
+ }
+ pre_erosion->PadRow(y_out, xsize_out, border);
+ }
+ }
+}
+
+} // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+HWY_EXPORT(ComputePreErosion);
+HWY_EXPORT(FuzzyErosion);
+HWY_EXPORT(PerBlockModulations);
+
+namespace {
+
+static constexpr int kPreErosionBorder = 1;
+
+} // namespace
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ if (!m->use_adaptive_quantization) {
+ return;
+ }
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ int y_quant_01 = cinfo->quant_tbl_ptrs[y_comp->quant_tbl_no]->quantval[1];
+ if (m->next_iMCU_row == 0) {
+ m->input_buffer[y_channel].CopyRow(-1, 0, 1);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+ m->input_buffer[y_channel].CopyRow(last_row + 1, last_row, 1);
+ }
+ const RowBuffer<float>& input = m->input_buffer[y_channel];
+ const size_t xsize_blocks = y_comp->width_in_blocks;
+ const size_t xsize = xsize_blocks * DCTSIZE;
+ const size_t yb0 = m->next_iMCU_row * cinfo->max_v_samp_factor;
+ const size_t yblen = cinfo->max_v_samp_factor;
+ size_t y0 = yb0 * DCTSIZE;
+ size_t ylen = cinfo->max_v_samp_factor * DCTSIZE;
+ if (y0 == 0) {
+ ylen += 4;
+ } else {
+ y0 += 4;
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ ylen -= 4;
+ }
+ HWY_DYNAMIC_DISPATCH(ComputePreErosion)
+ (input, xsize, y0, ylen, kPreErosionBorder, m->diff_buffer, &m->pre_erosion);
+ if (y0 == 0) {
+ m->pre_erosion.CopyRow(-1, 0, kPreErosionBorder);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * 2 - 1;
+ m->pre_erosion.CopyRow(last_row + 1, last_row, kPreErosionBorder);
+ }
+ HWY_DYNAMIC_DISPATCH(FuzzyErosion)
+ (m->pre_erosion, yb0, yblen, &m->fuzzy_erosion_tmp, &m->quant_field);
+ HWY_DYNAMIC_DISPATCH(PerBlockModulations)
+ (y_quant_01, input, yb0, yblen, &m->quant_field);
+ for (int y = 0; y < cinfo->max_v_samp_factor; ++y) {
+ float* row = m->quant_field.Row(yb0 + y);
+ for (size_t x = 0; x < xsize_blocks; ++x) {
+ row[x] = std::max(0.0f, (0.6f / row[x]) - 1.0f);
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.h b/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.h
new file mode 100644
index 0000000000..71f2fcc0af
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/adaptive_quantization.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stddef.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void ComputeAdaptiveQuantField(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/bit_writer.cc b/third_party/jpeg-xl/lib/jpegli/bit_writer.cc
new file mode 100644
index 0000000000..9788f35b8d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/bit_writer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bit_writer.h"
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void JpegBitWriterInit(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ JpegBitWriter* bw = &m->bw;
+ size_t buffer_size = m->blocks_per_iMCU_row * (DCTSIZE2 * 16 + 8) + (1 << 16);
+ bw->cinfo = cinfo;
+ bw->data = Allocate<uint8_t>(cinfo, buffer_size, JPOOL_IMAGE);
+ bw->len = buffer_size;
+ bw->pos = 0;
+ bw->output_pos = 0;
+ bw->put_buffer = 0;
+ bw->free_bits = 64;
+ bw->healthy = true;
+}
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw) {
+ while (bw->output_pos < bw->pos) {
+ j_compress_ptr cinfo = bw->cinfo;
+ if (cinfo->dest->free_in_buffer == 0 &&
+ !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+ return false;
+ }
+ size_t buflen = bw->pos - bw->output_pos;
+ size_t copylen = std::min<size_t>(cinfo->dest->free_in_buffer, buflen);
+ memcpy(cinfo->dest->next_output_byte, bw->data + bw->output_pos, copylen);
+ bw->output_pos += copylen;
+ cinfo->dest->free_in_buffer -= copylen;
+ cinfo->dest->next_output_byte += copylen;
+ }
+ bw->output_pos = bw->pos = 0;
+ return true;
+}
+
+void JumpToByteBoundary(JpegBitWriter* bw) {
+ size_t n_bits = bw->free_bits & 7u;
+ if (n_bits > 0) {
+ WriteBits(bw, n_bits, (1u << n_bits) - 1);
+ }
+ bw->put_buffer <<= bw->free_bits;
+ while (bw->free_bits <= 56) {
+ int c = (bw->put_buffer >> 56) & 0xFF;
+ EmitByte(bw, c);
+ bw->put_buffer <<= 8;
+ bw->free_bits += 8;
+ }
+ bw->put_buffer = 0;
+ bw->free_bits = 64;
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/bit_writer.h b/third_party/jpeg-xl/lib/jpegli/bit_writer.h
new file mode 100644
index 0000000000..0affcdabd3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/bit_writer.h
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BIT_WRITER_H_
+#define LIB_JPEGLI_BIT_WRITER_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stdint.h>
+#include <string.h>
+/* clang-format on */
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+ j_compress_ptr cinfo;
+ uint8_t* data;
+ size_t len;
+ size_t pos;
+ size_t output_pos;
+ uint64_t put_buffer;
+ int free_bits;
+ bool healthy;
+};
+
+void JpegBitWriterInit(j_compress_ptr cinfo);
+
+bool EmptyBitWriterBuffer(JpegBitWriter* bw);
+
+void JumpToByteBoundary(JpegBitWriter* bw);
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+ return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+ bw->data[bw->pos++] = byte;
+ if (byte == 0xFF) bw->data[bw->pos++] = 0;
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) {
+ // At this point we are ready to emit the bytes of put_buffer to the output.
+ // The JPEG format requires that after every 0xff byte in the entropy
+ // coded section, there is a zero byte, therefore we first check if any of
+ // the bytes of put_buffer is 0xFF.
+ if (HasZeroByte(~bw->put_buffer)) {
+ // We have a 0xFF byte somewhere, examine each byte and append a zero
+ // byte if necessary.
+ EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 8) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 0) & 0xFF);
+ } else {
+ // We don't have any 0xFF bytes, output all 6 bytes without checking.
+ bw->data[bw->pos] = (bw->put_buffer >> 56) & 0xFF;
+ bw->data[bw->pos + 1] = (bw->put_buffer >> 48) & 0xFF;
+ bw->data[bw->pos + 2] = (bw->put_buffer >> 40) & 0xFF;
+ bw->data[bw->pos + 3] = (bw->put_buffer >> 32) & 0xFF;
+ bw->data[bw->pos + 4] = (bw->put_buffer >> 24) & 0xFF;
+ bw->data[bw->pos + 5] = (bw->put_buffer >> 16) & 0xFF;
+ bw->data[bw->pos + 6] = (bw->put_buffer >> 8) & 0xFF;
+ bw->data[bw->pos + 7] = (bw->put_buffer >> 0) & 0xFF;
+ bw->pos += 8;
+ }
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+ // This is an optimization; if everything goes well,
+ // then |nbits| is positive; if non-existing Huffman symbol is going to be
+ // encoded, its length should be zero; later encoder could check the
+ // "health" of JpegBitWriter.
+ if (nbits == 0) {
+ bw->healthy = false;
+ return;
+ }
+ bw->free_bits -= nbits;
+ if (bw->free_bits < 0) {
+ bw->put_buffer <<= (bw->free_bits + nbits);
+ bw->put_buffer |= (bits >> -bw->free_bits);
+ DischargeBitBuffer(bw);
+ bw->free_bits += 64;
+ bw->put_buffer = nbits;
+ }
+ bw->put_buffer <<= nbits;
+ bw->put_buffer |= bits;
+}
+
+} // namespace jpegli
+#endif // LIB_JPEGLI_BIT_WRITER_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/bitstream.cc b/third_party/jpeg-xl/lib/jpegli/bitstream.cc
new file mode 100644
index 0000000000..0313ed3071
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/bitstream.cc
@@ -0,0 +1,1136 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/bitstream.h"
+
+#include <cmath>
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/bits.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/bitstream.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Compress;
+using hwy::HWY_NAMESPACE::CountTrue;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Not;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Shl;
+using hwy::HWY_NAMESPACE::Sub;
+
+using DI = HWY_FULL(int32_t);
+constexpr DI di;
+
+int NumNonZero8x8ExceptDC(const coeff_t* block) {
+ const HWY_CAPPED(coeff_t, 8) di;
+
+ const auto zero = Zero(di);
+ // Add FFFF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+ {
+ // First row has DC, so mask
+ const size_t y = 0;
+ HWY_ALIGN const coeff_t dc_mask_lanes[8] = {-1};
+
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+ // DC counts as zero so we don't include it in nzeros.
+ const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+ // Remaining rows: no mask
+ for (size_t y = 1; y < 8; y++) {
+ for (size_t x = 0; x < 8; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * 8 + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+ return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero));
+}
+
+void ZigZagShuffle(int32_t* JXL_RESTRICT block) {
+ // TODO(szabadka) SIMDify this.
+ int32_t tmp[DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ tmp[k] = block[kJPEGNaturalOrder[k]];
+ }
+ memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0]));
+}
+
+template <typename DI, class V>
+JXL_INLINE V NumBits(DI di, const V x) {
+ // TODO(szabadka) Add faster implementations for some specific architectures.
+ const auto b1 = And(x, Set(di, 1));
+ const auto b2 = And(x, Set(di, 2));
+ const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1));
+ const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4));
+ const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11));
+ const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26));
+ const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57));
+ const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120));
+ const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247));
+ const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502));
+ const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013));
+ const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036));
+ return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))),
+ Max(Max(b9, b10), Max(b11, b12)));
+}
+
+// Coefficient indexes pre-multiplied by 16 for the symbol calculation.
+HWY_ALIGN constexpr int32_t kIndexes[64] = {
+ 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192,
+ 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400,
+ 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608,
+ 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816,
+ 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008,
+};
+
+JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT nonzero_idx) {
+ const auto zero = Zero(di);
+ HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1};
+ const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes));
+ int num_nonzeros = 0;
+ int k = 0;
+ {
+ const auto coef = Load(di, block);
+ const auto idx = Load(di, kIndexes);
+ const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero)));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ k += Lanes(di);
+ }
+ for (; k < DCTSIZE2; k += Lanes(di)) {
+ const auto coef = Load(di, &block[k]);
+ const auto idx = Load(di, &kIndexes[k]);
+ const auto nonzero_mask = Not(Eq(coef, zero));
+ const auto nzero_coef = Compress(coef, nonzero_mask);
+ const auto nzero_idx = Compress(idx, nonzero_mask);
+ StoreU(nzero_coef, di, &block[num_nonzeros]);
+ StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]);
+ num_nonzeros += CountTrue(di, nonzero_mask);
+ }
+ return num_nonzeros;
+}
+
+JXL_INLINE void ComputeSymbols(const int num_nonzeros,
+ int32_t* JXL_RESTRICT nonzero_idx,
+ int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT symbols) {
+ nonzero_idx[-1] = -16;
+ const auto one = Set(di, 1);
+ const auto offset = Set(di, 16);
+ for (int i = 0; i < num_nonzeros; i += Lanes(di)) {
+ const auto idx = Load(di, &nonzero_idx[i]);
+ const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]);
+ const auto coeff = Load(di, &block[i]);
+ const auto nbits = NumBits(di, Abs(coeff));
+ const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff);
+ const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one));
+ const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset));
+ Store(symbol, di, symbols + i);
+ Store(bits, di, block + i);
+ }
+}
+
+void WriteBlock(int32_t* JXL_RESTRICT block, int32_t* JXL_RESTRICT symbols,
+ int32_t* JXL_RESTRICT nonzero_idx, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, JpegBitWriter* bw) {
+ ZigZagShuffle(block);
+ int num_nonzeros = CompactBlock(block, nonzero_idx);
+ ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols);
+ int symbol = symbols[0];
+ WriteBits(bw, dc_huff->depth[symbol], dc_huff->code[symbol] | block[0]);
+ for (int i = 1; i < num_nonzeros; ++i) {
+ symbol = symbols[i];
+ while (symbol > 255) {
+ WriteBits(bw, ac_huff->depth[0xf0], ac_huff->code[0xf0]);
+ symbol -= 256;
+ }
+ WriteBits(bw, ac_huff->depth[symbol], ac_huff->code[symbol] | block[i]);
+ }
+ if (nonzero_idx[num_nonzeros - 1] < 1008) {
+ WriteBits(bw, ac_huff->depth[0], ac_huff->code[0]);
+ }
+}
+
+void WriteiMCURow(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ JpegBitWriter* bw = &m->bw;
+ int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ int mcu_y = m->next_iMCU_row;
+ int32_t* block = m->block_tmp;
+ int32_t* symbols = m->block_tmp + DCTSIZE2;
+ int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2;
+ coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff;
+ const float* imcu_start[kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE);
+ }
+ const float* qf = nullptr;
+ if (m->use_adaptive_quantization) {
+ qf = m->quant_field.Row(0);
+ }
+ const size_t qf_stride = m->quant_field.stride();
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ HuffmanCodeTable* dc_huff = &m->huff_tables[comp->dc_tbl_no];
+ HuffmanCodeTable* ac_huff = &m->huff_tables[comp->ac_tbl_no + 4];
+ float* JXL_RESTRICT qmc = m->quant_mul[c];
+ const size_t stride = m->raw_data[c]->stride();
+ const int h_factor = m->h_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ float aq_strength = 0.0f;
+ for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+ for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+ size_t by = mcu_y * comp->v_samp_factor + iy;
+ size_t bx = mcu_x * comp->h_samp_factor + ix;
+ if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) {
+ WriteBits(bw, dc_huff->depth[0], dc_huff->code[0]);
+ WriteBits(bw, ac_huff->depth[0], ac_huff->code[0]);
+ continue;
+ }
+ if (m->use_adaptive_quantization) {
+ aq_strength = qf[iy * qf_stride + bx * h_factor];
+ }
+ const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE;
+ ComputeCoefficientBlock(pixels, stride, qmc, aq_strength,
+ zero_bias_offset, zero_bias_mul,
+ m->dct_buffer, block);
+ block[0] -= last_dc_coeff[c];
+ last_dc_coeff[c] += block[0];
+ WriteBlock(block, symbols, nonzero_idx, dc_huff, ac_huff, bw);
+ }
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+HWY_EXPORT(NumNonZero8x8ExceptDC);
+
+// Holds data that is buffered between 8x8 blocks in progressive mode.
+struct DCTCodingState {
+ // The run length of end-of-band symbols in a progressive scan.
+ int eob_run_;
+ // The huffman table to be used when flushing the state.
+ HuffmanCodeTable* cur_ac_huff_;
+ // The sequence of currently buffered refinement bits for a successive
+ // approximation scan (one where Ah > 0).
+ std::vector<int> refinement_bits_;
+};
+
+void DCTCodingStateInit(DCTCodingState* s) {
+ s->eob_run_ = 0;
+ s->cur_ac_huff_ = nullptr;
+ s->refinement_bits_.clear();
+ s->refinement_bits_.reserve(kJPEGMaxCorrectionBits);
+}
+
+static JXL_INLINE void WriteSymbol(int symbol, const HuffmanCodeTable* table,
+ JpegBitWriter* bw) {
+ WriteBits(bw, table->depth[symbol], table->code[symbol]);
+}
+
+// Emit all buffered data to the bit stream using the given Huffman code and
+// bit writer.
+static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) {
+ if (s->eob_run_ > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(s->eob_run_);
+ int symbol = nbits << 4u;
+ WriteSymbol(symbol, s->cur_ac_huff_, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1));
+ }
+ s->eob_run_ = 0;
+ }
+ for (size_t i = 0; i < s->refinement_bits_.size(); ++i) {
+ WriteBits(bw, 1, s->refinement_bits_[i]);
+ }
+ s->refinement_bits_.clear();
+}
+
+// Buffer some more data at the end-of-band (the last non-zero or newly
+// non-zero coefficient within the [Ss, Se] spectral band).
+static JXL_INLINE void BufferEndOfBand(DCTCodingState* s,
+ HuffmanCodeTable* ac_huff,
+ const std::vector<int>* new_bits,
+ JpegBitWriter* bw) {
+ if (s->eob_run_ == 0) {
+ s->cur_ac_huff_ = ac_huff;
+ }
+ ++s->eob_run_;
+ if (new_bits) {
+ s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(),
+ new_bits->end());
+ }
+ if (s->eob_run_ == 0x7FFF ||
+ s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+ Flush(s, bw);
+ }
+}
+
+bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff, HuffmanCodeTable* table,
+ bool pre_shifted = false) {
+ int huff_code[kJpegHuffmanAlphabetSize];
+ // +1 for a sentinel element.
+ uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+ int p = 0;
+ for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+ int i = huff.counts[l];
+ if (p + i > kJpegHuffmanAlphabetSize + 1) {
+ return false;
+ }
+ while (i--) huff_size[p++] = l;
+ }
+
+ if (p == 0) {
+ return true;
+ }
+
+ // Reuse sentinel element.
+ int last_p = p - 1;
+ huff_size[last_p] = 0;
+
+ int code = 0;
+ uint32_t si = huff_size[0];
+ p = 0;
+ while (huff_size[p]) {
+ while ((huff_size[p]) == si) {
+ huff_code[p++] = code;
+ code++;
+ }
+ code <<= 1;
+ si++;
+ }
+ for (p = 0; p < last_p; p++) {
+ int i = huff.values[p];
+ table->depth[i] = huff_size[p];
+ table->code[i] = huff_code[p];
+ if (pre_shifted) {
+ int nbits = i & 0xf;
+ table->depth[i] += nbits;
+ table->code[i] <<= nbits;
+ }
+ }
+ return true;
+}
+
+bool EncodeDCTBlockSequential(const coeff_t* block, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, coeff_t* last_dc_coeff,
+ JpegBitWriter* bw) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = block[0];
+ temp = temp2 - *last_dc_coeff;
+ if (temp == 0) {
+ WriteSymbol(0, dc_huff, bw);
+ } else {
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int dc_mask = (1 << dc_nbits) - 1;
+ WriteSymbol(dc_nbits, dc_huff, bw);
+ WriteBits(bw, dc_nbits, temp2 & dc_mask);
+ }
+ int num_nonzeros = HWY_DYNAMIC_DISPATCH(NumNonZero8x8ExceptDC)(block);
+ for (int k = 1; k < 64; ++k) {
+ if (num_nonzeros == 0) {
+ WriteSymbol(0, ac_huff, bw);
+ break;
+ }
+ int r = 0;
+ while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ k++;
+ }
+ --num_nonzeros;
+ if (temp < 0) {
+ temp = -temp;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int ac_mask = (1 << ac_nbits) - 1;
+ int symbol = (r << 4u) + ac_nbits;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, ac_nbits, temp2 & ac_mask);
+ }
+ return true;
+}
+
+bool EncodeDCTBlockProgressive(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, int Ss, int Se,
+ int Al, DCTCodingState* coding_state,
+ coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ coeff_t temp2;
+ coeff_t temp;
+ if (Ss == 0) {
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ WriteSymbol(nbits, dc_huff, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ }
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ continue;
+ }
+ Flush(coding_state, bw);
+ while (r > 15) {
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ r = 0;
+ }
+ if (r > 0) {
+ BufferEndOfBand(coding_state, ac_huff, nullptr, bw);
+ if (!eob_run_allowed) {
+ Flush(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+bool EncodeRefinementBits(const coeff_t* coeffs, HuffmanCodeTable* ac_huff,
+ int Ss, int Se, int Al, DCTCodingState* coding_state,
+ JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ if (Ss == 0) {
+ // Emit next bit of DC component.
+ WriteBits(bw, 1, (coeffs[0] >> Al) & 1);
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int abs_values[kDCTBlockSize];
+ int eob = 0;
+ for (int k = Ss; k <= Se; k++) {
+ const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+ abs_values[k] = abs_val >> Al;
+ if (abs_values[k] == 1) {
+ eob = k;
+ }
+ }
+ int r = 0;
+ std::vector<int> refinement_bits;
+ refinement_bits.reserve(kDCTBlockSize);
+ for (int k = Ss; k <= Se; k++) {
+ if (abs_values[k] == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15 && k <= eob) {
+ Flush(coding_state, bw);
+ WriteSymbol(0xf0, ac_huff, bw);
+ r -= 16;
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ }
+ if (abs_values[k] > 1) {
+ refinement_bits.push_back(abs_values[k] & 1u);
+ continue;
+ }
+ Flush(coding_state, bw);
+ int symbol = (r << 4u) + 1;
+ int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1;
+ WriteSymbol(symbol, ac_huff, bw);
+ WriteBits(bw, 1, new_non_zero_bit);
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ r = 0;
+ }
+ if (r > 0 || !refinement_bits.empty()) {
+ BufferEndOfBand(coding_state, ac_huff, &refinement_bits, bw);
+ if (!eob_run_allowed) {
+ Flush(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) {
+ size_t pos = 0;
+ while (pos < bufsize) {
+ if (cinfo->dest->free_in_buffer == 0 &&
+ !(*cinfo->dest->empty_output_buffer)(cinfo)) {
+ JPEGLI_ERROR("Destination suspension is not supported in markers.");
+ }
+ size_t len = std::min<size_t>(cinfo->dest->free_in_buffer, bufsize - pos);
+ memcpy(cinfo->dest->next_output_byte, buf + pos, len);
+ pos += len;
+ cinfo->dest->free_in_buffer -= len;
+ cinfo->dest->next_output_byte += len;
+ }
+}
+
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes) {
+ WriteOutput(cinfo, bytes.data(), bytes.size());
+}
+
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes) {
+ WriteOutput(cinfo, bytes.begin(), bytes.size());
+}
+
+void EncodeAPP0(j_compress_ptr cinfo) {
+ WriteOutput(cinfo,
+ {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0',
+ cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+ cinfo->density_unit, static_cast<uint8_t>(cinfo->X_density >> 8),
+ static_cast<uint8_t>(cinfo->X_density & 0xff),
+ static_cast<uint8_t>(cinfo->Y_density >> 8),
+ static_cast<uint8_t>(cinfo->Y_density & 0xff), 0, 0});
+}
+
+void EncodeAPP14(j_compress_ptr cinfo) {
+ uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr ? 1
+ : cinfo->jpeg_color_space == JCS_YCCK ? 2
+ : 0;
+ WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0,
+ 0, 0, color_transform});
+}
+
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) {
+ if (cinfo->data_precision != kJpegPrecision) {
+ is_baseline = false;
+ JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision);
+ }
+ const uint8_t marker = cinfo->progressive_mode ? 0xc2
+ : is_baseline ? 0xc0
+ : 0xc1;
+ const size_t n_comps = cinfo->num_components;
+ const size_t marker_len = 8 + 3 * n_comps;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = marker;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = kJpegPrecision;
+ data[pos++] = cinfo->image_height >> 8u;
+ data[pos++] = cinfo->image_height & 0xFFu;
+ data[pos++] = cinfo->image_width >> 8u;
+ data[pos++] = cinfo->image_width & 0xFFu;
+ data[pos++] = n_comps;
+ for (size_t i = 0; i < n_comps; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ data[pos++] = comp->component_id;
+ data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor));
+ const uint32_t quant_idx = comp->quant_tbl_no;
+ if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) {
+ JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx);
+ }
+ data[pos++] = quant_idx;
+ }
+ WriteOutput(cinfo, data);
+}
+
+void EncodeSOS(j_compress_ptr cinfo, int scan_index) {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const ScanCodingInfo& sci = cinfo->master->scan_coding_info[scan_index];
+ const size_t marker_len = 6 + 2 * scan_info->comps_in_scan;
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDA;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = scan_info->comps_in_scan;
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ data[pos++] = cinfo->comp_info[comp_idx].component_id;
+ data[pos++] = (sci.dc_tbl_idx[i] << 4u) + (sci.ac_tbl_idx[i] - 4);
+ }
+ data[pos++] = scan_info->Ss;
+ data[pos++] = scan_info->Se;
+ data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al));
+ WriteOutput(cinfo, data);
+}
+
+void EncodeDHT(j_compress_ptr cinfo, const JPEGHuffmanCode* huffman_codes,
+ size_t num_huffman_codes, bool pre_shifted) {
+ if (num_huffman_codes == 0) {
+ return;
+ }
+
+ size_t marker_len = 2;
+ for (size_t i = 0; i < num_huffman_codes; ++i) {
+ const JPEGHuffmanCode& huff = huffman_codes[i];
+ if (huff.sent_table) continue;
+ marker_len += kJpegHuffmanMaxBitLength;
+ for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) {
+ marker_len += huff.counts[j];
+ }
+ }
+ std::vector<uint8_t> data(marker_len + 2);
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xC4;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ for (size_t i = 0; i < num_huffman_codes; ++i) {
+ const JPEGHuffmanCode& huff = huffman_codes[i];
+ size_t index = huff.slot_id;
+ HuffmanCodeTable* huff_table;
+ if (index & 0x10) {
+ huff_table = &cinfo->master->huff_tables[index - 12];
+ } else {
+ huff_table = &cinfo->master->huff_tables[index];
+ }
+ // TODO(eustas): cache
+ // TODO(eustas): set up non-existing symbols
+ if (!BuildHuffmanCodeTable(huff, huff_table, pre_shifted)) {
+ JPEGLI_ERROR("Failed to build Huffman code table.");
+ }
+ if (huff.sent_table) continue;
+ size_t total_count = 0;
+ size_t max_length = 0;
+ for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) {
+ if (huff.counts[i] != 0) {
+ max_length = i;
+ }
+ total_count += huff.counts[i];
+ }
+ --total_count;
+ data[pos++] = huff.slot_id;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]);
+ }
+ for (size_t i = 0; i < total_count; ++i) {
+ data[pos++] = huff.values[i];
+ }
+ }
+ if (marker_len > 2) {
+ WriteOutput(cinfo, data);
+ }
+}
+
+void EncodeDQT(j_compress_ptr cinfo, bool write_all_tables, bool* is_baseline) {
+ uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)]; // 520 bytes
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDB;
+ pos += 2; // Length will be filled in later.
+
+ int send_table[NUM_QUANT_TBLS] = {};
+ if (write_all_tables) {
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1;
+ }
+ } else {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ send_table[cinfo->comp_info[c].quant_tbl_no] = 1;
+ }
+ }
+
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (!send_table[i]) continue;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i];
+ if (quant_table == nullptr) {
+ JPEGLI_ERROR("Missing quant table %d", i);
+ }
+ int precision = 0;
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ if (quant_table->quantval[k] > 255) {
+ precision = 1;
+ *is_baseline = false;
+ }
+ }
+ if (quant_table->sent_table) {
+ continue;
+ }
+ data[pos++] = (precision << 4) + i;
+ for (size_t j = 0; j < DCTSIZE2; ++j) {
+ int val_idx = kJPEGNaturalOrder[j];
+ int val = quant_table->quantval[val_idx];
+ if (val == 0) {
+ JPEGLI_ERROR("Invalid quantval 0.");
+ }
+ if (precision) {
+ data[pos++] = val >> 8;
+ }
+ data[pos++] = val & 0xFFu;
+ }
+ quant_table->sent_table = TRUE;
+ }
+ if (pos > 4) {
+ data[2] = (pos - 2) >> 8u;
+ data[3] = (pos - 2) & 0xFFu;
+ WriteOutput(cinfo, data, pos);
+ }
+}
+
+bool EncodeDRI(j_compress_ptr cinfo) {
+ WriteOutput(cinfo, {0xFF, 0xDD, 0, 4,
+ static_cast<uint8_t>(cinfo->restart_interval >> 8),
+ static_cast<uint8_t>(cinfo->restart_interval & 0xFF)});
+ return true;
+}
+
+static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) {
+ bw->data[bw->pos++] = 0xFF;
+ bw->data[bw->pos++] = marker;
+}
+
+void ProgressMonitorEncodePass(j_compress_ptr cinfo, size_t scan_index,
+ size_t mcu_y) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ cinfo->progress->completed_passes = 1 + scan_index;
+ cinfo->progress->pass_counter = mcu_y;
+ cinfo->progress->pass_limit = cinfo->total_iMCU_rows;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+bool EncodeScan(j_compress_ptr cinfo, int scan_index) {
+ jpeg_comp_master* m = cinfo->master;
+ const int restart_interval = cinfo->restart_interval;
+ int restarts_to_go = restart_interval;
+ int next_restart_marker = 0;
+
+ JpegBitWriter* bw = &m->bw;
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+ DCTCodingState coding_state;
+ DCTCodingStateInit(&coding_state);
+
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const ScanCodingInfo& sci = m->scan_coding_info[scan_index];
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ jpeg_component_info* base_comp =
+ &cinfo->comp_info[scan_info->component_index[0]];
+ // h_group / v_group act as numerators for converting number of blocks to
+ // number of MCU. In interleaved mode it is 1, so MCU is represented with
+ // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+ // be the samping factor, consequently MCU is always represented with single
+ // block.
+ const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor;
+ const int v_group = is_interleaved ? 1 : base_comp->v_samp_factor;
+ int MCUs_per_row =
+ DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor);
+ int MCU_rows =
+ DivCeil(cinfo->image_height * v_group, 8 * cinfo->max_v_samp_factor);
+ const bool is_progressive = cinfo->progressive_mode;
+ const int Al = scan_info->Al;
+ const int Ah = scan_info->Ah;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ HWY_ALIGN constexpr coeff_t kDummyBlock[DCTSIZE2] = {0};
+
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+ ProgressMonitorEncodePass(cinfo, scan_index, mcu_y);
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int by0 = mcu_y * n_blocks_y;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(n_blocks_y, block_rows_left);
+ ba[i] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+ by0, max_block_rows, false);
+ }
+ for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ Flush(&coding_state, bw);
+ JumpToByteBoundary(bw);
+ EmitMarker(bw, 0xD0 + next_restart_marker);
+ next_restart_marker += 1;
+ next_restart_marker &= 0x7;
+ restarts_to_go = restart_interval;
+ memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+ }
+ // Encode one MCU
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ HuffmanCodeTable* dc_huff = &m->huff_tables[sci.dc_tbl_idx[i]];
+ HuffmanCodeTable* ac_huff = &m->huff_tables[sci.ac_tbl_idx[i]];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ size_t block_y = mcu_y * n_blocks_y + iy;
+ size_t block_x = mcu_x * n_blocks_x + ix;
+ const coeff_t* block;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ block = kDummyBlock;
+ } else {
+ block = &ba[i][iy][block_x][0];
+ }
+ bool ok;
+ if (!is_progressive) {
+ ok = EncodeDCTBlockSequential(block, dc_huff, ac_huff,
+ last_dc_coeff + i, bw);
+ } else if (Ah == 0) {
+ ok = EncodeDCTBlockProgressive(block, dc_huff, ac_huff, Ss, Se,
+ Al, &coding_state,
+ last_dc_coeff + i, bw);
+ } else {
+ ok = EncodeRefinementBits(block, ac_huff, Ss, Se, Al,
+ &coding_state, bw);
+ }
+ if (!ok) return false;
+ }
+ }
+ }
+ --restarts_to_go;
+ }
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ }
+ Flush(&coding_state, bw);
+ JumpToByteBoundary(bw);
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ if (!bw->healthy) return false;
+
+ return true;
+}
+
+struct Token {
+ uint8_t histo_idx;
+ uint8_t symbol;
+ uint16_t bits;
+ Token(int i, int s, int b) : histo_idx(i), symbol(s), bits(b) {}
+};
+
+void ComputeTokensForBlock(const coeff_t* block, int histo_dc, int histo_ac,
+ coeff_t* last_dc_coeff, Token** tokens_ptr) {
+ Token* next_token = *tokens_ptr;
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = block[0];
+ temp = temp2 - *last_dc_coeff;
+ if (temp == 0) {
+ *next_token++ = Token(histo_dc, 0, 0);
+ } else {
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ temp2--;
+ }
+ int dc_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int dc_mask = (1 << dc_nbits) - 1;
+ *next_token++ = Token(histo_dc, dc_nbits, temp2 & dc_mask);
+ }
+ int num_nonzeros = HWY_DYNAMIC_DISPATCH(NumNonZero8x8ExceptDC)(block);
+ for (int k = 1; k < 64; ++k) {
+ if (num_nonzeros == 0) {
+ *next_token++ = Token(histo_ac, 0, 0);
+ break;
+ }
+ int r = 0;
+ while ((temp = block[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ k++;
+ }
+ --num_nonzeros;
+ if (temp < 0) {
+ temp = -temp;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ *next_token++ = Token(histo_ac, 0xf0, 0);
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int ac_mask = (1 << ac_nbits) - 1;
+ int symbol = (r << 4u) + ac_nbits;
+ *next_token++ = Token(histo_ac, symbol, temp2 & ac_mask);
+ }
+ *tokens_ptr = next_token;
+}
+
+struct TokenArray {
+ Token* tokens = nullptr;
+ size_t num_tokens = 0;
+};
+
+size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) {
+ int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ size_t blocks_per_mcu = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ return kDCTBlockSize * blocks_per_mcu * MCUs_per_row;
+}
+
+size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus,
+ size_t num_tokens, size_t max_per_row) {
+ size_t estimate;
+ if (mcu_y == 0) {
+ estimate = 16 * max_per_row;
+ } else {
+ estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y);
+ }
+ size_t mcus_left = ysize_mcus - mcu_y;
+ return std::min(mcus_left * max_per_row,
+ std::max(max_per_row, estimate - num_tokens));
+}
+
+void ComputeTokens(j_compress_ptr cinfo,
+ std::vector<TokenArray>* token_arrays) {
+ jpeg_comp_master* m = cinfo->master;
+ TokenArray ta;
+ Token* next_token = ta.tokens;
+ size_t num_tokens = 0;
+ size_t total_num_tokens = 0;
+ size_t max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo);
+ int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor);
+ int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor);
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ for (int mcu_y = 0; mcu_y < ysize_mcus; ++mcu_y) {
+ ProgressMonitorEncodePass(cinfo, 0, mcu_y);
+ ta.num_tokens = next_token - ta.tokens;
+ if (ta.num_tokens + max_tokens_per_mcu_row > num_tokens) {
+ if (ta.tokens) {
+ token_arrays->push_back(ta);
+ total_num_tokens += ta.num_tokens;
+ }
+ num_tokens = EstimateNumTokens(cinfo, mcu_y, ysize_mcus, total_num_tokens,
+ max_tokens_per_mcu_row);
+ ta.tokens = Allocate<Token>(cinfo, num_tokens, JPOOL_IMAGE);
+ next_token = ta.tokens;
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = mcu_y * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ ba[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
+ max_block_rows, false);
+ }
+ if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) {
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ ComputeTokensForBlock(&ba[c][0][mcu_x][0], c, c + 4,
+ &last_dc_coeff[c], &next_token);
+ }
+ }
+ continue;
+ }
+ for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ for (int iy = 0; iy < comp->v_samp_factor; ++iy) {
+ for (int ix = 0; ix < comp->h_samp_factor; ++ix) {
+ size_t block_y = mcu_y * comp->v_samp_factor + iy;
+ size_t block_x = mcu_x * comp->h_samp_factor + ix;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ *next_token++ = Token(c, 0, 0);
+ *next_token++ = Token(c + 4, 0, 0);
+ continue;
+ }
+ ComputeTokensForBlock(&ba[c][iy][block_x][0], c, c + 4,
+ &last_dc_coeff[c], &next_token);
+ }
+ }
+ }
+ }
+ }
+ ta.num_tokens = next_token - ta.tokens;
+ token_arrays->push_back(ta);
+}
+
+void WriteTokens(j_compress_ptr cinfo, const Token* tokens, size_t num_tokens,
+ const HuffmanCodeTable* huff_tables, const int* context_map,
+ JpegBitWriter* bw) {
+ size_t cycle_len = bw->len / 8;
+ size_t next_cycle = cycle_len;
+ for (size_t i = 0; i < num_tokens; ++i) {
+ Token t = tokens[i];
+ int nbits = t.symbol & 0xf;
+ WriteSymbol(t.symbol, &huff_tables[context_map[t.histo_idx]], bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, t.bits);
+ }
+ if (--next_cycle == 0) {
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ next_cycle = cycle_len;
+ }
+ }
+}
+
+void BuildHistograms(const Token* tokens, size_t num_tokens,
+ Histogram* histograms) {
+ for (size_t j = 0; j < num_tokens; ++j) {
+ Token t = tokens[j];
+ ++histograms[t.histo_idx].count[t.symbol];
+ }
+}
+
+void EncodeSingleScan(j_compress_ptr cinfo) {
+ std::vector<TokenArray> token_arrays;
+ ComputeTokens(cinfo, &token_arrays);
+ Histogram histograms[8] = {};
+ for (size_t i = 0; i < token_arrays.size(); ++i) {
+ Token* tokens = token_arrays[i].tokens;
+ size_t num_tokens = token_arrays[i].num_tokens;
+ BuildHistograms(tokens, num_tokens, histograms);
+ }
+ JpegClusteredHistograms dc_clusters;
+ ClusterJpegHistograms(histograms, 4, &dc_clusters);
+ JpegClusteredHistograms ac_clusters;
+ ClusterJpegHistograms(histograms + 4, 4, &ac_clusters);
+
+ JPEGHuffmanCode* huffman_codes =
+ Allocate<JPEGHuffmanCode>(cinfo, 8, JPOOL_IMAGE);
+ size_t num_huffman_codes = 0;
+ for (size_t i = 0; i < dc_clusters.histograms.size(); ++i) {
+ AddJpegHuffmanCode(dc_clusters.histograms[i], i, huffman_codes,
+ &num_huffman_codes);
+ }
+ for (size_t i = 0; i < ac_clusters.histograms.size(); ++i) {
+ AddJpegHuffmanCode(ac_clusters.histograms[i], 0x10 + i, huffman_codes,
+ &num_huffman_codes);
+ }
+
+ bool is_baseline = true;
+ int context_map[8];
+ ScanCodingInfo sci = {};
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (dc_clusters.histogram_indexes[c] > 1 ||
+ ac_clusters.histogram_indexes[c] > 1) {
+ is_baseline = false;
+ }
+ sci.dc_tbl_idx[c] = dc_clusters.histogram_indexes[c];
+ sci.ac_tbl_idx[c] = ac_clusters.histogram_indexes[c] + 4;
+ context_map[c] = sci.dc_tbl_idx[c];
+ context_map[c + 4] = sci.ac_tbl_idx[c];
+ }
+ sci.num_huffman_codes = num_huffman_codes;
+ memcpy(cinfo->master->scan_coding_info, &sci, sizeof(sci));
+ EncodeDQT(cinfo, /*write_all_tables=*/false, &is_baseline);
+ EncodeSOF(cinfo, is_baseline);
+ EncodeDHT(cinfo, huffman_codes, num_huffman_codes);
+ EncodeSOS(cinfo, 0);
+
+ JpegBitWriter* bw = &cinfo->master->bw;
+ HuffmanCodeTable* huff_tables = cinfo->master->huff_tables;
+ for (size_t i = 0; i < token_arrays.size(); ++i) {
+ Token* tokens = token_arrays[i].tokens;
+ size_t num_tokens = token_arrays[i].num_tokens;
+ WriteTokens(cinfo, tokens, num_tokens, huff_tables, context_map, bw);
+ }
+ JumpToByteBoundary(bw);
+ if (!EmptyBitWriterBuffer(bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ if (!bw->healthy) {
+ JPEGLI_ERROR("Failed to encode scan.");
+ }
+}
+
+HWY_EXPORT(WriteiMCURow);
+void WriteiMCURow(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo);
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/bitstream.h b/third_party/jpeg-xl/lib/jpegli/bitstream.h
new file mode 100644
index 0000000000..18b6a09c29
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/bitstream.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_BITSTREAM_H_
+#define LIB_JPEGLI_BITSTREAM_H_
+
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize);
+void WriteOutput(j_compress_ptr cinfo, const std::vector<uint8_t>& bytes);
+void WriteOutput(j_compress_ptr cinfo, std::initializer_list<uint8_t> bytes);
+
+void EncodeAPP0(j_compress_ptr cinfo);
+void EncodeAPP14(j_compress_ptr cinfo);
+void EncodeSOF(j_compress_ptr cinfo, bool is_baseline);
+void EncodeSOS(j_compress_ptr cinfo, int scan_index);
+void EncodeDHT(j_compress_ptr cinfo, const JPEGHuffmanCode* huffman_codes,
+ size_t num_huffman_codes, bool pre_shifted = false);
+void EncodeDQT(j_compress_ptr cinfo, bool write_all_tables, bool* is_baseline);
+bool EncodeDRI(j_compress_ptr cinfo);
+
+bool EncodeScan(j_compress_ptr cinfo, int scan_index);
+
+void EncodeSingleScan(j_compress_ptr cinfo);
+
+void WriteiMCURow(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_BITSTREAM_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/color_quantize.cc b/third_party/jpeg-xl/lib/jpegli/color_quantize.cc
new file mode 100644
index 0000000000..1079c45c9f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/color_quantize.cc
@@ -0,0 +1,533 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_quantize.h"
+
+#include <cmath>
+#include <limits>
+#include <unordered_map>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+namespace {
+
+static constexpr int kNumColorCellBits[kMaxComponents] = {3, 4, 3, 3};
+static constexpr int kCompW[kMaxComponents] = {2, 3, 1, 1};
+
+int Pow(int a, int b) {
+ int r = 1;
+ for (int i = 0; i < b; ++i) {
+ r *= a;
+ }
+ return r;
+}
+
+int ComponentOrder(j_decompress_ptr cinfo, int i) {
+ if (cinfo->out_color_components == 3) {
+ return i < 2 ? 1 - i : i;
+ }
+ return i;
+}
+
+int GetColorComponent(int i, int N) {
+ return (i * 255 + (N - 1) / 2) / (N - 1);
+}
+
+} // namespace
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ int components = cinfo->out_color_components;
+ int desired = std::min(cinfo->desired_number_of_colors, 256);
+ int num = 1;
+ while (Pow(num + 1, components) <= desired) {
+ ++num;
+ }
+ if (num == 1) {
+ JPEGLI_ERROR("Too few colors (%d) in requested colormap", desired);
+ }
+ int actual = Pow(num, components);
+ for (int i = 0; i < components; ++i) {
+ m->num_colors_[i] = num;
+ }
+ while (actual < desired) {
+ int total = actual;
+ for (int i = 0; i < components; ++i) {
+ int c = ComponentOrder(cinfo, i);
+ int new_total = (actual / m->num_colors_[c]) * (m->num_colors_[c] + 1);
+ if (new_total <= desired) {
+ ++m->num_colors_[c];
+ actual = new_total;
+ }
+ }
+ if (actual == total) {
+ break;
+ }
+ }
+ cinfo->actual_number_of_colors = actual;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, actual, components);
+ int next_color[kMaxComponents] = {0};
+ for (int i = 0; i < actual; ++i) {
+ for (int c = 0; c < components; ++c) {
+ cinfo->colormap[c][i] =
+ GetColorComponent(next_color[c], m->num_colors_[c]);
+ }
+ int c = components - 1;
+ while (c > 0 && next_color[c] + 1 == m->num_colors_[c]) {
+ next_color[c--] = 0;
+ }
+ ++next_color[c];
+ }
+ if (!m->colormap_lut_) {
+ m->colormap_lut_ = Allocate<uint8_t>(cinfo, components * 256, JPOOL_IMAGE);
+ }
+ int stride = actual;
+ for (int c = 0; c < components; ++c) {
+ int N = m->num_colors_[c];
+ stride /= N;
+ for (int i = 0; i < 256; ++i) {
+ int index = ((2 * i - 1) * (N - 1) + 254) / 510;
+ m->colormap_lut_[c * 256 + i] = index * stride;
+ }
+ }
+}
+
+namespace {
+
+// 2^13 priority levels for the PQ seems to be a good compromise between
+// accuracy, running time and stack space usage.
+static const int kMaxPriority = 1 << 13;
+static const int kMaxLevel = 3;
+
+// This function is used in the multi-resolution grid to be able to compute
+// the keys for the different resolutions by just shifting the first key.
+inline int InterlaceBitsRGB(uint8_t r, uint8_t g, uint8_t b) {
+ int z = 0;
+ for (int i = 0; i < 7; ++i) {
+ z += (r >> 5) & 4;
+ z += (g >> 6) & 2;
+ z += (b >> 7);
+ z <<= 3;
+ r <<= 1;
+ g <<= 1;
+ b <<= 1;
+ }
+ z += (r >> 5) & 4;
+ z += (g >> 6) & 2;
+ z += (b >> 7);
+ return z;
+}
+
+// This function will compute the actual priorities of the colors based on
+// the current distance from the palette, the population count and the signals
+// from the multi-resolution grid.
+inline int Priority(int d, int n, const int* density, const int* radius) {
+ int p = d * n;
+ for (int level = 0; level < kMaxLevel; ++level) {
+ if (d > radius[level]) {
+ p += density[level] * (d - radius[level]);
+ }
+ }
+ return std::min(kMaxPriority - 1, p >> 4);
+}
+
+inline int ColorIntQuadDistanceRGB(uint8_t r1, uint8_t g1, uint8_t b1,
+ uint8_t r2, uint8_t g2, uint8_t b2) {
+ // weights for the intensity calculation
+ static constexpr int ired = 2;
+ static constexpr int igreen = 5;
+ static constexpr int iblue = 1;
+ // normalization factor for the intensity calculation (2^ishift)
+ static constexpr int ishift = 3;
+ const int rd = r1 - r2;
+ const int gd = g1 - g2;
+ const int bd = b1 - b2;
+ const int id = ired * rd + igreen * gd + iblue * bd;
+ return rd * rd + gd * gd + bd * bd + ((id * id) >> (2 * ishift));
+}
+
+inline int ScaleQuadDistanceRGB(int d) {
+ return static_cast<int>(sqrt(d * 0.25) + 0.5);
+}
+
+// The function updates the minimal distances, the clustering and the
+// quantization error after the insertion of the new color into the palette.
+void AddToRGBPalette(const uint8_t* red, const uint8_t* green,
+ const uint8_t* blue,
+ const int* count, // histogram of colors
+ const int index, // index of color to be added
+ const int k, // size of current palette
+ const int n, // number of colors
+ int* dist, // array of distances from palette
+ int* cluster, // mapping of color indices to palette
+ int* center, // the inverse mapping
+ int64_t* error) { // measure of the quantization error
+ center[k] = index;
+ cluster[index] = k;
+ *error -=
+ static_cast<int64_t>(dist[index]) * static_cast<int64_t>(count[index]);
+ dist[index] = 0;
+ for (int j = 0; j < n; ++j) {
+ if (dist[j] > 0) {
+ const int d = ColorIntQuadDistanceRGB(
+ red[index], green[index], blue[index], red[j], green[j], blue[j]);
+ if (d < dist[j]) {
+ *error += static_cast<int64_t>((d - dist[j])) *
+ static_cast<int64_t>(count[j]);
+ dist[j] = d;
+ cluster[j] = k;
+ }
+ }
+ }
+}
+
+struct RGBPixelHasher {
+ // A quick but good-enough hash to get 24 bits of RGB into the lower 12 bits.
+ size_t operator()(uint32_t a) const { return (a ^ (a >> 12)) * 0x9e3779b9; }
+};
+
+struct WangHasher {
+ // Thomas Wang's Hash. Nearly perfect and still quite fast. Above (for
+ // pixels) we use a simpler hash because the number of hash calls is
+ // proportional to the number of pixels and that hash dominates; we want the
+ // cost to be minimal and we start with a large table. We can use a better
+ // hash for the histogram since the number of hash calls is proportional to
+ // the number of unique colors in the image, which is hopefully much smaller.
+ // Note that the difference is slight; e.g. replacing RGBPixelHasher with
+ // WangHasher only slows things down by 5% on an Opteron.
+ size_t operator()(uint32_t a) const {
+ a = (a ^ 61) ^ (a >> 16);
+ a = a + (a << 3);
+ a = a ^ (a >> 4);
+ a = a * 0x27d4eb2d;
+ a = a ^ (a >> 15);
+ return a;
+ }
+};
+
+// Build an index of all the different colors in the input
+// image. To do this we map the 24 bit RGB representation of the colors
+// to a unique integer index assigned to the different colors in order of
+// appearence in the image. Return the number of unique colors found.
+// The colors are pre-quantized to 3 * 6 bits precision.
+static int BuildRGBColorIndex(const uint8_t* const image, int const num_pixels,
+ int* const count, uint8_t* const red,
+ uint8_t* const green, uint8_t* const blue) {
+ // Impossible because rgb are in the low 24 bits, and the upper 8 bits is 0.
+ const uint32_t impossible_pixel_value = 0x10000000;
+ std::unordered_map<uint32_t, int, RGBPixelHasher> index_map(1 << 12);
+ std::unordered_map<uint32_t, int, RGBPixelHasher>::iterator index_map_lookup;
+ const uint8_t* imagep = &image[0];
+ uint32_t prev_pixel = impossible_pixel_value;
+ int index = 0;
+ int n = 0;
+ for (int i = 0; i < num_pixels; ++i) {
+ uint8_t r = ((*imagep++) & 0xfc) + 2;
+ uint8_t g = ((*imagep++) & 0xfc) + 2;
+ uint8_t b = ((*imagep++) & 0xfc) + 2;
+ uint32_t pixel = (b << 16) | (g << 8) | r;
+ if (pixel != prev_pixel) {
+ prev_pixel = pixel;
+ index_map_lookup = index_map.find(pixel);
+ if (index_map_lookup != index_map.end()) {
+ index = index_map_lookup->second;
+ } else {
+ index_map[pixel] = index = n++;
+ red[index] = r;
+ green[index] = g;
+ blue[index] = b;
+ }
+ }
+ ++count[index];
+ }
+ return n;
+}
+
+} // namespace
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo) {
+ if (cinfo->out_color_space != JCS_RGB) {
+ JPEGLI_ERROR("Two-pass quantizer must use RGB output color space.");
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ const size_t num_pixels = cinfo->output_width * cinfo->output_height;
+ const int max_color_count = std::max<size_t>(num_pixels, 1u << 18);
+ const int max_palette_size = cinfo->desired_number_of_colors;
+ std::unique_ptr<uint8_t[]> red(new uint8_t[max_color_count]);
+ std::unique_ptr<uint8_t[]> green(new uint8_t[max_color_count]);
+ std::unique_ptr<uint8_t[]> blue(new uint8_t[max_color_count]);
+ std::vector<int> count(max_color_count, 0);
+ // number of colors
+ int n = BuildRGBColorIndex(m->pixels_, num_pixels, &count[0], &red[0],
+ &green[0], &blue[0]);
+
+ std::vector<int> dist(n, std::numeric_limits<int>::max());
+ std::vector<int> cluster(n);
+ std::vector<bool> in_palette(n, false);
+ int center[256];
+ int k = 0; // palette size
+ const int count_threshold = (num_pixels * 4) / max_palette_size;
+ static constexpr int kAveragePixelErrorThreshold = 1;
+ const int64_t error_threshold = num_pixels * kAveragePixelErrorThreshold;
+ int64_t error = 0; // quantization error
+
+ int max_count = 0;
+ int winner = 0;
+ for (int i = 0; i < n; ++i) {
+ if (count[i] > max_count) {
+ max_count = count[i];
+ winner = i;
+ }
+ if (!in_palette[i] && count[i] > count_threshold) {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ in_palette[i] = true;
+ }
+ }
+ if (k == 0) {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], winner, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ in_palette[winner] = true;
+ }
+
+ // Calculation of the multi-resolution density grid.
+ std::vector<int> density(n * kMaxLevel);
+ std::vector<int> radius(n * kMaxLevel);
+ std::unordered_map<uint32_t, int, WangHasher> histogram[kMaxLevel];
+ for (int level = 0; level < kMaxLevel; ++level) {
+ // This value is never used because key = InterlaceBitsRGB(...) >> 6
+ }
+
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+ for (int level = 0; level < kMaxLevel; ++level) {
+ histogram[level][key >> (3 * level)] += count[i];
+ }
+ }
+ }
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ for (int level = 0; level < kMaxLevel; ++level) {
+ const int mask = (4 << level) - 1;
+ const int rd = std::max(red[i] & mask, mask - (red[i] & mask));
+ const int gd = std::max(green[i] & mask, mask - (green[i] & mask));
+ const int bd = std::max(blue[i] & mask, mask - (blue[i] & mask));
+ radius[i * kMaxLevel + level] =
+ ScaleQuadDistanceRGB(ColorIntQuadDistanceRGB(0, 0, 0, rd, gd, bd));
+ }
+ const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6;
+ if (kMaxLevel > 0) {
+ density[i * kMaxLevel] = histogram[0][key] - count[i];
+ }
+ for (int level = 1; level < kMaxLevel; ++level) {
+ density[i * kMaxLevel + level] =
+ (histogram[level][key >> (3 * level)] -
+ histogram[level - 1][key >> (3 * level - 3)]);
+ }
+ }
+ }
+
+ // Calculate the initial error now that the palette has been initialized.
+ error = 0;
+ for (int i = 0; i < n; ++i) {
+ error += static_cast<int64_t>(dist[i]) * static_cast<int64_t>(count[i]);
+ }
+
+ std::unique_ptr<std::vector<int>[]> bucket_array(
+ new std::vector<int>[kMaxPriority]);
+ int top_priority = -1;
+ for (int i = 0; i < n; ++i) {
+ if (!in_palette[i]) {
+ int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+ &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+ bucket_array[priority].push_back(i);
+ top_priority = std::max(priority, top_priority);
+ }
+ }
+ double error_accum = 0;
+ while (top_priority >= 0 && k < max_palette_size) {
+ if (error < error_threshold) {
+ error_accum += std::min(error_threshold, error_threshold - error);
+ if (error_accum >= 10 * error_threshold) {
+ break;
+ }
+ }
+ int i = bucket_array[top_priority].back();
+ int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i],
+ &density[i * kMaxLevel], &radius[i * kMaxLevel]);
+ if (priority < top_priority) {
+ bucket_array[priority].push_back(i);
+ } else {
+ AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n,
+ &dist[0], &cluster[0], &center[0], &error);
+ }
+ bucket_array[top_priority].pop_back();
+ while (top_priority >= 0 && bucket_array[top_priority].empty()) {
+ --top_priority;
+ }
+ }
+
+ cinfo->actual_number_of_colors = k;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, k, 3);
+ for (int i = 0; i < k; ++i) {
+ int index = center[i];
+ cinfo->colormap[0][i] = red[index];
+ cinfo->colormap[1][i] = green[index];
+ cinfo->colormap[2][i] = blue[index];
+ }
+}
+
+namespace {
+
+void FindCandidatesForCell(j_decompress_ptr cinfo, int ncomp, int cell[],
+ std::vector<uint8_t>* candidates) {
+ int cell_min[kMaxComponents];
+ int cell_max[kMaxComponents];
+ int cell_center[kMaxComponents];
+ for (int c = 0; c < ncomp; ++c) {
+ cell_min[c] = cell[c] << (8 - kNumColorCellBits[c]);
+ cell_max[c] = cell_min[c] + (1 << (8 - kNumColorCellBits[c])) - 1;
+ cell_center[c] = (cell_min[c] + cell_max[c]) >> 1;
+ }
+ int min_maxdist = std::numeric_limits<int>::max();
+ int mindist[256];
+ for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+ int dmin = 0;
+ int dmax = 0;
+ for (int c = 0; c < ncomp; ++c) {
+ int palette_c = cinfo->colormap[c][i];
+ int dminc = 0, dmaxc;
+ if (palette_c < cell_min[c]) {
+ dminc = cell_min[c] - palette_c;
+ dmaxc = cell_max[c] - palette_c;
+ } else if (palette_c > cell_max[c]) {
+ dminc = palette_c - cell_max[c];
+ dmaxc = palette_c - cell_min[c];
+ } else if (palette_c > cell_center[c]) {
+ dmaxc = palette_c - cell_min[c];
+ } else {
+ dmaxc = cell_max[c] - palette_c;
+ }
+ dminc *= kCompW[c];
+ dmaxc *= kCompW[c];
+ dmin += dminc * dminc;
+ dmax += dmaxc * dmaxc;
+ }
+ mindist[i] = dmin;
+ min_maxdist = std::min(dmax, min_maxdist);
+ }
+ for (int i = 0; i < cinfo->actual_number_of_colors; ++i) {
+ if (mindist[i] < min_maxdist) {
+ candidates->push_back(i);
+ }
+ }
+}
+
+} // namespace
+
+void CreateInverseColorMap(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ int ncomp = cinfo->out_color_components;
+ int num_cells = 1;
+ for (int c = 0; c < ncomp; ++c) {
+ num_cells *= (1 << kNumColorCellBits[c]);
+ }
+ m->candidate_lists_.resize(num_cells);
+
+ int next_cell[kMaxComponents] = {0};
+ for (int i = 0; i < num_cells; ++i) {
+ m->candidate_lists_[i].clear();
+ FindCandidatesForCell(cinfo, ncomp, next_cell, &m->candidate_lists_[i]);
+ int c = ncomp - 1;
+ while (c > 0 && next_cell[c] + 1 == (1 << kNumColorCellBits[c])) {
+ next_cell[c--] = 0;
+ }
+ ++next_cell[c];
+ }
+ m->regenerate_inverse_colormap_ = false;
+}
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel) {
+ jpeg_decomp_master* m = cinfo->master;
+ int num_channels = cinfo->out_color_components;
+ int index = 0;
+ if (m->quant_mode_ == 1) {
+ for (int c = 0; c < num_channels; ++c) {
+ index += m->colormap_lut_[c * 256 + pixel[c]];
+ }
+ } else {
+ size_t cell_idx = 0;
+ size_t stride = 1;
+ for (int c = num_channels - 1; c >= 0; --c) {
+ cell_idx += (pixel[c] >> (8 - kNumColorCellBits[c])) * stride;
+ stride <<= kNumColorCellBits[c];
+ }
+ JXL_ASSERT(cell_idx < m->candidate_lists_.size());
+ int mindist = std::numeric_limits<int>::max();
+ const auto& candidates = m->candidate_lists_[cell_idx];
+ for (uint8_t i : candidates) {
+ int dist = 0;
+ for (int c = 0; c < num_channels; ++c) {
+ int d = (cinfo->colormap[c][i] - pixel[c]) * kCompW[c];
+ dist += d * d;
+ }
+ if (dist < mindist) {
+ mindist = dist;
+ index = i;
+ }
+ }
+ }
+ JXL_ASSERT(index < cinfo->actual_number_of_colors);
+ return index;
+}
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ static constexpr size_t kDitherSize = 4;
+ static constexpr size_t kDitherMask = kDitherSize - 1;
+ static constexpr float kBaseDitherMatrix[] = {
+ 0, 8, 2, 10, //
+ 12, 4, 14, 6, //
+ 3, 11, 1, 9, //
+ 15, 7, 13, 5, //
+ };
+ m->dither_size_ = kDitherSize;
+ m->dither_mask_ = kDitherMask;
+ size_t ncells = m->dither_size_ * m->dither_size_;
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ float spread = 1.0f / (m->num_colors_[c] - 1);
+ float mul = spread / ncells;
+ float offset = 0.5f * spread;
+ if (m->dither_[c] == nullptr) {
+ m->dither_[c] = Allocate<float>(cinfo, ncells, JPOOL_IMAGE_ALIGNED);
+ }
+ for (size_t idx = 0; idx < ncells; ++idx) {
+ m->dither_[c][idx] = kBaseDitherMatrix[idx] * mul - offset;
+ }
+ }
+}
+
+void InitFSDitherState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ if (m->error_row_[c] == nullptr) {
+ m->error_row_[c] =
+ Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+ m->error_row_[c + kMaxComponents] =
+ Allocate<float>(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED);
+ }
+ memset(m->error_row_[c], 0.0, cinfo->output_width * sizeof(float));
+ memset(m->error_row_[c + kMaxComponents], 0.0,
+ cinfo->output_width * sizeof(float));
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/color_quantize.h b/third_party/jpeg-xl/lib/jpegli/color_quantize.h
new file mode 100644
index 0000000000..36a92d2f77
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/color_quantize.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_QUANTIZE_H_
+#define LIB_JPEGLI_COLOR_QUANTIZE_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void ChooseColorMap1Pass(j_decompress_ptr cinfo);
+
+void ChooseColorMap2Pass(j_decompress_ptr cinfo);
+
+void CreateInverseColorMap(j_decompress_ptr cinfo);
+
+void CreateOrderedDitherTables(j_decompress_ptr cinfo);
+
+void InitFSDitherState(j_decompress_ptr cinfo);
+
+int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COLOR_QUANTIZE_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/color_transform.cc b/third_party/jpeg-xl/lib/jpegli/color_transform.cc
new file mode 100644
index 0000000000..020a6fd80c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/color_transform.cc
@@ -0,0 +1,281 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/color_transform.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/color_transform.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+void YCbCrToRGB(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto crcr = Set(df, 1.402f);
+ const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+ const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+ const auto cbcb = Set(df, 1.772f);
+
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto y_vec = Load(df, row0 + x);
+ const auto cb_vec = Load(df, row1 + x);
+ const auto cr_vec = Load(df, row2 + x);
+ const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+ const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+ const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+ Store(r_vec, df, row0 + x);
+ Store(g_vec, df, row1 + x);
+ Store(b_vec, df, row2 + x);
+ }
+}
+
+void YCCKToCMYK(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ YCbCrToRGB(row, xsize);
+ const auto offset = Set(df, -1.0f / 255.0f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Sub(offset, Load(df, row0 + x)), df, row0 + x);
+ Store(Sub(offset, Load(df, row1 + x)), df, row1 + x);
+ Store(Sub(offset, Load(df, row2 + x)), df, row2 + x);
+ }
+}
+
+void RGBToYCbCr(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto c128 = Set(df, 128.0f);
+ const auto kR = Set(df, 0.299f); // NTSC luma
+ const auto kG = Set(df, 0.587f);
+ const auto kB = Set(df, 0.114f);
+ const auto kAmpR = Set(df, 0.701f);
+ const auto kAmpB = Set(df, 0.886f);
+ const auto kDiffR = Add(kAmpR, kR);
+ const auto kDiffB = Add(kAmpB, kB);
+ const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
+ const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
+
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto r = Load(df, row0 + x);
+ const auto g = Load(df, row1 + x);
+ const auto b = Load(df, row2 + x);
+ const auto r_base = Mul(r, kR);
+ const auto r_diff = Mul(r, kDiffR);
+ const auto g_base = Mul(g, kG);
+ const auto b_base = Mul(b, kB);
+ const auto b_diff = Mul(b, kDiffB);
+ const auto y_base = Add(r_base, Add(g_base, b_base));
+ const auto cb_vec = MulAdd(Sub(b_diff, y_base), kNormB, c128);
+ const auto cr_vec = MulAdd(Sub(r_diff, y_base), kNormR, c128);
+ Store(y_base, df, row0 + x);
+ Store(cb_vec, df, row1 + x);
+ Store(cr_vec, df, row2 + x);
+ }
+}
+
+void CMYKToYCCK(float* row[kMaxComponents], size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ float* JXL_RESTRICT row0 = row[0];
+ float* JXL_RESTRICT row1 = row[1];
+ float* JXL_RESTRICT row2 = row[2];
+ const auto unity = Set(df, 255.0f);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Sub(unity, Load(df, row0 + x)), df, row0 + x);
+ Store(Sub(unity, Load(df, row1 + x)), df, row1 + x);
+ Store(Sub(unity, Load(df, row2 + x)), df, row2 + x);
+ }
+ RGBToYCbCr(row, xsize);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(CMYKToYCCK);
+HWY_EXPORT(YCCKToCMYK);
+HWY_EXPORT(YCbCrToRGB);
+HWY_EXPORT(RGBToYCbCr);
+
+bool CheckColorSpaceComponents(int num_components, J_COLOR_SPACE colorspace) {
+ switch (colorspace) {
+ case JCS_GRAYSCALE:
+ return num_components == 1;
+ case JCS_RGB:
+ case JCS_YCbCr:
+ case JCS_EXT_RGB:
+ case JCS_EXT_BGR:
+ return num_components == 3;
+ case JCS_CMYK:
+ case JCS_YCCK:
+ case JCS_EXT_RGBX:
+ case JCS_EXT_BGRX:
+ case JCS_EXT_XBGR:
+ case JCS_EXT_XRGB:
+ case JCS_EXT_RGBA:
+ case JCS_EXT_BGRA:
+ case JCS_EXT_ABGR:
+ case JCS_EXT_ARGB:
+ return num_components == 4;
+ default:
+ // Unrecognized colorspaces can have any number of channels, since no
+ // color transform will be performed on them.
+ return true;
+ }
+}
+
+void NullTransform(float* row[kMaxComponents], size_t len) {}
+
+void GrayscaleToRGB(float* row[kMaxComponents], size_t len) {
+ memcpy(row[1], row[0], len * sizeof(row[1][0]));
+ memcpy(row[2], row[0], len * sizeof(row[2][0]));
+}
+
+void GrayscaleToYCbCr(float* row[kMaxComponents], size_t len) {
+ memset(row[1], 0, len * sizeof(row[1][0]));
+ memset(row[2], 0, len * sizeof(row[2][0]));
+}
+
+void ChooseColorTransform(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ if (!CheckColorSpaceComponents(cinfo->input_components,
+ cinfo->in_color_space)) {
+ JPEGLI_ERROR("Invalid number of input components %d for colorspace %d",
+ cinfo->input_components, cinfo->in_color_space);
+ }
+ if (!CheckColorSpaceComponents(cinfo->num_components,
+ cinfo->jpeg_color_space)) {
+ JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+ cinfo->num_components, cinfo->jpeg_color_space);
+ }
+ if (cinfo->jpeg_color_space == cinfo->in_color_space) {
+ if (cinfo->num_components != cinfo->input_components) {
+ JPEGLI_ERROR("Input/output components mismatch: %d vs %d",
+ cinfo->input_components, cinfo->num_components);
+ }
+ // No color transform requested.
+ m->color_transform = NullTransform;
+ return;
+ }
+
+ if (cinfo->in_color_space == JCS_RGB && m->xyb_mode) {
+ JPEGLI_ERROR("Color transform on XYB colorspace is not supported.");
+ }
+
+ m->color_transform = nullptr;
+ if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+ if (cinfo->in_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ } else if (cinfo->in_color_space == JCS_YCbCr ||
+ cinfo->in_color_space == JCS_YCCK) {
+ // Since the first luminance channel is the grayscale version of the
+ // image, nothing to do here
+ m->color_transform = NullTransform;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
+ if (cinfo->in_color_space == JCS_GRAYSCALE) {
+ m->color_transform = GrayscaleToRGB;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ if (cinfo->in_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ } else if (cinfo->in_color_space == JCS_GRAYSCALE) {
+ m->color_transform = GrayscaleToYCbCr;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ if (cinfo->in_color_space == JCS_CMYK) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(CMYKToYCCK);
+ }
+ }
+
+ if (m->color_transform == nullptr) {
+ // TODO(szabadka) Support more color transforms.
+ JPEGLI_ERROR("Unsupported color transform %d -> %d", cinfo->in_color_space,
+ cinfo->jpeg_color_space);
+ }
+}
+
+void ChooseColorTransform(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!CheckColorSpaceComponents(cinfo->out_color_components,
+ cinfo->out_color_space)) {
+ JPEGLI_ERROR("Invalid number of output components %d for colorspace %d",
+ cinfo->out_color_components, cinfo->out_color_space);
+ }
+ if (!CheckColorSpaceComponents(cinfo->num_components,
+ cinfo->jpeg_color_space)) {
+ JPEGLI_ERROR("Invalid number of components %d for colorspace %d",
+ cinfo->num_components, cinfo->jpeg_color_space);
+ }
+ if (cinfo->jpeg_color_space == cinfo->out_color_space) {
+ if (cinfo->num_components != cinfo->out_color_components) {
+ JPEGLI_ERROR("Input/output components mismatch: %d vs %d",
+ cinfo->num_components, cinfo->out_color_components);
+ }
+ // No color transform requested.
+ m->color_transform = NullTransform;
+ return;
+ }
+
+ m->color_transform = nullptr;
+ if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+ if (cinfo->out_color_space == JCS_RGB) {
+ m->color_transform = GrayscaleToRGB;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_RGB) {
+ if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr);
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ if (cinfo->out_color_space == JCS_RGB) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(YCbCrToRGB);
+ } else if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ m->color_transform = NullTransform;
+ }
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ if (cinfo->out_color_space == JCS_CMYK) {
+ m->color_transform = HWY_DYNAMIC_DISPATCH(YCCKToCMYK);
+ }
+ }
+
+ if (m->color_transform == nullptr) {
+ // TODO(szabadka) Support more color transforms.
+ JPEGLI_ERROR("Unsupported color transform %d -> %d",
+ cinfo->jpeg_color_space, cinfo->out_color_space);
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/color_transform.h b/third_party/jpeg-xl/lib/jpegli/color_transform.h
new file mode 100644
index 0000000000..27570858f7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/color_transform.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COLOR_TRANSFORM_H_
+#define LIB_JPEGLI_COLOR_TRANSFORM_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseColorTransform(j_compress_ptr cinfo);
+
+void ChooseColorTransform(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COLOR_TRANSFORM_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/common.cc b/third_party/jpeg-xl/lib/jpegli/common.cc
new file mode 100644
index 0000000000..5f34372f3e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/common.cc
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/common.h"
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/memory_manager.h"
+
+void jpegli_abort(j_common_ptr cinfo) {
+ if (cinfo->mem == nullptr) return;
+ for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+ if (pool_id == JPOOL_PERMANENT) continue;
+ (*cinfo->mem->free_pool)(cinfo, pool_id);
+ }
+ if (cinfo->is_decompressor) {
+ cinfo->global_state = jpegli::kDecStart;
+ } else {
+ cinfo->global_state = jpegli::kEncStart;
+ }
+}
+
+void jpegli_destroy(j_common_ptr cinfo) {
+ if (cinfo->mem == nullptr) return;
+ (*cinfo->mem->self_destruct)(cinfo);
+ if (cinfo->is_decompressor) {
+ cinfo->global_state = jpegli::kDecNull;
+ delete reinterpret_cast<j_decompress_ptr>(cinfo)->master;
+ } else {
+ cinfo->global_state = jpegli::kEncNull;
+ }
+}
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo) {
+ JQUANT_TBL* table = jpegli::Allocate<JQUANT_TBL>(cinfo, 1);
+ table->sent_table = FALSE;
+ return table;
+}
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo) {
+ JHUFF_TBL* table = jpegli::Allocate<JHUFF_TBL>(cinfo, 1);
+ table->sent_table = FALSE;
+ return table;
+}
+
+int jpegli_bytes_per_sample(JpegliDataType data_type) {
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ return 1;
+ case JPEGLI_TYPE_UINT16:
+ return 2;
+ case JPEGLI_TYPE_FLOAT:
+ return 4;
+ default:
+ return 0;
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/common.h b/third_party/jpeg-xl/lib/jpegli/common.h
new file mode 100644
index 0000000000..f46b751018
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/common.h
@@ -0,0 +1,67 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the common encoder/decoder part of libjpegli
+// library, which is based on the C API of libjpeg, with the function names
+// changed from jpeg_* to jpegli_*, while compressor and dempressor object
+// definitions are included directly from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_COMMON_H_
+#define LIB_JPEGLI_COMMON_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err);
+
+void jpegli_abort(j_common_ptr cinfo);
+
+void jpegli_destroy(j_common_ptr cinfo);
+
+JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo);
+
+JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo);
+
+//
+// New API structs and functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+typedef enum {
+ JPEGLI_TYPE_FLOAT = 0,
+ JPEGLI_TYPE_UINT8 = 2,
+ JPEGLI_TYPE_UINT16 = 3,
+} JpegliDataType;
+
+typedef enum {
+ JPEGLI_NATIVE_ENDIAN = 0,
+ JPEGLI_LITTLE_ENDIAN = 1,
+ JPEGLI_BIG_ENDIAN = 2,
+} JpegliEndianness;
+
+int jpegli_bytes_per_sample(JpegliDataType data_type);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_COMMON_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/common_internal.h b/third_party/jpeg-xl/lib/jpegli/common_internal.h
new file mode 100644
index 0000000000..248d3154e1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/common_internal.h
@@ -0,0 +1,150 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_COMMON_INTERNAL_H_
+#define LIB_JPEGLI_COMMON_INTERNAL_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/simd.h"
+#include "lib/jxl/base/compiler_specific.h" // for ssize_t
+#include "lib/jxl/base/status.h" // for JXL_CHECK
+
+namespace jpegli {
+
+enum State {
+ kDecNull,
+ kDecStart,
+ kDecInHeader,
+ kDecHeaderDone,
+ kDecProcessMarkers,
+ kDecProcessScan,
+ kEncNull,
+ kEncStart,
+ kEncHeader,
+ kEncReadImage,
+ kEncWriteCoeffs,
+};
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+ return (a + b - 1) / b;
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 RoundUpTo(T1 a, T2 b) {
+ return DivCeil(a, b) * b;
+}
+
+constexpr size_t kDCTBlockSize = 64;
+// This is set to the same value as MAX_COMPS_IN_SCAN, because that is the
+// maximum number of channels the libjpeg-turbo decoder can decode.
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kJpegPrecision = 8;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ // extra entries for safety in decoder
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+template <typename T>
+class RowBuffer {
+ public:
+ template <typename CInfoType>
+ void Allocate(CInfoType cinfo, size_t num_rows, size_t rowsize) {
+ size_t vec_size = std::max(VectorSize(), sizeof(T));
+ JXL_CHECK(vec_size % sizeof(T) == 0);
+ size_t alignment = std::max<size_t>(HWY_ALIGNMENT, vec_size);
+ size_t min_memstride = alignment + rowsize * sizeof(T) + vec_size;
+ size_t memstride = RoundUpTo(min_memstride, alignment);
+ xsize_ = rowsize;
+ ysize_ = num_rows;
+ stride_ = memstride / sizeof(T);
+ offset_ = alignment / sizeof(T);
+ data_ = ::jpegli::Allocate<T>(cinfo, ysize_ * stride_, JPOOL_IMAGE_ALIGNED);
+ }
+
+ T* Row(ssize_t y) const {
+ return &data_[((ysize_ + y) % ysize_) * stride_ + offset_];
+ }
+
+ size_t xsize() const { return xsize_; };
+ size_t ysize() const { return ysize_; };
+ size_t stride() const { return stride_; }
+
+ void PadRow(size_t y, size_t from, int border) {
+ float* row = Row(y);
+ for (int offset = -border; offset < 0; ++offset) {
+ row[offset] = row[0];
+ }
+ float last_val = row[from - 1];
+ for (size_t x = from; x < xsize_ + border; ++x) {
+ row[x] = last_val;
+ }
+ }
+
+ void CopyRow(ssize_t dst_row, ssize_t src_row, int border) {
+ memcpy(Row(dst_row) - border, Row(src_row) - border,
+ (xsize_ + 2 * border) * sizeof(T));
+ }
+
+ void FillRow(ssize_t y, T val, size_t len) {
+ T* row = Row(y);
+ for (size_t x = 0; x < len; ++x) {
+ row[x] = val;
+ }
+ }
+
+ private:
+ size_t xsize_;
+ size_t ysize_;
+ size_t stride_;
+ size_t offset_;
+ T* data_;
+};
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_COMMON_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/dct-inl.h b/third_party/jpeg-xl/lib/jpegli/dct-inl.h
new file mode 100644
index 0000000000..0524e220d6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/dct-inl.h
@@ -0,0 +1,266 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_DCT_INL_H_
+#undef LIB_JPEGLI_DCT_INL_H_
+#else
+#define LIB_JPEGLI_DCT_INL_H_
+#endif
+
+#include "lib/jpegli/transpose-inl.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::DemoteTo;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Round;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+
+template <size_t N>
+void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(d8, ain1 + i * 8);
+ auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+ Store(Add(in1, in2), d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(d8, ain1 + i * 8);
+ auto in2 = Load(d8, ain2 + (N - i - 1) * 8);
+ Store(Sub(in1, in2), d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void B(float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ constexpr float kSqrt2 = 1.41421356237f;
+ auto sqrt2 = Set(d8, kSqrt2);
+ auto in1 = Load(d8, coeff);
+ auto in2 = Load(d8, coeff + 8);
+ Store(MulAdd(in1, sqrt2, in2), d8, coeff);
+ for (size_t i = 1; i + 1 < N; i++) {
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (i + 1) * 8);
+ Store(Add(in1, in2), d8, coeff + i * 8);
+ }
+}
+
+// Ideally optimized away by compiler (except the multiply).
+template <size_t N>
+void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(d8, ain + i * 8);
+ Store(in1, d8, aout + 2 * i * 8);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = Load(d8, ain + i * 8);
+ Store(in1, d8, aout + (2 * (i - N / 2) + 1) * 8);
+ }
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+ static constexpr float kMultipliers[] = {
+ 0.541196100146197,
+ 1.3065629648763764,
+ };
+};
+
+template <>
+struct WcMultipliers<8> {
+ static constexpr float kMultipliers[] = {
+ 0.5097955791041592,
+ 0.6013448869350453,
+ 0.8999762231364156,
+ 2.5629154477415055,
+ };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+// Invoked on full vector.
+template <size_t N>
+void Multiply(float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(d8, coeff + (N / 2 + i) * 8);
+ auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+ Store(Mul(in1, mul), d8, coeff + (N / 2 + i) * 8);
+ }
+}
+
+void LoadFromBlock(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ size_t off, float* JXL_RESTRICT coeff) {
+ HWY_CAPPED(float, 8) d8;
+ for (size_t i = 0; i < 8; i++) {
+ Store(LoadU(d8, pixels + i * pixels_stride + off), d8, coeff + i * 8);
+ }
+}
+
+void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, float* output,
+ size_t off) {
+ HWY_CAPPED(float, 8) d8;
+ auto mul = Set(d8, 1.0f / 8);
+ for (size_t i = 0; i < 8; i++) {
+ StoreU(Mul(mul, Load(d8, coeff + i * 8)), d8, output + i * 8 + off);
+ }
+}
+
+template <size_t N>
+struct DCT1DImpl;
+
+template <>
+struct DCT1DImpl<1> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <>
+struct DCT1DImpl<2> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+ HWY_CAPPED(float, 8) d8;
+ auto in1 = Load(d8, mem);
+ auto in2 = Load(d8, mem + 8);
+ Store(Add(in1, in2), d8, mem);
+ Store(Sub(in1, in2), d8, mem + 8);
+ }
+};
+
+template <size_t N>
+struct DCT1DImpl {
+ void operator()(float* JXL_RESTRICT mem) {
+ HWY_ALIGN float tmp[N * 8];
+ AddReverse<N / 2>(mem, mem + N * 4, tmp);
+ DCT1DImpl<N / 2>()(tmp);
+ SubReverse<N / 2>(mem, mem + N * 4, tmp + N * 4);
+ Multiply<N>(tmp);
+ DCT1DImpl<N / 2>()(tmp + N * 4);
+ B<N / 2>(tmp + N * 4);
+ InverseEvenOdd<N>(tmp, mem);
+ }
+};
+
+void DCT1D(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT output) {
+ HWY_CAPPED(float, 8) d8;
+ HWY_ALIGN float tmp[64];
+ for (size_t i = 0; i < 8; i += Lanes(d8)) {
+ // TODO(veluca): consider removing the temporary memory here (as is done in
+ // IDCT), if it turns out that some compilers don't optimize away the loads
+ // and this is performance-critical.
+ LoadFromBlock(pixels, pixels_stride, i, tmp);
+ DCT1DImpl<8>()(tmp);
+ StoreToBlockAndScale(tmp, output, i);
+ }
+}
+
+void TransformFromPixels(const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT scratch_space) {
+ DCT1D(pixels, pixels_stride, scratch_space);
+ Transpose8x8Block(scratch_space, coefficients);
+ DCT1D(coefficients, 8, scratch_space);
+ Transpose8x8Block(scratch_space, coefficients);
+}
+
+void StoreQuantizedValue(const Vec<DI>& ival, int16_t* out) {
+ Rebind<int16_t, DI> di16;
+ Store(DemoteTo(di16, ival), di16, out);
+}
+
+void StoreQuantizedValue(const Vec<DI>& ival, int32_t* out) {
+ DI di;
+ Store(ival, di, out);
+}
+
+template <typename T>
+void QuantizeBlock(const float* dct, const float* qmc, float aq_strength,
+ const float* zero_bias_offset, const float* zero_bias_mul,
+ T* block) {
+ D d;
+ DI di;
+ const auto aq_mul = Set(d, aq_strength);
+ for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+ const auto val = Load(d, dct + k);
+ const auto q = Load(d, qmc + k);
+ const auto qval = Mul(val, q);
+ const auto zb_offset = Load(d, zero_bias_offset + k);
+ const auto zb_mul = Load(d, zero_bias_mul + k);
+ const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul));
+ const auto nzero_mask = Ge(Abs(qval), threshold);
+ const auto ival = ConvertTo(di, IfThenElseZero(nzero_mask, Round(qval)));
+ StoreQuantizedValue(ival, block + k);
+ }
+}
+
+template <typename T>
+void QuantizeBlockNoAQ(const float* dct, const float* qmc, T* block) {
+ D d;
+ DI di;
+ for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) {
+ const auto val = Load(d, dct + k);
+ const auto q = Load(d, qmc + k);
+ const auto ival = ConvertTo(di, Round(Mul(val, q)));
+ StoreQuantizedValue(ival, block + k);
+ }
+}
+
+template <typename T>
+void ComputeCoefficientBlock(const float* JXL_RESTRICT pixels, size_t stride,
+ const float* JXL_RESTRICT qmc, float aq_strength,
+ const float* zero_bias_offset,
+ const float* zero_bias_mul,
+ float* JXL_RESTRICT tmp, T* block) {
+ float* JXL_RESTRICT dct = tmp;
+ float* JXL_RESTRICT scratch_space = tmp + DCTSIZE2;
+ TransformFromPixels(pixels, stride, dct, scratch_space);
+ if (aq_strength > 0.0f) {
+ QuantizeBlock(dct, qmc, aq_strength, zero_bias_offset, zero_bias_mul,
+ block);
+ } else {
+ QuantizeBlockNoAQ(dct, qmc, block);
+ }
+ // Center DC values around zero.
+ static constexpr float kDCBias = 128.0f;
+ block[0] = std::round((dct[0] - kDCBias) * qmc[0]);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JPEGLI_DCT_INL_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/dct.cc b/third_party/jpeg-xl/lib/jpegli/dct.cc
new file mode 100644
index 0000000000..4320abe4c6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/dct.cc
@@ -0,0 +1,75 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/dct.h"
+
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/dct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/dct-inl.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/memory_manager.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+void ComputeDCTCoefficients(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ float* tmp = m->dct_buffer;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = m->next_iMCU_row * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[c], by0,
+ max_block_rows, true);
+ float* qmc = m->quant_mul[c];
+ RowBuffer<float>* plane = m->raw_data[c];
+ const int h_factor = m->h_factor[c];
+ const int v_factor = m->v_factor[c];
+ const float* zero_bias_offset = m->zero_bias_offset[c];
+ const float* zero_bias_mul = m->zero_bias_mul[c];
+ float aq_strength = 0.0f;
+ for (int iy = 0; iy < comp->v_samp_factor; iy++) {
+ size_t by = by0 + iy;
+ if (by >= comp->height_in_blocks) continue;
+ JBLOCKROW brow = ba[iy];
+ const float* row = plane->Row(8 * by);
+ for (size_t bx = 0; bx < comp->width_in_blocks; bx++) {
+ JCOEF* block = &brow[bx][0];
+ if (m->use_adaptive_quantization) {
+ aq_strength = m->quant_field.Row(by * v_factor)[bx * h_factor];
+ }
+ ComputeCoefficientBlock(row + 8 * bx, plane->stride(), qmc, aq_strength,
+ zero_bias_offset, zero_bias_mul, tmp, block);
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(ComputeDCTCoefficients);
+
+void ComputeDCTCoefficients(j_compress_ptr cinfo) {
+ HWY_DYNAMIC_DISPATCH(ComputeDCTCoefficients)(cinfo);
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/dct.h b/third_party/jpeg-xl/lib/jpegli/dct.h
new file mode 100644
index 0000000000..9ae5f9f7c2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/dct.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DCT_H_
+#define LIB_JPEGLI_DCT_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void ComputeDCTCoefficients(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DCT_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/decode.cc b/third_party/jpeg-xl/lib/jpegli/decode.cc
new file mode 100644
index 0000000000..cf87673705
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode.cc
@@ -0,0 +1,981 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/decode_marker.h"
+#include "lib/jpegli/decode_scan.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/render.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+void InitializeImage(j_decompress_ptr cinfo) {
+ cinfo->restart_interval = 0;
+ cinfo->saw_JFIF_marker = FALSE;
+ cinfo->JFIF_major_version = 1;
+ cinfo->JFIF_minor_version = 1;
+ cinfo->density_unit = 0;
+ cinfo->X_density = 1;
+ cinfo->Y_density = 1;
+ cinfo->saw_Adobe_marker = FALSE;
+ cinfo->Adobe_transform = 0;
+ cinfo->CCIR601_sampling = FALSE; // not used
+ cinfo->marker_list = nullptr;
+ cinfo->comp_info = nullptr;
+ cinfo->input_scan_number = 0;
+ cinfo->input_iMCU_row = 0;
+ cinfo->output_scan_number = 0;
+ cinfo->output_iMCU_row = 0;
+ cinfo->output_scanline = 0;
+ cinfo->unread_marker = 0;
+ cinfo->coef_bits = nullptr;
+ // We set all these to zero since we don't yet support arithmetic coding.
+ memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+ memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+ memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+ // Initialize the private fields.
+ jpeg_decomp_master* m = cinfo->master;
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ m->codestream_bits_ahead_ = 0;
+ m->is_multiscan_ = false;
+ m->found_soi_ = false;
+ m->found_dri_ = false;
+ m->found_sof_ = false;
+ m->found_eoi_ = false;
+ m->icc_index_ = 0;
+ m->icc_total_ = 0;
+ m->icc_profile_.clear();
+ memset(m->dc_huff_lut_, 0, sizeof(m->dc_huff_lut_));
+ memset(m->ac_huff_lut_, 0, sizeof(m->ac_huff_lut_));
+ // Initialize the values to an invalid symbol so that we can recognize it
+ // when reading the bit stream using a Huffman code with space > 0.
+ for (size_t i = 0; i < kAllHuffLutSize; ++i) {
+ m->dc_huff_lut_[i].bits = 0;
+ m->dc_huff_lut_[i].value = 0xffff;
+ m->ac_huff_lut_[i].bits = 0;
+ m->ac_huff_lut_[i].value = 0xffff;
+ }
+ m->colormap_lut_ = nullptr;
+ m->pixels_ = nullptr;
+ m->scanlines_ = nullptr;
+ m->regenerate_inverse_colormap_ = true;
+ for (int i = 0; i < kMaxComponents; ++i) {
+ m->dither_[i] = nullptr;
+ m->error_row_[i] = nullptr;
+ }
+ m->output_passes_done_ = 0;
+ m->xoffset_ = 0;
+ m->dequant_ = nullptr;
+}
+
+void InitializeDecompressParams(j_decompress_ptr cinfo) {
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ cinfo->out_color_space = JCS_UNKNOWN;
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 1;
+ cinfo->output_gamma = 0.0f;
+ cinfo->buffered_image = FALSE;
+ cinfo->raw_data_out = FALSE;
+ cinfo->dct_method = JDCT_DEFAULT;
+ cinfo->do_fancy_upsampling = TRUE;
+ cinfo->do_block_smoothing = TRUE;
+ cinfo->quantize_colors = FALSE;
+ cinfo->dither_mode = JDITHER_FS;
+ cinfo->two_pass_quantize = TRUE;
+ cinfo->desired_number_of_colors = 256;
+ cinfo->enable_1pass_quant = FALSE;
+ cinfo->enable_external_quant = FALSE;
+ cinfo->enable_2pass_quant = FALSE;
+ cinfo->actual_number_of_colors = 0;
+ cinfo->colormap = nullptr;
+}
+
+void InitProgressMonitor(j_decompress_ptr cinfo, bool coef_only) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int nc = cinfo->num_components;
+ int estimated_num_scans =
+ cinfo->progressive_mode ? 2 + 3 * nc : (m->is_multiscan_ ? nc : 1);
+ cinfo->progress->pass_limit = cinfo->total_iMCU_rows * estimated_num_scans;
+ cinfo->progress->pass_counter = 0;
+ if (coef_only) {
+ cinfo->progress->total_passes = 1;
+ } else {
+ int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+ bool two_pass_quant = cinfo->quantize_colors && !cinfo->colormap &&
+ cinfo->two_pass_quantize && cinfo->enable_2pass_quant;
+ cinfo->progress->total_passes = input_passes + (two_pass_quant ? 2 : 1);
+ }
+ cinfo->progress->completed_passes = 0;
+}
+
+void InitProgressMonitorForOutput(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int passes_per_output = cinfo->enable_2pass_quant ? 2 : 1;
+ int output_passes_left = cinfo->buffered_image && !m->found_eoi_ ? 2 : 1;
+ cinfo->progress->total_passes =
+ m->output_passes_done_ + passes_per_output * output_passes_left;
+ cinfo->progress->completed_passes = m->output_passes_done_;
+}
+
+void ProgressMonitorInputPass(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ cinfo->progress->pass_counter =
+ ((cinfo->input_scan_number - 1) * cinfo->total_iMCU_rows +
+ cinfo->input_iMCU_row);
+ if (cinfo->progress->pass_counter > cinfo->progress->pass_limit) {
+ cinfo->progress->pass_limit =
+ cinfo->input_scan_number * cinfo->total_iMCU_rows;
+ }
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void ProgressMonitorOutputPass(j_decompress_ptr cinfo) {
+ if (!cinfo->progress) return;
+ jpeg_decomp_master* m = cinfo->master;
+ int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0;
+ cinfo->progress->pass_counter = cinfo->output_scanline;
+ cinfo->progress->pass_limit = cinfo->output_height;
+ cinfo->progress->completed_passes = input_passes + m->output_passes_done_;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void BuildHuffmanLookupTable(j_decompress_ptr cinfo, JHUFF_TBL* table,
+ HuffmanTableEntry* huff_lut) {
+ uint32_t counts[kJpegHuffmanMaxBitLength + 1] = {};
+ counts[0] = 0;
+ int total_count = 0;
+ int space = 1 << kJpegHuffmanMaxBitLength;
+ int max_depth = 1;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ int count = table->bits[i];
+ if (count != 0) {
+ max_depth = i;
+ }
+ counts[i] = count;
+ total_count += count;
+ space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+ }
+ uint32_t values[kJpegHuffmanAlphabetSize + 1] = {};
+ uint8_t values_seen[256] = {0};
+ for (int i = 0; i < total_count; ++i) {
+ int value = table->huffval[i];
+ if (values_seen[value]) {
+ return JPEGLI_ERROR("Duplicate Huffman code value %d", value);
+ }
+ values_seen[value] = 1;
+ values[i] = value;
+ }
+ // Add an invalid symbol that will have the all 1 code.
+ ++counts[max_depth];
+ values[total_count] = kJpegHuffmanAlphabetSize;
+ space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+ if (space < 0) {
+ JPEGLI_ERROR("Invalid Huffman code lengths.");
+ } else if (space > 0 && huff_lut[0].value != 0xffff) {
+ // Re-initialize the values to an invalid symbol so that we can recognize
+ // it when reading the bit stream using a Huffman code with space > 0.
+ for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+ huff_lut[i].bits = 0;
+ huff_lut[i].value = 0xffff;
+ }
+ }
+ BuildJpegHuffmanTable(&counts[0], &values[0], huff_lut);
+}
+
+void PrepareForScan(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int comp_idx = cinfo->cur_comp_info[i]->component_index;
+ int* prev_coef_bits = cinfo->coef_bits[comp_idx + cinfo->num_components];
+ for (int k = std::min(cinfo->Ss, 1); k <= std::max(cinfo->Se, 9); k++) {
+ prev_coef_bits[k] =
+ (cinfo->input_scan_number > 0) ? cinfo->coef_bits[comp_idx][k] : 0;
+ }
+ for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+ cinfo->coef_bits[comp_idx][k] = cinfo->Al;
+ }
+ }
+ AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/false);
+ AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/true);
+ // Check that all the Huffman tables needed for this scan are defined and
+ // build derived lookup tables.
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ if (cinfo->Ss == 0) {
+ int dc_tbl_idx = cinfo->cur_comp_info[i]->dc_tbl_no;
+ JHUFF_TBL* table = cinfo->dc_huff_tbl_ptrs[dc_tbl_idx];
+ HuffmanTableEntry* huff_lut =
+ &m->dc_huff_lut_[dc_tbl_idx * kJpegHuffmanLutSize];
+ if (!table) {
+ return JPEGLI_ERROR("DC Huffman table %d not found", dc_tbl_idx);
+ }
+ BuildHuffmanLookupTable(cinfo, table, huff_lut);
+ }
+ if (cinfo->Se > 0) {
+ int ac_tbl_idx = cinfo->cur_comp_info[i]->ac_tbl_no;
+ JHUFF_TBL* table = cinfo->ac_huff_tbl_ptrs[ac_tbl_idx];
+ HuffmanTableEntry* huff_lut =
+ &m->ac_huff_lut_[ac_tbl_idx * kJpegHuffmanLutSize];
+ if (!table) {
+ return JPEGLI_ERROR("AC Huffman table %d not found", ac_tbl_idx);
+ }
+ BuildHuffmanLookupTable(cinfo, table, huff_lut);
+ }
+ }
+ // Copy quantization tables into comp_info.
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ if (comp->quant_table == nullptr) {
+ comp->quant_table = Allocate<JQUANT_TBL>(cinfo, 1, JPOOL_IMAGE);
+ memcpy(comp->quant_table, cinfo->quant_tbl_ptrs[comp->quant_tbl_no],
+ sizeof(JQUANT_TBL));
+ }
+ }
+ if (cinfo->comps_in_scan == 1) {
+ const auto& comp = *cinfo->cur_comp_info[0];
+ cinfo->MCUs_per_row = DivCeil(cinfo->image_width * comp.h_samp_factor,
+ cinfo->max_h_samp_factor * DCTSIZE);
+ cinfo->MCU_rows_in_scan = DivCeil(cinfo->image_height * comp.v_samp_factor,
+ cinfo->max_v_samp_factor * DCTSIZE);
+ m->mcu_rows_per_iMCU_row_ = cinfo->cur_comp_info[0]->v_samp_factor;
+ } else {
+ cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows;
+ cinfo->MCUs_per_row = m->iMCU_cols_;
+ m->mcu_rows_per_iMCU_row_ = 1;
+ size_t mcu_size = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ if (mcu_size > D_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("MCU size too big");
+ }
+ }
+ memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+ m->restarts_to_go_ = cinfo->restart_interval;
+ m->next_restart_marker_ = 0;
+ m->eobrun_ = -1;
+ m->scan_mcu_row_ = 0;
+ m->scan_mcu_col_ = 0;
+ m->codestream_bits_ahead_ = 0;
+ ++cinfo->input_scan_number;
+ cinfo->input_iMCU_row = 0;
+ PrepareForiMCURow(cinfo);
+ cinfo->global_state = kDecProcessScan;
+}
+
+int ConsumeInput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state == kDecProcessScan && m->streaming_mode_ &&
+ cinfo->input_iMCU_row > cinfo->output_iMCU_row) {
+ // Prevent input from getting ahead of output in streaming mode.
+ return JPEG_SUSPENDED;
+ }
+ jpeg_source_mgr* src = cinfo->src;
+ int status;
+ for (;;) {
+ const uint8_t* data;
+ size_t len;
+ if (m->input_buffer_.empty()) {
+ data = cinfo->src->next_input_byte;
+ len = cinfo->src->bytes_in_buffer;
+ } else {
+ data = &m->input_buffer_[m->input_buffer_pos_];
+ len = m->input_buffer_.size() - m->input_buffer_pos_;
+ }
+ size_t pos = 0;
+ if (cinfo->global_state == kDecProcessScan) {
+ status = ProcessScan(cinfo, data, len, &pos, &m->codestream_bits_ahead_);
+ } else {
+ status = ProcessMarkers(cinfo, data, len, &pos);
+ }
+ if (m->input_buffer_.empty()) {
+ cinfo->src->next_input_byte += pos;
+ cinfo->src->bytes_in_buffer -= pos;
+ } else {
+ m->input_buffer_pos_ += pos;
+ size_t bytes_left = m->input_buffer_.size() - m->input_buffer_pos_;
+ if (bytes_left <= src->bytes_in_buffer) {
+ src->next_input_byte += (src->bytes_in_buffer - bytes_left);
+ src->bytes_in_buffer = bytes_left;
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ }
+ }
+ if (status == kHandleRestart) {
+ JXL_DASSERT(m->input_buffer_.size() <=
+ m->input_buffer_pos_ + src->bytes_in_buffer);
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ if (cinfo->unread_marker == 0xd0 + m->next_restart_marker_) {
+ cinfo->unread_marker = 0;
+ } else {
+ if (!(*cinfo->src->resync_to_restart)(cinfo, m->next_restart_marker_)) {
+ return JPEG_SUSPENDED;
+ }
+ }
+ m->next_restart_marker_ += 1;
+ m->next_restart_marker_ &= 0x7;
+ m->restarts_to_go_ = cinfo->restart_interval;
+ if (cinfo->unread_marker != 0) {
+ JPEGLI_WARN("Failed to resync to next restart marker, skipping scan.");
+ return JPEG_SCAN_COMPLETED;
+ }
+ continue;
+ }
+ if (status == kHandleMarkerProcessor) {
+ JXL_DASSERT(m->input_buffer_.size() <=
+ m->input_buffer_pos_ + src->bytes_in_buffer);
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ if (!(*GetMarkerProcessor(cinfo))(cinfo)) {
+ return JPEG_SUSPENDED;
+ }
+ cinfo->unread_marker = 0;
+ continue;
+ }
+ if (status != kNeedMoreInput) {
+ break;
+ }
+ if (m->input_buffer_.empty()) {
+ JXL_DASSERT(m->input_buffer_pos_ == 0);
+ m->input_buffer_.assign(src->next_input_byte,
+ src->next_input_byte + src->bytes_in_buffer);
+ }
+ if (!(*cinfo->src->fill_input_buffer)(cinfo)) {
+ m->input_buffer_.clear();
+ m->input_buffer_pos_ = 0;
+ return JPEG_SUSPENDED;
+ }
+ if (src->bytes_in_buffer == 0) {
+ JPEGLI_ERROR("Empty input.");
+ }
+ m->input_buffer_.insert(m->input_buffer_.end(), src->next_input_byte,
+ src->next_input_byte + src->bytes_in_buffer);
+ }
+ if (status == JPEG_SCAN_COMPLETED) {
+ cinfo->global_state = kDecProcessMarkers;
+ } else if (status == JPEG_REACHED_SOS) {
+ if (cinfo->global_state == kDecInHeader) {
+ cinfo->global_state = kDecHeaderDone;
+ } else {
+ PrepareForScan(cinfo);
+ }
+ }
+ return status;
+}
+
+bool IsInputReady(j_decompress_ptr cinfo) {
+ if (cinfo->master->found_eoi_) {
+ return true;
+ }
+ if (cinfo->input_scan_number > cinfo->output_scan_number) {
+ return true;
+ }
+ if (cinfo->input_scan_number < cinfo->output_scan_number) {
+ return false;
+ }
+ if (cinfo->input_iMCU_row == cinfo->total_iMCU_rows) {
+ return true;
+ }
+ return cinfo->input_iMCU_row >
+ cinfo->output_iMCU_row + (cinfo->master->streaming_mode_ ? 0 : 2);
+}
+
+bool ReadOutputPass(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->pixels_) {
+ size_t stride = cinfo->out_color_components * cinfo->output_width;
+ size_t num_samples = cinfo->output_height * stride;
+ m->pixels_ = Allocate<uint8_t>(cinfo, num_samples, JPOOL_IMAGE);
+ m->scanlines_ =
+ Allocate<JSAMPROW>(cinfo, cinfo->output_height, JPOOL_IMAGE);
+ for (size_t i = 0; i < cinfo->output_height; ++i) {
+ m->scanlines_[i] = &m->pixels_[i * stride];
+ }
+ }
+ size_t num_output_rows = 0;
+ while (num_output_rows < cinfo->output_height) {
+ if (IsInputReady(cinfo)) {
+ ProgressMonitorOutputPass(cinfo);
+ ProcessOutput(cinfo, &num_output_rows, m->scanlines_,
+ cinfo->output_height);
+ } else if (ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return false;
+ }
+ }
+ cinfo->output_scanline = 0;
+ cinfo->output_iMCU_row = 0;
+ return true;
+}
+
+boolean PrepareQuantizedOutput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->raw_data_out) {
+ JPEGLI_ERROR("Color quantization is not supported in raw data mode.");
+ }
+ if (m->output_data_type_ != JPEGLI_TYPE_UINT8) {
+ JPEGLI_ERROR("Color quantization must use 8-bit mode.");
+ }
+ if (cinfo->colormap) {
+ m->quant_mode_ = 3;
+ } else if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+ m->quant_mode_ = 2;
+ } else if (cinfo->enable_1pass_quant) {
+ m->quant_mode_ = 1;
+ } else {
+ JPEGLI_ERROR("Invalid quantization mode change");
+ }
+ if (m->quant_mode_ > 1 && cinfo->dither_mode == JDITHER_ORDERED) {
+ cinfo->dither_mode = JDITHER_FS;
+ }
+ if (m->quant_mode_ == 1) {
+ ChooseColorMap1Pass(cinfo);
+ } else if (m->quant_mode_ == 2) {
+ m->quant_pass_ = 0;
+ if (!ReadOutputPass(cinfo)) {
+ return FALSE;
+ }
+ ChooseColorMap2Pass(cinfo);
+ }
+ if (m->quant_mode_ == 2 ||
+ (m->quant_mode_ == 3 && m->regenerate_inverse_colormap_)) {
+ CreateInverseColorMap(cinfo);
+ }
+ if (cinfo->dither_mode == JDITHER_ORDERED) {
+ CreateOrderedDitherTables(cinfo);
+ } else if (cinfo->dither_mode == JDITHER_FS) {
+ InitFSDitherState(cinfo);
+ }
+ m->quant_pass_ = 1;
+ return TRUE;
+}
+
+void AllocateCoefficientBuffer(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ jvirt_barray_ptr* coef_arrays = jpegli::Allocate<jvirt_barray_ptr>(
+ cinfo, cinfo->num_components, JPOOL_IMAGE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t height_in_blocks =
+ m->streaming_mode_ ? comp->v_samp_factor : comp->height_in_blocks;
+ coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+ comptr, JPOOL_IMAGE, TRUE, comp->width_in_blocks, height_in_blocks,
+ comp->v_samp_factor);
+ }
+ cinfo->master->coef_arrays = coef_arrays;
+ (*cinfo->mem->realize_virt_arrays)(comptr);
+}
+
+} // namespace jpegli
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize) {
+ cinfo->mem = nullptr;
+ if (structsize != sizeof(*cinfo)) {
+ JPEGLI_ERROR("jpeg_decompress_struct has wrong size.");
+ }
+ jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+ cinfo->is_decompressor = TRUE;
+ cinfo->progress = nullptr;
+ cinfo->src = nullptr;
+ for (int i = 0; i < NUM_QUANT_TBLS; i++) {
+ cinfo->quant_tbl_ptrs[i] = nullptr;
+ }
+ for (int i = 0; i < NUM_HUFF_TBLS; i++) {
+ cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+ cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+ }
+ cinfo->global_state = jpegli::kDecStart;
+ cinfo->sample_range_limit = nullptr; // not used
+ cinfo->rec_outbuf_height = 1; // output works with any buffer height
+ cinfo->master = new jpeg_decomp_master;
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < 16; ++i) {
+ m->app_marker_parsers[i] = nullptr;
+ }
+ m->com_marker_parser = nullptr;
+ memset(m->markers_to_save_, 0, sizeof(m->markers_to_save_));
+ jpegli::InitializeDecompressParams(cinfo);
+ jpegli::InitializeImage(cinfo);
+}
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo) {
+ jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo) {
+ jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit) {
+ // TODO(szabadka) Limit our memory usage by taking into account length_limit.
+ jpeg_decomp_master* m = cinfo->master;
+ if (marker_code < 0xe0) {
+ JPEGLI_ERROR("jpegli_save_markers: invalid marker code %d", marker_code);
+ }
+ m->markers_to_save_[marker_code - 0xe0] = 1;
+}
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (marker_code == 0xfe) {
+ m->com_marker_parser = routine;
+ } else if (marker_code >= 0xe0 && marker_code <= 0xef) {
+ m->app_marker_parsers[marker_code - 0xe0] = routine;
+ } else {
+ JPEGLI_ERROR("jpegli_set_marker_processor: invalid marker code %d",
+ marker_code);
+ }
+}
+
+int jpegli_consume_input(j_decompress_ptr cinfo) {
+ if (cinfo->global_state == jpegli::kDecStart) {
+ (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+ (*cinfo->src->init_source)(cinfo);
+ jpegli::InitializeDecompressParams(cinfo);
+ jpegli::InitializeImage(cinfo);
+ cinfo->global_state = jpegli::kDecInHeader;
+ }
+ if (cinfo->global_state == jpegli::kDecHeaderDone) {
+ return JPEG_REACHED_SOS;
+ }
+ if (cinfo->master->found_eoi_) {
+ return JPEG_REACHED_EOI;
+ }
+ if (cinfo->global_state == jpegli::kDecInHeader ||
+ cinfo->global_state == jpegli::kDecProcessMarkers ||
+ cinfo->global_state == jpegli::kDecProcessScan) {
+ return jpegli::ConsumeInput(cinfo);
+ }
+ JPEGLI_ERROR("Unexpected state %d", cinfo->global_state);
+ return JPEG_REACHED_EOI; // return value does not matter
+}
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image) {
+ if (cinfo->global_state != jpegli::kDecStart &&
+ cinfo->global_state != jpegli::kDecInHeader) {
+ JPEGLI_ERROR("jpegli_read_header: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->src == nullptr) {
+ JPEGLI_ERROR("Missing source.");
+ }
+ for (;;) {
+ int retcode = jpegli_consume_input(cinfo);
+ if (retcode == JPEG_SUSPENDED) {
+ return retcode;
+ } else if (retcode == JPEG_REACHED_SOS) {
+ break;
+ } else if (retcode == JPEG_REACHED_EOI) {
+ if (require_image) {
+ JPEGLI_ERROR("jpegli_read_header: unexpected EOI marker.");
+ }
+ jpegli_abort_decompress(cinfo);
+ return JPEG_HEADER_TABLES_ONLY;
+ }
+ };
+ return JPEG_HEADER_OK;
+}
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET** icc_data_ptr,
+ unsigned int* icc_data_len) {
+ if (cinfo->global_state == jpegli::kDecStart ||
+ cinfo->global_state == jpegli::kDecInHeader) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (icc_data_ptr == nullptr || icc_data_len == nullptr) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: invalid output buffer");
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->icc_profile_.empty()) {
+ *icc_data_ptr = nullptr;
+ *icc_data_len = 0;
+ return FALSE;
+ }
+ *icc_data_len = m->icc_profile_.size();
+ *icc_data_ptr = (JOCTET*)malloc(*icc_data_len);
+ if (*icc_data_ptr == nullptr) {
+ JPEGLI_ERROR("jpegli_read_icc_profile: Out of memory");
+ }
+ memcpy(*icc_data_ptr, m->icc_profile_.data(), *icc_data_len);
+ return TRUE;
+}
+
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_sof_) {
+ JPEGLI_ERROR("No SOF marker found.");
+ }
+ if (cinfo->raw_data_out) {
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ JPEGLI_ERROR("Output scaling is not supported in raw output mode");
+ }
+ }
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ int dctsize = 16;
+ while (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * (dctsize - 1)) {
+ --dctsize;
+ }
+ m->min_scaled_dct_size = dctsize;
+ cinfo->output_width =
+ jpegli::DivCeil(cinfo->image_width * dctsize, DCTSIZE);
+ cinfo->output_height =
+ jpegli::DivCeil(cinfo->image_height * dctsize, DCTSIZE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->scaled_dct_size[c] = m->min_scaled_dct_size;
+ }
+ } else {
+ cinfo->output_width = cinfo->image_width;
+ cinfo->output_height = cinfo->image_height;
+ m->min_scaled_dct_size = DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->scaled_dct_size[c] = DCTSIZE;
+ }
+ }
+}
+
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ jpegli_core_output_dimensions(cinfo);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ }
+ if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ // Prefer IDCT scaling over 2x upsampling.
+ while (m->scaled_dct_size[c] < DCTSIZE && (m->v_factor[c] % 2) == 0 &&
+ (m->h_factor[c] % 2) == 0) {
+ m->scaled_dct_size[c] *= 2;
+ m->v_factor[c] /= 2;
+ m->h_factor[c] /= 2;
+ }
+ }
+ }
+ if (cinfo->out_color_space == JCS_GRAYSCALE) {
+ cinfo->out_color_components = 1;
+ } else if (cinfo->out_color_space == JCS_RGB ||
+ cinfo->out_color_space == JCS_YCbCr) {
+ cinfo->out_color_components = 3;
+ } else if (cinfo->out_color_space == JCS_CMYK ||
+ cinfo->out_color_space == JCS_YCCK) {
+ cinfo->out_color_components = 4;
+ } else {
+ cinfo->out_color_components = cinfo->num_components;
+ }
+ cinfo->output_components =
+ cinfo->quantize_colors ? 1 : cinfo->out_color_components;
+ cinfo->rec_outbuf_height = 1;
+}
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo) {
+ if (cinfo->input_scan_number == 0) {
+ JPEGLI_ERROR("No SOS marker found.");
+ }
+ return cinfo->master->is_multiscan_;
+}
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo) {
+ return cinfo->master->found_eoi_;
+}
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state == jpegli::kDecHeaderDone) {
+ m->streaming_mode_ = !m->is_multiscan_ && !cinfo->buffered_image &&
+ (!cinfo->quantize_colors || !cinfo->two_pass_quantize);
+ jpegli::AllocateCoefficientBuffer(cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ jpegli::PrepareForScan(cinfo);
+ if (cinfo->quantize_colors) {
+ if (cinfo->colormap != nullptr) {
+ cinfo->enable_external_quant = TRUE;
+ } else if (cinfo->two_pass_quantize &&
+ cinfo->out_color_space == JCS_RGB) {
+ cinfo->enable_2pass_quant = TRUE;
+ } else {
+ cinfo->enable_1pass_quant = TRUE;
+ }
+ }
+ jpegli::InitProgressMonitor(cinfo, /*coef_only=*/false);
+ if (cinfo->buffered_image == TRUE) {
+ cinfo->output_scan_number = 0;
+ return TRUE;
+ }
+ } else if (!m->is_multiscan_) {
+ JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (m->is_multiscan_) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ while (!m->found_eoi_) {
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ }
+ cinfo->output_scan_number = cinfo->input_scan_number;
+ jpegli::PrepareForOutput(cinfo);
+ if (cinfo->quantize_colors) {
+ return jpegli::PrepareQuantizedOutput(cinfo);
+ } else {
+ return TRUE;
+ }
+}
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_start_output: buffered image mode was not set");
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_start_output: unexpected state %d",
+ cinfo->global_state);
+ }
+ cinfo->output_scan_number = std::max(1, scan_number);
+ if (m->found_eoi_) {
+ cinfo->output_scan_number =
+ std::min(cinfo->output_scan_number, cinfo->input_scan_number);
+ }
+ jpegli::InitProgressMonitorForOutput(cinfo);
+ // TODO(szabadka): Figure out how much we can reuse.
+ jpegli::PrepareForOutput(cinfo);
+ if (cinfo->quantize_colors) {
+ return jpegli::PrepareQuantizedOutput(cinfo);
+ } else {
+ return TRUE;
+ }
+}
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo) {
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_finish_output: buffered image mode was not set");
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_finish_output: unexpected state %d",
+ cinfo->global_state);
+ }
+ // Advance input to the start of the next scan, or to the end of input.
+ while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+ !cinfo->master->found_eoi_) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_read_scanlines: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->buffered_image) {
+ if (cinfo->output_scan_number == 0) {
+ JPEGLI_ERROR(
+ "jpegli_read_scanlines: "
+ "jpegli_start_output() was not called");
+ }
+ } else if (m->is_multiscan_ && !m->found_eoi_) {
+ JPEGLI_ERROR(
+ "jpegli_read_scanlines: "
+ "jpegli_start_decompress() did not finish");
+ }
+ if (cinfo->output_scanline + max_lines > cinfo->output_height) {
+ max_lines = cinfo->output_height - cinfo->output_scanline;
+ }
+ jpegli::ProgressMonitorOutputPass(cinfo);
+ size_t num_output_rows = 0;
+ while (num_output_rows < max_lines) {
+ if (jpegli::IsInputReady(cinfo)) {
+ jpegli::ProcessOutput(cinfo, &num_output_rows, scanlines, max_lines);
+ } else if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ break;
+ }
+ }
+ return num_output_rows;
+}
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+ // TODO(szabadka) Skip the IDCT for skipped over blocks.
+ return jpegli_read_scanlines(cinfo, nullptr, num_lines);
+}
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION* xoffset,
+ JDIMENSION* width) {
+ jpeg_decomp_master* m = cinfo->master;
+ if ((cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) ||
+ cinfo->output_scanline != 0) {
+ JPEGLI_ERROR("jpegli_crop_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (cinfo->raw_data_out) {
+ JPEGLI_ERROR("Output cropping is not supported in raw data mode");
+ }
+ if (xoffset == nullptr || width == nullptr || *width == 0 ||
+ *xoffset + *width > cinfo->output_width) {
+ JPEGLI_ERROR("jpegli_crop_scanline: Invalid arguments");
+ }
+ // TODO(szabadka) Skip the IDCT for skipped over blocks.
+ size_t xend = *xoffset + *width;
+ size_t iMCU_width = m->min_scaled_dct_size * cinfo->max_h_samp_factor;
+ *xoffset = (*xoffset / iMCU_width) * iMCU_width;
+ *width = xend - *xoffset;
+ cinfo->master->xoffset_ = *xoffset;
+ cinfo->output_width = *width;
+}
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines) {
+ if ((cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) ||
+ !cinfo->raw_data_out) {
+ JPEGLI_ERROR("jpegli_read_raw_data: unexpected state %d",
+ cinfo->global_state);
+ }
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ if (max_lines < iMCU_height) {
+ JPEGLI_ERROR("jpegli_read_raw_data: output buffer too small");
+ }
+ jpegli::ProgressMonitorOutputPass(cinfo);
+ while (!jpegli::IsInputReady(cinfo)) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return 0;
+ }
+ }
+ if (cinfo->output_iMCU_row < cinfo->total_iMCU_rows) {
+ jpegli::ProcessRawOutput(cinfo, data);
+ return iMCU_height;
+ }
+ return 0;
+}
+
+jvirt_barray_ptr* jpegli_read_coefficients(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ m->streaming_mode_ = false;
+ if (!cinfo->buffered_image && cinfo->global_state == jpegli::kDecHeaderDone) {
+ jpegli::AllocateCoefficientBuffer(cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ jpegli::InitProgressMonitor(cinfo, /*coef_only=*/true);
+ jpegli::PrepareForScan(cinfo);
+ }
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_read_coefficients: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image) {
+ while (!m->found_eoi_) {
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return nullptr;
+ }
+ }
+ cinfo->output_scanline = cinfo->output_height;
+ }
+ return m->coef_arrays;
+}
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_finish_decompress: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image && cinfo->output_scanline < cinfo->output_height) {
+ JPEGLI_ERROR("Incomplete output");
+ }
+ while (!cinfo->master->found_eoi_) {
+ if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) {
+ return FALSE;
+ }
+ }
+ (*cinfo->src->term_source)(cinfo);
+ jpegli_abort_decompress(cinfo);
+ return TRUE;
+}
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+ JPEGLI_WARN("Invalid restart marker found: 0x%02x vs 0x%02x.",
+ cinfo->unread_marker, 0xd0 + desired);
+ // This is a trivial implementation, we just let the decoder skip the entire
+ // scan and attempt to render the partial input.
+ return TRUE;
+}
+
+void jpegli_new_colormap(j_decompress_ptr cinfo) {
+ if (cinfo->global_state != jpegli::kDecProcessScan &&
+ cinfo->global_state != jpegli::kDecProcessMarkers) {
+ JPEGLI_ERROR("jpegli_new_colormap: unexpected state %d",
+ cinfo->global_state);
+ }
+ if (!cinfo->buffered_image) {
+ JPEGLI_ERROR("jpegli_new_colormap: not in buffered image mode");
+ }
+ if (!cinfo->enable_external_quant) {
+ JPEGLI_ERROR("external colormap quantizer was not enabled");
+ }
+ if (!cinfo->quantize_colors || cinfo->colormap == nullptr) {
+ JPEGLI_ERROR("jpegli_new_colormap: not in external colormap mode");
+ }
+ cinfo->master->regenerate_inverse_colormap_ = true;
+}
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ case JPEGLI_TYPE_UINT16:
+ case JPEGLI_TYPE_FLOAT:
+ cinfo->master->output_data_type_ = data_type;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported data type %d", data_type);
+ }
+ switch (endianness) {
+ case JPEGLI_NATIVE_ENDIAN:
+ cinfo->master->swap_endianness_ = false;
+ break;
+ case JPEGLI_LITTLE_ENDIAN:
+ cinfo->master->swap_endianness_ = !IsLittleEndian();
+ break;
+ case JPEGLI_BIG_ENDIAN:
+ cinfo->master->swap_endianness_ = IsLittleEndian();
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported endianness %d", endianness);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/decode.h b/third_party/jpeg-xl/lib/jpegli/decode.h
new file mode 100644
index 0000000000..7b7a2034ad
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the decoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while dempressor object definitions are included directly
+// from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_DECODE_H_
+#define LIB_JPEGLI_DECODE_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_decompress(cinfo) \
+ jpegli_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+ (size_t)sizeof(struct jpeg_decompress_struct))
+
+void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize);
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE *infile);
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+ unsigned long insize);
+
+int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image);
+
+boolean jpegli_start_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines);
+
+JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines);
+
+void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+ JDIMENSION *width);
+
+boolean jpegli_finish_decompress(j_decompress_ptr cinfo);
+
+JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines);
+
+jvirt_barray_ptr *jpegli_read_coefficients(j_decompress_ptr cinfo);
+
+boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo);
+
+boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number);
+
+boolean jpegli_finish_output(j_decompress_ptr cinfo);
+
+boolean jpegli_input_complete(j_decompress_ptr cinfo);
+
+int jpegli_consume_input(j_decompress_ptr cinfo);
+
+#if JPEG_LIB_VERSION >= 80
+void jpegli_core_output_dimensions(j_decompress_ptr cinfo);
+#endif
+void jpegli_calc_output_dimensions(j_decompress_ptr cinfo);
+
+void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit);
+
+void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine);
+
+boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired);
+
+boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+ unsigned int *icc_data_len);
+
+void jpegli_abort_decompress(j_decompress_ptr cinfo);
+
+void jpegli_destroy_decompress(j_decompress_ptr cinfo);
+
+void jpegli_new_colormap(j_decompress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_DECODE_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_api_test.cc b/third_party/jpeg-xl/lib/jpegli/decode_api_test.cc
new file mode 100644
index 0000000000..0bb7321db4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_api_test.cc
@@ -0,0 +1,1305 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+static constexpr size_t kNumSourceBuffers = 4;
+
+// Custom source manager that refills the input buffer in chunks, simulating
+// a file reader with a fixed buffer size.
+class SourceManager {
+ public:
+ SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size)
+ : data_(data), len_(len), max_chunk_size_(max_chunk_size) {
+ pub_.skip_input_data = skip_input_data;
+ pub_.resync_to_restart = jpegli_resync_to_restart;
+ pub_.term_source = term_source;
+ pub_.init_source = init_source;
+ pub_.fill_input_buffer = fill_input_buffer;
+ if (max_chunk_size_ == 0) max_chunk_size_ = len;
+ buffers_.resize(kNumSourceBuffers, std::vector<uint8_t>(max_chunk_size_));
+ Reset();
+ }
+
+ void Reset() {
+ pub_.next_input_byte = nullptr;
+ pub_.bytes_in_buffer = 0;
+ pos_ = 0;
+ chunk_idx_ = 0;
+ }
+
+ ~SourceManager() {
+ EXPECT_EQ(0, pub_.bytes_in_buffer);
+ EXPECT_EQ(len_, pos_);
+ }
+
+ private:
+ jpeg_source_mgr pub_;
+ const uint8_t* data_;
+ size_t len_;
+ size_t chunk_idx_;
+ size_t pos_;
+ size_t max_chunk_size_;
+ std::vector<std::vector<uint8_t>> buffers_;
+
+ static void init_source(j_decompress_ptr cinfo) {}
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (src->pos_ < src->len_) {
+ size_t chunk_size = std::min(src->len_ - src->pos_, src->max_chunk_size_);
+ size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers;
+ uint8_t* next_buffer = src->buffers_[next_idx].data();
+ memcpy(next_buffer, src->data_ + src->pos_, chunk_size);
+ src->pub_.next_input_byte = next_buffer;
+ src->pub_.bytes_in_buffer = chunk_size;
+ } else {
+ src->pub_.next_input_byte = kFakeEoiMarker;
+ src->pub_.bytes_in_buffer = 2;
+ src->len_ += 2;
+ }
+ src->pos_ += src->pub_.bytes_in_buffer;
+ return TRUE;
+ }
+
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (num_bytes <= 0) {
+ return;
+ }
+ if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+ src->pub_.bytes_in_buffer -= num_bytes;
+ src->pub_.next_input_byte += num_bytes;
+ } else {
+ src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+ src->pub_.bytes_in_buffer = 0;
+ }
+ }
+
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+ if (cinfo->src->bytes_in_buffer == 0) {
+ (*cinfo->src->fill_input_buffer)(cinfo);
+ }
+ cinfo->src->bytes_in_buffer--;
+ return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+ markers_seen[num_markers_seen] = cinfo->unread_marker;
+ size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+ EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+ if (marker_len > 2) {
+ (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+ }
+ ++num_markers_seen;
+ return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+ TestImage* output) {
+ JDIMENSION xoffset = 0;
+ JDIMENSION yoffset = 0;
+ JDIMENSION xsize_cropped = cinfo->output_width;
+ JDIMENSION ysize_cropped = cinfo->output_height;
+ if (dparams.crop_output) {
+ xoffset = xsize_cropped = cinfo->output_width / 3;
+ yoffset = ysize_cropped = cinfo->output_height / 3;
+ jpegli_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+ }
+ output->ysize = ysize_cropped;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->out_color_components;
+ output->data_type = dparams.data_type;
+ output->endianness = dparams.endianness;
+ size_t bytes_per_sample = jpegli_bytes_per_sample(dparams.data_type);
+ if (cinfo->raw_data_out) {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize * bytes_per_sample);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ } else {
+ output->color_space = cinfo->out_color_space;
+ output->AllocatePixels();
+ }
+ size_t total_output_lines = 0;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t max_lines;
+ size_t num_output_lines;
+ if (cinfo->raw_data_out) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+ max_lines = iMCU_height;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ num_output_lines = jpegli_read_raw_data(cinfo, &data[0], max_lines);
+ } else {
+ size_t max_output_lines = dparams.max_output_lines;
+ if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+ if (cinfo->output_scanline < yoffset) {
+ max_lines = yoffset - cinfo->output_scanline;
+ num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+ } else if (cinfo->output_scanline >= yoffset + ysize_cropped) {
+ max_lines = cinfo->output_height - cinfo->output_scanline;
+ num_output_lines = jpegli_skip_scanlines(cinfo, max_lines);
+ } else {
+ size_t lines_left = yoffset + ysize_cropped - cinfo->output_scanline;
+ max_lines = std::min<size_t>(max_output_lines, lines_left);
+ size_t stride = cinfo->output_width * cinfo->out_color_components *
+ bytes_per_sample;
+ std::vector<JSAMPROW> scanlines(max_lines);
+ for (size_t i = 0; i < max_lines; ++i) {
+ size_t yidx = cinfo->output_scanline - yoffset + i;
+ scanlines[i] = &output->pixels[yidx * stride];
+ }
+ num_output_lines =
+ jpegli_read_scanlines(cinfo, &scanlines[0], max_lines);
+ if (cinfo->quantize_colors) {
+ for (size_t i = 0; i < num_output_lines; ++i) {
+ UnmapColors(scanlines[i], cinfo->output_width,
+ cinfo->out_color_components, cinfo->colormap,
+ cinfo->actual_number_of_colors);
+ }
+ }
+ }
+ }
+ total_output_lines += num_output_lines;
+ EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+ EXPECT_EQ(num_output_lines, max_lines);
+ }
+ EXPECT_EQ(cinfo->total_iMCU_rows,
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ TestImage input;
+ CompressParams jparams;
+ DecompressParams dparams;
+ bool compare_to_orig = false;
+ float max_tolerance_factor = 1.01f;
+ float max_rms_dist = 1.0f;
+ float max_diff = 35.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+ std::vector<uint8_t> compressed;
+ if (!config.fn.empty()) {
+ compressed = ReadTestData(config.fn.c_str());
+ } else {
+ GeneratePixels(&config.input);
+ JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ }
+ if (config.dparams.size_factor < 1.0f) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ return compressed;
+}
+
+void TestAPINonBuffered(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const TestImage& expected_output,
+ j_decompress_ptr cinfo, TestImage* output) {
+ if (jparams.add_marker) {
+ jpegli_save_markers(cinfo, kSpecialMarker0, 0xffff);
+ jpegli_save_markers(cinfo, kSpecialMarker1, 0xffff);
+ num_markers_seen = 0;
+ jpegli_set_marker_processor(cinfo, 0xe6, test_marker_processor);
+ jpegli_set_marker_processor(cinfo, 0xe7, test_marker_processor);
+ jpegli_set_marker_processor(cinfo, 0xe8, test_marker_processor);
+ }
+ if (!jparams.icc.empty()) {
+ jpegli_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+ }
+ jpegli_read_header(cinfo, /*require_image=*/TRUE);
+ if (jparams.add_marker) {
+ EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+ EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+ }
+ if (!jparams.icc.empty()) {
+ uint8_t* icc_data = nullptr;
+ unsigned int icc_len;
+ JXL_CHECK(jpegli_read_icc_profile(cinfo, &icc_data, &icc_len));
+ JXL_CHECK(icc_data);
+ EXPECT_EQ(0, memcmp(jparams.icc.data(), icc_data, icc_len));
+ free(icc_data);
+ }
+ // Check that jpegli_calc_output_dimensions can be called multiple times
+ // even with different parameters.
+ if (!cinfo->raw_data_out) {
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 2;
+ }
+ jpegli_calc_output_dimensions(cinfo);
+ SetDecompressParams(dparams, cinfo, /*is_jpegli=*/true);
+ VerifyHeader(jparams, cinfo);
+ jpegli_calc_output_dimensions(cinfo);
+ EXPECT_LE(expected_output.xsize, cinfo->output_width);
+ if (!dparams.crop_output) {
+ EXPECT_EQ(expected_output.xsize, cinfo->output_width);
+ }
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, output);
+ } else {
+ jpegli_start_decompress(cinfo);
+ VerifyScanHeader(jparams, cinfo);
+ ReadOutputImage(dparams, cinfo, output);
+ }
+ jpegli_finish_decompress(cinfo);
+}
+
+void TestAPIBuffered(const CompressParams& jparams,
+ const DecompressParams& dparams, j_decompress_ptr cinfo,
+ std::vector<TestImage>* output_progression) {
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(cinfo, /*require_image=*/TRUE));
+ cinfo->buffered_image = TRUE;
+ SetDecompressParams(dparams, cinfo, /*is_jpegli=*/true);
+ VerifyHeader(jparams, cinfo);
+ EXPECT_TRUE(jpegli_start_decompress(cinfo));
+ // start decompress should not read the whole input in buffered image mode
+ EXPECT_FALSE(jpegli_input_complete(cinfo));
+ bool has_multiple_scans = jpegli_has_multiple_scans(cinfo);
+ EXPECT_EQ(0, cinfo->output_scan_number);
+ int sos_marker_cnt = 1; // read_header reads the first SOS marker
+ while (!jpegli_input_complete(cinfo)) {
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ if (dparams.skip_scans && (cinfo->input_scan_number % 2) != 1) {
+ int result = JPEG_SUSPENDED;
+ while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+ result = jpegli_consume_input(cinfo);
+ }
+ if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+ continue;
+ }
+ SetScanDecompressParams(dparams, cinfo, cinfo->input_scan_number,
+ /*is_jpegli=*/true);
+ EXPECT_TRUE(jpegli_start_output(cinfo, cinfo->input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ VerifyScanHeader(jparams, cinfo);
+ TestImage output;
+ ReadOutputImage(dparams, cinfo, &output);
+ output_progression->emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt);
+ EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number);
+ EXPECT_TRUE(jpegli_finish_output(cinfo));
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, &output_progression->back());
+ }
+ }
+ jpegli_finish_decompress(cinfo);
+ if (dparams.size_factor == 1.0f) {
+ EXPECT_EQ(has_multiple_scans, cinfo->input_scan_number > 1);
+ }
+}
+
+TEST(DecodeAPITest, ReuseCinfo) {
+ TestImage input, output, expected;
+ std::vector<TestImage> output_progression, expected_output_progression;
+ CompressParams jparams;
+ DecompressParams dparams;
+ std::vector<uint8_t> compressed;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ input.xsize = 129;
+ input.ysize = 73;
+ GeneratePixels(&input);
+ for (int h_samp : {2, 1}) {
+ for (int v_samp : {2, 1}) {
+ for (int progr : {0, 2}) {
+ jparams.h_sampling = {h_samp, 1, 1};
+ jparams.v_sampling = {v_samp, 1, 1};
+ jparams.progressive_mode = progr;
+ printf(
+ "Generating input with %dx%d chroma subsampling "
+ "progressive level %d\n",
+ h_samp, v_samp, progr);
+ JXL_CHECK(EncodeWithJpegli(input, jparams, &compressed));
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ for (bool crop : {true, false}) {
+ if (crop && output_mode != PIXELS) continue;
+ for (int scale_num : {1, 2, 3, 4, 7, 8, 13, 16}) {
+ if (scale_num != 8 && output_mode != PIXELS) continue;
+ int scale_denom = 8;
+ while (scale_num % 2 == 0 && scale_denom % 2 == 0) {
+ scale_num /= 2;
+ scale_denom /= 2;
+ }
+ printf("Decoding with output mode %d output scaling %d/%d %s\n",
+ output_mode, scale_num, scale_denom,
+ crop ? "with cropped output" : "");
+ dparams.output_mode = output_mode;
+ dparams.scale_num = scale_num;
+ dparams.scale_denom = scale_denom;
+ expected.Clear();
+ DecodeWithLibjpeg(jparams, dparams, compressed, &expected);
+ output.Clear();
+ cinfo.buffered_image = false;
+ cinfo.raw_data_out = false;
+ cinfo.scale_num = cinfo.scale_denom = 1;
+ SourceManager src(compressed.data(), compressed.size(),
+ 1u << 12);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ jpegli_read_header(&cinfo, /*require_image=*/TRUE);
+ jpegli_abort_decompress(&cinfo);
+ src.Reset();
+ TestAPINonBuffered(jparams, dparams, expected, &cinfo, &output);
+ float max_rms = output_mode == COEFFICIENTS ? 0.0f : 1.0f;
+ if (scale_num == 1 && scale_denom == 8 && h_samp != v_samp) {
+ max_rms = 5.0f; // libjpeg does not do fancy upsampling
+ }
+ VerifyOutputImage(expected, output, max_rms);
+ printf("Decoding in buffered image mode\n");
+ expected_output_progression.clear();
+ DecodeAllScansWithLibjpeg(jparams, dparams, compressed,
+ &expected_output_progression);
+ output_progression.clear();
+ src.Reset();
+ TestAPIBuffered(jparams, dparams, &cinfo, &output_progression);
+ JXL_CHECK(output_progression.size() ==
+ expected_output_progression.size());
+ for (size_t i = 0; i < output_progression.size(); ++i) {
+ const TestImage& output = output_progression[i];
+ const TestImage& expected = expected_output_progression[i];
+ VerifyOutputImage(expected, output, max_rms);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+ std::vector<TestConfig> all_configs;
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.input.xsize = 257 + samp * 37;
+ config.input.ysize = 265 + (progr / 2) * 17;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ GeneratePixels(&config.input);
+ all_configs.push_back(config);
+ }
+ }
+ return all_configs;
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameMemSource) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, buffer, buffer_size);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+ all_configs[i].input, &cinfo, &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+ }
+ if (buffer) free(buffer);
+}
+
+TEST(DecodeAPITest, ReuseCinfoSameStdSource) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ FILE* tmpf = tmpfile();
+ JXL_CHECK(tmpf);
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_stdio_dest(&cinfo, tmpf);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ rewind(tmpf);
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_stdio_src(&cinfo, tmpf);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ TestAPINonBuffered(all_configs[i].jparams, DecompressParams(),
+ all_configs[i].input, &cinfo, &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f);
+ }
+ fclose(tmpf);
+}
+
+TEST(DecodeAPITest, AbbreviatedStreams) {
+ uint8_t* table_stream = nullptr;
+ unsigned long table_stream_size = 0;
+ uint8_t* data_stream = nullptr;
+ unsigned long data_stream_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_write_tables(&cinfo);
+ jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.optimize_coding = FALSE;
+ jpegli_set_progressive_level(&cinfo, 0);
+ jpegli_start_compress(&cinfo, FALSE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ EXPECT_LT(data_stream_size, 50);
+ jpegli_destroy_compress(&cinfo);
+ }
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, table_stream, table_stream_size);
+ jpegli_read_header(&cinfo, FALSE);
+ jpegli_mem_src(&cinfo, data_stream, data_stream_size);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ EXPECT_EQ(3, cinfo.num_components);
+ jpegli_start_decompress(&cinfo);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ EXPECT_EQ(0, image[1]);
+ EXPECT_EQ(0, image[2]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+ }
+ if (table_stream) free(table_stream);
+ if (data_stream) free(data_stream);
+}
+
+class DecodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(DecodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ if (dparams.skip_scans) return;
+ const std::vector<uint8_t> compressed = GetTestJpegData(config);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ TestAPINonBuffered(config.jparams, dparams, output1, &cinfo, &output0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ if (config.compare_to_orig) {
+ double rms0 = DistanceRms(config.input, output0);
+ double rms1 = DistanceRms(config.input, output1);
+ printf("rms: %f vs %f\n", rms0, rms1);
+ EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+ } else {
+ VerifyOutputImage(output0, output1, config.max_rms_dist, config.max_diff);
+ }
+}
+
+class DecodeAPITestParamBuffered : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(DecodeAPITestParamBuffered, TestAPI) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ const std::vector<uint8_t> compressed = GetTestJpegData(config);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size);
+
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+
+ std::vector<TestImage> output_progression0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ TestAPIBuffered(config.jparams, dparams, &cinfo, &output_progression0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ ASSERT_EQ(output_progression0.size(), output_progression1.size());
+ for (size_t i = 0; i < output_progression0.size(); ++i) {
+ const TestImage& output = output_progression0[i];
+ const TestImage& expected = output_progression1[i];
+ if (config.compare_to_orig) {
+ double rms0 = DistanceRms(config.input, output);
+ double rms1 = DistanceRms(config.input, expected);
+ printf("rms: %f vs %f\n", rms0, rms1);
+ EXPECT_LE(rms0, rms1 * config.max_tolerance_factor);
+ } else {
+ VerifyOutputImage(expected, output, config.max_rms_dist, config.max_diff);
+ }
+ }
+}
+
+std::vector<TestConfig> GenerateTests(bool buffered) {
+ std::vector<TestConfig> all_tests;
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ });
+ for (size_t i = 0; i < (buffered ? 1u : testfiles.size()); ++i) {
+ TestConfig config;
+ config.fn = testfiles[i].first;
+ config.fn_desc = testfiles[i].second;
+ for (size_t chunk_size : {0, 1, 64, 65536}) {
+ config.dparams.chunk_size = chunk_size;
+ for (size_t max_output_lines : {0, 1, 8, 16}) {
+ config.dparams.max_output_lines = max_output_lines;
+ config.dparams.output_mode = PIXELS;
+ all_tests.push_back(config);
+ }
+ {
+ config.dparams.max_output_lines = 16;
+ config.dparams.output_mode = RAW_DATA;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower_small.q85_444_non_interleaved.jpg",
+ "Q85YUV444NonInterleaved"},
+ {"jxl/flower/flower_small.q85_420_non_interleaved.jpg",
+ "Q85YUV420NonInterleaved"},
+ {"jxl/flower/flower_small.q85_444_partially_interleaved.jpg",
+ "Q85YUV444PartiallyInterleaved"},
+ {"jxl/flower/flower_small.q85_420_partially_interleaved.jpg",
+ "Q85YUV420PartiallyInterleaved"},
+ {"jxl/flower/flower.png.im_q85_422.jpg", "Q85YUV422"},
+ {"jxl/flower/flower.png.im_q85_440.jpg", "Q85YUV440"},
+ {"jxl/flower/flower.png.im_q85_444_1x2.jpg", "Q85YUV444_1x2"},
+ {"jxl/flower/flower.png.im_q85_asymmetric.jpg", "Q85Asymmetric"},
+ {"jxl/flower/flower.png.im_q85_gray.jpg", "Q85Gray"},
+ {"jxl/flower/flower.png.im_q85_luma_subsample.jpg", "Q85LumaSubsample"},
+ {"jxl/flower/flower.png.im_q85_rgb.jpg", "Q85RGB"},
+ {"jxl/flower/flower.png.im_q85_rgb_subsample_blue.jpg",
+ "Q85RGBSubsampleBlue"},
+ {"jxl/flower/flower_small.cmyk.jpg", "CMYK"},
+ });
+ for (size_t i = 0; i < (buffered ? 4u : testfiles.size()); ++i) {
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.fn = testfiles[i].first;
+ config.fn_desc = testfiles[i].second;
+ config.dparams.output_mode = output_mode;
+ all_tests.push_back(config);
+ }
+ }
+ }
+
+ // Tests for common chroma subsampling and output modes.
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ for (int h_samp : {1, 2}) {
+ for (int v_samp : {1, 2}) {
+ for (bool fancy : {true, false}) {
+ if (!fancy && (output_mode != PIXELS || h_samp * v_samp == 1)) {
+ continue;
+ }
+ TestConfig config;
+ config.dparams.output_mode = output_mode;
+ config.dparams.do_fancy_upsampling = fancy;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h_samp, 1, 1};
+ config.jparams.v_sampling = {v_samp, 1, 1};
+ if (output_mode == COEFFICIENTS) {
+ config.max_rms_dist = 0.0f;
+ }
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+
+ // Tests for partial input.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ for (int progr : {0, 1, 3}) {
+ for (int samp : {1, 2}) {
+ for (bool skip_scans : {false, true}) {
+ if (skip_scans && (progr != 1 || size_factor < 0.5f)) continue;
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.size_factor = size_factor;
+ config.dparams.output_mode = output_mode;
+ config.dparams.skip_scans = skip_scans;
+ // The last partially available block can behave differently.
+ // TODO(szabadka) Figure out if we can make the behaviour more
+ // similar.
+ config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+ config.max_diff = 255.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+
+ // Tests for block smoothing.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+ for (int samp : {1, 2}) {
+ for (bool skip_scans : {false, true}) {
+ if (skip_scans && size_factor < 0.3f) continue;
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = 2;
+ config.dparams.size_factor = size_factor;
+ config.dparams.do_block_smoothing = true;
+ config.dparams.skip_scans = skip_scans;
+ // libjpeg does smoothing for incomplete scans differently at
+ // the border between current and previous scans.
+ config.max_rms_dist = 8.0f;
+ config.max_diff = 255.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+
+ // Test for switching output color quantization modes between scans.
+ if (buffered) {
+ TestConfig config;
+ config.jparams.progressive_mode = 2;
+ config.dparams.quantize_colors = true;
+ config.dparams.scan_params = {
+ {3, JDITHER_NONE, CQUANT_1PASS}, {4, JDITHER_ORDERED, CQUANT_1PASS},
+ {5, JDITHER_FS, CQUANT_1PASS}, {6, JDITHER_NONE, CQUANT_EXTERNAL},
+ {8, JDITHER_NONE, CQUANT_REUSE}, {9, JDITHER_NONE, CQUANT_EXTERNAL},
+ {10, JDITHER_NONE, CQUANT_2PASS}, {11, JDITHER_NONE, CQUANT_REUSE},
+ {12, JDITHER_NONE, CQUANT_2PASS}, {13, JDITHER_FS, CQUANT_2PASS},
+ };
+ config.compare_to_orig = true;
+ config.max_tolerance_factor = 1.04f;
+ all_tests.push_back(config);
+ }
+
+ if (buffered) {
+ return all_tests;
+ }
+
+ // Tests for output color quantization.
+ for (int num_colors : {8, 64, 256}) {
+ for (ColorQuantMode mode : {CQUANT_1PASS, CQUANT_EXTERNAL, CQUANT_2PASS}) {
+ if (mode == CQUANT_EXTERNAL && num_colors != 256) continue;
+ for (J_DITHER_MODE dither : {JDITHER_NONE, JDITHER_ORDERED, JDITHER_FS}) {
+ if (mode == CQUANT_EXTERNAL && dither != JDITHER_NONE) continue;
+ if (mode != CQUANT_1PASS && dither == JDITHER_ORDERED) continue;
+ for (bool crop : {false, true}) {
+ for (bool scale : {false, true}) {
+ for (bool samp : {false, true}) {
+ if ((num_colors != 256) && (crop || scale || samp)) {
+ continue;
+ }
+ if (mode == CQUANT_2PASS && crop) continue;
+ TestConfig config;
+ config.input.xsize = 1024;
+ config.input.ysize = 768;
+ config.dparams.quantize_colors = true;
+ config.dparams.desired_number_of_colors = num_colors;
+ config.dparams.scan_params = {{kLastScan, dither, mode}};
+ config.dparams.crop_output = crop;
+ if (scale) {
+ config.dparams.scale_num = 7;
+ config.dparams.scale_denom = 8;
+ }
+ if (samp) {
+ config.jparams.h_sampling = {2, 1, 1};
+ config.jparams.v_sampling = {2, 1, 1};
+ }
+ if (!scale && !crop) {
+ config.compare_to_orig = true;
+ if (dither != JDITHER_NONE) {
+ config.max_tolerance_factor = 1.05f;
+ }
+ if (mode == CQUANT_2PASS &&
+ (num_colors == 8 || dither == JDITHER_FS)) {
+ // TODO(szabadka) Lower this bound.
+ config.max_tolerance_factor = 1.5f;
+ }
+ } else {
+ // We only test for buffer overflows, etc.
+ config.max_rms_dist = 100.0f;
+ config.max_diff = 255.0f;
+ }
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Tests for output formats.
+ for (JpegliDataType type :
+ {JPEGLI_TYPE_UINT8, JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+ for (JpegliEndianness endianness :
+ {JPEGLI_NATIVE_ENDIAN, JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN}) {
+ if (type == JPEGLI_TYPE_UINT8 && endianness != JPEGLI_NATIVE_ENDIAN) {
+ continue;
+ }
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.dparams.data_type = type;
+ config.dparams.endianness = endianness;
+ config.input.color_space = JCS_UNKNOWN;
+ config.input.components = channels;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = JCS_UNKNOWN;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ // Test for output cropping.
+ {
+ TestConfig config;
+ config.dparams.crop_output = true;
+ all_tests.push_back(config);
+ }
+ // Tests for color transforms.
+ for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_GRAYSCALE}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = JCS_GRAYSCALE;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+ for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ if (jpeg_color_space == JCS_RGB && out_color_space == JCS_YCbCr) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (J_COLOR_SPACE out_color_space : {JCS_CMYK, JCS_YCCK}) {
+ if (jpeg_color_space == JCS_CMYK && out_color_space == JCS_YCCK) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = JCS_CMYK;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.dparams.set_out_color_space = true;
+ config.dparams.out_color_space = out_color_space;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for progressive levels.
+ for (int p = 0; p < 3 + kNumTestScripts; ++p) {
+ TestConfig config;
+ config.jparams.progressive_mode = p;
+ all_tests.push_back(config);
+ }
+ // Tests for RST markers.
+ for (size_t r : {1, 17, 1024}) {
+ for (size_t chunk_size : {1, 65536}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.progressive_mode = progr;
+ config.jparams.restart_interval = r;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (size_t rr : {1, 3, 8, 100}) {
+ TestConfig config;
+ config.jparams.restart_in_rows = rr;
+ all_tests.push_back(config);
+ }
+ // Tests for custom quantization tables.
+ for (int type : {0, 1, 10, 100, 10000}) {
+ for (int scale : {1, 50, 100, 200, 500}) {
+ for (bool add_raw : {false, true}) {
+ for (bool baseline : {true, false}) {
+ if (!baseline && (add_raw || type * scale < 25500)) continue;
+ TestConfig config;
+ config.input.xsize = 64;
+ config.input.ysize = 64;
+ CustomQuantTable table;
+ table.table_type = type;
+ table.scale_factor = scale;
+ table.force_baseline = baseline;
+ table.add_raw = add_raw;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ config.jparams.quant_indexes = {0, 0, 0};
+ config.compare_to_orig = true;
+ config.max_tolerance_factor = 1.02;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ if (qidx == 3) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ all_tests.push_back(config);
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+ if (qidx == 0 && slot_idx == 0) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ CustomQuantTable table;
+ table.slot_idx = slot_idx;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ all_tests.push_back(config);
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {0, 1, 2};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 2;
+ table.table_type = 30;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ // Tests for fixed (and custom) prefix codes.
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) {
+ for (bool flat_dc_luma : {false, true}) {
+ TestConfig config;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (bool flat_dc_luma : {false, true}) {
+ TestConfig config;
+ config.input.color_space = JCS_CMYK;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.use_flat_dc_luma_code = flat_dc_luma;
+ all_tests.push_back(config);
+ }
+ }
+ // Test for jpeg without DHT marker.
+ {
+ TestConfig config;
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.jparams.omit_standard_tables = true;
+ all_tests.push_back(config);
+ }
+ // Test for custom component ids.
+ {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.comp_ids = {7, 17, 177};
+ all_tests.push_back(config);
+ }
+ // Tests for JFIF/Adobe markers.
+ for (int override_JFIF : {-1, 0, 1}) {
+ for (int override_Adobe : {-1, 0, 1}) {
+ if (override_JFIF == -1 && override_Adobe == -1) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.override_JFIF = override_JFIF;
+ config.jparams.override_Adobe = override_Adobe;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for small images.
+ for (int xsize : {1, 7, 8, 9, 15, 16, 17}) {
+ for (int ysize : {1, 7, 8, 9, 15, 16, 17}) {
+ TestConfig config;
+ config.input.xsize = xsize;
+ config.input.ysize = ysize;
+ all_tests.push_back(config);
+ }
+ }
+ // Tests for custom marker processor.
+ for (size_t chunk_size : {0, 1, 64, 65536}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ // Tests for icc profile decoding.
+ for (size_t icc_size : {728, 70000, 1000000}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.jparams.icc.resize(icc_size);
+ for (size_t i = 0; i < icc_size; ++i) {
+ config.jparams.icc[i] = (i * 17) & 0xff;
+ }
+ all_tests.push_back(config);
+ }
+ // Tests for unusual sampling factors.
+ for (int h0_samp : {1, 2, 3, 4}) {
+ for (int v0_samp : {1, 2, 3, 4}) {
+ for (int dxb = 0; dxb < h0_samp; ++dxb) {
+ for (int dyb = 0; dyb < v0_samp; ++dyb) {
+ for (int dx = 0; dx < 2; ++dx) {
+ for (int dy = 0; dy < 2; ++dy) {
+ TestConfig config;
+ config.input.xsize = 128 + dyb * 8 + dy;
+ config.input.ysize = 256 + dxb * 8 + dx;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, 1};
+ config.jparams.v_sampling = {v0_samp, 1, 1};
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 4}) {
+ for (int v0_samp : {1, 2, 4}) {
+ for (int h2_samp : {1, 2, 4}) {
+ for (int v2_samp : {1, 2, 4}) {
+ TestConfig config;
+ config.input.xsize = 137;
+ config.input.ysize = 75;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.compare_to_orig = true;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 3}) {
+ for (int v0_samp : {1, 3}) {
+ for (int h2_samp : {1, 3}) {
+ for (int v2_samp : {1, 3}) {
+ TestConfig config;
+ config.input.xsize = 205;
+ config.input.ysize = 99;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ // Tests for output scaling.
+ for (int scale_num = 1; scale_num <= 16; ++scale_num) {
+ if (scale_num == 8) continue;
+ for (bool crop : {false, true}) {
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.scale_num = scale_num;
+ config.dparams.scale_denom = 8;
+ config.dparams.crop_output = crop;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::string QuantMode(ColorQuantMode mode) {
+ switch (mode) {
+ case CQUANT_1PASS:
+ return "1pass";
+ case CQUANT_EXTERNAL:
+ return "External";
+ case CQUANT_2PASS:
+ return "2pass";
+ case CQUANT_REUSE:
+ return "Reuse";
+ }
+ return "";
+}
+
+std::string DitherMode(J_DITHER_MODE mode) {
+ switch (mode) {
+ case JDITHER_NONE:
+ return "No";
+ case JDITHER_ORDERED:
+ return "Ordered";
+ case JDITHER_FS:
+ return "FS";
+ }
+ return "";
+}
+
+std::ostream& operator<<(std::ostream& os, const DecompressParams& dparams) {
+ if (dparams.chunk_size == 0) {
+ os << "CompleteInput";
+ } else {
+ os << "InputChunks" << dparams.chunk_size;
+ }
+ if (dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(dparams.size_factor * 100) << "p";
+ }
+ if (dparams.max_output_lines == 0) {
+ os << "CompleteOutput";
+ } else {
+ os << "OutputLines" << dparams.max_output_lines;
+ }
+ if (dparams.output_mode == RAW_DATA) {
+ os << "RawDataOut";
+ } else if (dparams.output_mode == COEFFICIENTS) {
+ os << "CoeffsOut";
+ }
+ os << IOMethodName(dparams.data_type, dparams.endianness);
+ if (dparams.set_out_color_space) {
+ os << "OutColor" << ColorSpaceName(dparams.out_color_space);
+ }
+ if (dparams.crop_output) {
+ os << "Crop";
+ }
+ if (dparams.do_block_smoothing) {
+ os << "BlockSmoothing";
+ }
+ if (!dparams.do_fancy_upsampling) {
+ os << "NoFancyUpsampling";
+ }
+ if (dparams.scale_num != 1 || dparams.scale_denom != 1) {
+ os << "Scale" << dparams.scale_num << "_" << dparams.scale_denom;
+ }
+ if (dparams.quantize_colors) {
+ os << "Quant" << dparams.desired_number_of_colors << "colors";
+ for (size_t i = 0; i < dparams.scan_params.size(); ++i) {
+ if (i > 0) os << "_";
+ const auto& sparam = dparams.scan_params[i];
+ os << QuantMode(sparam.color_quant_mode);
+ os << DitherMode(sparam.dither_mode) << "Dither";
+ }
+ }
+ if (dparams.skip_scans) {
+ os << "SkipScans";
+ }
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ if (!c.fn.empty()) {
+ os << c.fn_desc;
+ } else {
+ os << c.input;
+ }
+ os << c.jparams;
+ os << c.dparams;
+ return os;
+}
+
+std::string TestDescription(const testing::TestParamInfo<TestConfig>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITest, DecodeAPITestParam,
+ testing::ValuesIn(GenerateTests(false)),
+ TestDescription);
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITestBuffered,
+ DecodeAPITestParamBuffered,
+ testing::ValuesIn(GenerateTests(true)),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_internal.h b/third_party/jpeg-xl/lib/jpegli/decode_internal.h
new file mode 100644
index 0000000000..1c4f248d40
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_internal.h
@@ -0,0 +1,150 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_INTERNAL_H_
+#define LIB_JPEGLI_DECODE_INTERNAL_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/huffman.h"
+
+namespace jpegli {
+
+static constexpr int kNeedMoreInput = 100;
+static constexpr int kHandleRestart = 101;
+static constexpr int kHandleMarkerProcessor = 102;
+static constexpr int kProcessNextMarker = 103;
+static constexpr size_t kAllHuffLutSize = NUM_HUFF_TBLS * kJpegHuffmanLutSize;
+
+typedef int16_t coeff_t;
+
+// State of the decoder that has to be saved before decoding one MCU in case
+// we run out of the bitstream.
+struct MCUCodingState {
+ coeff_t last_dc_coeff[kMaxComponents];
+ int eobrun;
+ coeff_t coeffs[D_MAX_BLOCKS_IN_MCU * DCTSIZE2];
+};
+
+} // namespace jpegli
+
+// Use this forward-declared libjpeg struct to hold all our private variables.
+// TODO(szabadka) Remove variables that have a corresponding version in cinfo.
+struct jpeg_decomp_master {
+ //
+ // Input handling state.
+ //
+ std::vector<uint8_t> input_buffer_;
+ size_t input_buffer_pos_;
+ // Number of bits after codestream_pos_ that were already processed.
+ size_t codestream_bits_ahead_;
+ bool streaming_mode_;
+
+ // Coefficient buffers
+ jvirt_barray_ptr* coef_arrays;
+ JBLOCKARRAY coeff_rows[jpegli::kMaxComponents];
+
+ //
+ // Marker data processing state.
+ //
+ bool found_soi_;
+ bool found_dri_;
+ bool found_sof_;
+ bool found_eoi_;
+ size_t icc_index_;
+ size_t icc_total_;
+ std::vector<uint8_t> icc_profile_;
+ jpegli::HuffmanTableEntry dc_huff_lut_[jpegli::kAllHuffLutSize];
+ jpegli::HuffmanTableEntry ac_huff_lut_[jpegli::kAllHuffLutSize];
+ uint8_t markers_to_save_[32];
+ jpeg_marker_parser_method app_marker_parsers[16];
+ jpeg_marker_parser_method com_marker_parser;
+ // Whether this jpeg has multiple scans (progressive or non-interleaved
+ // sequential).
+ bool is_multiscan_;
+
+ // Fields defined by SOF marker.
+ size_t iMCU_cols_;
+ int h_factor[jpegli::kMaxComponents];
+ int v_factor[jpegli::kMaxComponents];
+
+ // Initialized at strat of frame.
+ uint16_t scan_progression_[jpegli::kMaxComponents][DCTSIZE2];
+
+ //
+ // Per scan state.
+ //
+ size_t scan_mcu_row_;
+ size_t scan_mcu_col_;
+ size_t mcu_rows_per_iMCU_row_;
+ jpegli::coeff_t last_dc_coeff_[jpegli::kMaxComponents];
+ int eobrun_;
+ int restarts_to_go_;
+ int next_restart_marker_;
+
+ jpegli::MCUCodingState mcu_;
+
+ //
+ // Rendering state.
+ //
+ int output_passes_done_;
+ JpegliDataType output_data_type_ = JPEGLI_TYPE_UINT8;
+ bool swap_endianness_ = false;
+ size_t xoffset_;
+
+ int min_scaled_dct_size;
+ int scaled_dct_size[jpegli::kMaxComponents];
+
+ size_t raw_height_[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float> raw_output_[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float> render_output_[jpegli::kMaxComponents];
+
+ void (*inverse_transform[jpegli::kMaxComponents])(
+ const int16_t* JXL_RESTRICT qblock, const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases, float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output, size_t output_stride, size_t dctsize);
+
+ void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+
+ float* idct_scratch_;
+ float* upsample_scratch_;
+ uint8_t* output_scratch_;
+ int16_t* smoothing_scratch_;
+ float* dequant_;
+ // 1 = 1pass, 2 = 2pass, 3 = external
+ int quant_mode_;
+ int quant_pass_;
+ int num_colors_[jpegli::kMaxComponents];
+ uint8_t* colormap_lut_;
+ uint8_t* pixels_;
+ JSAMPARRAY scanlines_;
+ std::vector<std::vector<uint8_t>> candidate_lists_;
+ bool regenerate_inverse_colormap_;
+ float* dither_[jpegli::kMaxComponents];
+ float* error_row_[2 * jpegli::kMaxComponents];
+ size_t dither_size_;
+ size_t dither_mask_;
+
+ // Per channel and per frequency statistics about the number of nonzeros and
+ // the sum of coefficient absolute values, used in dequantization bias
+ // computation.
+ int* nonzeros_;
+ int* sumabs_;
+ size_t num_processed_blocks_[jpegli::kMaxComponents];
+ float* biases_;
+#define SAVED_COEFS 10
+ // This holds the coef_bits of the scan before the current scan,
+ // i.e. the bottom half when rendering incomplete scans.
+ int (*coef_bits_latch)[SAVED_COEFS];
+ int (*prev_coef_bits_latch)[SAVED_COEFS];
+ bool apply_smoothing;
+};
+
+#endif // LIB_JPEGLI_DECODE_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_marker.cc b/third_party/jpeg-xl/lib/jpegli/decode_marker.cc
new file mode 100644
index 0000000000..c5c5790cdf
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_marker.cc
@@ -0,0 +1,588 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_marker.h"
+
+#include <string.h>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jpegli {
+namespace {
+
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+
+// Macros for commonly used error conditions.
+
+#define JPEG_VERIFY_LEN(n) \
+ if (pos + (n) > len) { \
+ return JPEGLI_ERROR("Unexpected end of marker: pos=%" PRIuS \
+ " need=%d len=%" PRIuS, \
+ pos, static_cast<int>(n), len); \
+ }
+
+#define JPEG_VERIFY_INPUT(var, low, high) \
+ if ((var) < (low) || (var) > (high)) { \
+ return JPEGLI_ERROR("Invalid " #var ": %d", static_cast<int>(var)); \
+ }
+
+#define JPEG_VERIFY_MARKER_END() \
+ if (pos != len) { \
+ return JPEGLI_ERROR("Invalid marker length: declared=%" PRIuS \
+ " actual=%" PRIuS, \
+ len, pos); \
+ }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+ return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+ int v = (data[*pos] << 8) + data[*pos + 1];
+ *pos += 2;
+ return v;
+}
+
+void ProcessSOF(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_soi_) {
+ JPEGLI_ERROR("Unexpected SOF marker.");
+ }
+ if (m->found_sof_) {
+ JPEGLI_ERROR("Duplicate SOF marker.");
+ }
+ m->found_sof_ = true;
+ cinfo->progressive_mode = (cinfo->unread_marker == 0xc2);
+ cinfo->arith_code = 0;
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(6);
+ cinfo->data_precision = ReadUint8(data, &pos);
+ cinfo->image_height = ReadUint16(data, &pos);
+ cinfo->image_width = ReadUint16(data, &pos);
+ cinfo->num_components = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->data_precision, kJpegPrecision, kJpegPrecision);
+ JPEG_VERIFY_INPUT(cinfo->image_height, 1, kMaxDimPixels);
+ JPEG_VERIFY_INPUT(cinfo->image_width, 1, kMaxDimPixels);
+ JPEG_VERIFY_INPUT(cinfo->num_components, 1, kMaxComponents);
+ JPEG_VERIFY_LEN(3 * cinfo->num_components);
+ cinfo->comp_info = jpegli::Allocate<jpeg_component_info>(
+ cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+ // Read sampling factors and quant table index for each component.
+ uint8_t ids_seen[256] = {0};
+ cinfo->max_h_samp_factor = 1;
+ cinfo->max_v_samp_factor = 1;
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ comp->component_index = i;
+ const int id = ReadUint8(data, &pos);
+ if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci)
+ JPEGLI_ERROR("Duplicate ID %d in SOF.", id);
+ }
+ ids_seen[id] = 1;
+ comp->component_id = id;
+ int factor = ReadUint8(data, &pos);
+ int h_samp_factor = factor >> 4;
+ int v_samp_factor = factor & 0xf;
+ JPEG_VERIFY_INPUT(h_samp_factor, 1, MAX_SAMP_FACTOR);
+ JPEG_VERIFY_INPUT(v_samp_factor, 1, MAX_SAMP_FACTOR);
+ comp->h_samp_factor = h_samp_factor;
+ comp->v_samp_factor = v_samp_factor;
+ cinfo->max_h_samp_factor =
+ std::max(cinfo->max_h_samp_factor, h_samp_factor);
+ cinfo->max_v_samp_factor =
+ std::max(cinfo->max_v_samp_factor, v_samp_factor);
+ int quant_tbl_idx = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(quant_tbl_idx, 0, NUM_QUANT_TBLS - 1);
+ comp->quant_tbl_no = quant_tbl_idx;
+ if (cinfo->quant_tbl_ptrs[quant_tbl_idx] == nullptr) {
+ JPEGLI_ERROR("Quantization table with index %u not found", quant_tbl_idx);
+ }
+ comp->quant_table = nullptr; // will be allocated after SOS marker
+ }
+ JPEG_VERIFY_MARKER_END();
+
+ // Set the input colorspace based on the markers we have seen and set
+ // default output colorspace.
+ if (cinfo->num_components == 1) {
+ cinfo->jpeg_color_space = JCS_GRAYSCALE;
+ cinfo->out_color_space = JCS_GRAYSCALE;
+ } else if (cinfo->num_components == 3) {
+ if (cinfo->saw_JFIF_marker) {
+ cinfo->jpeg_color_space = JCS_YCbCr;
+ } else if (cinfo->saw_Adobe_marker) {
+ cinfo->jpeg_color_space =
+ cinfo->Adobe_transform == 0 ? JCS_RGB : JCS_YCbCr;
+ } else {
+ cinfo->jpeg_color_space = JCS_YCbCr;
+ if (cinfo->comp_info[0].component_id == 'R' && //
+ cinfo->comp_info[1].component_id == 'G' && //
+ cinfo->comp_info[2].component_id == 'B') {
+ cinfo->jpeg_color_space = JCS_RGB;
+ }
+ }
+ cinfo->out_color_space = JCS_RGB;
+ } else if (cinfo->num_components == 4) {
+ if (cinfo->saw_Adobe_marker) {
+ cinfo->jpeg_color_space =
+ cinfo->Adobe_transform == 0 ? JCS_CMYK : JCS_YCCK;
+ } else {
+ cinfo->jpeg_color_space = JCS_CMYK;
+ }
+ cinfo->out_color_space = JCS_CMYK;
+ }
+
+ // We have checked above that none of the sampling factors are 0, so the max
+ // sampling factors can not be 0.
+ cinfo->total_iMCU_rows =
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE);
+ m->iMCU_cols_ =
+ DivCeil(cinfo->image_width, cinfo->max_h_samp_factor * DCTSIZE);
+ // Compute the block dimensions for each component.
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+ cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+ JPEGLI_ERROR("Non-integral subsampling ratios.");
+ }
+ m->h_factor[i] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[i] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[i]);
+ comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[i]);
+ comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+ comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+ }
+ memset(m->scan_progression_, 0, sizeof(m->scan_progression_));
+}
+
+void ProcessSOS(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->found_sof_) {
+ JPEGLI_ERROR("Unexpected SOS marker.");
+ }
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(1);
+ cinfo->comps_in_scan = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, cinfo->num_components);
+ JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, MAX_COMPS_IN_SCAN);
+
+ JPEG_VERIFY_LEN(2 * cinfo->comps_in_scan);
+ bool is_interleaved = (cinfo->comps_in_scan > 1);
+ uint8_t ids_seen[256] = {0};
+ cinfo->blocks_in_MCU = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int id = ReadUint8(data, &pos);
+ if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj)
+ return JPEGLI_ERROR("Duplicate ID %d in SOS.", id);
+ }
+ ids_seen[id] = 1;
+ jpeg_component_info* comp = nullptr;
+ for (int j = 0; j < cinfo->num_components; ++j) {
+ if (cinfo->comp_info[j].component_id == id) {
+ comp = &cinfo->comp_info[j];
+ cinfo->cur_comp_info[i] = comp;
+ }
+ }
+ if (!comp) {
+ return JPEGLI_ERROR("SOS marker: Could not find component with id %d",
+ id);
+ }
+ int c = ReadUint8(data, &pos);
+ comp->dc_tbl_no = c >> 4;
+ comp->ac_tbl_no = c & 0xf;
+ JPEG_VERIFY_INPUT(comp->dc_tbl_no, 0, 3);
+ JPEG_VERIFY_INPUT(comp->ac_tbl_no, 0, 3);
+ comp->MCU_width = is_interleaved ? comp->h_samp_factor : 1;
+ comp->MCU_height = is_interleaved ? comp->v_samp_factor : 1;
+ comp->MCU_blocks = comp->MCU_width * comp->MCU_height;
+ if (cinfo->blocks_in_MCU + comp->MCU_blocks > D_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("Too many blocks in MCU.");
+ }
+ for (int j = 0; j < comp->MCU_blocks; ++j) {
+ cinfo->MCU_membership[cinfo->blocks_in_MCU++] = i;
+ }
+ }
+ JPEG_VERIFY_LEN(3);
+ cinfo->Ss = ReadUint8(data, &pos);
+ cinfo->Se = ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(cinfo->Ss, 0, 63);
+ JPEG_VERIFY_INPUT(cinfo->Se, cinfo->Ss, 63);
+ int c = ReadUint8(data, &pos);
+ cinfo->Ah = c >> 4;
+ cinfo->Al = c & 0xf;
+ JPEG_VERIFY_MARKER_END();
+
+ if (cinfo->input_scan_number == 0) {
+ m->is_multiscan_ = (cinfo->comps_in_scan < cinfo->num_components ||
+ cinfo->progressive_mode);
+ }
+ if (cinfo->Ah != 0 && cinfo->Al != cinfo->Ah - 1) {
+ // section G.1.1.1.2 : Successive approximation control only improves
+ // by one bit at a time.
+ JPEGLI_ERROR("Invalid progressive parameters: Al=%d Ah=%d", cinfo->Al,
+ cinfo->Ah);
+ }
+ if (!cinfo->progressive_mode) {
+ cinfo->Ss = 0;
+ cinfo->Se = 63;
+ cinfo->Ah = 0;
+ cinfo->Al = 0;
+ }
+ const uint16_t scan_bitmask =
+ cinfo->Ah == 0 ? (0xffff << cinfo->Al) : (1u << cinfo->Al);
+ const uint16_t refinement_bitmask = (1 << cinfo->Al) - 1;
+ if (!cinfo->coef_bits) {
+ cinfo->coef_bits =
+ Allocate<int[DCTSIZE2]>(cinfo, cinfo->num_components * 2, JPOOL_IMAGE);
+ m->coef_bits_latch =
+ Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+ m->prev_coef_bits_latch =
+ Allocate<int[SAVED_COEFS]>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int i = 0; i < DCTSIZE2; ++i) {
+ cinfo->coef_bits[c][i] = -1;
+ if (i < SAVED_COEFS) {
+ m->coef_bits_latch[c][i] = -1;
+ }
+ }
+ }
+ }
+
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ int comp_idx = cinfo->cur_comp_info[i]->component_index;
+ for (int k = cinfo->Ss; k <= cinfo->Se; ++k) {
+ if (m->scan_progression_[comp_idx][k] & scan_bitmask) {
+ return JPEGLI_ERROR(
+ "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+ comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+ }
+ if (m->scan_progression_[comp_idx][k] & refinement_bitmask) {
+ return JPEGLI_ERROR(
+ "Invalid scan order, a more refined scan was already done: "
+ "component=%d k=%d prev_mask=%u cur_mask=%u",
+ comp_idx, k, m->scan_progression_[i][k], scan_bitmask);
+ }
+ m->scan_progression_[comp_idx][k] |= scan_bitmask;
+ }
+ }
+ if (cinfo->Al > 10) {
+ return JPEGLI_ERROR("Scan parameter Al=%d is not supported.", cinfo->Al);
+ }
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and builds the Huffman
+// decoding table in either dc_huff_lut_ or ac_huff_lut_, depending on the type
+// and solt_id of Huffman code being read.
+void ProcessDHT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ size_t pos = 2;
+ if (pos == len) {
+ return JPEGLI_ERROR("DHT marker: no Huffman table found");
+ }
+ while (pos < len) {
+ JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+ // The index of the Huffman code in the current set of Huffman codes. For AC
+ // component Huffman codes, 0x10 is added to the index.
+ int slot_id = ReadUint8(data, &pos);
+ int huffman_index = slot_id;
+ int is_ac_table = (slot_id & 0x10) != 0;
+ JHUFF_TBL** table;
+ if (is_ac_table) {
+ huffman_index -= 0x10;
+ JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+ table = &cinfo->ac_huff_tbl_ptrs[huffman_index];
+ } else {
+ JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1);
+ table = &cinfo->dc_huff_tbl_ptrs[huffman_index];
+ }
+ if (*table == nullptr) {
+ *table = jpegli_alloc_huff_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ int total_count = 0;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ int count = ReadUint8(data, &pos);
+ (*table)->bits[i] = count;
+ total_count += count;
+ }
+ if (is_ac_table) {
+ JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize);
+ } else {
+ // Allow symbols up to 15 here, we check later whether any invalid symbols
+ // are actually decoded.
+ // TODO(szabadka) Make sure decoder works (does not crash) with up to
+ // 15-nbits DC symbols and then increase kJpegDCAlphabetSize.
+ JPEG_VERIFY_INPUT(total_count, 0, 16);
+ }
+ JPEG_VERIFY_LEN(total_count);
+ for (int i = 0; i < total_count; ++i) {
+ int value = ReadUint8(data, &pos);
+ if (!is_ac_table) {
+ JPEG_VERIFY_INPUT(value, 0, 15);
+ }
+ (*table)->huffval[i] = value;
+ }
+ for (int i = total_count; i < kJpegHuffmanAlphabetSize; ++i) {
+ (*table)->huffval[i] = 0;
+ }
+ }
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDQT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_sof_) {
+ JPEGLI_ERROR("Updating quant tables between scans is not supported.");
+ }
+ size_t pos = 2;
+ if (pos == len) {
+ return JPEGLI_ERROR("DQT marker: no quantization table found");
+ }
+ while (pos < len) {
+ JPEG_VERIFY_LEN(1);
+ int quant_table_index = ReadUint8(data, &pos);
+ int precision = quant_table_index >> 4;
+ JPEG_VERIFY_INPUT(precision, 0, 1);
+ quant_table_index &= 0xf;
+ JPEG_VERIFY_INPUT(quant_table_index, 0, NUM_QUANT_TBLS - 1);
+ JPEG_VERIFY_LEN((precision + 1) * DCTSIZE2);
+
+ if (cinfo->quant_tbl_ptrs[quant_table_index] == nullptr) {
+ cinfo->quant_tbl_ptrs[quant_table_index] =
+ jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_table_index];
+
+ for (size_t i = 0; i < DCTSIZE2; ++i) {
+ int quant_val =
+ precision ? ReadUint16(data, &pos) : ReadUint8(data, &pos);
+ JPEG_VERIFY_INPUT(quant_val, 1, 65535);
+ quant_table->quantval[kJPEGNaturalOrder[i]] = quant_val;
+ }
+ }
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessDNL(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ // Ignore marker.
+}
+
+void ProcessDRI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_dri_) {
+ return JPEGLI_ERROR("Duplicate DRI marker.");
+ }
+ m->found_dri_ = true;
+ size_t pos = 2;
+ JPEG_VERIFY_LEN(2);
+ cinfo->restart_interval = ReadUint16(data, &pos);
+ JPEG_VERIFY_MARKER_END();
+}
+
+void ProcessAPP(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ const uint8_t marker = cinfo->unread_marker;
+ const uint8_t* payload = data + 2;
+ size_t payload_size = len - 2;
+ if (marker == 0xE0) {
+ if (payload_size >= 14 && memcmp(payload, "JFIF", 4) == 0) {
+ cinfo->saw_JFIF_marker = TRUE;
+ cinfo->JFIF_major_version = payload[5];
+ cinfo->JFIF_minor_version = payload[6];
+ cinfo->density_unit = payload[7];
+ cinfo->X_density = (payload[8] << 8) + payload[9];
+ cinfo->Y_density = (payload[10] << 8) + payload[11];
+ }
+ } else if (marker == 0xEE) {
+ if (payload_size >= 12 && memcmp(payload, "Adobe", 5) == 0) {
+ cinfo->saw_Adobe_marker = TRUE;
+ cinfo->Adobe_transform = payload[11];
+ }
+ } else if (marker == 0xE2) {
+ if (payload_size >= sizeof(kIccProfileTag) &&
+ memcmp(payload, kIccProfileTag, sizeof(kIccProfileTag)) == 0) {
+ payload += sizeof(kIccProfileTag);
+ payload_size -= sizeof(kIccProfileTag);
+ if (payload_size < 2) {
+ return JPEGLI_ERROR("ICC chunk is too small.");
+ }
+ uint8_t index = payload[0];
+ uint8_t total = payload[1];
+ ++m->icc_index_;
+ if (m->icc_index_ != index) {
+ return JPEGLI_ERROR("Invalid ICC chunk order.");
+ }
+ if (total == 0) {
+ return JPEGLI_ERROR("Invalid ICC chunk total.");
+ }
+ if (m->icc_total_ == 0) {
+ m->icc_total_ = total;
+ } else if (m->icc_total_ != total) {
+ return JPEGLI_ERROR("Invalid ICC chunk total.");
+ }
+ if (m->icc_index_ > m->icc_total_) {
+ return JPEGLI_ERROR("Invalid ICC chunk index.");
+ }
+ m->icc_profile_.insert(m->icc_profile_.end(), payload + 2,
+ payload + payload_size);
+ }
+ }
+}
+
+void ProcessCOM(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ // Ignore marker.
+}
+
+void ProcessSOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->found_soi_) {
+ JPEGLI_ERROR("Duplicate SOI marker");
+ }
+ m->found_soi_ = true;
+}
+
+void ProcessEOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ cinfo->master->found_eoi_ = true;
+}
+
+void SaveMarker(j_decompress_ptr cinfo, const uint8_t* data, size_t len) {
+ const uint8_t marker = cinfo->unread_marker;
+ const uint8_t* payload = data + 2;
+ size_t payload_size = len - 2;
+
+ // Insert new saved marker to the head of the list.
+ jpeg_saved_marker_ptr next = cinfo->marker_list;
+ cinfo->marker_list =
+ jpegli::Allocate<jpeg_marker_struct>(cinfo, 1, JPOOL_IMAGE);
+ cinfo->marker_list->next = next;
+ cinfo->marker_list->marker = marker;
+ cinfo->marker_list->original_length = payload_size;
+ cinfo->marker_list->data_length = payload_size;
+ cinfo->marker_list->data =
+ jpegli::Allocate<uint8_t>(cinfo, payload_size, JPOOL_IMAGE);
+ memcpy(cinfo->marker_list->data, payload, payload_size);
+}
+
+uint8_t ProcessNextMarker(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos) {
+ jpeg_decomp_master* m = cinfo->master;
+ size_t num_skipped = 0;
+ uint8_t marker = cinfo->unread_marker;
+ if (marker == 0) {
+ // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+ static const uint8_t kIsValidMarker[] = {
+ 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ };
+ // Skip bytes between markers.
+ while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] < 0xc0 ||
+ !kIsValidMarker[data[*pos + 1] - 0xc0])) {
+ ++(*pos);
+ ++num_skipped;
+ }
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ marker = data[*pos + 1];
+ if (num_skipped > 0) {
+ if (m->found_soi_) {
+ JPEGLI_WARN("Skipped %d bytes before marker 0x%02x", (int)num_skipped,
+ marker);
+ } else {
+ JPEGLI_ERROR("Did not find SOI marker.");
+ }
+ }
+ *pos += 2;
+ cinfo->unread_marker = marker;
+ }
+ if (!m->found_soi_ && marker != 0xd8) {
+ JPEGLI_ERROR("Did not find SOI marker.");
+ }
+ if (GetMarkerProcessor(cinfo)) {
+ return kHandleMarkerProcessor;
+ }
+ const uint8_t* marker_data = &data[*pos];
+ size_t marker_len = 0;
+ if (marker != 0xd8 && marker != 0xd9) {
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ marker_len += (data[*pos] << 8) + data[*pos + 1];
+ if (marker_len < 2) {
+ JPEGLI_ERROR("Invalid marker length");
+ }
+ if (*pos + marker_len > len) {
+ // TODO(szabadka) Limit our memory usage by using the skip_input_data
+ // source manager callback on APP markers that are not saved.
+ return kNeedMoreInput;
+ }
+ if (marker >= 0xe0 && m->markers_to_save_[marker - 0xe0]) {
+ SaveMarker(cinfo, marker_data, marker_len);
+ }
+ }
+ if (marker == 0xc0 || marker == 0xc1 || marker == 0xc2) {
+ ProcessSOF(cinfo, marker_data, marker_len);
+ } else if (marker == 0xc4) {
+ ProcessDHT(cinfo, marker_data, marker_len);
+ } else if (marker == 0xda) {
+ ProcessSOS(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdb) {
+ ProcessDQT(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdc) {
+ ProcessDNL(cinfo, marker_data, marker_len);
+ } else if (marker == 0xdd) {
+ ProcessDRI(cinfo, marker_data, marker_len);
+ } else if (marker >= 0xe0 && marker <= 0xef) {
+ ProcessAPP(cinfo, marker_data, marker_len);
+ } else if (marker == 0xfe) {
+ ProcessCOM(cinfo, marker_data, marker_len);
+ } else if (marker == 0xd8) {
+ ProcessSOI(cinfo, marker_data, marker_len);
+ } else if (marker == 0xd9) {
+ ProcessEOI(cinfo, marker_data, marker_len);
+ } else {
+ JPEGLI_ERROR("Unexpected marker 0x%x", marker);
+ }
+ *pos += marker_len;
+ cinfo->unread_marker = 0;
+ if (marker == 0xda) {
+ return JPEG_REACHED_SOS;
+ } else if (marker == 0xd9) {
+ return JPEG_REACHED_EOI;
+ }
+ return kProcessNextMarker;
+}
+
+} // namespace
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ uint8_t marker = cinfo->unread_marker;
+ jpeg_marker_parser_method callback = nullptr;
+ if (marker >= 0xe0 && marker <= 0xef) {
+ callback = m->app_marker_parsers[marker - 0xe0];
+ } else if (marker == 0xfe) {
+ callback = m->com_marker_parser;
+ }
+ return callback;
+}
+
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos) {
+ for (;;) {
+ int status = ProcessNextMarker(cinfo, data, len, pos);
+ if (status != kProcessNextMarker) {
+ return status;
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_marker.h b/third_party/jpeg-xl/lib/jpegli/decode_marker.h
new file mode 100644
index 0000000000..d52c335341
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_marker.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_MARKER_H_
+#define LIB_JPEGLI_DECODE_MARKER_H_
+
+/* clang-format off */
+#include <stdint.h>
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until either
+// the end of the next SOS marker or the end of the input.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start or at the end of a marker
+// data segment (inter-marker data is allowed).
+// Return value is one of:
+// * JPEG_SUSPENDED, if the current input buffer ends before the next SOS or
+// EOI marker. Input buffer refill is handled by the caller;
+// * JPEG_REACHED_SOS, if the the next SOS marker is found;
+// * JPEG_REACHED_EOR, if the end of the input is found.
+int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos);
+
+jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DECODE_MARKER_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_scan.cc b/third_party/jpeg-xl/lib/jpegli/decode_scan.cc
new file mode 100644
index 0000000000..29c0172950
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_scan.cc
@@ -0,0 +1,566 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode_scan.h"
+
+#include <string.h>
+
+#include <hwy/base.h>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+// Max 14 block per MCU (when 1 channel is subsampled)
+// Max 64 nonzero coefficients per block
+// Max 16 symbol bits plus 11 extra bits per nonzero symbol
+// Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff)
+constexpr int kMaxMCUByteSize = 6048;
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+ BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+ : data_(data), len_(len), start_pos_(pos) {
+ Reset(pos);
+ }
+
+ void Reset(size_t pos) {
+ pos_ = pos;
+ val_ = 0;
+ bits_left_ = 0;
+ next_marker_pos_ = len_;
+ FillBitWindow();
+ }
+
+ // Returns the next byte and skips the 0xff/0x00 escape sequences.
+ uint8_t GetNextByte() {
+ if (pos_ >= next_marker_pos_) {
+ ++pos_;
+ return 0;
+ }
+ uint8_t c = data_[pos_++];
+ if (c == 0xff) {
+ uint8_t escape = pos_ < len_ ? data_[pos_] : 0;
+ if (escape == 0) {
+ ++pos_;
+ } else {
+ // 0xff was followed by a non-zero byte, which means that we found the
+ // start of the next marker segment.
+ next_marker_pos_ = pos_ - 1;
+ }
+ }
+ return c;
+ }
+
+ void FillBitWindow() {
+ if (bits_left_ <= 16) {
+ while (bits_left_ <= 56) {
+ val_ <<= 8;
+ val_ |= (uint64_t)GetNextByte();
+ bits_left_ += 8;
+ }
+ }
+ }
+
+ int ReadBits(int nbits) {
+ FillBitWindow();
+ uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+ bits_left_ -= nbits;
+ return val;
+ }
+
+ // Sets *pos to the next stream position, and *bit_pos to the bit position
+ // within the next byte where parsing should continue.
+ // Returns false if the stream ended too early.
+ bool FinishStream(size_t* pos, size_t* bit_pos) {
+ *bit_pos = (8 - (bits_left_ & 7)) & 7;
+ // Give back some bytes that we did not use.
+ int unused_bytes_left = DivCeil(bits_left_, 8);
+ while (unused_bytes_left-- > 0) {
+ --pos_;
+ // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+ // sequence, and if yes, we need to give back one more byte.
+ if (((pos_ == len_ && pos_ == next_marker_pos_) ||
+ (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) &&
+ (data_[pos_ - 1] == 0xff)) {
+ --pos_;
+ }
+ }
+ if (pos_ >= next_marker_pos_) {
+ *pos = next_marker_pos_;
+ if (pos_ > next_marker_pos_ || *bit_pos > 0) {
+ // Data ran out before the scan was complete.
+ return false;
+ }
+ }
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* data_;
+ const size_t len_;
+ size_t pos_;
+ uint64_t val_;
+ int bits_left_;
+ size_t next_marker_pos_;
+ size_t start_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+ int nbits;
+ br->FillBitWindow();
+ int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+ table += val;
+ nbits = table->bits - 8;
+ if (nbits > 0) {
+ br->bits_left_ -= 8;
+ table += table->value;
+ val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+ table += val;
+ }
+ br->bits_left_ -= table->bits;
+ return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ * SSSS | DIFF values
+ * ------+--------------------------
+ * 0 | 0
+ * 1 | –1, 1
+ * 2 | –3, –2, 2, 3
+ * 3 | –7..–4, 4..7
+ * ......|..........................
+ * 11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+ JXL_DASSERT(s >= 1);
+ int half = 1 << (s - 1);
+ if (x >= half) {
+ JXL_DASSERT(x < (1 << s));
+ return x;
+ } else {
+ return x - (1 << s) + 1;
+ }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+ const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff,
+ coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = ReadSymbol(dc_huff, br);
+ if (s >= kJpegDCAlphabetSize) {
+ return false;
+ }
+ int diff = 0;
+ if (s > 0) {
+ int bits = br->ReadBits(s);
+ diff = HuffExtend(bits, s);
+ }
+ int coeff = diff + *last_dc_coeff;
+ const int dc_coeff = coeff * Am;
+ coeffs[0] = dc_coeff;
+ // TODO(eustas): is there a more elegant / explicit way to check this?
+ if (dc_coeff != coeffs[0]) {
+ return false;
+ }
+ *last_dc_coeff = coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ if (*eobrun > 0) {
+ --(*eobrun);
+ return true;
+ }
+ for (int k = Ss; k <= Se; k++) {
+ int sr = ReadSymbol(ac_huff, br);
+ if (sr >= kJpegHuffmanAlphabetSize) {
+ return false;
+ }
+ int r = sr >> 4;
+ int s = sr & 15;
+ if (s > 0) {
+ k += r;
+ if (k > Se) {
+ return false;
+ }
+ if (s + Al >= kJpegDCAlphabetSize) {
+ return false;
+ }
+ int bits = br->ReadBits(s);
+ int coeff = HuffExtend(bits, s);
+ coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+ } else if (r == 15) {
+ k += 15;
+ } else {
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return false;
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, BitReaderState* br, coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = br->ReadBits(1);
+ coeff_t dc_coeff = coeffs[0];
+ dc_coeff |= s * Am;
+ coeffs[0] = dc_coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int p1 = Am;
+ int m1 = -Am;
+ int k = Ss;
+ int r;
+ int s;
+ bool in_zero_run = false;
+ if (*eobrun <= 0) {
+ for (; k <= Se; k++) {
+ s = ReadSymbol(ac_huff, br);
+ if (s >= kJpegHuffmanAlphabetSize) {
+ return false;
+ }
+ r = s >> 4;
+ s &= 15;
+ if (s) {
+ if (s != 1) {
+ return false;
+ }
+ s = br->ReadBits(1) ? p1 : m1;
+ in_zero_run = false;
+ } else {
+ if (r != 15) {
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return false;
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ in_zero_run = true;
+ }
+ do {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ } else {
+ if (--r < 0) {
+ break;
+ }
+ }
+ k++;
+ } while (k <= Se);
+ if (s) {
+ if (k > Se) {
+ return false;
+ }
+ coeffs[kJPEGNaturalOrder[k]] = s;
+ }
+ }
+ }
+ if (in_zero_run) {
+ return false;
+ }
+ if (*eobrun > 0) {
+ for (; k <= Se; k++) {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ }
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+void SaveMCUCodingState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_));
+ m->mcu_.eobrun = m->eobrun_;
+ size_t offset = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ size_t biy = block_y % comp->v_samp_factor;
+ if (block_y >= comp->height_in_blocks) {
+ continue;
+ }
+ size_t nblocks =
+ std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+ size_t ncoeffs = nblocks * DCTSIZE2;
+ coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+ memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0]));
+ offset += ncoeffs;
+ }
+ }
+}
+
+void RestoreMCUCodingState(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_));
+ m->eobrun_ = m->mcu_.eobrun;
+ size_t offset = 0;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width;
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ size_t biy = block_y % comp->v_samp_factor;
+ if (block_y >= comp->height_in_blocks) {
+ continue;
+ }
+ size_t nblocks =
+ std::min<size_t>(comp->MCU_width, comp->width_in_blocks - block_x);
+ size_t ncoeffs = nblocks * DCTSIZE2;
+ coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0];
+ memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0]));
+ offset += ncoeffs;
+ }
+ }
+}
+
+bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len,
+ size_t* pos, size_t* bit_pos) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (m->eobrun_ > 0) {
+ JPEGLI_ERROR("End-of-block run too long.");
+ }
+ m->eobrun_ = -1;
+ memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_));
+ if (*bit_pos == 0) {
+ return true;
+ }
+ if (data[*pos] == 0xff) {
+ // After last br.FinishStream we checked that there is at least 2 bytes
+ // in the buffer.
+ JXL_DASSERT(*pos + 1 < len);
+ // br.FinishStream would have detected an early marker.
+ JXL_DASSERT(data[*pos + 1] == 0);
+ *pos += 2;
+ } else {
+ *pos += 1;
+ }
+ *bit_pos = 0;
+ return true;
+}
+
+} // namespace
+
+void PrepareForiMCURow(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ int by0 = cinfo->input_iMCU_row * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ int offset = m->streaming_mode_ ? 0 : by0;
+ m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+ max_block_rows, true);
+ }
+}
+
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos, size_t* bit_pos) {
+ if (len == 0) {
+ return kNeedMoreInput;
+ }
+ jpeg_decomp_master* m = cinfo->master;
+ for (;;) {
+ // Handle the restart intervals.
+ if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) {
+ if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+ return kNeedMoreInput;
+ }
+ // Go to the next marker, warn if we had to skip any data.
+ size_t num_skipped = 0;
+ while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 ||
+ data[*pos + 1] == 0xff)) {
+ ++(*pos);
+ ++num_skipped;
+ }
+ if (num_skipped > 0) {
+ JPEGLI_WARN("Skipped %d bytes before restart marker", (int)num_skipped);
+ }
+ if (*pos + 2 > len) {
+ return kNeedMoreInput;
+ }
+ cinfo->unread_marker = data[*pos + 1];
+ *pos += 2;
+ return kHandleRestart;
+ }
+
+ size_t start_pos = *pos;
+ BitReaderState br(data, len, start_pos);
+ if (*bit_pos > 0) {
+ br.ReadBits(*bit_pos);
+ }
+ if (start_pos + kMaxMCUByteSize > len) {
+ SaveMCUCodingState(cinfo);
+ }
+
+ // Decode one MCU.
+ HWY_ALIGN_MAX coeff_t dummy_block[DCTSIZE2];
+ bool scan_ok = true;
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ const jpeg_component_info* comp = cinfo->cur_comp_info[i];
+ int c = comp->component_index;
+ const HuffmanTableEntry* dc_lut =
+ &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize];
+ const HuffmanTableEntry* ac_lut =
+ &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize];
+ for (int iy = 0; iy < comp->MCU_height; ++iy) {
+ size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy;
+ int biy = block_y % comp->v_samp_factor;
+ for (int ix = 0; ix < comp->MCU_width; ++ix) {
+ size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix;
+ coeff_t* coeffs;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ // Note that it is OK that dummy_block is uninitialized because
+ // it will never be used in any branches, even in the RefineDCTBlock
+ // case, because only DC scans can be interleaved and we don't use
+ // the zero-ness of the DC coeff in the DC refinement code-path.
+ coeffs = dummy_block;
+ } else {
+ coeffs = &m->coeff_rows[c][biy][block_x][0];
+ }
+ if (cinfo->Ah == 0) {
+ if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+ &m->eobrun_, &br,
+ &m->last_dc_coeff_[comp->component_index],
+ coeffs)) {
+ scan_ok = false;
+ }
+ } else {
+ if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al,
+ &m->eobrun_, &br, coeffs)) {
+ scan_ok = false;
+ }
+ }
+ }
+ }
+ }
+ size_t new_pos;
+ size_t new_bit_pos;
+ bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos);
+ if (new_pos + 2 > len) {
+ // If reading stopped within the last two bytes, we have to request more
+ // input even if FinishStream() returned true, since the Huffman code
+ // reader could have peaked ahead some bits past the current input chunk
+ // and thus the last prefix code length could have been wrong. We can do
+ // this because a valid JPEG bit stream has two extra bytes at the end.
+ RestoreMCUCodingState(cinfo);
+ return kNeedMoreInput;
+ }
+ *pos = new_pos;
+ *bit_pos = new_bit_pos;
+ if (!stream_ok) {
+ // We hit a marker during parsing.
+ JXL_DASSERT(data[*pos] == 0xff);
+ JXL_DASSERT(data[*pos + 1] != 0);
+ RestoreMCUCodingState(cinfo);
+ JPEGLI_WARN("Incomplete scan detected.");
+ return JPEG_SCAN_COMPLETED;
+ }
+ if (!scan_ok) {
+ JPEGLI_ERROR("Failed to decode DCT block");
+ }
+ if (m->restarts_to_go_ > 0) {
+ --m->restarts_to_go_;
+ }
+ ++m->scan_mcu_col_;
+ if (m->scan_mcu_col_ == cinfo->MCUs_per_row) {
+ ++m->scan_mcu_row_;
+ m->scan_mcu_col_ = 0;
+ if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) {
+ if (!FinishScan(cinfo, data, len, pos, bit_pos)) {
+ return kNeedMoreInput;
+ }
+ break;
+ } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) {
+ // Current iMCU row is done.
+ break;
+ }
+ }
+ }
+ ++cinfo->input_iMCU_row;
+ if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) {
+ PrepareForiMCURow(cinfo);
+ return JPEG_ROW_COMPLETED;
+ }
+ return JPEG_SCAN_COMPLETED;
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/decode_scan.h b/third_party/jpeg-xl/lib/jpegli/decode_scan.h
new file mode 100644
index 0000000000..61d05c67d6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/decode_scan.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DECODE_SCAN_H_
+#define LIB_JPEGLI_DECODE_SCAN_H_
+
+/* clang-format off */
+#include <stdint.h>
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+// Reads the available input in the source manager's input buffer until the end
+// of the next iMCU row.
+// The corresponding fields of cinfo are updated with the processed input data.
+// Upon return, the input buffer will be at the start of an MCU, or at the end
+// of the scan.
+// Return value is one of:
+// * JPEG_SUSPENDED, if the input buffer ends before the end of an iMCU row;
+// * JPEG_ROW_COMPLETED, if the next iMCU row (but not the scan) is reached;
+// * JPEG_SCAN_COMPLETED, if the end of the scan is reached.
+int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data,
+ const size_t len, size_t* pos, size_t* bit_pos);
+
+void PrepareForiMCURow(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DECODE_SCAN_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/destination_manager.cc b/third_party/jpeg-xl/lib/jpegli/destination_manager.cc
new file mode 100644
index 0000000000..9bc269f0c9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/destination_manager.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+constexpr size_t kDestBufferSize = 64 << 10;
+
+struct StdioDestinationManager {
+ jpeg_destination_mgr pub;
+ FILE* f;
+ uint8_t* buffer;
+
+ static void init_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = kDestBufferSize;
+ }
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ if (fwrite(dest->buffer, 1, kDestBufferSize, dest->f) != kDestBufferSize) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = kDestBufferSize;
+ return TRUE;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<StdioDestinationManager*>(cinfo->dest);
+ size_t bytes_left = kDestBufferSize - dest->pub.free_in_buffer;
+ if (bytes_left &&
+ fwrite(dest->buffer, 1, bytes_left, dest->f) != bytes_left) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ fflush(dest->f);
+ if (ferror(dest->f)) {
+ JPEGLI_ERROR("Failed to write to output stream.");
+ }
+ }
+};
+
+struct MemoryDestinationManager {
+ jpeg_destination_mgr pub;
+ // Output buffer supplied by the application
+ uint8_t** output;
+ unsigned long* output_size;
+ // Output buffer allocated by us.
+ uint8_t* temp_buffer;
+ // Current output buffer (either application supplied or allocated by us).
+ uint8_t* current_buffer;
+ size_t buffer_size;
+
+ static void init_destination(j_compress_ptr cinfo) {}
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+ uint8_t* next_buffer =
+ reinterpret_cast<uint8_t*>(malloc(dest->buffer_size * 2));
+ memcpy(next_buffer, dest->current_buffer, dest->buffer_size);
+ if (dest->temp_buffer != nullptr) {
+ free(dest->temp_buffer);
+ }
+ dest->temp_buffer = next_buffer;
+ dest->current_buffer = next_buffer;
+ *dest->output = next_buffer;
+ *dest->output_size = dest->buffer_size;
+ dest->pub.next_output_byte = next_buffer + dest->buffer_size;
+ dest->pub.free_in_buffer = dest->buffer_size;
+ dest->buffer_size *= 2;
+ return TRUE;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ auto dest = reinterpret_cast<MemoryDestinationManager*>(cinfo->dest);
+ *dest->output_size = dest->buffer_size - dest->pub.free_in_buffer;
+ }
+};
+
+} // namespace jpegli
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile) {
+ if (outfile == nullptr) {
+ JPEGLI_ERROR("jpegli_stdio_dest: Invalid destination.");
+ }
+ if (cinfo->dest && cinfo->dest->init_destination !=
+ jpegli::StdioDestinationManager::init_destination) {
+ JPEGLI_ERROR("jpegli_stdio_dest: a different dest manager was already set");
+ }
+ if (!cinfo->dest) {
+ cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(
+ jpegli::Allocate<jpegli::StdioDestinationManager>(cinfo, 1));
+ }
+ auto dest = reinterpret_cast<jpegli::StdioDestinationManager*>(cinfo->dest);
+ dest->f = outfile;
+ dest->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kDestBufferSize);
+ dest->pub.next_output_byte = dest->buffer;
+ dest->pub.free_in_buffer = jpegli::kDestBufferSize;
+ dest->pub.init_destination =
+ jpegli::StdioDestinationManager::init_destination;
+ dest->pub.empty_output_buffer =
+ jpegli::StdioDestinationManager::empty_output_buffer;
+ dest->pub.term_destination =
+ jpegli::StdioDestinationManager::term_destination;
+}
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+ unsigned long* outsize) {
+ if (outbuffer == nullptr || outsize == nullptr) {
+ JPEGLI_ERROR("jpegli_mem_dest: Invalid destination.");
+ }
+ if (cinfo->dest && cinfo->dest->init_destination !=
+ jpegli::MemoryDestinationManager::init_destination) {
+ JPEGLI_ERROR("jpegli_mem_dest: a different dest manager was already set");
+ }
+ if (!cinfo->dest) {
+ auto dest = jpegli::Allocate<jpegli::MemoryDestinationManager>(cinfo, 1);
+ dest->temp_buffer = nullptr;
+ cinfo->dest = reinterpret_cast<jpeg_destination_mgr*>(dest);
+ }
+ auto dest = reinterpret_cast<jpegli::MemoryDestinationManager*>(cinfo->dest);
+ dest->pub.init_destination =
+ jpegli::MemoryDestinationManager::init_destination;
+ dest->pub.empty_output_buffer =
+ jpegli::MemoryDestinationManager::empty_output_buffer;
+ dest->pub.term_destination =
+ jpegli::MemoryDestinationManager::term_destination;
+ dest->output = outbuffer;
+ dest->output_size = outsize;
+ if (*outbuffer == nullptr || *outsize == 0) {
+ dest->temp_buffer =
+ reinterpret_cast<uint8_t*>(malloc(jpegli::kDestBufferSize));
+ *outbuffer = dest->temp_buffer;
+ *outsize = jpegli::kDestBufferSize;
+ }
+ dest->current_buffer = *outbuffer;
+ dest->buffer_size = *outsize;
+ dest->pub.next_output_byte = dest->current_buffer;
+ dest->pub.free_in_buffer = dest->buffer_size;
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/downsample.cc b/third_party/jpeg-xl/lib/jpegli/downsample.cc
new file mode 100644
index 0000000000..df2c156972
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/downsample.cc
@@ -0,0 +1,356 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/downsample.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/downsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, 8);
+constexpr D d;
+
+void DownsampleRow2x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 2;
+ const auto mul = Set(d, 0.5f);
+ Vec<D> v0, v1;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved2(d, row_in + 2 * x, v0, v1);
+ Store(Mul(mul, Add(v0, v1)), d, row_out + x);
+ }
+}
+
+void DownsampleRow3x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 3;
+ const auto mul = Set(d, 1.0f / 3);
+ Vec<D> v0, v1, v2;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved3(d, row_in + 3 * x, v0, v1, v2);
+ Store(Mul(mul, Add(Add(v0, v1), v2)), d, row_out + x);
+ }
+}
+
+void DownsampleRow4x1(const float* row_in, size_t len, float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 4;
+ const auto mul = Set(d, 0.25f);
+ Vec<D> v0, v1, v2, v3;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved4(d, row_in + 4 * x, v0, v1, v2, v3);
+ Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+ }
+}
+
+void Downsample2x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, row_out);
+}
+
+void Downsample3x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, row_out);
+}
+
+void Downsample4x1(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, row_out);
+}
+
+void Downsample1x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 0.5f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ for (size_t x = 0; x < len; x += N) {
+ Store(Mul(mul, Add(Load(d, row0 + x), Load(d, row1 + x))), d, row_out + x);
+ }
+}
+
+void Downsample2x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const size_t len_out = len / 2;
+ const auto mul = Set(d, 0.25f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ Vec<D> v0, v1, v2, v3;
+ for (size_t x = 0; x < len_out; x += N) {
+ LoadInterleaved2(d, row0 + 2 * x, v0, v1);
+ LoadInterleaved2(d, row1 + 2 * x, v2, v3);
+ Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x);
+ }
+}
+
+void Downsample3x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ Downsample1x2(rows_in, len / 3, row_out);
+}
+
+void Downsample4x2(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ Downsample1x2(rows_in, len / 4, row_out);
+}
+
+void Downsample1x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 1.0f / 3);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ float* row2 = rows_in[2];
+ for (size_t x = 0; x < len; x += N) {
+ const auto in0 = Load(d, row0 + x);
+ const auto in1 = Load(d, row1 + x);
+ const auto in2 = Load(d, row2 + x);
+ Store(Mul(mul, Add(Add(in0, in1), in2)), d, row_out + x);
+ }
+}
+
+void Downsample2x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 2, row_out);
+}
+
+void Downsample3x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 3, row_out);
+}
+
+void Downsample4x3(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+ Downsample1x3(rows_in, len / 4, row_out);
+}
+
+void Downsample1x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ const size_t N = Lanes(d);
+ const auto mul = Set(d, 0.25f);
+ float* row0 = rows_in[0];
+ float* row1 = rows_in[1];
+ float* row2 = rows_in[2];
+ float* row3 = rows_in[3];
+ for (size_t x = 0; x < len; x += N) {
+ const auto in0 = Load(d, row0 + x);
+ const auto in1 = Load(d, row1 + x);
+ const auto in2 = Load(d, row2 + x);
+ const auto in3 = Load(d, row3 + x);
+ Store(Mul(mul, Add(Add(in0, in1), Add(in2, in3))), d, row_out + x);
+ }
+}
+
+void Downsample2x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow2x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow2x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow2x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow2x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 2, row_out);
+}
+
+void Downsample3x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow3x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow3x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow3x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow3x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 3, row_out);
+}
+
+void Downsample4x4(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {
+ DownsampleRow4x1(rows_in[0], len, rows_in[0]);
+ DownsampleRow4x1(rows_in[1], len, rows_in[1]);
+ DownsampleRow4x1(rows_in[2], len, rows_in[2]);
+ DownsampleRow4x1(rows_in[3], len, rows_in[3]);
+ Downsample1x4(rows_in, len / 4, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Downsample1x2);
+HWY_EXPORT(Downsample1x3);
+HWY_EXPORT(Downsample1x4);
+HWY_EXPORT(Downsample2x1);
+HWY_EXPORT(Downsample2x2);
+HWY_EXPORT(Downsample2x3);
+HWY_EXPORT(Downsample2x4);
+HWY_EXPORT(Downsample3x1);
+HWY_EXPORT(Downsample3x2);
+HWY_EXPORT(Downsample3x3);
+HWY_EXPORT(Downsample3x4);
+HWY_EXPORT(Downsample4x1);
+HWY_EXPORT(Downsample4x2);
+HWY_EXPORT(Downsample4x3);
+HWY_EXPORT(Downsample4x4);
+
+void NullDownsample(float* rows_in[MAX_SAMP_FACTOR], size_t len,
+ float* row_out) {}
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ m->downsample_method[c] = nullptr;
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ if (v_factor == 1) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = NullDownsample;
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x1);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x1);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x1);
+ }
+ } else if (v_factor == 2) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+ }
+ } else if (v_factor == 3) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2);
+ }
+ } else if (v_factor == 4) {
+ if (h_factor == 1) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x4);
+ } else if (h_factor == 2) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x4);
+ } else if (h_factor == 3) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x4);
+ } else if (h_factor == 4) {
+ m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x4);
+ }
+ }
+ if (m->downsample_method[c] == nullptr) {
+ JPEGLI_ERROR("Unsupported downsampling ratio %dx%d", h_factor, v_factor);
+ }
+ }
+}
+
+void DownsampleInputBuffer(j_compress_ptr cinfo) {
+ if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) {
+ return;
+ }
+ jpeg_comp_master* m = cinfo->master;
+ const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ const size_t y0 = m->next_iMCU_row * iMCU_height;
+ const size_t y1 = y0 + iMCU_height;
+ const size_t xsize_padded = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ if (h_factor == 1 && v_factor == 1) {
+ continue;
+ }
+ auto& input = *m->smooth_input[c];
+ auto& output = *m->raw_data[c];
+ const size_t yout0 = y0 / v_factor;
+ float* rows_in[MAX_SAMP_FACTOR];
+ for (size_t yin = y0, yout = yout0; yin < y1; yin += v_factor, ++yout) {
+ for (int iy = 0; iy < v_factor; ++iy) {
+ rows_in[iy] = input.Row(yin + iy);
+ }
+ float* row_out = output.Row(yout);
+ (*m->downsample_method[c])(rows_in, xsize_padded, row_out);
+ }
+ }
+}
+
+void ApplyInputSmoothing(j_compress_ptr cinfo) {
+ if (!cinfo->smoothing_factor) {
+ return;
+ }
+ jpeg_comp_master* m = cinfo->master;
+ const float kW1 = cinfo->smoothing_factor / 1024.0;
+ const float kW0 = 1.0f - 8.0f * kW1;
+ const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ const ssize_t y0 = m->next_iMCU_row * iMCU_height;
+ const ssize_t y1 = y0 + iMCU_height;
+ const ssize_t xsize_padded = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; c++) {
+ auto& input = m->input_buffer[c];
+ auto& output = *m->smooth_input[c];
+ if (m->next_iMCU_row == 0) {
+ input.CopyRow(-1, 0, 1);
+ }
+ if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) {
+ size_t last_row = m->ysize_blocks * DCTSIZE - 1;
+ input.CopyRow(last_row + 1, last_row, 1);
+ }
+ // TODO(szabadka) SIMDify this.
+ for (ssize_t y = y0; y < y1; ++y) {
+ const float* row_t = input.Row(y - 1);
+ const float* row_m = input.Row(y);
+ const float* row_b = input.Row(y + 1);
+ float* row_out = output.Row(y);
+ for (ssize_t x = 0; x < xsize_padded; ++x) {
+ float val_tl = row_t[x - 1];
+ float val_tm = row_t[x];
+ float val_tr = row_t[x + 1];
+ float val_ml = row_m[x - 1];
+ float val_mm = row_m[x];
+ float val_mr = row_m[x + 1];
+ float val_bl = row_b[x - 1];
+ float val_bm = row_b[x];
+ float val_br = row_b[x + 1];
+ float val1 = (val_tl + val_tm + val_tr + val_ml + val_mr + val_bl +
+ val_bm + val_br);
+ row_out[x] = val_mm * kW0 + val1 * kW1;
+ }
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/downsample.h b/third_party/jpeg-xl/lib/jpegli/downsample.h
new file mode 100644
index 0000000000..9d87047758
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/downsample.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_DOWNSAMPLE_H_
+#define LIB_JPEGLI_DOWNSAMPLE_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void ChooseDownsampleMethods(j_compress_ptr cinfo);
+
+void DownsampleInputBuffer(j_compress_ptr cinfo);
+
+void ApplyInputSmoothing(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_DOWNSAMPLE_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/encode.cc b/third_party/jpeg-xl/lib/jpegli/encode.cc
new file mode 100644
index 0000000000..6015d7d9bb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/encode.cc
@@ -0,0 +1,1153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+
+#include <cmath>
+#include <initializer_list>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/bitstream.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/dct.h"
+#include "lib/jpegli/downsample.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/entropy_coding.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jpegli/input.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jpegli/quant.h"
+
+namespace jpegli {
+
+constexpr size_t kMaxBytesInMarker = 65533;
+
+void CheckState(j_compress_ptr cinfo, int state) {
+ if (cinfo->global_state != state) {
+ JPEGLI_ERROR("Unexpected global state %d [expected %d]",
+ cinfo->global_state, state);
+ }
+}
+
+void CheckState(j_compress_ptr cinfo, int state1, int state2) {
+ if (cinfo->global_state != state1 && cinfo->global_state != state2) {
+ JPEGLI_ERROR("Unexpected global state %d [expected %d or %d]",
+ cinfo->global_state, state1, state2);
+ }
+}
+
+// Initialize cinfo fields that are not dependent on input image. This is shared
+// between jpegli_CreateCompress() and jpegli_set_defaults()
+void InitializeCompressParams(j_compress_ptr cinfo) {
+ cinfo->data_precision = 8;
+ cinfo->num_scans = 0;
+ cinfo->scan_info = nullptr;
+ cinfo->raw_data_in = FALSE;
+ cinfo->arith_code = FALSE;
+ cinfo->optimize_coding = FALSE;
+ cinfo->CCIR601_sampling = FALSE;
+ cinfo->smoothing_factor = 0;
+ cinfo->dct_method = JDCT_FLOAT;
+ cinfo->restart_interval = 0;
+ cinfo->restart_in_rows = 0;
+ cinfo->write_JFIF_header = FALSE;
+ cinfo->JFIF_major_version = 1;
+ cinfo->JFIF_minor_version = 1;
+ cinfo->density_unit = 0;
+ cinfo->X_density = 1;
+ cinfo->Y_density = 1;
+#if JPEG_LIB_VERSION >= 70
+ cinfo->scale_num = 1;
+ cinfo->scale_denom = 1;
+ cinfo->do_fancy_downsampling = FALSE;
+ cinfo->min_DCT_h_scaled_size = DCTSIZE;
+ cinfo->min_DCT_v_scaled_size = DCTSIZE;
+#endif
+}
+
+float LinearQualityToDistance(int scale_factor) {
+ scale_factor = std::min(5000, std::max(0, scale_factor));
+ int quality =
+ scale_factor < 100 ? 100 - scale_factor / 2 : 5000 / scale_factor;
+ return jpegli_quality_to_distance(quality);
+}
+
+template <typename T>
+void SetSentTableFlag(T** table_ptrs, size_t num, boolean val) {
+ for (size_t i = 0; i < num; ++i) {
+ if (table_ptrs[i]) table_ptrs[i]->sent_table = val;
+ }
+}
+
+struct ProgressiveScan {
+ int Ss, Se, Ah, Al;
+ bool interleaved;
+};
+
+void SetDefaultScanScript(j_compress_ptr cinfo) {
+ int level = cinfo->master->progressive_level;
+ std::vector<ProgressiveScan> progressive_mode;
+ bool interleave_dc =
+ (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1);
+ if (level == 0) {
+ progressive_mode.push_back({0, 63, 0, 0, true});
+ } else if (level == 1) {
+ progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+ progressive_mode.push_back({1, 63, 0, 1, false});
+ progressive_mode.push_back({1, 63, 1, 0, false});
+ } else {
+ progressive_mode.push_back({0, 0, 0, 0, interleave_dc});
+ progressive_mode.push_back({1, 2, 0, 0, false});
+ progressive_mode.push_back({3, 63, 0, 2, false});
+ progressive_mode.push_back({3, 63, 2, 1, false});
+ progressive_mode.push_back({3, 63, 1, 0, false});
+ }
+
+ cinfo->script_space_size = 0;
+ for (const auto& scan : progressive_mode) {
+ int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+ cinfo->script_space_size += DivCeil(cinfo->num_components, comps);
+ }
+ cinfo->script_space =
+ Allocate<jpeg_scan_info>(cinfo, cinfo->script_space_size);
+
+ jpeg_scan_info* next_scan = cinfo->script_space;
+ for (const auto& scan : progressive_mode) {
+ int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1;
+ for (int c = 0; c < cinfo->num_components; c += comps) {
+ next_scan->Ss = scan.Ss;
+ next_scan->Se = scan.Se;
+ next_scan->Ah = scan.Ah;
+ next_scan->Al = scan.Al;
+ next_scan->comps_in_scan = std::min(comps, cinfo->num_components - c);
+ for (int j = 0; j < next_scan->comps_in_scan; ++j) {
+ next_scan->component_index[j] = c + j;
+ }
+ ++next_scan;
+ }
+ }
+ JXL_ASSERT(next_scan - cinfo->script_space == cinfo->script_space_size);
+ cinfo->scan_info = cinfo->script_space;
+ cinfo->num_scans = cinfo->script_space_size;
+}
+
+void ValidateScanScript(j_compress_ptr cinfo) {
+ // Mask of coefficient bits defined by the scan script, for each component
+ // and coefficient index.
+ uint16_t comp_mask[kMaxComponents][DCTSIZE2] = {};
+ static constexpr int kMaxRefinementBit = 10;
+
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info& si = cinfo->scan_info[i];
+ if (si.comps_in_scan < 1 || si.comps_in_scan > MAX_COMPS_IN_SCAN) {
+ JPEGLI_ERROR("Invalid number of components in scan %d", si.comps_in_scan);
+ }
+ int last_ci = -1;
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ if (ci < 0 || ci >= cinfo->num_components) {
+ JPEGLI_ERROR("Invalid component index %d in scan", ci);
+ } else if (ci == last_ci) {
+ JPEGLI_ERROR("Duplicate component index %d in scan", ci);
+ } else if (ci < last_ci) {
+ JPEGLI_ERROR("Out of order component index %d in scan", ci);
+ }
+ last_ci = ci;
+ }
+ if (si.Ss < 0 || si.Se < si.Ss || si.Se >= DCTSIZE2) {
+ JPEGLI_ERROR("Invalid spectral range %d .. %d in scan", si.Ss, si.Se);
+ }
+ if (si.Ah < 0 || si.Al < 0 || si.Al > kMaxRefinementBit) {
+ JPEGLI_ERROR("Invalid refinement bits %d/%d", si.Ah, si.Al);
+ }
+ if (!cinfo->progressive_mode) {
+ if (si.Ss != 0 || si.Se != DCTSIZE2 - 1 || si.Ah != 0 || si.Al != 0) {
+ JPEGLI_ERROR("Invalid scan for sequential mode");
+ }
+ } else {
+ if (si.Ss == 0 && si.Se != 0) {
+ JPEGLI_ERROR("DC and AC together in progressive scan");
+ }
+ }
+ if (si.Ss != 0 && si.comps_in_scan != 1) {
+ JPEGLI_ERROR("Interleaved AC only scan.");
+ }
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ if (si.Ss != 0 && comp_mask[ci][0] == 0) {
+ JPEGLI_ERROR("AC before DC in component %d of scan", ci);
+ }
+ for (int k = si.Ss; k <= si.Se; ++k) {
+ if (comp_mask[ci][k] == 0) {
+ if (si.Ah != 0) {
+ JPEGLI_ERROR("Invalid first scan refinement bit");
+ }
+ comp_mask[ci][k] = ((0xffff << si.Al) & 0xffff);
+ } else {
+ if (comp_mask[ci][k] != ((0xffff << si.Ah) & 0xffff) ||
+ si.Al != si.Ah - 1) {
+ JPEGLI_ERROR("Invalid refinement bit progression.");
+ }
+ comp_mask[ci][k] |= 1 << si.Al;
+ }
+ }
+ }
+ if (si.comps_in_scan > 1) {
+ size_t mcu_size = 0;
+ for (int j = 0; j < si.comps_in_scan; ++j) {
+ int ci = si.component_index[j];
+ jpeg_component_info* comp = &cinfo->comp_info[ci];
+ mcu_size += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ if (mcu_size > C_MAX_BLOCKS_IN_MCU) {
+ JPEGLI_ERROR("MCU size too big");
+ }
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ if (comp_mask[c][k] != 0xffff) {
+ JPEGLI_ERROR("Incomplete scan of component %d and frequency %d", c, k);
+ }
+ }
+ }
+}
+
+void ProcessCompressionParams(j_compress_ptr cinfo) {
+ if (cinfo->dest == nullptr) {
+ JPEGLI_ERROR("Missing destination.");
+ }
+ if (cinfo->image_width < 1 || cinfo->image_height < 1 ||
+ cinfo->input_components < 1) {
+ JPEGLI_ERROR("Empty input image.");
+ }
+ if (cinfo->image_width > static_cast<int>(JPEG_MAX_DIMENSION) ||
+ cinfo->image_height > static_cast<int>(JPEG_MAX_DIMENSION) ||
+ cinfo->input_components > static_cast<int>(kMaxComponents)) {
+ JPEGLI_ERROR("Input image too big.");
+ }
+ if (cinfo->num_components < 1 ||
+ cinfo->num_components > static_cast<int>(kMaxComponents)) {
+ JPEGLI_ERROR("Invalid number of components.");
+ }
+ if (cinfo->data_precision != kJpegPrecision) {
+ JPEGLI_ERROR("Invalid data precision");
+ }
+ if (cinfo->arith_code) {
+ JPEGLI_ERROR("Arithmetic coding is not implemented.");
+ }
+ if (cinfo->CCIR601_sampling) {
+ JPEGLI_ERROR("CCIR601 sampling is not implemented.");
+ }
+ if (cinfo->restart_interval > 65535u) {
+ JPEGLI_ERROR("Restart interval too big");
+ }
+ if (cinfo->smoothing_factor < 0 || cinfo->smoothing_factor > 100) {
+ JPEGLI_ERROR("Invalid smoothing factor %d", cinfo->smoothing_factor);
+ }
+ jpeg_comp_master* m = cinfo->master;
+ cinfo->max_h_samp_factor = cinfo->max_v_samp_factor = 1;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (comp->component_index != c) {
+ JPEGLI_ERROR("Invalid component index");
+ }
+ for (int j = 0; j < c; ++j) {
+ if (cinfo->comp_info[j].component_id == comp->component_id) {
+ JPEGLI_ERROR("Duplicate component id %d", comp->component_id);
+ }
+ }
+ if (comp->h_samp_factor <= 0 || comp->v_samp_factor <= 0 ||
+ comp->h_samp_factor > MAX_SAMP_FACTOR ||
+ comp->v_samp_factor > MAX_SAMP_FACTOR) {
+ JPEGLI_ERROR("Invalid sampling factor %d x %d", comp->h_samp_factor,
+ comp->v_samp_factor);
+ }
+ cinfo->max_h_samp_factor =
+ std::max(comp->h_samp_factor, cinfo->max_h_samp_factor);
+ cinfo->max_v_samp_factor =
+ std::max(comp->v_samp_factor, cinfo->max_v_samp_factor);
+ }
+ if (cinfo->num_components == 1 &&
+ (cinfo->max_h_samp_factor != 1 || cinfo->max_v_samp_factor != 1)) {
+ JPEGLI_ERROR("Sampling is not supported for simgle component image.");
+ }
+ size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+ cinfo->total_iMCU_rows = DivCeil(cinfo->image_height, iMCU_height);
+ m->xsize_blocks = total_iMCU_cols * cinfo->max_h_samp_factor;
+ m->ysize_blocks = cinfo->total_iMCU_rows * cinfo->max_v_samp_factor;
+
+ size_t blocks_per_iMCU = 0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 ||
+ cinfo->max_v_samp_factor % comp->v_samp_factor != 0) {
+ JPEGLI_ERROR("Non-integral sampling ratios are not supported.");
+ }
+ m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor;
+ m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor;
+ comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[c]);
+ comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[c]);
+ comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE);
+ comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE);
+ blocks_per_iMCU += comp->h_samp_factor * comp->v_samp_factor;
+ }
+ m->blocks_per_iMCU_row = total_iMCU_cols * blocks_per_iMCU;
+ // Disable adaptive quantization for subsampled luma channel.
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ if (y_comp->h_samp_factor != cinfo->max_h_samp_factor ||
+ y_comp->v_samp_factor != cinfo->max_v_samp_factor) {
+ m->use_adaptive_quantization = false;
+ }
+ if (cinfo->scan_info == nullptr) {
+ SetDefaultScanScript(cinfo);
+ }
+ cinfo->progressive_mode =
+ cinfo->scan_info->Ss != 0 || cinfo->scan_info->Se != DCTSIZE2 - 1;
+ ValidateScanScript(cinfo);
+}
+
+void ResetForImage(j_compress_ptr cinfo) {
+ (*cinfo->err->reset_error_mgr)(reinterpret_cast<j_common_ptr>(cinfo));
+ (*cinfo->dest->init_destination)(cinfo);
+ jpeg_comp_master* m = cinfo->master;
+ m->next_iMCU_row = 0;
+ m->last_restart_interval = 0;
+ m->last_dht_index = 0;
+ m->num_huffman_codes = 0;
+ if (cinfo->num_scans > 0) {
+ m->scan_coding_info =
+ Allocate<ScanCodingInfo>(cinfo, cinfo->num_scans, JPOOL_IMAGE_ALIGNED);
+ }
+}
+
+bool IsStreamingSupported(j_compress_ptr cinfo) {
+ if (cinfo->global_state == kEncWriteCoeffs) {
+ return false;
+ }
+ // TODO(szabadka) Remove this restriction.
+ if (cinfo->restart_interval > 0 || cinfo->restart_in_rows > 0) {
+ return false;
+ }
+ if (cinfo->optimize_coding || cinfo->num_scans > 1) {
+ return false;
+ }
+ return true;
+}
+
+bool IsSinglePassOptimizerSupported(j_compress_ptr cinfo) {
+ return cinfo->num_scans == 1 && cinfo->optimize_coding &&
+ cinfo->restart_interval == 0 && cinfo->restart_in_rows == 0;
+}
+
+void AllocateBuffers(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width);
+ size_t xsize_full = total_iMCU_cols * iMCU_width;
+ size_t ysize_full = 3 * iMCU_height;
+ if (!cinfo->raw_data_in) {
+ int num_all_components =
+ std::max(cinfo->input_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ m->input_buffer[c].Allocate(cinfo, ysize_full, xsize_full);
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t xsize = total_iMCU_cols * comp->h_samp_factor * DCTSIZE;
+ size_t ysize = 3 * comp->v_samp_factor * DCTSIZE;
+ if (cinfo->raw_data_in) {
+ m->input_buffer[c].Allocate(cinfo, ysize, xsize);
+ }
+ m->smooth_input[c] = &m->input_buffer[c];
+ if (!cinfo->raw_data_in && cinfo->smoothing_factor) {
+ m->smooth_input[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+ m->smooth_input[c]->Allocate(cinfo, ysize_full, xsize_full);
+ }
+ m->raw_data[c] = m->smooth_input[c];
+ if (!cinfo->raw_data_in && (m->h_factor[c] > 1 || m->v_factor[c] > 1)) {
+ m->raw_data[c] = Allocate<RowBuffer<float>>(cinfo, 1, JPOOL_IMAGE);
+ m->raw_data[c]->Allocate(cinfo, ysize, xsize);
+ }
+ m->quant_mul[c] = Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ }
+ m->dct_buffer = Allocate<float>(cinfo, 2 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ m->block_tmp = Allocate<int32_t>(cinfo, DCTSIZE2 * 4, JPOOL_IMAGE_ALIGNED);
+ if (!IsStreamingSupported(cinfo)) {
+ m->coeff_buffers =
+ Allocate<jvirt_barray_ptr>(cinfo, cinfo->num_components, JPOOL_IMAGE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ const size_t xsize_blocks = comp->width_in_blocks;
+ const size_t ysize_blocks = comp->height_in_blocks;
+ m->coeff_buffers[c] = (*cinfo->mem->request_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+ /*pre_zero=*/false, xsize_blocks, ysize_blocks, comp->v_samp_factor);
+ }
+ }
+ if (m->use_adaptive_quantization) {
+ int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0;
+ jpeg_component_info* y_comp = &cinfo->comp_info[y_channel];
+ const size_t xsize_blocks = y_comp->width_in_blocks;
+ const size_t vecsize = VectorSize();
+ const size_t xsize_padded = DivCeil(2 * xsize_blocks, vecsize) * vecsize;
+ m->diff_buffer =
+ Allocate<float>(cinfo, xsize_blocks * DCTSIZE + 8, JPOOL_IMAGE_ALIGNED);
+ m->fuzzy_erosion_tmp.Allocate(cinfo, 2, xsize_padded);
+ m->pre_erosion.Allocate(cinfo, 6 * cinfo->max_v_samp_factor, xsize_padded);
+ m->quant_field.Allocate(cinfo, cinfo->max_v_samp_factor, xsize_blocks);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ m->zero_bias_offset[c] =
+ Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ m->zero_bias_mul[c] =
+ Allocate<float>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ }
+ }
+}
+
+void ReadInputRow(j_compress_ptr cinfo, const uint8_t* scanline,
+ float* row[kMaxComponents]) {
+ jpeg_comp_master* m = cinfo->master;
+ int num_all_components =
+ std::max(cinfo->input_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ row[c] = m->input_buffer[c].Row(m->next_input_row);
+ }
+ ++m->next_input_row;
+ if (scanline == nullptr) {
+ for (int c = 0; c < cinfo->input_components; ++c) {
+ memset(row[c], 0, cinfo->image_width * sizeof(row[c][0]));
+ }
+ return;
+ }
+ (*m->input_method)(scanline, cinfo->image_width, row);
+}
+
+void PadInputBuffer(j_compress_ptr cinfo, float* row[kMaxComponents]) {
+ jpeg_comp_master* m = cinfo->master;
+ const size_t len0 = cinfo->image_width;
+ const size_t len1 = m->xsize_blocks * DCTSIZE;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ // Pad row to a multiple of the iMCU width, plus create a border of 1
+ // repeated pixel for adaptive quant field calculation.
+ float last_val = row[c][len0 - 1];
+ for (size_t x = len0; x <= len1; ++x) {
+ row[c][x] = last_val;
+ }
+ row[c][-1] = row[c][0];
+ }
+ if (m->next_input_row == cinfo->image_height) {
+ size_t num_rows = m->ysize_blocks * DCTSIZE - cinfo->image_height;
+ for (size_t i = 0; i < num_rows; ++i) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ float* dest = m->input_buffer[c].Row(m->next_input_row) - 1;
+ memcpy(dest, row[c] - 1, (len1 + 2) * sizeof(dest[0]));
+ }
+ ++m->next_input_row;
+ }
+ }
+}
+
+void ProcessiMCURow(j_compress_ptr cinfo) {
+ JXL_ASSERT(cinfo->master->next_iMCU_row < cinfo->total_iMCU_rows);
+ if (!cinfo->raw_data_in) {
+ ApplyInputSmoothing(cinfo);
+ DownsampleInputBuffer(cinfo);
+ }
+ ComputeAdaptiveQuantField(cinfo);
+ if (IsStreamingSupported(cinfo)) {
+ WriteiMCURow(cinfo);
+ } else {
+ ComputeDCTCoefficients(cinfo);
+ }
+ ++cinfo->master->next_iMCU_row;
+}
+
+void ProcessiMCURows(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ // To have context rows both above and below the current iMCU row, we delay
+ // processing the first iMCU row and process two iMCU rows after we receive
+ // the last input row.
+ if (m->next_input_row % iMCU_height == 0 && m->next_input_row > iMCU_height) {
+ ProcessiMCURow(cinfo);
+ }
+ if (m->next_input_row >= cinfo->image_height) {
+ ProcessiMCURow(cinfo);
+ }
+}
+
+void InitProgressMonitor(j_compress_ptr cinfo) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ if (IsStreamingSupported(cinfo)) {
+ // We have only one input pass.
+ cinfo->progress->total_passes = 1;
+ } else if (IsSinglePassOptimizerSupported(cinfo)) {
+ // We have one input pass and an encode pass for each scan.
+ cinfo->progress->total_passes = 1 + cinfo->num_scans;
+ } else {
+ // We have one input pass, a histogram pass for each scan, and an encode
+ // pass for each scan.
+ cinfo->progress->total_passes = 1 + 2 * cinfo->num_scans;
+ }
+}
+
+void ProgressMonitorInputPass(j_compress_ptr cinfo) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ cinfo->progress->completed_passes = 0;
+ cinfo->progress->pass_counter = cinfo->next_scanline;
+ cinfo->progress->pass_limit = cinfo->image_height;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void WriteFileHeader(j_compress_ptr cinfo) {
+ WriteOutput(cinfo, {0xFF, 0xD8}); // SOI
+ if (cinfo->write_JFIF_header) {
+ EncodeAPP0(cinfo);
+ }
+ if (cinfo->write_Adobe_marker) {
+ EncodeAPP14(cinfo);
+ }
+}
+
+void WriteScanHeader(j_compress_ptr cinfo, size_t scan_idx) {
+ jpeg_comp_master* m = cinfo->master;
+ cinfo->restart_interval = RestartIntervalForScan(cinfo, scan_idx);
+ if (cinfo->restart_interval != m->last_restart_interval) {
+ EncodeDRI(cinfo);
+ m->last_restart_interval = cinfo->restart_interval;
+ }
+ size_t num_dht = cinfo->master->scan_coding_info[scan_idx].num_huffman_codes;
+ if (num_dht > 0) {
+ bool pre_shifted = IsStreamingSupported(cinfo);
+ EncodeDHT(cinfo, m->huffman_codes + m->last_dht_index, num_dht,
+ pre_shifted);
+ m->last_dht_index += num_dht;
+ }
+ EncodeSOS(cinfo, scan_idx);
+}
+
+void WriteHeaderMarkers(j_compress_ptr cinfo) {
+ bool is_baseline = true;
+ CopyHuffmanCodes(cinfo, &is_baseline);
+ EncodeDQT(cinfo, /*write_all_tables=*/false, &is_baseline);
+ EncodeSOF(cinfo, is_baseline);
+ WriteScanHeader(cinfo, 0);
+ memset(cinfo->master->last_dc_coeff, 0, sizeof(cinfo->master->last_dc_coeff));
+}
+
+void EncodeScans(j_compress_ptr cinfo) {
+ if (IsSinglePassOptimizerSupported(cinfo)) {
+ EncodeSingleScan(cinfo);
+ return;
+ }
+ bool is_baseline = false;
+ if (cinfo->optimize_coding || cinfo->progressive_mode) {
+ OptimizeHuffmanCodes(cinfo, &is_baseline);
+ } else {
+ CopyHuffmanCodes(cinfo, &is_baseline);
+ }
+ EncodeDQT(cinfo, /*write_all_tables=*/false, &is_baseline);
+ EncodeSOF(cinfo, is_baseline);
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ WriteScanHeader(cinfo, i);
+ if (!EncodeScan(cinfo, i)) {
+ JPEGLI_ERROR("Failed to encode scan.");
+ }
+ }
+}
+
+} // namespace jpegli
+
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+ size_t structsize) {
+ cinfo->mem = nullptr;
+ if (structsize != sizeof(*cinfo)) {
+ JPEGLI_ERROR("jpegli_compress_struct has wrong size.");
+ }
+ jpegli::InitMemoryManager(reinterpret_cast<j_common_ptr>(cinfo));
+ cinfo->progress = nullptr;
+ cinfo->is_decompressor = FALSE;
+ cinfo->global_state = jpegli::kEncStart;
+ cinfo->dest = nullptr;
+ cinfo->image_width = 0;
+ cinfo->image_height = 0;
+ cinfo->input_components = 0;
+ cinfo->in_color_space = JCS_UNKNOWN;
+ cinfo->input_gamma = 1.0f;
+ cinfo->num_components = 0;
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ cinfo->comp_info = nullptr;
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ cinfo->quant_tbl_ptrs[i] = nullptr;
+ }
+ for (int i = 0; i < NUM_HUFF_TBLS; ++i) {
+ cinfo->dc_huff_tbl_ptrs[i] = nullptr;
+ cinfo->ac_huff_tbl_ptrs[i] = nullptr;
+ }
+ memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L));
+ memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U));
+ memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K));
+ cinfo->write_Adobe_marker = false;
+ jpegli::InitializeCompressParams(cinfo);
+ cinfo->master = jpegli::Allocate<jpeg_comp_master>(cinfo, 1);
+ cinfo->master->force_baseline = true;
+ cinfo->master->xyb_mode = false;
+ cinfo->master->cicp_transfer_function = 2; // unknown transfer function code
+ cinfo->master->use_std_tables = false;
+ cinfo->master->use_adaptive_quantization = true;
+ cinfo->master->progressive_level = jpegli::kDefaultProgressiveLevel;
+ cinfo->master->data_type = JPEGLI_TYPE_UINT8;
+ cinfo->master->endianness = JPEGLI_NATIVE_ENDIAN;
+ cinfo->master->coeff_buffers = nullptr;
+}
+
+void jpegli_set_xyb_mode(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->xyb_mode = true;
+}
+
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->cicp_transfer_function = code;
+}
+
+void jpegli_set_defaults(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli::InitializeCompressParams(cinfo);
+ jpegli_default_colorspace(cinfo);
+ jpegli_set_quality(cinfo, 90, TRUE);
+ jpegli_set_progressive_level(cinfo, jpegli::kDefaultProgressiveLevel);
+ jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/false);
+ jpegli::AddStandardHuffmanTables(reinterpret_cast<j_common_ptr>(cinfo),
+ /*is_dc=*/true);
+}
+
+void jpegli_default_colorspace(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ switch (cinfo->in_color_space) {
+ case JCS_GRAYSCALE:
+ jpegli_set_colorspace(cinfo, JCS_GRAYSCALE);
+ break;
+ case JCS_RGB: {
+ if (cinfo->master->xyb_mode) {
+ jpegli_set_colorspace(cinfo, JCS_RGB);
+ } else {
+ jpegli_set_colorspace(cinfo, JCS_YCbCr);
+ }
+ break;
+ }
+ case JCS_YCbCr:
+ jpegli_set_colorspace(cinfo, JCS_YCbCr);
+ break;
+ case JCS_CMYK:
+ jpegli_set_colorspace(cinfo, JCS_CMYK);
+ break;
+ case JCS_YCCK:
+ jpegli_set_colorspace(cinfo, JCS_YCCK);
+ break;
+ case JCS_UNKNOWN:
+ jpegli_set_colorspace(cinfo, JCS_UNKNOWN);
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported input colorspace %d", cinfo->in_color_space);
+ }
+}
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->jpeg_color_space = colorspace;
+ switch (colorspace) {
+ case JCS_GRAYSCALE:
+ cinfo->num_components = 1;
+ break;
+ case JCS_RGB:
+ case JCS_YCbCr:
+ cinfo->num_components = 3;
+ break;
+ case JCS_CMYK:
+ case JCS_YCCK:
+ cinfo->num_components = 4;
+ break;
+ case JCS_UNKNOWN:
+ cinfo->num_components =
+ std::min<int>(jpegli::kMaxComponents, cinfo->input_components);
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported jpeg colorspace %d", colorspace);
+ }
+ // Adobe marker is only needed to distinguish CMYK and YCCK JPEGs.
+ cinfo->write_Adobe_marker = (cinfo->jpeg_color_space == JCS_YCCK);
+ if (cinfo->comp_info == nullptr) {
+ cinfo->comp_info =
+ jpegli::Allocate<jpeg_component_info>(cinfo, MAX_COMPONENTS);
+ }
+ memset(cinfo->comp_info, 0,
+ jpegli::kMaxComponents * sizeof(jpeg_component_info));
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ comp->component_index = c;
+ comp->component_id = c + 1;
+ comp->h_samp_factor = 1;
+ comp->v_samp_factor = 1;
+ comp->quant_tbl_no = 0;
+ comp->dc_tbl_no = 0;
+ comp->ac_tbl_no = 0;
+ }
+ if (colorspace == JCS_RGB) {
+ cinfo->comp_info[0].component_id = 'R';
+ cinfo->comp_info[1].component_id = 'G';
+ cinfo->comp_info[2].component_id = 'B';
+ if (cinfo->master->xyb_mode) {
+ // Subsample blue channel.
+ cinfo->comp_info[0].h_samp_factor = cinfo->comp_info[0].v_samp_factor = 2;
+ cinfo->comp_info[1].h_samp_factor = cinfo->comp_info[1].v_samp_factor = 2;
+ cinfo->comp_info[2].h_samp_factor = cinfo->comp_info[2].v_samp_factor = 1;
+ // Use separate quantization tables for each component
+ cinfo->comp_info[1].quant_tbl_no = 1;
+ cinfo->comp_info[2].quant_tbl_no = 2;
+ }
+ } else if (colorspace == JCS_CMYK) {
+ cinfo->comp_info[0].component_id = 'C';
+ cinfo->comp_info[1].component_id = 'M';
+ cinfo->comp_info[2].component_id = 'Y';
+ cinfo->comp_info[3].component_id = 'K';
+ } else if (colorspace == JCS_YCbCr || colorspace == JCS_YCCK) {
+ // Use separate quantization and Huffman tables for luma and chroma
+ cinfo->comp_info[1].quant_tbl_no = 1;
+ cinfo->comp_info[2].quant_tbl_no = 1;
+ cinfo->comp_info[1].dc_tbl_no = cinfo->comp_info[1].ac_tbl_no = 1;
+ cinfo->comp_info[2].dc_tbl_no = cinfo->comp_info[2].ac_tbl_no = 1;
+ }
+}
+
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true);
+}
+
+float jpegli_quality_to_distance(int quality) {
+ return (quality >= 100 ? 0.01f
+ : quality >= 30 ? 0.1f + (100 - quality) * 0.09f
+ : 53.0f / 3000.0f * quality * quality -
+ 23.0f / 20.0f * quality + 25.0f);
+}
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distance = jpegli_quality_to_distance(quality);
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distance = jpegli::LinearQualityToDistance(scale_factor);
+ float distances[NUM_QUANT_TBLS] = {distance, distance, distance};
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->force_baseline = force_baseline;
+ float distances[NUM_QUANT_TBLS];
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ distances[i] = jpegli::LinearQualityToDistance(cinfo->q_scale_factor[i]);
+ }
+ jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false);
+}
+#endif
+
+int jpegli_quality_scaling(int quality) {
+ quality = std::min(100, std::max(1, quality));
+ return quality < 50 ? 5000 / quality : 200 - 2 * quality;
+}
+
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->use_std_tables = true;
+}
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int* basic_table, int scale_factor,
+ boolean force_baseline) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (which_tbl < 0 || which_tbl > NUM_QUANT_TBLS) {
+ JPEGLI_ERROR("Invalid quant table index %d", which_tbl);
+ }
+ if (cinfo->quant_tbl_ptrs[which_tbl] == nullptr) {
+ cinfo->quant_tbl_ptrs[which_tbl] =
+ jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ int max_qval = force_baseline ? 255 : 32767U;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[which_tbl];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ int qval = (basic_table[k] * scale_factor + 50) / 100;
+ qval = std::max(1, std::min(qval, max_qval));
+ quant_table->quantval[k] = qval;
+ }
+ quant_table->sent_table = FALSE;
+}
+
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value) {
+ CheckState(cinfo, jpegli::kEncStart);
+ cinfo->master->use_adaptive_quantization = value;
+}
+
+void jpegli_simple_progression(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli_set_progressive_level(cinfo, 2);
+}
+
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (level < 0) {
+ JPEGLI_ERROR("Invalid progressive level %d", level);
+ }
+ cinfo->master->progressive_level = level;
+}
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ CheckState(cinfo, jpegli::kEncStart);
+ switch (data_type) {
+ case JPEGLI_TYPE_UINT8:
+ case JPEGLI_TYPE_UINT16:
+ case JPEGLI_TYPE_FLOAT:
+ cinfo->master->data_type = data_type;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported data type %d", data_type);
+ }
+ switch (endianness) {
+ case JPEGLI_NATIVE_ENDIAN:
+ case JPEGLI_LITTLE_ENDIAN:
+ case JPEGLI_BIG_ENDIAN:
+ cinfo->master->endianness = endianness;
+ break;
+ default:
+ JPEGLI_ERROR("Unsupported endianness %d", endianness);
+ }
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+ // Since input scaling is not supported, we just copy the image dimensions.
+ cinfo->jpeg_width = cinfo->image_width;
+ cinfo->jpeg_height = cinfo->image_height;
+}
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo) {
+ CheckState(dstinfo, jpegli::kEncStart);
+ // Image parameters.
+ dstinfo->image_width = srcinfo->image_width;
+ dstinfo->image_height = srcinfo->image_height;
+ dstinfo->input_components = srcinfo->num_components;
+ dstinfo->in_color_space = srcinfo->jpeg_color_space;
+ dstinfo->input_gamma = srcinfo->output_gamma;
+ // Compression parameters.
+ jpegli_set_defaults(dstinfo);
+ jpegli_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+ if (dstinfo->num_components != srcinfo->num_components) {
+ const auto& cinfo = dstinfo;
+ return JPEGLI_ERROR("Mismatch between src colorspace and components");
+ }
+ dstinfo->data_precision = srcinfo->data_precision;
+ dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+ dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+ dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+ dstinfo->density_unit = srcinfo->density_unit;
+ dstinfo->X_density = srcinfo->X_density;
+ dstinfo->Y_density = srcinfo->Y_density;
+ for (int c = 0; c < dstinfo->num_components; ++c) {
+ jpeg_component_info* srccomp = &srcinfo->comp_info[c];
+ jpeg_component_info* dstcomp = &dstinfo->comp_info[c];
+ dstcomp->component_id = srccomp->component_id;
+ dstcomp->h_samp_factor = srccomp->h_samp_factor;
+ dstcomp->v_samp_factor = srccomp->v_samp_factor;
+ dstcomp->quant_tbl_no = srccomp->quant_tbl_no;
+ }
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ if (!srcinfo->quant_tbl_ptrs[i]) continue;
+ if (dstinfo->quant_tbl_ptrs[i] == nullptr) {
+ dstinfo->quant_tbl_ptrs[i] = jpegli::Allocate<JQUANT_TBL>(dstinfo, 1);
+ }
+ memcpy(dstinfo->quant_tbl_ptrs[i], srcinfo->quant_tbl_ptrs[i],
+ sizeof(JQUANT_TBL));
+ dstinfo->quant_tbl_ptrs[i]->sent_table = FALSE;
+ }
+}
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+ jpegli::SetSentTableFlag(cinfo->quant_tbl_ptrs, NUM_QUANT_TBLS, suppress);
+ jpegli::SetSentTableFlag(cinfo->dc_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+ jpegli::SetSentTableFlag(cinfo->ac_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress);
+}
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli::ProcessCompressionParams(cinfo);
+ jpegli::InitProgressMonitor(cinfo);
+ jpegli::AllocateBuffers(cinfo);
+ jpegli::ChooseInputMethod(cinfo);
+ if (!cinfo->raw_data_in) {
+ jpegli::ChooseColorTransform(cinfo);
+ jpegli::ChooseDownsampleMethods(cinfo);
+ }
+ jpegli::InitQuantizer(cinfo);
+ if (write_all_tables) {
+ jpegli_suppress_tables(cinfo, FALSE);
+ }
+ (*cinfo->mem->realize_virt_arrays)(reinterpret_cast<j_common_ptr>(cinfo));
+ jpegli::ResetForImage(cinfo);
+ jpegli::WriteFileHeader(cinfo);
+ jpegli::JpegBitWriterInit(cinfo);
+ cinfo->next_scanline = 0;
+ cinfo->master->next_input_row = 0;
+ cinfo->global_state = jpegli::kEncHeader;
+}
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr* coef_arrays) {
+ CheckState(cinfo, jpegli::kEncStart);
+ jpegli::ProcessCompressionParams(cinfo);
+ jpegli::InitProgressMonitor(cinfo);
+ (*cinfo->mem->realize_virt_arrays)(reinterpret_cast<j_common_ptr>(cinfo));
+ cinfo->master->coeff_buffers = coef_arrays;
+ jpegli_suppress_tables(cinfo, FALSE);
+ jpegli::ResetForImage(cinfo);
+ jpegli::WriteFileHeader(cinfo);
+ jpegli::JpegBitWriterInit(cinfo);
+ cinfo->next_scanline = cinfo->image_height;
+ cinfo->master->next_input_row = cinfo->image_height;
+ cinfo->global_state = jpegli::kEncWriteCoeffs;
+}
+
+void jpegli_write_tables(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncStart);
+ if (cinfo->dest == nullptr) {
+ JPEGLI_ERROR("Missing destination.");
+ }
+ jpegli::ResetForImage(cinfo);
+ bool is_baseline = true;
+ jpeg_comp_master* m = cinfo->master;
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD8}); // SOI
+ jpegli::EncodeDQT(cinfo, /*write_all_tables=*/true, &is_baseline);
+ jpegli::CopyHuffmanCodes(cinfo, &is_baseline);
+ jpegli::EncodeDHT(cinfo, m->huffman_codes, m->num_huffman_codes);
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI
+ (*cinfo->dest->term_destination)(cinfo);
+ jpegli_suppress_tables(cinfo, TRUE);
+}
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncWriteCoeffs);
+ if (datalen > jpegli::kMaxBytesInMarker) {
+ JPEGLI_ERROR("Invalid marker length %u", datalen);
+ }
+ if (marker != 0xfe && (marker < 0xe0 || marker > 0xef)) {
+ JPEGLI_ERROR(
+ "jpegli_write_m_header: Only APP and COM markers are supported.");
+ }
+ std::vector<uint8_t> marker_data(4 + datalen);
+ marker_data[0] = 0xff;
+ marker_data[1] = marker;
+ marker_data[2] = (datalen + 2) >> 8;
+ marker_data[3] = (datalen + 2) & 0xff;
+ jpegli::WriteOutput(cinfo, &marker_data[0], 4);
+}
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val) {
+ uint8_t data = val;
+ jpegli::WriteOutput(cinfo, &data, 1);
+}
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+ const JOCTET* dataptr, unsigned int datalen) {
+ jpegli_write_m_header(cinfo, marker, datalen);
+ jpegli::WriteOutput(cinfo, dataptr, datalen);
+}
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+ unsigned int icc_data_len) {
+ constexpr size_t kMaxIccBytesInMarker =
+ jpegli::kMaxBytesInMarker - sizeof jpegli::kICCSignature - 2;
+ const int num_markers =
+ static_cast<int>(jpegli::DivCeil(icc_data_len, kMaxIccBytesInMarker));
+ size_t begin = 0;
+ for (int current_marker = 0; current_marker < num_markers; ++current_marker) {
+ const size_t length = std::min(kMaxIccBytesInMarker, icc_data_len - begin);
+ jpegli_write_m_header(
+ cinfo, jpegli::kICCMarker,
+ static_cast<unsigned int>(length + sizeof jpegli::kICCSignature + 2));
+ for (const unsigned char c : jpegli::kICCSignature) {
+ jpegli_write_m_byte(cinfo, c);
+ }
+ jpegli_write_m_byte(cinfo, current_marker + 1);
+ jpegli_write_m_byte(cinfo, num_markers);
+ for (size_t i = 0; i < length; ++i) {
+ jpegli_write_m_byte(cinfo, icc_data_ptr[begin]);
+ ++begin;
+ }
+ }
+}
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+ if (cinfo->raw_data_in) {
+ JPEGLI_ERROR("jpegli_write_raw_data() must be called for raw data mode.");
+ }
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (cinfo->global_state == jpegli::kEncHeader &&
+ jpegli::IsStreamingSupported(cinfo)) {
+ jpegli::WriteHeaderMarkers(cinfo);
+ }
+ cinfo->global_state = jpegli::kEncReadImage;
+ jpeg_comp_master* m = cinfo->master;
+ if (num_lines + cinfo->next_scanline > cinfo->image_height) {
+ num_lines = cinfo->image_height - cinfo->next_scanline;
+ }
+ JDIMENSION prev_scanline = cinfo->next_scanline;
+ size_t input_lag = (std::min<size_t>(cinfo->image_height, m->next_input_row) -
+ cinfo->next_scanline);
+ if (input_lag > num_lines) {
+ JPEGLI_ERROR("Need at least %u lines to continue", input_lag);
+ }
+ if (input_lag > 0) {
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline += input_lag;
+ }
+ float* rows[jpegli::kMaxComponents];
+ for (size_t i = input_lag; i < num_lines; ++i) {
+ jpegli::ReadInputRow(cinfo, scanlines[i], rows);
+ (*m->color_transform)(rows, cinfo->image_width);
+ jpegli::PadInputBuffer(cinfo, rows);
+ jpegli::ProcessiMCURows(cinfo);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ break;
+ }
+ ++cinfo->next_scanline;
+ }
+ return cinfo->next_scanline - prev_scanline;
+}
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines) {
+ CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage);
+ if (!cinfo->raw_data_in) {
+ JPEGLI_ERROR("jpegli_write_raw_data(): raw data mode was not set");
+ }
+ jpegli::ProgressMonitorInputPass(cinfo);
+ if (cinfo->global_state == jpegli::kEncHeader &&
+ jpegli::IsStreamingSupported(cinfo)) {
+ jpegli::WriteHeaderMarkers(cinfo);
+ }
+ cinfo->global_state = jpegli::kEncReadImage;
+ jpeg_comp_master* m = cinfo->master;
+ if (cinfo->next_scanline >= cinfo->image_height) {
+ return 0;
+ }
+ size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor;
+ if (num_lines < iMCU_height) {
+ JPEGLI_ERROR("Missing input lines, minimum is %u", iMCU_height);
+ }
+ if (cinfo->next_scanline < m->next_input_row) {
+ JXL_ASSERT(m->next_input_row - cinfo->next_scanline == iMCU_height);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline = m->next_input_row;
+ return iMCU_height;
+ }
+ size_t iMCU_y = m->next_input_row / iMCU_height;
+ float* rows[jpegli::kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ JSAMPARRAY plane = data[c];
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ size_t xsize = comp->width_in_blocks * DCTSIZE;
+ size_t ysize = comp->v_samp_factor * DCTSIZE;
+ size_t y0 = iMCU_y * ysize;
+ auto& buffer = m->input_buffer[c];
+ for (size_t i = 0; i < ysize; ++i) {
+ rows[0] = buffer.Row(y0 + i);
+ if (plane[i] == nullptr) {
+ memset(rows[0], 0, xsize * sizeof(rows[0][0]));
+ } else {
+ (*m->input_method)(plane[i], xsize, rows);
+ }
+ // We need a border of 1 repeated pixel for adaptive quant field.
+ buffer.PadRow(y0 + i, xsize, /*border=*/1);
+ }
+ }
+ m->next_input_row += iMCU_height;
+ jpegli::ProcessiMCURows(cinfo);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ return 0;
+ }
+ cinfo->next_scanline += iMCU_height;
+ return iMCU_height;
+}
+
+void jpegli_finish_compress(j_compress_ptr cinfo) {
+ CheckState(cinfo, jpegli::kEncReadImage, jpegli::kEncWriteCoeffs);
+ jpeg_comp_master* m = cinfo->master;
+ if (cinfo->next_scanline < cinfo->image_height) {
+ JPEGLI_ERROR("Incomplete image, expected %d rows, got %d",
+ cinfo->image_height, cinfo->next_scanline);
+ }
+
+ if (jpegli::IsStreamingSupported(cinfo)) {
+ jpegli::JumpToByteBoundary(&m->bw);
+ if (!jpegli::EmptyBitWriterBuffer(&m->bw)) {
+ JPEGLI_ERROR("Output suspension is not supported in finish_compress");
+ }
+ if (!m->bw.healthy) {
+ JPEGLI_ERROR("Failed to encode scan.");
+ }
+ } else {
+ jpegli::EncodeScans(cinfo);
+ }
+
+ jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI
+ (*cinfo->dest->term_destination)(cinfo);
+
+ // Release memory and reset global state.
+ jpegli_abort_compress(cinfo);
+}
+
+void jpegli_abort_compress(j_compress_ptr cinfo) {
+ jpegli_abort(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+void jpegli_destroy_compress(j_compress_ptr cinfo) {
+ jpegli_destroy(reinterpret_cast<j_common_ptr>(cinfo));
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/encode.h b/third_party/jpeg-xl/lib/jpegli/encode.h
new file mode 100644
index 0000000000..075b6b855f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/encode.h
@@ -0,0 +1,159 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file conatins the C API of the encoder part of the libjpegli library,
+// which is based on the C API of libjpeg, with the function names changed from
+// jpeg_* to jpegli_*, while compressor object definitions are included directly
+// from jpeglib.h
+//
+// Applications can use the libjpegli library in one of the following ways:
+//
+// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function
+// names of the API and link against libjpegli.
+//
+// (2) Leave the application code unchanged, but replace the libjpeg.so library
+// with the one built by this project that is API- and ABI-compatible with
+// libjpeg-turbo's version of libjpeg.so.
+
+#ifndef LIB_JPEGLI_ENCODE_H_
+#define LIB_JPEGLI_ENCODE_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+#define jpegli_create_compress(cinfo) \
+ jpegli_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+ (size_t)sizeof(struct jpeg_compress_struct))
+void jpegli_CreateCompress(j_compress_ptr cinfo, int version,
+ size_t structsize);
+
+void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile);
+
+void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer,
+ unsigned long* outsize);
+
+void jpegli_set_defaults(j_compress_ptr cinfo);
+
+void jpegli_default_colorspace(j_compress_ptr cinfo);
+
+void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace);
+
+void jpegli_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline);
+
+void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline);
+#endif
+
+int jpegli_quality_scaling(int quality);
+
+void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int* basic_table, int scale_factor,
+ boolean force_baseline);
+
+void jpegli_simple_progression(j_compress_ptr cinfo);
+
+void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress);
+
+#if JPEG_LIB_VERSION >= 70
+void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo);
+#endif
+
+void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo);
+
+void jpegli_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen);
+
+void jpegli_write_m_byte(j_compress_ptr cinfo, int val);
+
+void jpegli_write_marker(j_compress_ptr cinfo, int marker,
+ const JOCTET* dataptr, unsigned int datalen);
+
+void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr,
+ unsigned int icc_data_len);
+
+void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables);
+
+void jpegli_write_tables(j_compress_ptr cinfo);
+
+JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines);
+
+JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines);
+
+void jpegli_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr* coef_arrays);
+
+void jpegli_finish_compress(j_compress_ptr cinfo);
+
+void jpegli_abort_compress(j_compress_ptr cinfo);
+
+void jpegli_destroy_compress(j_compress_ptr cinfo);
+
+//
+// New API functions that are not available in libjpeg
+//
+// NOTE: This part of the API is still experimental and will probably change in
+// the future.
+//
+
+// Sets the butteraugli target distance for the compressor. This may override
+// the default quantization table indexes based on jpeg colorspace, therefore
+// it must be called after jpegli_set_defaults() or after the last
+// jpegli_set_colorspace() or jpegli_default_colorspace() calls.
+void jpegli_set_distance(j_compress_ptr cinfo, float distance,
+ boolean force_baseline);
+
+// Returns the butteraugli target distance for the given quality parameter.
+float jpegli_quality_to_distance(int quality);
+
+// Changes the default behaviour of the encoder in the selection of quantization
+// matrices and chroma subsampling. Must be called before jpegli_set_defaults()
+// because some default setting depend on the XYB mode.
+void jpegli_set_xyb_mode(j_compress_ptr cinfo);
+
+// Signals to the encoder that the pixel data that will be provided later
+// through jpegli_write_scanlines() has this transfer function. This must be
+// called before jpegli_set_defaults() because it changes the default
+// quantization tables.
+void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code);
+
+void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type,
+ JpegliEndianness endianness);
+
+// Sets whether or not the encoder uses adaptive quantization for createing more
+// zero coefficients based on the local properties of the image.
+// Enabled by default.
+void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value);
+
+// Sets the default progression parameters, where level 0 is sequential, and
+// greater level value means more progression steps. Default is 2.
+void jpegli_set_progressive_level(j_compress_ptr cinfo, int level);
+
+// If this function is called before starting compression, the quality and
+// linear quality parameters will be used to scale the standard quantization
+// tables from Annex K of the JPEG standard. By default jpegli uses a different
+// set of quantization tables and used different scaling parameters for DC and
+// AC coefficients. Must be called before jpegli_set_defaults().
+void jpegli_use_standard_quant_tables(j_compress_ptr cinfo);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} // extern "C"
+#endif
+
+#endif // LIB_JPEGLI_ENCODE_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/encode_api_test.cc b/third_party/jpeg-xl/lib/jpegli/encode_api_test.cc
new file mode 100644
index 0000000000..4358b2b6e0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/encode_api_test.cc
@@ -0,0 +1,856 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+ JpegIOMode input_mode = PIXELS;
+ double max_bpp;
+ double max_dist;
+};
+
+class EncodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+void GenerateInput(JpegIOMode input_mode, const CompressParams& jparams,
+ TestImage* input) {
+ GeneratePixels(input);
+ if (input_mode == RAW_DATA) {
+ GenerateRawData(jparams, input);
+ } else if (input_mode == COEFFICIENTS) {
+ GenerateCoeffs(jparams, input);
+ }
+}
+
+TEST_P(EncodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ GenerateInput(config.input_mode, config.jparams, &config.input);
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ if (config.jparams.icc.empty()) {
+ double bpp =
+ compressed.size() * 8.0 / (config.input.xsize * config.input.ysize);
+ printf("bpp: %f\n", bpp);
+ EXPECT_LT(bpp, config.max_bpp);
+ }
+ DecompressParams dparams;
+ dparams.output_mode =
+ config.input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+ if (config.jparams.set_jpeg_colorspace &&
+ config.jparams.jpeg_color_space == JCS_GRAYSCALE) {
+ ConvertToGrayscale(&config.input);
+ } else {
+ dparams.set_out_color_space = true;
+ dparams.out_color_space = config.input.color_space;
+ }
+ TestImage output;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output);
+ VerifyOutputImage(config.input, output, config.max_dist);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameImageTwice) {
+ TestImage input;
+ input.xsize = 129;
+ input.ysize = 73;
+ CompressParams jparams;
+ GenerateInput(PIXELS, jparams, &input);
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ std::vector<uint8_t> compressed0;
+ std::vector<uint8_t> compressed1;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed0.assign(buffer, buffer + buffer_size);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed1.assign(buffer, buffer + buffer_size);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+ ASSERT_EQ(compressed0.size(), compressed1.size());
+ EXPECT_EQ(0,
+ memcmp(compressed0.data(), compressed1.data(), compressed0.size()));
+}
+
+std::vector<TestConfig> GenerateBasicConfigs() {
+ std::vector<TestConfig> all_configs;
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.input.xsize = 257 + samp * 37;
+ config.input.ysize = 265 + optimize * 17;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.jparams.optimize_coding = optimize;
+ config.max_dist = 2.4f;
+ GeneratePixels(&config.input);
+ all_configs.push_back(config);
+ }
+ }
+ }
+ return all_configs;
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameMemOutput) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, buffer, buffer_size);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), &cinfo,
+ &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i],
+ all_configs[i].max_dist);
+ }
+ if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, ReuseCinfoSameStdOutput) {
+ std::vector<TestConfig> all_configs = GenerateBasicConfigs();
+ FILE* tmpf = tmpfile();
+ JXL_CHECK(tmpf);
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_stdio_dest(&cinfo, tmpf);
+ for (const TestConfig& config : all_configs) {
+ EncodeWithJpegli(config.input, config.jparams, &cinfo);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ rewind(tmpf);
+ std::vector<TestImage> all_outputs(all_configs.size());
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpeg_create_decompress(&cinfo);
+ jpeg_stdio_src(&cinfo, tmpf);
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), &cinfo,
+ &all_outputs[i]);
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+ }
+ for (size_t i = 0; i < all_configs.size(); ++i) {
+ VerifyOutputImage(all_configs[i].input, all_outputs[i],
+ all_configs[i].max_dist);
+ }
+ fclose(tmpf);
+}
+
+TEST(EncodeAPITest, ReuseCinfoChangeParams) {
+ TestImage input, output;
+ CompressParams jparams;
+ DecompressParams dparams;
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ std::vector<uint8_t> compressed;
+ jpeg_compress_struct cinfo;
+ const auto max_rms = [](int q, int hs, int vs) {
+ if (hs == 1 && vs == 1) return q == 90 ? 2.2 : 0.6;
+ if (hs == 2 && vs == 2) return q == 90 ? 2.8 : 1.2;
+ return q == 90 ? 2.4 : 1.0;
+ };
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ input.xsize = 129;
+ input.ysize = 73;
+ dparams.set_out_color_space = true;
+ for (JpegIOMode input_mode : {PIXELS, RAW_DATA, PIXELS, COEFFICIENTS}) {
+ for (int h_samp : {2, 1}) {
+ for (int v_samp : {2, 1}) {
+ for (int progr : {0, 2}) {
+ for (int quality : {90, 100}) {
+ input.Clear();
+ input.color_space =
+ (input_mode == RAW_DATA ? JCS_YCbCr : JCS_RGB);
+ jparams.quality = quality;
+ jparams.h_sampling = {h_samp, 1, 1};
+ jparams.v_sampling = {v_samp, 1, 1};
+ jparams.progressive_mode = progr;
+ printf(
+ "Generating input with quality %d chroma subsampling %dx%d "
+ "input mode %d progressive_mode %d\n",
+ quality, h_samp, v_samp, input_mode, progr);
+ GenerateInput(input_mode, jparams, &input);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ if (input_mode != COEFFICIENTS) {
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ jpegli_abort_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ }
+ EncodeWithJpegli(input, jparams, &cinfo);
+ compressed.resize(buffer_size);
+ std::copy_n(buffer, buffer_size, compressed.data());
+ dparams.output_mode =
+ input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS;
+ dparams.out_color_space = input.color_space;
+ output.Clear();
+ DecodeWithLibjpeg(jparams, dparams, compressed, &output);
+ VerifyOutputImage(input, output,
+ max_rms(quality, h_samp, v_samp));
+ }
+ }
+ }
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncodeAPITest, AbbreviatedStreams) {
+ uint8_t* table_stream = nullptr;
+ unsigned long table_stream_size = 0;
+ uint8_t* data_stream = nullptr;
+ unsigned long data_stream_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_write_tables(&cinfo);
+ jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.optimize_coding = FALSE;
+ jpegli_set_progressive_level(&cinfo, 0);
+ jpegli_start_compress(&cinfo, FALSE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ EXPECT_LT(data_stream_size, 50);
+ jpegli_destroy_compress(&cinfo);
+ }
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpeg);
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, table_stream, table_stream_size);
+ jpeg_read_header(&cinfo, FALSE);
+ jpeg_mem_src(&cinfo, data_stream, data_stream_size);
+ jpeg_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ EXPECT_EQ(3, cinfo.num_components);
+ jpeg_start_decompress(&cinfo);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpeg_read_scanlines(&cinfo, row, 1);
+ jxl::msan::UnpoisonMemory(image, 3);
+ EXPECT_EQ(0, image[0]);
+ EXPECT_EQ(0, image[1]);
+ EXPECT_EQ(0, image[2]);
+ jpeg_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+ }
+ if (table_stream) free(table_stream);
+ if (data_stream) free(data_stream);
+}
+
+void CopyQuantTables(j_compress_ptr cinfo, uint16_t* quant_tables) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ quant_tables[c * DCTSIZE2 + k] = quant_table->quantval[k];
+ }
+ }
+}
+
+TEST(EncodeAPITest, QualitySettings) {
+ // Test that jpegli_set_quality, jpegli_set_linear_quality and
+ // jpegli_quality_scaling are consistent with each other.
+ uint16_t quant_tables0[3 * DCTSIZE2];
+ uint16_t quant_tables1[3 * DCTSIZE2];
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ for (boolean baseline : {FALSE, TRUE}) {
+ for (int q = 1; q <= 100; ++q) {
+ jpegli_set_quality(&cinfo, q, baseline);
+ CopyQuantTables(&cinfo, quant_tables0);
+ jpegli_set_linear_quality(&cinfo, jpegli_quality_scaling(q), baseline);
+ CopyQuantTables(&cinfo, quant_tables1);
+ EXPECT_EQ(0,
+ memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#if JPEG_LIB_VERSION >= 70
+ for (int i = 0; i < NUM_QUANT_TBLS; ++i) {
+ cinfo.q_scale_factor[i] = jpegli_quality_scaling(q);
+ }
+ jpegli_default_qtables(&cinfo, baseline);
+ CopyQuantTables(&cinfo, quant_tables1);
+ EXPECT_EQ(0,
+ memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0)));
+#endif
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ // Test jpegli_quality_scaling for some specific values .
+ EXPECT_EQ(5000, jpegli_quality_scaling(-1));
+ EXPECT_EQ(5000, jpegli_quality_scaling(0));
+ EXPECT_EQ(5000, jpegli_quality_scaling(1));
+ EXPECT_EQ(100, jpegli_quality_scaling(50));
+ EXPECT_EQ(50, jpegli_quality_scaling(75));
+ EXPECT_EQ(20, jpegli_quality_scaling(90));
+ EXPECT_EQ(0, jpegli_quality_scaling(100));
+ EXPECT_EQ(0, jpegli_quality_scaling(101));
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ for (int h_samp : {1, 2}) {
+ for (int v_samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.jparams.h_sampling = {h_samp, 1, 1};
+ config.jparams.v_sampling = {v_samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ if (!progr) {
+ config.jparams.optimize_coding = optimize;
+ }
+ const float kMaxBpp[4] = {1.55, 1.45, 1.45, 1.32};
+ const float kMaxDist[4] = {1.95, 2.1, 2.1, 2.0};
+ const int idx = v_samp * 2 + h_samp - 3;
+ config.max_bpp =
+ kMaxBpp[idx] * (optimize ? 0.97 : 1.0) * (progr ? 0.97 : 1.0);
+ config.max_dist = kMaxDist[idx];
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ {
+ TestConfig config;
+ config.jparams.quality = 100;
+ config.max_bpp = 6.6;
+ config.max_dist = 0.6;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.quality = 80;
+ config.max_bpp = 1.05;
+ config.max_dist = 2.7;
+ all_tests.push_back(config);
+ }
+ for (int samp : {1, 2}) {
+ for (int progr : {0, 2}) {
+ for (int optimize : {0, 1}) {
+ if (progr && optimize) continue;
+ TestConfig config;
+ config.input.xsize = 257;
+ config.input.ysize = 265;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ if (!progr) {
+ config.jparams.optimize_coding = optimize;
+ }
+ config.jparams.use_adaptive_quantization = false;
+ config.max_bpp = 2.05f;
+ config.max_dist = 2.3f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 4}) {
+ for (int v0_samp : {1, 2, 4}) {
+ for (int h2_samp : {1, 2, 4}) {
+ for (int v2_samp : {1, 2, 4}) {
+ TestConfig config;
+ config.input.xsize = 137;
+ config.input.ysize = 75;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.max_bpp = 2.5;
+ config.max_dist = 12.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 3}) {
+ for (int v0_samp : {1, 3}) {
+ for (int h2_samp : {1, 3}) {
+ for (int v2_samp : {1, 3}) {
+ TestConfig config;
+ config.input.xsize = 205;
+ config.input.ysize = 99;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, h2_samp};
+ config.jparams.v_sampling = {v0_samp, 1, v2_samp};
+ config.max_bpp = 2.5;
+ config.max_dist = 10.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int h0_samp : {1, 2, 3, 4}) {
+ for (int v0_samp : {1, 2, 3, 4}) {
+ TestConfig config;
+ config.input.xsize = 217;
+ config.input.ysize = 129;
+ config.jparams.progressive_mode = 2;
+ config.jparams.h_sampling = {h0_samp, 1, 1};
+ config.jparams.v_sampling = {v0_samp, 1, 1};
+ config.max_bpp = 2.0;
+ config.max_dist = 5.5;
+ all_tests.push_back(config);
+ }
+ }
+ for (int p = 0; p < 3 + kNumTestScripts; ++p) {
+ TestConfig config;
+ config.jparams.progressive_mode = p;
+ const float kMaxBpp[] = {1.59, 1.51, 1.48, 1.59, 1.55, 1.55, 1.51};
+ config.max_bpp = kMaxBpp[p];
+ config.max_dist = 2.0;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.simple_progression = true;
+ config.max_bpp = 1.48;
+ config.max_dist = 2.0;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.input_mode = COEFFICIENTS;
+ config.jparams.h_sampling = {2, 1, 1};
+ config.jparams.v_sampling = {2, 1, 1};
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.max_bpp = 16;
+ config.max_dist = 0.0;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.xyb_mode = true;
+ config.jparams.progressive_mode = 2;
+ config.max_bpp = 1.5;
+ config.max_dist = 3.5;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.libjpeg_mode = true;
+ config.max_bpp = 2.1;
+ config.max_dist = 1.7;
+ all_tests.push_back(config);
+ }
+
+ for (J_COLOR_SPACE in_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) {
+ if (jpeg_color_space == JCS_RGB && in_color_space == JCS_YCbCr) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = in_color_space;
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ config.max_bpp = jpeg_color_space == JCS_RGB ? 4.5 : 1.85;
+ config.max_dist = jpeg_color_space == JCS_RGB ? 1.4 : 2.05;
+ all_tests.push_back(config);
+ }
+ }
+ for (J_COLOR_SPACE in_color_space : {JCS_CMYK, JCS_YCCK}) {
+ for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) {
+ if (jpeg_color_space == JCS_CMYK && in_color_space == JCS_YCCK) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input.color_space = in_color_space;
+ if (in_color_space != jpeg_color_space) {
+ config.jparams.set_jpeg_colorspace = true;
+ config.jparams.jpeg_color_space = jpeg_color_space;
+ }
+ config.max_bpp = jpeg_color_space == JCS_CMYK ? 4.0 : 3.6;
+ config.max_dist = jpeg_color_space == JCS_CMYK ? 1.2 : 1.5;
+ all_tests.push_back(config);
+ }
+ }
+ {
+ TestConfig config;
+ config.input.color_space = JCS_YCbCr;
+ config.max_bpp = 1.6;
+ config.max_dist = 1.35;
+ all_tests.push_back(config);
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.color_space = JCS_GRAYSCALE;
+ config.jparams.xyb_mode = xyb;
+ config.max_bpp = 1.35;
+ config.max_dist = 1.4;
+ all_tests.push_back(config);
+ }
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.input.color_space = JCS_UNKNOWN;
+ config.input.components = channels;
+ config.max_bpp = 1.35 * channels;
+ config.max_dist = 1.4;
+ all_tests.push_back(config);
+ }
+ for (size_t r : {1, 3, 17, 1024}) {
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.jparams.restart_interval = r;
+ config.jparams.progressive_mode = progr;
+ config.max_bpp = 1.58 + 5.5 / r;
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ }
+ for (size_t rr : {1, 3, 8, 100}) {
+ TestConfig config;
+ config.jparams.restart_in_rows = rr;
+ config.max_bpp = 1.6;
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ for (int type : {0, 1, 10, 100, 10000}) {
+ for (int scale : {1, 50, 100, 200, 500}) {
+ for (bool add_raw : {false, true}) {
+ for (bool baseline : {true, false}) {
+ if (!baseline && (add_raw || type * scale < 25500)) continue;
+ TestConfig config;
+ config.input.xsize = 64;
+ config.input.ysize = 64;
+ CustomQuantTable table;
+ table.table_type = type;
+ table.scale_factor = scale;
+ table.force_baseline = baseline;
+ table.add_raw = add_raw;
+ table.Generate();
+ config.jparams.optimize_coding = 1;
+ config.jparams.quant_tables.push_back(table);
+ config.jparams.quant_indexes = {0, 0, 0};
+ float q = (type == 0 ? 16 : type) * scale * 0.01f;
+ if (baseline && !add_raw) q = std::max(1.0f, std::min(255.0f, q));
+ config.max_bpp = 1.5f + 25.0f / q;
+ config.max_dist = 0.6f + 0.25f * q;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ if (qidx == 3) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ config.max_bpp = 2.25;
+ config.max_dist = 2.8;
+ all_tests.push_back(config);
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (int slot_idx = 0; slot_idx < 2; ++slot_idx) {
+ if (qidx == 0 && slot_idx == 0) continue;
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ CustomQuantTable table;
+ table.slot_idx = slot_idx;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ config.max_bpp = 2.3;
+ config.max_dist = 2.9;
+ all_tests.push_back(config);
+ }
+ }
+ for (int qidx = 0; qidx < 8; ++qidx) {
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1,
+ (qidx >> 0) & 1};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.max_bpp = 2.0;
+ config.max_dist = 3.85;
+ all_tests.push_back(config);
+ }
+ }
+ for (bool xyb : {false, true}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.jparams.xyb_mode = xyb;
+ config.jparams.quant_indexes = {0, 1, 2};
+ {
+ CustomQuantTable table;
+ table.slot_idx = 0;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 1;
+ table.table_type = 20;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ {
+ CustomQuantTable table;
+ table.slot_idx = 2;
+ table.table_type = 30;
+ table.Generate();
+ config.jparams.quant_tables.push_back(table);
+ }
+ config.max_bpp = 1.5;
+ config.max_dist = 3.75;
+ all_tests.push_back(config);
+ }
+ {
+ TestConfig config;
+ config.jparams.comp_ids = {7, 17, 177};
+ config.input.xsize = config.input.ysize = 128;
+ config.max_bpp = 2.25;
+ config.max_dist = 2.4;
+ all_tests.push_back(config);
+ }
+ for (int override_JFIF : {-1, 0, 1}) {
+ for (int override_Adobe : {-1, 0, 1}) {
+ if (override_JFIF == -1 && override_Adobe == -1) continue;
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 128;
+ config.jparams.override_JFIF = override_JFIF;
+ config.jparams.override_Adobe = override_Adobe;
+ config.max_bpp = 2.25;
+ config.max_dist = 2.4;
+ all_tests.push_back(config);
+ }
+ }
+ {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.max_bpp = 1.85;
+ config.max_dist = 2.05;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ for (size_t icc_size : {728, 70000, 1000000}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.max_dist = 2.05;
+ config.jparams.icc.resize(icc_size);
+ for (size_t i = 0; i < icc_size; ++i) {
+ config.jparams.icc[i] = (i * 17) & 0xff;
+ }
+ all_tests.push_back(config);
+ }
+ for (JpegIOMode input_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) {
+ TestConfig config;
+ config.input.xsize = config.input.ysize = 256;
+ config.input_mode = input_mode;
+ if (input_mode == RAW_DATA) {
+ config.input.color_space = JCS_YCbCr;
+ }
+ config.jparams.progressive_mode = 0;
+ config.jparams.optimize_coding = 0;
+ config.max_bpp = 1.85;
+ config.max_dist = 2.05;
+ if (input_mode == COEFFICIENTS) {
+ config.max_bpp = 3.5;
+ config.max_dist = 0.0;
+ }
+ all_tests.push_back(config);
+ config.jparams.use_flat_dc_luma_code = true;
+ all_tests.push_back(config);
+ }
+ for (int xsize : {640, 641, 648, 649}) {
+ for (int ysize : {640, 641, 648, 649}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ if (h_sampling == 1 && v_sampling == 1) continue;
+ for (int progr : {0, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize;
+ config.input.ysize = ysize;
+ config.input.color_space = JCS_YCbCr;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.input_mode = RAW_DATA;
+ config.max_bpp = 1.75;
+ config.max_dist = 2.0;
+ all_tests.push_back(config);
+ config.input_mode = COEFFICIENTS;
+ if (xsize & 1) {
+ config.jparams.add_marker = true;
+ }
+ config.max_bpp = 24.0;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ }
+ for (JpegliDataType data_type : {JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) {
+ for (JpegliEndianness endianness :
+ {JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN, JPEGLI_NATIVE_ENDIAN}) {
+ J_COLOR_SPACE colorspace[4] = {JCS_GRAYSCALE, JCS_UNKNOWN, JCS_RGB,
+ JCS_CMYK};
+ float max_bpp[4] = {1.32, 2.7, 1.6, 4.0};
+ for (int channels = 1; channels <= 4; ++channels) {
+ TestConfig config;
+ config.input.data_type = data_type;
+ config.input.endianness = endianness;
+ config.input.components = channels;
+ config.input.color_space = colorspace[channels - 1];
+ config.max_bpp = max_bpp[channels - 1];
+ config.max_dist = 2.2;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ for (int smoothing : {1, 5, 50, 100}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = 257;
+ config.input.ysize = 265;
+ config.jparams.smoothing_factor = smoothing;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ config.max_bpp = 1.85;
+ config.max_dist = 3.05f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ return all_tests;
+};
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ if (c.input_mode == RAW_DATA) {
+ os << "RawDataIn";
+ } else if (c.input_mode == COEFFICIENTS) {
+ os << "WriteCoeffs";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<EncodeAPITestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(EncodeAPITest, EncodeAPITestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/encode_internal.h b/third_party/jpeg-xl/lib/jpegli/encode_internal.h
new file mode 100644
index 0000000000..8f08272fd2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/encode_internal.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENCODE_INTERNAL_H_
+#define LIB_JPEGLI_ENCODE_INTERNAL_H_
+
+/* clang-format off */
+#include <stdint.h>
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/bit_writer.h"
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/encode.h"
+
+namespace jpegli {
+
+constexpr unsigned char kICCSignature[12] = {
+ 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00};
+constexpr int kICCMarker = JPEG_APP0 + 2;
+
+struct JPEGHuffmanCode {
+ // Bit length histogram.
+ uint32_t counts[kJpegHuffmanMaxBitLength + 1];
+ // Symbol values sorted by increasing bit lengths.
+ uint32_t values[kJpegHuffmanAlphabetSize + 1];
+ // The index of the Huffman code in the current set of Huffman codes. For AC
+ // component Huffman codes, 0x10 is added to the index.
+ int slot_id;
+ boolean sent_table;
+};
+
+// DCTCodingState: maximum number of correction bits to buffer
+const int kJPEGMaxCorrectionBits = 1u << 16;
+
+constexpr int kDefaultProgressiveLevel = 0;
+
+struct HuffmanCodeTable {
+ int depth[256];
+ int code[256];
+};
+
+struct ScanCodingInfo {
+ uint32_t dc_tbl_idx[MAX_COMPS_IN_SCAN];
+ uint32_t ac_tbl_idx[MAX_COMPS_IN_SCAN];
+ // Number of Huffman codes defined in the DHT segment preceding this scan.
+ size_t num_huffman_codes;
+};
+
+typedef int16_t coeff_t;
+
+} // namespace jpegli
+
+struct jpeg_comp_master {
+ jpegli::RowBuffer<float> input_buffer[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float>* smooth_input[jpegli::kMaxComponents];
+ jpegli::RowBuffer<float>* raw_data[jpegli::kMaxComponents];
+ bool force_baseline;
+ bool xyb_mode;
+ uint8_t cicp_transfer_function;
+ bool use_std_tables;
+ bool use_adaptive_quantization;
+ int progressive_level;
+ size_t xsize_blocks;
+ size_t ysize_blocks;
+ size_t blocks_per_iMCU_row;
+ jpegli::ScanCodingInfo* scan_coding_info;
+ JpegliDataType data_type;
+ JpegliEndianness endianness;
+ void (*input_method)(const uint8_t* row_in, size_t len,
+ float* row_out[jpegli::kMaxComponents]);
+ void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len);
+ void (*downsample_method[jpegli::kMaxComponents])(
+ float* rows_in[MAX_SAMP_FACTOR], size_t len, float* row_out);
+ float* quant_mul[jpegli::kMaxComponents];
+ float* zero_bias_offset[jpegli::kMaxComponents];
+ float* zero_bias_mul[jpegli::kMaxComponents];
+ int h_factor[jpegli::kMaxComponents];
+ int v_factor[jpegli::kMaxComponents];
+ jpegli::JPEGHuffmanCode* huffman_codes;
+ size_t num_huffman_codes;
+ jpegli::HuffmanCodeTable huff_tables[8];
+ float* diff_buffer;
+ jpegli::RowBuffer<float> fuzzy_erosion_tmp;
+ jpegli::RowBuffer<float> pre_erosion;
+ jpegli::RowBuffer<float> quant_field;
+ jvirt_barray_ptr* coeff_buffers;
+ size_t next_input_row;
+ size_t next_iMCU_row;
+ size_t last_dht_index;
+ size_t last_restart_interval;
+ JCOEF last_dc_coeff[MAX_COMPS_IN_SCAN];
+ jpegli::JpegBitWriter bw;
+ float* dct_buffer;
+ int32_t* block_tmp;
+};
+
+#endif // LIB_JPEGLI_ENCODE_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc
new file mode 100644
index 0000000000..110a36a3e9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/entropy_coding.cc
@@ -0,0 +1,605 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/entropy_coding.h"
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/huffman.h"
+#include "lib/jxl/base/bits.h"
+
+namespace jpegli {
+namespace {
+
+float HistogramCost(const Histogram& histo) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ counts[i] = histo.count[i];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8;
+ size_t data_bits = 0;
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ header_bits += 8;
+ data_bits += counts[i] * depths[i];
+ }
+ }
+ return header_bits + data_bits;
+}
+
+void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ c->count[i] = a.count[i] + b.count[i];
+ }
+}
+
+bool IsEmptyHistogram(const Histogram& histo) {
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ if (histo.count[i]) return false;
+ }
+ return true;
+}
+
+} // namespace
+
+void ClusterJpegHistograms(const Histogram* histograms, size_t num,
+ JpegClusteredHistograms* clusters) {
+ clusters->histogram_indexes.resize(num);
+ std::vector<uint32_t> slot_histograms;
+ std::vector<float> slot_costs;
+ for (size_t i = 0; i < num; ++i) {
+ const Histogram& cur = histograms[i];
+ if (IsEmptyHistogram(cur)) {
+ continue;
+ }
+ float best_cost = HistogramCost(cur);
+ size_t best_slot = slot_histograms.size();
+ for (size_t j = 0; j < slot_histograms.size(); ++j) {
+ size_t prev_idx = slot_histograms[j];
+ const Histogram& prev = clusters->histograms[prev_idx];
+ Histogram combined;
+ AddHistograms(prev, cur, &combined);
+ float combined_cost = HistogramCost(combined);
+ float cost = combined_cost - slot_costs[j];
+ if (cost < best_cost) {
+ best_cost = cost;
+ best_slot = j;
+ }
+ }
+ if (best_slot == slot_histograms.size()) {
+ // Create new histogram.
+ size_t histogram_index = clusters->histograms.size();
+ clusters->histograms.push_back(cur);
+ clusters->histogram_indexes[i] = histogram_index;
+ if (best_slot < 4) {
+ // We have a free slot, so we put the new histogram there.
+ slot_histograms.push_back(histogram_index);
+ slot_costs.push_back(best_cost);
+ } else {
+ // TODO(szabadka) Find the best histogram to replce.
+ best_slot = (clusters->slot_ids.back() + 1) % 4;
+ }
+ slot_histograms[best_slot] = histogram_index;
+ slot_costs[best_slot] = best_cost;
+ clusters->slot_ids.push_back(best_slot);
+ } else {
+ // Merge this histogram with a previous one.
+ size_t histogram_index = slot_histograms[best_slot];
+ const Histogram& prev = clusters->histograms[histogram_index];
+ AddHistograms(prev, cur, &clusters->histograms[histogram_index]);
+ clusters->histogram_indexes[i] = histogram_index;
+ JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot);
+ slot_costs[best_slot] += best_cost;
+ }
+ }
+}
+
+void BuildJpegHuffmanCode(const Histogram& histo, JPEGHuffmanCode* huff) {
+ std::vector<uint32_t> counts(kJpegHuffmanAlphabetSize + 1);
+ std::vector<uint8_t> depths(kJpegHuffmanAlphabetSize + 1);
+ for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) {
+ counts[j] = histo.count[j];
+ }
+ counts[kJpegHuffmanAlphabetSize] = 1;
+ CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength,
+ &depths[0]);
+ std::fill(std::begin(huff->counts), std::end(huff->counts), 0);
+ std::fill(std::begin(huff->values), std::end(huff->values), 0);
+ for (size_t i = 0; i <= kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ ++huff->counts[depths[i]];
+ }
+ }
+ int offset[kJpegHuffmanMaxBitLength + 1] = {0};
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ offset[i] = offset[i - 1] + huff->counts[i - 1];
+ }
+ for (size_t i = 0; i <= kJpegHuffmanAlphabetSize; ++i) {
+ if (depths[i] > 0) {
+ huff->values[offset[depths[i]]++] = i;
+ }
+ }
+}
+
+void AddJpegHuffmanCode(const Histogram& histogram, size_t slot_id,
+ JPEGHuffmanCode* huff_codes, size_t* num_huff_codes) {
+ JPEGHuffmanCode huff_code = {};
+ huff_code.slot_id = slot_id;
+ BuildJpegHuffmanCode(histogram, &huff_code);
+ memcpy(&huff_codes[*num_huff_codes], &huff_code, sizeof(huff_code));
+ ++(*num_huff_codes);
+}
+
+namespace {
+void SetJpegHuffmanCode(const JpegClusteredHistograms& clusters,
+ size_t histogram_id, size_t slot_id_offset,
+ std::vector<uint32_t>& slot_histograms,
+ uint32_t* slot_id, bool* is_baseline,
+ JPEGHuffmanCode* huff_codes, size_t* num_huff_codes) {
+ JXL_ASSERT(histogram_id < clusters.histogram_indexes.size());
+ uint32_t histogram_index = clusters.histogram_indexes[histogram_id];
+ uint32_t id = clusters.slot_ids[histogram_index];
+ if (id > 1) {
+ *is_baseline = false;
+ }
+ *slot_id = id + (slot_id_offset / 4);
+ if (slot_histograms[id] != histogram_index) {
+ AddJpegHuffmanCode(clusters.histograms[histogram_index],
+ slot_id_offset + id, huff_codes, num_huff_codes);
+ slot_histograms[id] = histogram_index;
+ }
+}
+
+struct DCTState {
+ int eob_run = 0;
+ size_t num_refinement_bits = 0;
+ Histogram* ac_histo = nullptr;
+};
+
+static JXL_INLINE void ProcessFlush(DCTState* s) {
+ if (s->eob_run > 0) {
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(s->eob_run);
+ int symbol = nbits << 4u;
+ ++s->ac_histo->count[symbol];
+ s->eob_run = 0;
+ }
+ s->num_refinement_bits = 0;
+}
+
+static JXL_INLINE void ProcessEndOfBand(DCTState* s, size_t new_refinement_bits,
+ Histogram* new_ac_histo) {
+ if (s->eob_run == 0) {
+ s->ac_histo = new_ac_histo;
+ }
+ ++s->eob_run;
+ s->num_refinement_bits += new_refinement_bits;
+ if (s->eob_run == 0x7FFF ||
+ s->num_refinement_bits > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+ ProcessFlush(s);
+ }
+}
+
+bool ProcessDCTBlockSequential(const coeff_t* coeffs, Histogram* dc_histo,
+ Histogram* ac_histo, coeff_t* last_dc_coeff) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = coeffs[0];
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int dc_nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ ++dc_histo->count[dc_nbits];
+ if (dc_nbits >= 12) return false;
+ int r = 0;
+ for (int k = 1; k < 64; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ ++ac_histo->count[0xf0];
+ r -= 16;
+ }
+ int ac_nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ if (ac_nbits >= 16) return false;
+ int symbol = (r << 4u) + ac_nbits;
+ ++ac_histo->count[symbol];
+ r = 0;
+ }
+ if (r > 0) {
+ ++ac_histo->count[0];
+ }
+ return true;
+}
+
+bool ProcessDCTBlockProgressive(const coeff_t* coeffs, Histogram* dc_histo,
+ Histogram* ac_histo, int Ss, int Se, int Al,
+ DCTState* s, coeff_t* last_dc_coeff) {
+ bool eob_run_allowed = Ss > 0;
+ coeff_t temp2;
+ coeff_t temp;
+ if (Ss == 0) {
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero<uint32_t>(temp) + 1);
+ ++dc_histo->count[nbits];
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ continue;
+ }
+ ProcessFlush(s);
+ while (r > 15) {
+ ++ac_histo->count[0xf0];
+ r -= 16;
+ }
+ int nbits = jxl::FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ ++ac_histo->count[symbol];
+ r = 0;
+ }
+ if (r > 0) {
+ ProcessEndOfBand(s, 0, ac_histo);
+ if (!eob_run_allowed) {
+ ProcessFlush(s);
+ }
+ }
+ return true;
+}
+
+bool ProcessRefinementBits(const coeff_t* coeffs, Histogram* ac_histo, int Ss,
+ int Se, int Al, DCTState* s) {
+ bool eob_run_allowed = Ss > 0;
+ if (Ss == 0) {
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int abs_values[kDCTBlockSize];
+ int eob = 0;
+ for (int k = Ss; k <= Se; k++) {
+ const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+ abs_values[k] = abs_val >> Al;
+ if (abs_values[k] == 1) {
+ eob = k;
+ }
+ }
+ int r = 0;
+ size_t num_refinement_bits = 0;
+ for (int k = Ss; k <= Se; k++) {
+ if (abs_values[k] == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15 && k <= eob) {
+ ProcessFlush(s);
+ ++ac_histo->count[0xf0];
+ r -= 16;
+ num_refinement_bits = 0;
+ }
+ if (abs_values[k] > 1) {
+ ++num_refinement_bits;
+ continue;
+ }
+ ProcessFlush(s);
+ int symbol = (r << 4u) + 1;
+ ++ac_histo->count[symbol];
+ num_refinement_bits = 0;
+ r = 0;
+ }
+ if (r > 0 || num_refinement_bits > 0) {
+ ProcessEndOfBand(s, num_refinement_bits, ac_histo);
+ if (!eob_run_allowed) {
+ ProcessFlush(s);
+ }
+ }
+ return true;
+}
+
+void ProgressMonitorHistogramPass(j_compress_ptr cinfo, size_t scan_index,
+ size_t mcu_y) {
+ if (cinfo->progress == nullptr) {
+ return;
+ }
+ cinfo->progress->completed_passes = 1 + scan_index;
+ cinfo->progress->pass_counter = mcu_y;
+ cinfo->progress->pass_limit = cinfo->total_iMCU_rows;
+ (*cinfo->progress->progress_monitor)(reinterpret_cast<j_common_ptr>(cinfo));
+}
+
+bool ProcessScan(j_compress_ptr cinfo,
+ size_t scan_index, int* histo_index, Histogram* dc_histograms,
+ Histogram* ac_histograms) {
+ jpeg_comp_master* m = cinfo->master;
+ size_t restart_interval = RestartIntervalForScan(cinfo, scan_index);
+ int restarts_to_go = restart_interval;
+ coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0};
+ DCTState s;
+
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ jpeg_component_info* base_comp =
+ &cinfo->comp_info[scan_info->component_index[0]];
+ // h_group / v_group act as numerators for converting number of blocks to
+ // number of MCU. In interleaved mode it is 1, so MCU is represented with
+ // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+ // be the samping factor, consequently MCU is always represented with single
+ // block.
+ const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor;
+ const int v_group = is_interleaved ? 1 : base_comp->v_samp_factor;
+ int MCUs_per_row =
+ DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor);
+ int MCU_rows =
+ DivCeil(cinfo->image_height * v_group, 8 * cinfo->max_v_samp_factor);
+ const bool is_progressive = cinfo->progressive_mode;
+ const int Al = scan_info->Al;
+ const int Ah = scan_info->Ah;
+ const int Ss = scan_info->Ss;
+ const int Se = scan_info->Se;
+ constexpr coeff_t kDummyBlock[DCTSIZE2] = {0};
+
+ JBLOCKARRAY ba[MAX_COMPS_IN_SCAN];
+ for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+ ProgressMonitorHistogramPass(cinfo, scan_index, mcu_y);
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int by0 = mcu_y * n_blocks_y;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(n_blocks_y, block_rows_left);
+ ba[i] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coeff_buffers[comp_idx],
+ by0, max_block_rows, false);
+ }
+ for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && restarts_to_go == 0) {
+ ProcessFlush(&s);
+ restarts_to_go = restart_interval;
+ memset(last_dc_coeff, 0, sizeof(last_dc_coeff));
+ }
+ // Encode one MCU
+ for (int i = 0; i < scan_info->comps_in_scan; ++i) {
+ int comp_idx = scan_info->component_index[i];
+ jpeg_component_info* comp = &cinfo->comp_info[comp_idx];
+ int histo_idx = *histo_index + i;
+ Histogram* dc_histo = &dc_histograms[histo_idx];
+ Histogram* ac_histo = &ac_histograms[histo_idx];
+ int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ size_t block_y = mcu_y * n_blocks_y + iy;
+ size_t block_x = mcu_x * n_blocks_x + ix;
+ const coeff_t* block;
+ if (block_x >= comp->width_in_blocks ||
+ block_y >= comp->height_in_blocks) {
+ block = kDummyBlock;
+ } else {
+ block = &ba[i][iy][block_x][0];
+ }
+ bool ok;
+ if (!is_progressive) {
+ ok = ProcessDCTBlockSequential(block, dc_histo, ac_histo,
+ last_dc_coeff + i);
+ } else if (Ah == 0) {
+ ok = ProcessDCTBlockProgressive(block, dc_histo, ac_histo, Ss, Se,
+ Al, &s, last_dc_coeff + i);
+ } else {
+ ok = ProcessRefinementBits(block, ac_histo, Ss, Se, Al, &s);
+ }
+ if (!ok) return false;
+ }
+ }
+ }
+ --restarts_to_go;
+ }
+ }
+ ProcessFlush(&s);
+ *histo_index += scan_info->comps_in_scan;
+ return true;
+}
+
+void ProcessJpeg(j_compress_ptr cinfo,
+ std::vector<Histogram>* dc_histograms,
+ std::vector<Histogram>* ac_histograms) {
+ int histo_index = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ if (!ProcessScan(cinfo, i, &histo_index, &(*dc_histograms)[0],
+ &(*ac_histograms)[0])) {
+ JPEGLI_ERROR("Invalid scan.");
+ }
+ }
+}
+
+void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc,
+ JPEGHuffmanCode* huffman_codes,
+ size_t* num_huffman_codes) {
+ const char* type = is_dc ? "DC" : "AC";
+ if (index < 0 || index >= NUM_HUFF_TBLS) {
+ JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index);
+ }
+ JHUFF_TBL* table =
+ is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index];
+ if (table == nullptr) {
+ JPEGLI_ERROR("Missing %s Huffman table %d", type, index);
+ }
+ ValidateHuffmanTable(reinterpret_cast<j_common_ptr>(cinfo), table, is_dc);
+ JPEGHuffmanCode huff = {};
+ size_t max_depth = 0;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ if (table->bits[i] != 0) max_depth = i;
+ huff.counts[i] = table->bits[i];
+ }
+ ++huff.counts[max_depth];
+ for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) {
+ huff.values[i] = table->huffval[i];
+ }
+ huff.slot_id = index + (is_dc ? 0 : 0x10);
+ huff.sent_table = table->sent_table;
+ bool have_slot = false;
+ for (size_t i = 0; i < *num_huffman_codes; ++i) {
+ if (huffman_codes[i].slot_id == huff.slot_id) have_slot = true;
+ }
+ if (!have_slot) {
+ memcpy(&huffman_codes[*num_huffman_codes], &huff, sizeof(huff));
+ ++(*num_huffman_codes);
+ }
+}
+
+} // namespace
+
+void CopyHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline) {
+ jpeg_comp_master* m = cinfo->master;
+ m->huffman_codes =
+ Allocate<JPEGHuffmanCode>(cinfo, 2 * cinfo->num_components, JPOOL_IMAGE);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ if (comp->dc_tbl_no > 1 || comp->ac_tbl_no > 1) {
+ *is_baseline = false;
+ }
+ CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, m->huffman_codes,
+ &m->num_huffman_codes);
+ CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, m->huffman_codes,
+ &m->num_huffman_codes);
+ }
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ const jpeg_scan_info* si = &cinfo->scan_info[i];
+ ScanCodingInfo sci = {};
+ for (int j = 0; j < si->comps_in_scan; ++j) {
+ int ci = si->component_index[j];
+ sci.dc_tbl_idx[j] = cinfo->comp_info[ci].dc_tbl_no;
+ sci.ac_tbl_idx[j] = cinfo->comp_info[ci].ac_tbl_no + 4;
+ }
+ if (i == 0) {
+ sci.num_huffman_codes = m->num_huffman_codes;
+ }
+ memcpy(&m->scan_coding_info[i], &sci, sizeof(sci));
+ }
+}
+
+size_t RestartIntervalForScan(j_compress_ptr cinfo, size_t scan_index) {
+ if (cinfo->restart_in_rows <= 0) {
+ return cinfo->restart_interval;
+ } else {
+ const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index];
+ const bool is_interleaved = (scan_info->comps_in_scan > 1);
+ jpeg_component_info* base_comp =
+ &cinfo->comp_info[scan_info->component_index[0]];
+ const int h_group = is_interleaved ? 1 : base_comp->h_samp_factor;
+ int MCUs_per_row =
+ DivCeil(cinfo->image_width * h_group, 8 * cinfo->max_h_samp_factor);
+ return std::min<size_t>(MCUs_per_row * cinfo->restart_in_rows, 65535u);
+ }
+}
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline) {
+ jpeg_comp_master* m = cinfo->master;
+ // Gather histograms.
+ size_t num_histo = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ num_histo += cinfo->scan_info[i].comps_in_scan;
+ }
+ std::vector<Histogram> dc_histograms(num_histo);
+ std::vector<Histogram> ac_histograms(num_histo);
+ ProcessJpeg(cinfo, &dc_histograms, &ac_histograms);
+
+ // Cluster DC histograms.
+ JpegClusteredHistograms dc_clusters;
+ ClusterJpegHistograms(dc_histograms.data(), dc_histograms.size(),
+ &dc_clusters);
+
+ // Cluster AC histograms.
+ JpegClusteredHistograms ac_clusters;
+ ClusterJpegHistograms(ac_histograms.data(), ac_histograms.size(),
+ &ac_clusters);
+
+ // Add the first 4 DC and AC histograms in the first DHT segment.
+ std::vector<uint32_t> dc_slot_histograms;
+ std::vector<uint32_t> ac_slot_histograms;
+ m->huffman_codes = Allocate<JPEGHuffmanCode>(cinfo, num_histo, JPOOL_IMAGE);
+ for (size_t i = 0; i < dc_clusters.histograms.size(); ++i) {
+ if (i >= 4) break;
+ JXL_ASSERT(dc_clusters.slot_ids[i] == i);
+ AddJpegHuffmanCode(dc_clusters.histograms[i], i, m->huffman_codes,
+ &m->num_huffman_codes);
+ dc_slot_histograms.push_back(i);
+ }
+ for (size_t i = 0; i < ac_clusters.histograms.size(); ++i) {
+ if (i >= 4) break;
+ JXL_ASSERT(ac_clusters.slot_ids[i] == i);
+ AddJpegHuffmanCode(ac_clusters.histograms[i], 0x10 + i, m->huffman_codes,
+ &m->num_huffman_codes);
+ ac_slot_histograms.push_back(i);
+ }
+
+ // Set the Huffman table indexes in the scan_infos and emit additional DHT
+ // segments if necessary.
+ size_t histogram_id = 0;
+ size_t num_huffman_codes_sent = 0;
+ for (int i = 0; i < cinfo->num_scans; ++i) {
+ ScanCodingInfo sci = {};
+ for (int c = 0; c < cinfo->scan_info[i].comps_in_scan; ++c) {
+ SetJpegHuffmanCode(dc_clusters, histogram_id, 0, dc_slot_histograms,
+ &sci.dc_tbl_idx[c], is_baseline, m->huffman_codes,
+ &m->num_huffman_codes);
+ SetJpegHuffmanCode(ac_clusters, histogram_id, 0x10, ac_slot_histograms,
+ &sci.ac_tbl_idx[c], is_baseline, m->huffman_codes,
+ &m->num_huffman_codes);
+ ++histogram_id;
+ }
+ sci.num_huffman_codes = m->num_huffman_codes - num_huffman_codes_sent;
+ num_huffman_codes_sent = m->num_huffman_codes;
+ memcpy(&m->scan_coding_info[i], &sci, sizeof(sci));
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/entropy_coding.h b/third_party/jpeg-xl/lib/jpegli/entropy_coding.h
new file mode 100644
index 0000000000..6d9dd2303b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/entropy_coding.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ENTROPY_CODING_H_
+#define LIB_JPEGLI_ENTROPY_CODING_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include <vector>
+
+#include "lib/jpegli/encode_internal.h"
+
+namespace jpegli {
+
+void CopyHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline);
+
+size_t RestartIntervalForScan(j_compress_ptr cinfo, size_t scan_index);
+
+struct Histogram {
+ int count[kJpegHuffmanAlphabetSize];
+ Histogram() { memset(count, 0, sizeof(count)); }
+};
+
+struct JpegClusteredHistograms {
+ std::vector<Histogram> histograms;
+ std::vector<uint32_t> histogram_indexes;
+ std::vector<uint32_t> slot_ids;
+};
+
+void ClusterJpegHistograms(const Histogram* histograms, size_t num,
+ JpegClusteredHistograms* clusters);
+
+void AddJpegHuffmanCode(const Histogram& histogram, size_t slot_id,
+ JPEGHuffmanCode* huff_codes, size_t* num_huff_codes);
+
+void OptimizeHuffmanCodes(j_compress_ptr cinfo, bool* is_baseline);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_ENTROPY_CODING_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/error.cc b/third_party/jpeg-xl/lib/jpegli/error.cc
new file mode 100644
index 0000000000..289261672d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/error.cc
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/error.h"
+
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+const char* const kErrorMessageTable[] = {
+ "Message codes are not supported, error message is in msg_parm.s string",
+};
+
+bool FormatString(char* buffer, const char* format, ...) {
+ va_list args;
+ va_start(args, format);
+ vsnprintf(buffer, JMSG_STR_PARM_MAX, format, args);
+ va_end(args);
+ return false;
+}
+
+void ExitWithAbort(j_common_ptr cinfo) {
+ (*cinfo->err->output_message)(cinfo);
+ jpegli_destroy(cinfo);
+ exit(EXIT_FAILURE);
+}
+
+void EmitMessage(j_common_ptr cinfo, int msg_level) {
+ if (msg_level < 0) {
+ if (cinfo->err->num_warnings <= 5 || cinfo->err->trace_level >= 3) {
+ (*cinfo->err->output_message)(cinfo);
+ }
+ ++cinfo->err->num_warnings;
+ } else if (cinfo->err->trace_level >= msg_level) {
+ (*cinfo->err->output_message)(cinfo);
+ }
+}
+
+void OutputMessage(j_common_ptr cinfo) {
+ char buffer[JMSG_LENGTH_MAX];
+ (*cinfo->err->format_message)(cinfo, buffer);
+ fprintf(stderr, "%s\n", buffer);
+}
+
+void FormatMessage(j_common_ptr cinfo, char* buffer) {
+ jpeg_error_mgr* err = cinfo->err;
+ int code = err->msg_code;
+ if (code == 0) {
+ memcpy(buffer, cinfo->err->msg_parm.s, JMSG_STR_PARM_MAX);
+ } else if (err->addon_message_table != nullptr &&
+ code >= err->first_addon_message &&
+ code <= err->last_addon_message) {
+ std::string msg(err->addon_message_table[code - err->first_addon_message]);
+ if (msg.find("%s") != std::string::npos) {
+ snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.s);
+ } else {
+ snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.i[0],
+ err->msg_parm.i[1], err->msg_parm.i[2], err->msg_parm.i[3],
+ err->msg_parm.i[4], err->msg_parm.i[5], err->msg_parm.i[6],
+ err->msg_parm.i[7]);
+ }
+ } else {
+ snprintf(buffer, JMSG_LENGTH_MAX, "%s", kErrorMessageTable[0]);
+ }
+}
+
+void ResetErrorManager(j_common_ptr cinfo) {
+ memset(cinfo->err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+ cinfo->err->msg_code = 0;
+ cinfo->err->num_warnings = 0;
+}
+
+} // namespace jpegli
+
+struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err) {
+ err->error_exit = jpegli::ExitWithAbort;
+ err->emit_message = jpegli::EmitMessage;
+ err->output_message = jpegli::OutputMessage;
+ err->format_message = jpegli::FormatMessage;
+ err->reset_error_mgr = jpegli::ResetErrorManager;
+ memset(err->msg_parm.s, 0, JMSG_STR_PARM_MAX);
+ err->trace_level = 0;
+ err->num_warnings = 0;
+ // We don't support message codes and message table, but we define one here
+ // in case the application has a custom format_message and tries to access
+ // these fields there.
+ err->msg_code = 0;
+ err->jpeg_message_table = jpegli::kErrorMessageTable;
+ err->last_jpeg_message = 0;
+ err->addon_message_table = nullptr;
+ err->first_addon_message = 0;
+ err->last_addon_message = 0;
+ return err;
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/error.h b/third_party/jpeg-xl/lib/jpegli/error.h
new file mode 100644
index 0000000000..9de030ab3b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/error.h
@@ -0,0 +1,39 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_ERROR_H_
+#define LIB_JPEGLI_ERROR_H_
+
+/* clang-format off */
+#include <stdint.h>
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stdarg.h>
+/* clang-format on */
+
+namespace jpegli {
+
+bool FormatString(char* buffer, const char* format, ...);
+
+} // namespace jpegli
+
+#define JPEGLI_ERROR(format, ...) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->error_exit)(reinterpret_cast<j_common_ptr>(cinfo))
+
+#define JPEGLI_WARN(format, ...) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), -1)
+
+#define JPEGLI_TRACE(level, format, ...) \
+ if (cinfo->err->trace_level >= (level)) \
+ jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \
+ __LINE__, ##__VA_ARGS__), \
+ (*cinfo->err->emit_message)(reinterpret_cast<j_common_ptr>(cinfo), \
+ (level))
+
+#endif // LIB_JPEGLI_ERROR_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/error_handling_test.cc b/third_party/jpeg-xl/lib/jpegli/error_handling_test.cc
new file mode 100644
index 0000000000..f652993827
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/error_handling_test.cc
@@ -0,0 +1,1290 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+TEST(EncoderErrorHandlingTest, MinimalSuccess) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ }
+ {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpeg);
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, buffer, buffer_size);
+ jpeg_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ jpeg_start_decompress(&cinfo);
+ JSAMPLE image[1];
+ JSAMPROW row[] = {image};
+ jpeg_read_scanlines(&cinfo, row, 1);
+ jxl::msan::UnpoisonMemory(image, 1);
+ EXPECT_EQ(0, image[0]);
+ jpeg_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+ }
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoDestination) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, NoImageDimensions) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ImageTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 100000;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoInputComponents) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, TooManyInputComponents) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1000;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoSetDefaults) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoStartCompress) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteScanlines) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NoWriteAllScanlines) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 2;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantValue) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.quant_tbl_ptrs[0] = jpegli_alloc_quant_table((j_common_ptr)&cinfo);
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ cinfo.quant_tbl_ptrs[0]->quantval[k] = 0;
+ }
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidQuantTableIndex) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].quant_tbl_no = 3;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch1) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 100;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch2) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch3) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[1].v_samp_factor = 1;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch4) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[1] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch5) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_GRAYSCALE;
+ jpegli_set_defaults(&cinfo);
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch6) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ cinfo.num_components = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidColorTransform) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ cinfo.in_color_space = JCS_YCbCr;
+ jpegli_set_defaults(&cinfo);
+ cinfo.jpeg_color_space = JCS_RGB;
+ jpegli_start_compress(&cinfo, TRUE);
+ JSAMPLE image[3] = {0};
+ JSAMPROW row[] = {image};
+ jpegli_write_scanlines(&cinfo, row, 1);
+ jpegli_finish_compress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, DuplicateComponentIds) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].component_id = 0;
+ cinfo.comp_info[1].component_id = 0;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidComponentIndex) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].component_index = 17;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, ArithmeticCoding) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.arith_code = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, CCIR601Sampling) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.CCIR601_sampling = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript1) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = 0;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript2) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {0, 1}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript3) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{5, {0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript4) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {0, 0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript5) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{2, {1, 0}, 0, 63, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript6) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 64, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript7) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {{1, {0}, 2, 1, 0, 0}}; //
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript8) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 63, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {1}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript9) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 1, 0, 0}, {1, {0}, 2, 63, 0, 0}, //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript10) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 2;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {2, {0, 1}, 0, 0, 0, 0}, {2, {0, 1}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript11) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 1, 63, 0, 0}, {1, {0}, 0, 0, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript12) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 0, 10, 1}, {1, {0}, 0, 0, 1, 0}, {1, {0}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, InvalidScanScript13) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ static constexpr jpeg_scan_info kScript[] = {
+ {1, {0}, 0, 0, 0, 2},
+ {1, {0}, 0, 0, 1, 0},
+ {1, {0}, 0, 0, 2, 1}, //
+ {1, {0}, 1, 63, 0, 0} //
+ };
+ cinfo.scan_info = kScript;
+ cinfo.num_scans = ARRAY_SIZE(kScript);
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, MCUSizeTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.comp_info[0].h_samp_factor = 3;
+ cinfo.comp_info[0].v_samp_factor = 3;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, RestartIntervalTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 1;
+ jpegli_set_defaults(&cinfo);
+ cinfo.restart_interval = 1000000;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, SamplingFactorTooBig) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].h_samp_factor = 5;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+TEST(EncoderErrorHandlingTest, NonIntegralSamplingRatio) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ cinfo.image_width = 1;
+ cinfo.image_height = 1;
+ cinfo.input_components = 3;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].h_samp_factor = 3;
+ cinfo.comp_info[1].h_samp_factor = 2;
+ jpegli_start_compress(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ if (buffer) free(buffer);
+}
+
+constexpr const char* kAddOnTable[] = {"First message",
+ "Second message with int param %d",
+ "Third message with string param %s"};
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10000;
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableIntParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10001;
+ cinfo.err->msg_parm.i[0] = 17;
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+TEST(EncoderErrorHandlingTest, AddOnTableNoStringParam) {
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.err->addon_message_table = kAddOnTable;
+ cinfo.err->first_addon_message = 10000;
+ cinfo.err->last_addon_message = 10002;
+ cinfo.err->msg_code = 10002;
+ memcpy(cinfo.err->msg_parm.s, "MESSAGE PARAM", 14);
+ (*cinfo.err->error_exit)(reinterpret_cast<j_common_ptr>(&cinfo));
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+}
+
+static const uint8_t kCompressed0[] = {
+ // SOI
+ 0xff, 0xd8, //
+ // DQT
+ 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x03, 0x02, //
+ 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, //
+ 0x08, 0x05, 0x05, 0x04, 0x04, 0x05, 0x0a, 0x07, 0x07, 0x06, //
+ 0x08, 0x0c, 0x0a, 0x0c, 0x0c, 0x0b, 0x0a, 0x0b, 0x0b, 0x0d, //
+ 0x0e, 0x12, 0x10, 0x0d, 0x0e, 0x11, 0x0e, 0x0b, 0x0b, 0x10, //
+ 0x16, 0x10, 0x11, 0x13, 0x14, 0x15, 0x15, 0x15, 0x0c, 0x0f, //
+ 0x17, 0x18, 0x16, 0x14, 0x18, 0x12, 0x14, 0x15, 0x14, //
+ // SOF
+ 0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, //
+ 0x01, 0x11, 0x00, //
+ // DHT
+ 0xff, 0xc4, 0x00, 0xd2, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01, //
+ 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
+ 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, //
+ 0x09, 0x0a, 0x0b, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, //
+ 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d, //
+ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, //
+ 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, //
+ 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, //
+ 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, //
+ 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, //
+ 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, //
+ 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, //
+ 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, //
+ 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, //
+ 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, //
+ 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, //
+ 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, //
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, //
+ 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, //
+ 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, //
+ 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, //
+ 0xf9, 0xfa, //
+ // SOS
+ 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, //
+ // entropy coded data
+ 0xfc, 0xaa, 0xaf, //
+ // EOI
+ 0xff, 0xd9, //
+};
+static const size_t kLen0 = sizeof(kCompressed0);
+
+static const size_t kDQTOffset = 2;
+static const size_t kSOFOffset = 71;
+static const size_t kDHTOffset = 84;
+static const size_t kSOSOffset = 296;
+
+TEST(DecoderErrorHandlingTest, MinimalSuccess) {
+ JXL_CHECK(kCompressed0[kDQTOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kSOFOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kDHTOffset] == 0xff);
+ JXL_CHECK(kCompressed0[kSOSOffset] == 0xff);
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ jpegli_start_decompress(&cinfo);
+ JSAMPLE image[1];
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoSource) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_read_header(&cinfo, TRUE);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadHeader) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_start_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoStartDecompress) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ JSAMPLE image[1];
+ JSAMPROW row[] = {image};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ EXPECT_EQ(0, image[0]);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+TEST(DecoderErrorHandlingTest, NoReadScanlines) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, kCompressed0, kLen0);
+ jpegli_read_header(&cinfo, TRUE);
+ EXPECT_EQ(1, cinfo.image_width);
+ EXPECT_EQ(1, cinfo.image_height);
+ jpegli_start_decompress(&cinfo);
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ EXPECT_FALSE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+}
+
+static const size_t kMaxImageWidth = 0xffff;
+JSAMPLE kOutputBuffer[MAX_COMPONENTS * kMaxImageWidth];
+
+bool ParseCompressed(const std::vector<uint8_t>& compressed) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+ jpegli_read_header(&cinfo, TRUE);
+ jpegli_start_decompress(&cinfo);
+ for (JDIMENSION i = 0; i < cinfo.output_height; ++i) {
+ JSAMPROW row[] = {kOutputBuffer};
+ jpegli_read_scanlines(&cinfo, row, 1);
+ }
+ jpegli_finish_decompress(&cinfo);
+ return true;
+ };
+ bool retval = try_catch_block();
+ jpegli_destroy_decompress(&cinfo);
+ return retval;
+}
+
+TEST(DecoderErrorHandlingTest, NoSOI) {
+ for (int pos : {0, 1}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[pos] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDQT) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // inavlid table index / precision
+ for (int val : {0x20, 0x05}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // zero quant value
+ for (int k : {0, 1, 17, 63}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDQTOffset + 5 + k] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOF) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // zero width, height or num_components
+ for (int pos : {6, 8, 9}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + pos] = 0;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid data precision
+ for (int val : {0, 1, 127}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // too many num_components
+ for (int val : {5, 255}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 9] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid sampling factors
+ for (int val : {0x00, 0x01, 0x10, 0x15, 0x51}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 11] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid quant table index
+ for (int val : {5, 17}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOFOffset + 12] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidDHT) {
+ // Bad marker length
+ for (int diff : {-2, -1, 1, 2}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 3] += diff;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 2] += 17;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // inavlid table slot_id
+ for (int val : {0x05, 0x15, 0x20}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kDHTOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, InvalidSOS) {
+ // Invalid comps_in_scan
+ for (int val : {2, 5, 17}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + 4] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid Huffman table indexes
+ for (int val : {0x05, 0x50, 0x15, 0x51}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + 6] = val;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+ // invalid Ss/Se
+ for (int pos : {7, 8}) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ compressed[kSOSOffset + pos] = 64;
+ EXPECT_FALSE(ParseCompressed(compressed));
+ }
+}
+
+TEST(DecoderErrorHandlingTest, MutateSingleBytes) {
+ for (size_t pos = 0; pos < kLen0; ++pos) {
+ std::vector<uint8_t> compressed(kCompressed0, kCompressed0 + kLen0);
+ for (int val : {0x00, 0x0f, 0xf0, 0xff}) {
+ compressed[pos] = val;
+ ParseCompressed(compressed);
+ }
+ }
+}
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/huffman.cc b/third_party/jpeg-xl/lib/jpegli/huffman.cc
new file mode 100644
index 0000000000..1cf88a5536
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/huffman.cc
@@ -0,0 +1,321 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/huffman.h"
+
+#include <limits>
+#include <vector>
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/error.h"
+
+namespace jpegli {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+ int left = 1 << (len - kJpegHuffmanRootTableBits);
+ while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+ left -= count[len];
+ if (left <= 0) break;
+ ++len;
+ left <<= 1;
+ }
+ return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+ HuffmanTableEntry* lut) {
+ HuffmanTableEntry code; // current table entry
+ HuffmanTableEntry* table; // next available space in table
+ int len; // current code length
+ int idx; // symbol index
+ int key; // prefix code
+ int reps; // number of replicate key values in current table
+ int low; // low bits for current root entry
+ int table_bits; // key length of current table
+ int table_size; // size of current table
+
+ // Make a local copy of the input bit length histogram.
+ int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+ int total_count = 0;
+ for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ tmp_count[len] = count[len];
+ total_count += tmp_count[len];
+ }
+
+ table = lut;
+ table_bits = kJpegHuffmanRootTableBits;
+ table_size = 1 << table_bits;
+
+ // Special case code with only one value.
+ if (total_count == 1) {
+ code.bits = 0;
+ code.value = symbols[0];
+ for (key = 0; key < table_size; ++key) {
+ table[key] = code;
+ }
+ return;
+ }
+
+ // Fill in root table.
+ key = 0;
+ idx = 0;
+ for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ code.bits = len;
+ code.value = symbols[idx++];
+ reps = 1 << (kJpegHuffmanRootTableBits - len);
+ while (reps--) {
+ table[key++] = code;
+ }
+ }
+ }
+
+ // Fill in 2nd level tables and add pointers to root table.
+ table += table_size;
+ table_size = 0;
+ low = 0;
+ for (len = kJpegHuffmanRootTableBits + 1;
+ len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ // Start a new sub-table if the previous one is full.
+ if (low >= table_size) {
+ table += table_size;
+ table_bits = NextTableBitSize(tmp_count, len);
+ table_size = 1 << table_bits;
+ low = 0;
+ lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+ lut[key].value = (table - lut) - key;
+ ++key;
+ }
+ code.bits = len - kJpegHuffmanRootTableBits;
+ code.value = symbols[idx++];
+ reps = 1 << (table_bits - code.bits);
+ while (reps--) {
+ table[low++] = code;
+ }
+ }
+ }
+}
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+ HuffmanTree(uint32_t count, int16_t left, int16_t right)
+ : total_count(count), index_left(left), index_right_or_value(right) {}
+ uint32_t total_count;
+ int16_t index_left;
+ int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+ uint8_t level) {
+ if (p.index_left >= 0) {
+ ++level;
+ SetDepth(pool[p.index_left], pool, depth, level);
+ SetDepth(pool[p.index_right_or_value], pool, depth, level);
+ } else {
+ depth[p.index_right_or_value] = level;
+ }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+ return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+ const int tree_limit, uint8_t* depth) {
+ // For block sizes below 64 kB, we never need to do a second iteration
+ // of this loop. Probably all of our block sizes will be smaller than
+ // that, so this loop is mostly of academic interest. If we actually
+ // would need this, we would be better off with the Katajainen algorithm.
+ for (uint32_t count_limit = 1;; count_limit *= 2) {
+ std::vector<HuffmanTree> tree;
+ tree.reserve(2 * length + 1);
+
+ for (size_t i = length; i != 0;) {
+ --i;
+ if (data[i]) {
+ const uint32_t count = std::max(data[i], count_limit - 1);
+ tree.emplace_back(count, -1, static_cast<int16_t>(i));
+ }
+ }
+
+ const size_t n = tree.size();
+ if (n == 1) {
+ // Fake value; will be fixed on upper level.
+ depth[tree[0].index_right_or_value] = 1;
+ break;
+ }
+
+ std::stable_sort(tree.begin(), tree.end(), Compare);
+
+ // The nodes are:
+ // [0, n): the sorted leaf nodes that we start with.
+ // [n]: we add a sentinel here.
+ // [n + 1, 2n): new parent nodes are added here, starting from
+ // (n+1). These are naturally in ascending order.
+ // [2n]: we add a sentinel at the end as well.
+ // There will be (2n+1) elements at the end.
+ const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+ tree.push_back(sentinel);
+ tree.push_back(sentinel);
+
+ size_t i = 0; // Points to the next leaf node.
+ size_t j = n + 1; // Points to the next non-leaf node.
+ for (size_t k = n - 1; k != 0; --k) {
+ size_t left, right;
+ if (tree[i].total_count <= tree[j].total_count) {
+ left = i;
+ ++i;
+ } else {
+ left = j;
+ ++j;
+ }
+ if (tree[i].total_count <= tree[j].total_count) {
+ right = i;
+ ++i;
+ } else {
+ right = j;
+ ++j;
+ }
+
+ // The sentinel node becomes the parent node.
+ size_t j_end = tree.size() - 1;
+ tree[j_end].total_count =
+ tree[left].total_count + tree[right].total_count;
+ tree[j_end].index_left = static_cast<int16_t>(left);
+ tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+ // Add back the last sentinel node.
+ tree.push_back(sentinel);
+ }
+ JXL_DASSERT(tree.size() == 2 * n + 1);
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+ // We need to pack the Huffman tree in tree_limit bits.
+ // If this was not successful, add fake entities to the lowest values
+ // and retry.
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+ break;
+ }
+ }
+}
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+ bool is_dc) {
+ size_t total_symbols = 0;
+ size_t total_p = 0;
+ size_t max_depth = 0;
+ for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) {
+ uint8_t count = table->bits[d];
+ if (count) {
+ total_symbols += count;
+ total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count;
+ max_depth = d;
+ }
+ }
+ total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth); // sentinel symbol
+ if (total_symbols == 0) {
+ JPEGLI_ERROR("Empty Huffman table");
+ }
+ if (total_symbols > kJpegHuffmanAlphabetSize) {
+ JPEGLI_ERROR("Too many symbols in Huffman table");
+ }
+ if (total_p != (1u << kJpegHuffmanMaxBitLength)) {
+ JPEGLI_ERROR("Invalid bit length distribution");
+ }
+ uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {};
+ for (size_t i = 0; i < total_symbols; ++i) {
+ uint8_t symbol = table->huffval[i];
+ if (symbol_seen[symbol]) {
+ JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol);
+ }
+ symbol_seen[symbol] = 1;
+ }
+}
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) {
+ // Huffman tables from the JPEG standard.
+ static constexpr JHUFF_TBL kStandardDCTables[2] = {
+ // DC luma
+ {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+ FALSE},
+ // DC chroma
+ {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
+ FALSE}};
+ static constexpr JHUFF_TBL kStandardACTables[2] = {
+ // AC luma
+ {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125},
+ {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06,
+ 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+ 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72,
+ 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+ 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45,
+ 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+ 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75,
+ 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+ 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3,
+ 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+ 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
+ 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4,
+ 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+ FALSE},
+ // AC chroma
+ {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119},
+ {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41,
+ 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+ 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1,
+ 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+ 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44,
+ 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74,
+ 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a,
+ 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+ 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+ 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4,
+ 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa},
+ FALSE}};
+ const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables;
+ JHUFF_TBL** tables;
+ if (cinfo->is_decompressor) {
+ j_decompress_ptr cinfo_d = reinterpret_cast<j_decompress_ptr>(cinfo);
+ tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs;
+ } else {
+ j_compress_ptr cinfo_c = reinterpret_cast<j_compress_ptr>(cinfo);
+ tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs;
+ }
+ for (int i = 0; i < 2; ++i) {
+ if (tables[i] == nullptr) {
+ tables[i] = jpegli_alloc_huff_table(cinfo);
+ memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL));
+ ValidateHuffmanTable(cinfo, tables[i], is_dc);
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/huffman.h b/third_party/jpeg-xl/lib/jpegli/huffman.h
new file mode 100644
index 0000000000..f0e5e1de40
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/huffman.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_HUFFMAN_H_
+#define LIB_JPEGLI_HUFFMAN_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jpegli/common_internal.h"
+
+namespace jpegli {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+ uint8_t bits; // number of bits used for this symbol
+ uint16_t value; // symbol value or table offset
+};
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+ HuffmanTableEntry* lut);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit,
+ uint8_t* depth);
+
+void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table,
+ bool is_dc);
+
+void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_HUFFMAN_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/idct.cc b/third_party/jpeg-xl/lib/jpegli/idct.cc
new file mode 100644
index 0000000000..4d10563583
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/idct.cc
@@ -0,0 +1,692 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/idct.h"
+
+#include <cmath>
+
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jxl/base/status.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/idct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/transpose-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::Xor;
+
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+using D8 = HWY_CAPPED(float, 8);
+constexpr D8 d8;
+
+void DequantBlock(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) {
+ for (size_t k = 0; k < 64; k += Lanes(d)) {
+ const auto mul = Load(d, dequant + k);
+ const auto bias = Load(d, biases + k);
+ const Rebind<int16_t, DI> di16;
+ const Vec<DI> quant_i = PromoteTo(di, Load(di16, qblock + k));
+ const Rebind<float, DI> df;
+ const auto quant = ConvertTo(df, quant_i);
+ const auto abs_quant = Abs(quant);
+ const auto not_0 = Gt(abs_quant, Zero(df));
+ const auto sign_quant = Xor(quant, abs_quant);
+ const auto biased_quant = Sub(quant, Xor(bias, sign_quant));
+ const auto dequant = IfThenElseZero(not_0, Mul(biased_quant, mul));
+ Store(dequant, d, block + k);
+ }
+}
+
+template <size_t N>
+void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = LoadU(d8, ain + 2 * i * ain_stride);
+ Store(in1, d8, aout + i * 8);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = LoadU(d8, ain + (2 * (i - N / 2) + 1) * ain_stride);
+ Store(in1, d8, aout + i * 8);
+ }
+}
+
+template <size_t N>
+void BTranspose(float* JXL_RESTRICT coeff) {
+ for (size_t i = N - 1; i > 0; i--) {
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (i - 1) * 8);
+ Store(Add(in1, in2), d8, coeff + i * 8);
+ }
+ constexpr float kSqrt2 = 1.41421356237f;
+ auto sqrt2 = Set(d8, kSqrt2);
+ auto in1 = Load(d8, coeff);
+ Store(Mul(in1, sqrt2), d8, coeff);
+}
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+ static constexpr float kMultipliers[] = {
+ 0.541196100146197,
+ 1.3065629648763764,
+ };
+};
+
+template <>
+struct WcMultipliers<8> {
+ static constexpr float kMultipliers[] = {
+ 0.5097955791041592,
+ 0.6013448869350453,
+ 0.8999762231364156,
+ 2.5629154477415055,
+ };
+};
+
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+
+template <size_t N>
+void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out,
+ size_t out_stride) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto mul = Set(d8, WcMultipliers<N>::kMultipliers[i]);
+ auto in1 = Load(d8, coeff + i * 8);
+ auto in2 = Load(d8, coeff + (N / 2 + i) * 8);
+ auto out1 = MulAdd(mul, in2, in1);
+ auto out2 = NegMulAdd(mul, in2, in1);
+ StoreU(out1, d8, out + i * out_stride);
+ StoreU(out2, d8, out + (N - i - 1) * out_stride);
+ }
+}
+
+template <size_t N>
+struct IDCT1DImpl;
+
+template <>
+struct IDCT1DImpl<1> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ StoreU(LoadU(d8, from), d8, to);
+ }
+};
+
+template <>
+struct IDCT1DImpl<2> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= 8);
+ JXL_DASSERT(to_stride >= 8);
+ auto in1 = LoadU(d8, from);
+ auto in2 = LoadU(d8, from + from_stride);
+ StoreU(Add(in1, in2), d8, to);
+ StoreU(Sub(in1, in2), d8, to + to_stride);
+ }
+};
+
+template <size_t N>
+struct IDCT1DImpl {
+ void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= 8);
+ JXL_DASSERT(to_stride >= 8);
+ HWY_ALIGN float tmp[64];
+ ForwardEvenOdd<N>(from, from_stride, tmp);
+ IDCT1DImpl<N / 2>()(tmp, 8, tmp, 8);
+ BTranspose<N / 2>(tmp + N * 4);
+ IDCT1DImpl<N / 2>()(tmp + N * 4, 8, tmp + N * 4, 8);
+ MultiplyAndAdd<N>(tmp, to, to_stride);
+ }
+};
+
+template <size_t N>
+void IDCT1D(float* JXL_RESTRICT from, float* JXL_RESTRICT output,
+ size_t output_stride) {
+ for (size_t i = 0; i < 8; i += Lanes(d8)) {
+ IDCT1DImpl<N>()(from + i, 8, output + i, output_stride);
+ }
+}
+
+void ComputeScaledIDCT(float* JXL_RESTRICT block0, float* JXL_RESTRICT block1,
+ float* JXL_RESTRICT output, size_t output_stride) {
+ Transpose8x8Block(block0, block1);
+ IDCT1D<8>(block1, block0, 8);
+ Transpose8x8Block(block0, block1);
+ IDCT1D<8>(block1, output, output_stride);
+}
+
+void InverseTransformBlock8x8(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases,
+ float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output, size_t output_stride,
+ size_t dctsize) {
+ float* JXL_RESTRICT block0 = scratch_space;
+ float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+ DequantBlock(qblock, dequant, biases, block0);
+ ComputeScaledIDCT(block0, block1, output, output_stride);
+}
+
+// Computes the N-point IDCT of in[], and stores the result in out[]. The in[]
+// array is at most 8 values long, values in[8:N-1] are assumed to be 0.
+void Compute1dIDCT(float* in, float* out, size_t N) {
+ switch (N) {
+ case 3: {
+ static constexpr float kC3[3] = {
+ 1.414213562373,
+ 1.224744871392,
+ 0.707106781187,
+ };
+ float even0 = in[0] + kC3[2] * in[2];
+ float even1 = in[0] - kC3[0] * in[2];
+ float odd0 = kC3[1] * in[1];
+ out[0] = even0 + odd0;
+ out[2] = even0 - odd0;
+ out[1] = even1;
+ break;
+ }
+ case 5: {
+ static constexpr float kC5[5] = {
+ 1.414213562373, 1.344997023928, 1.144122805635,
+ 0.831253875555, 0.437016024449,
+ };
+ float even0 = in[0] + kC5[2] * in[2] + kC5[4] * in[4];
+ float even1 = in[0] - kC5[4] * in[2] - kC5[2] * in[4];
+ float even2 = in[0] - kC5[0] * in[2] + kC5[0] * in[4];
+ float odd0 = kC5[1] * in[1] + kC5[3] * in[3];
+ float odd1 = kC5[3] * in[1] - kC5[1] * in[3];
+ out[0] = even0 + odd0;
+ out[4] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[3] = even1 - odd1;
+ out[2] = even2;
+ break;
+ }
+ case 6: {
+ static constexpr float kC6[6] = {
+ 1.414213562373, 1.366025403784, 1.224744871392,
+ 1.000000000000, 0.707106781187, 0.366025403784,
+ };
+ float even0 = in[0] + kC6[2] * in[2] + kC6[4] * in[4];
+ float even1 = in[0] - kC6[0] * in[4];
+ float even2 = in[0] - kC6[2] * in[2] + kC6[4] * in[4];
+ float odd0 = kC6[1] * in[1] + kC6[3] * in[3] + kC6[5] * in[5];
+ float odd1 = kC6[3] * in[1] - kC6[3] * in[3] - kC6[3] * in[5];
+ float odd2 = kC6[5] * in[1] - kC6[3] * in[3] + kC6[1] * in[5];
+ out[0] = even0 + odd0;
+ out[5] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[4] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[3] = even2 - odd2;
+ break;
+ }
+ case 7: {
+ static constexpr float kC7[7] = {
+ 1.414213562373, 1.378756275744, 1.274162392264, 1.105676685997,
+ 0.881747733790, 0.613604268353, 0.314692122713,
+ };
+ float even0 = in[0] + kC7[2] * in[2] + kC7[4] * in[4] + kC7[6] * in[6];
+ float even1 = in[0] + kC7[6] * in[2] - kC7[2] * in[4] - kC7[4] * in[6];
+ float even2 = in[0] - kC7[4] * in[2] - kC7[6] * in[4] + kC7[2] * in[6];
+ float even3 = in[0] - kC7[0] * in[2] + kC7[0] * in[4] - kC7[0] * in[6];
+ float odd0 = kC7[1] * in[1] + kC7[3] * in[3] + kC7[5] * in[5];
+ float odd1 = kC7[3] * in[1] - kC7[5] * in[3] - kC7[1] * in[5];
+ float odd2 = kC7[5] * in[1] - kC7[1] * in[3] + kC7[3] * in[5];
+ out[0] = even0 + odd0;
+ out[6] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[5] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[4] = even2 - odd2;
+ out[3] = even3;
+ break;
+ }
+ case 9: {
+ static constexpr float kC9[9] = {
+ 1.414213562373, 1.392728480640, 1.328926048777,
+ 1.224744871392, 1.083350440839, 0.909038955344,
+ 0.707106781187, 0.483689525296, 0.245575607938,
+ };
+ float even0 = in[0] + kC9[2] * in[2] + kC9[4] * in[4] + kC9[6] * in[6];
+ float even1 = in[0] + kC9[6] * in[2] - kC9[6] * in[4] - kC9[0] * in[6];
+ float even2 = in[0] - kC9[8] * in[2] - kC9[2] * in[4] + kC9[6] * in[6];
+ float even3 = in[0] - kC9[4] * in[2] + kC9[8] * in[4] + kC9[6] * in[6];
+ float even4 = in[0] - kC9[0] * in[2] + kC9[0] * in[4] - kC9[0] * in[6];
+ float odd0 =
+ kC9[1] * in[1] + kC9[3] * in[3] + kC9[5] * in[5] + kC9[7] * in[7];
+ float odd1 = kC9[3] * in[1] - kC9[3] * in[5] - kC9[3] * in[7];
+ float odd2 =
+ kC9[5] * in[1] - kC9[3] * in[3] - kC9[7] * in[5] + kC9[1] * in[7];
+ float odd3 =
+ kC9[7] * in[1] - kC9[3] * in[3] + kC9[1] * in[5] - kC9[5] * in[7];
+ out[0] = even0 + odd0;
+ out[8] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[7] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[6] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[5] = even3 - odd3;
+ out[4] = even4;
+ break;
+ }
+ case 10: {
+ static constexpr float kC10[10] = {
+ 1.414213562373, 1.396802246667, 1.344997023928, 1.260073510670,
+ 1.144122805635, 1.000000000000, 0.831253875555, 0.642039521920,
+ 0.437016024449, 0.221231742082,
+ };
+ float even0 = in[0] + kC10[2] * in[2] + kC10[4] * in[4] + kC10[6] * in[6];
+ float even1 = in[0] + kC10[6] * in[2] - kC10[8] * in[4] - kC10[2] * in[6];
+ float even2 = in[0] - kC10[0] * in[4];
+ float even3 = in[0] - kC10[6] * in[2] - kC10[8] * in[4] + kC10[2] * in[6];
+ float even4 = in[0] - kC10[2] * in[2] + kC10[4] * in[4] - kC10[6] * in[6];
+ float odd0 =
+ kC10[1] * in[1] + kC10[3] * in[3] + kC10[5] * in[5] + kC10[7] * in[7];
+ float odd1 =
+ kC10[3] * in[1] + kC10[9] * in[3] - kC10[5] * in[5] - kC10[1] * in[7];
+ float odd2 =
+ kC10[5] * in[1] - kC10[5] * in[3] - kC10[5] * in[5] + kC10[5] * in[7];
+ float odd3 =
+ kC10[7] * in[1] - kC10[1] * in[3] + kC10[5] * in[5] + kC10[9] * in[7];
+ float odd4 =
+ kC10[9] * in[1] - kC10[7] * in[3] + kC10[5] * in[5] - kC10[3] * in[7];
+ out[0] = even0 + odd0;
+ out[9] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[8] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[7] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[6] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[5] = even4 - odd4;
+ break;
+ }
+ case 11: {
+ static constexpr float kC11[11] = {
+ 1.414213562373, 1.399818907436, 1.356927976287, 1.286413904599,
+ 1.189712155524, 1.068791297809, 0.926112931411, 0.764581576418,
+ 0.587485545401, 0.398430002847, 0.201263574413,
+ };
+ float even0 = in[0] + kC11[2] * in[2] + kC11[4] * in[4] + kC11[6] * in[6];
+ float even1 =
+ in[0] + kC11[6] * in[2] - kC11[10] * in[4] - kC11[4] * in[6];
+ float even2 =
+ in[0] + kC11[10] * in[2] - kC11[2] * in[4] - kC11[8] * in[6];
+ float even3 = in[0] - kC11[8] * in[2] - kC11[6] * in[4] + kC11[2] * in[6];
+ float even4 =
+ in[0] - kC11[4] * in[2] + kC11[8] * in[4] + kC11[10] * in[6];
+ float even5 = in[0] - kC11[0] * in[2] + kC11[0] * in[4] - kC11[0] * in[6];
+ float odd0 =
+ kC11[1] * in[1] + kC11[3] * in[3] + kC11[5] * in[5] + kC11[7] * in[7];
+ float odd1 =
+ kC11[3] * in[1] + kC11[9] * in[3] - kC11[7] * in[5] - kC11[1] * in[7];
+ float odd2 =
+ kC11[5] * in[1] - kC11[7] * in[3] - kC11[3] * in[5] + kC11[9] * in[7];
+ float odd3 =
+ kC11[7] * in[1] - kC11[1] * in[3] + kC11[9] * in[5] + kC11[5] * in[7];
+ float odd4 =
+ kC11[9] * in[1] - kC11[5] * in[3] + kC11[1] * in[5] - kC11[3] * in[7];
+ out[0] = even0 + odd0;
+ out[10] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[9] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[8] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[7] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[6] = even4 - odd4;
+ out[5] = even5;
+ break;
+ }
+ case 12: {
+ static constexpr float kC12[12] = {
+ 1.414213562373, 1.402114769300, 1.366025403784, 1.306562964876,
+ 1.224744871392, 1.121971053594, 1.000000000000, 0.860918669154,
+ 0.707106781187, 0.541196100146, 0.366025403784, 0.184591911283,
+ };
+ float even0 = in[0] + kC12[2] * in[2] + kC12[4] * in[4] + kC12[6] * in[6];
+ float even1 = in[0] + kC12[6] * in[2] - kC12[6] * in[6];
+ float even2 =
+ in[0] + kC12[10] * in[2] - kC12[4] * in[4] - kC12[6] * in[6];
+ float even3 =
+ in[0] - kC12[10] * in[2] - kC12[4] * in[4] + kC12[6] * in[6];
+ float even4 = in[0] - kC12[6] * in[2] + kC12[6] * in[6];
+ float even5 = in[0] - kC12[2] * in[2] + kC12[4] * in[4] - kC12[6] * in[6];
+ float odd0 =
+ kC12[1] * in[1] + kC12[3] * in[3] + kC12[5] * in[5] + kC12[7] * in[7];
+ float odd1 =
+ kC12[3] * in[1] + kC12[9] * in[3] - kC12[9] * in[5] - kC12[3] * in[7];
+ float odd2 = kC12[5] * in[1] - kC12[9] * in[3] - kC12[1] * in[5] -
+ kC12[11] * in[7];
+ float odd3 = kC12[7] * in[1] - kC12[3] * in[3] - kC12[11] * in[5] +
+ kC12[1] * in[7];
+ float odd4 =
+ kC12[9] * in[1] - kC12[3] * in[3] + kC12[3] * in[5] - kC12[9] * in[7];
+ float odd5 = kC12[11] * in[1] - kC12[9] * in[3] + kC12[7] * in[5] -
+ kC12[5] * in[7];
+ out[0] = even0 + odd0;
+ out[11] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[10] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[9] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[8] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[7] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[6] = even5 - odd5;
+ break;
+ }
+ case 13: {
+ static constexpr float kC13[13] = {
+ 1.414213562373, 1.403902353238, 1.373119086479, 1.322312651445,
+ 1.252223920364, 1.163874944761, 1.058554051646, 0.937797056801,
+ 0.803364869133, 0.657217812653, 0.501487040539, 0.338443458124,
+ 0.170464607981,
+ };
+ float even0 = in[0] + kC13[2] * in[2] + kC13[4] * in[4] + kC13[6] * in[6];
+ float even1 =
+ in[0] + kC13[6] * in[2] + kC13[12] * in[4] - kC13[8] * in[6];
+ float even2 =
+ in[0] + kC13[10] * in[2] - kC13[6] * in[4] - kC13[4] * in[6];
+ float even3 =
+ in[0] - kC13[12] * in[2] - kC13[2] * in[4] + kC13[10] * in[6];
+ float even4 =
+ in[0] - kC13[8] * in[2] - kC13[10] * in[4] + kC13[2] * in[6];
+ float even5 =
+ in[0] - kC13[4] * in[2] + kC13[8] * in[4] - kC13[12] * in[6];
+ float even6 = in[0] - kC13[0] * in[2] + kC13[0] * in[4] - kC13[0] * in[6];
+ float odd0 =
+ kC13[1] * in[1] + kC13[3] * in[3] + kC13[5] * in[5] + kC13[7] * in[7];
+ float odd1 = kC13[3] * in[1] + kC13[9] * in[3] - kC13[11] * in[5] -
+ kC13[5] * in[7];
+ float odd2 = kC13[5] * in[1] - kC13[11] * in[3] - kC13[1] * in[5] -
+ kC13[9] * in[7];
+ float odd3 =
+ kC13[7] * in[1] - kC13[5] * in[3] - kC13[9] * in[5] + kC13[3] * in[7];
+ float odd4 = kC13[9] * in[1] - kC13[1] * in[3] + kC13[7] * in[5] +
+ kC13[11] * in[7];
+ float odd5 = kC13[11] * in[1] - kC13[7] * in[3] + kC13[3] * in[5] -
+ kC13[1] * in[7];
+ out[0] = even0 + odd0;
+ out[12] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[11] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[10] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[9] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[8] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[7] = even5 - odd5;
+ out[6] = even6;
+ break;
+ }
+ case 14: {
+ static constexpr float kC14[14] = {
+ 1.414213562373, 1.405321284327, 1.378756275744, 1.334852607020,
+ 1.274162392264, 1.197448846138, 1.105676685997, 1.000000000000,
+ 0.881747733790, 0.752406978226, 0.613604268353, 0.467085128785,
+ 0.314692122713, 0.158341680609,
+ };
+ float even0 = in[0] + kC14[2] * in[2] + kC14[4] * in[4] + kC14[6] * in[6];
+ float even1 =
+ in[0] + kC14[6] * in[2] + kC14[12] * in[4] - kC14[10] * in[6];
+ float even2 =
+ in[0] + kC14[10] * in[2] - kC14[8] * in[4] - kC14[2] * in[6];
+ float even3 = in[0] - kC14[0] * in[4];
+ float even4 =
+ in[0] - kC14[10] * in[2] - kC14[8] * in[4] + kC14[2] * in[6];
+ float even5 =
+ in[0] - kC14[6] * in[2] + kC14[12] * in[4] + kC14[10] * in[6];
+ float even6 = in[0] - kC14[2] * in[2] + kC14[4] * in[4] - kC14[6] * in[6];
+ float odd0 =
+ kC14[1] * in[1] + kC14[3] * in[3] + kC14[5] * in[5] + kC14[7] * in[7];
+ float odd1 = kC14[3] * in[1] + kC14[9] * in[3] - kC14[13] * in[5] -
+ kC14[7] * in[7];
+ float odd2 = kC14[5] * in[1] - kC14[13] * in[3] - kC14[3] * in[5] -
+ kC14[7] * in[7];
+ float odd3 =
+ kC14[7] * in[1] - kC14[7] * in[3] - kC14[7] * in[5] + kC14[7] * in[7];
+ float odd4 = kC14[9] * in[1] - kC14[1] * in[3] + kC14[11] * in[5] +
+ kC14[7] * in[7];
+ float odd5 = kC14[11] * in[1] - kC14[5] * in[3] + kC14[1] * in[5] -
+ kC14[7] * in[7];
+ float odd6 = kC14[13] * in[1] - kC14[11] * in[3] + kC14[9] * in[5] -
+ kC14[7] * in[7];
+ out[0] = even0 + odd0;
+ out[13] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[12] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[11] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[10] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[9] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[8] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[7] = even6 - odd6;
+ break;
+ }
+ case 15: {
+ static constexpr float kC15[15] = {
+ 1.414213562373, 1.406466352507, 1.383309602960, 1.344997023928,
+ 1.291948376043, 1.224744871392, 1.144122805635, 1.050965490998,
+ 0.946293578512, 0.831253875555, 0.707106781187, 0.575212476952,
+ 0.437016024449, 0.294031532930, 0.147825570407,
+ };
+ float even0 = in[0] + kC15[2] * in[2] + kC15[4] * in[4] + kC15[6] * in[6];
+ float even1 =
+ in[0] + kC15[6] * in[2] + kC15[12] * in[4] - kC15[12] * in[6];
+ float even2 =
+ in[0] + kC15[10] * in[2] - kC15[10] * in[4] - kC15[0] * in[6];
+ float even3 =
+ in[0] + kC15[14] * in[2] - kC15[2] * in[4] - kC15[12] * in[6];
+ float even4 =
+ in[0] - kC15[12] * in[2] - kC15[6] * in[4] + kC15[6] * in[6];
+ float even5 =
+ in[0] - kC15[8] * in[2] - kC15[14] * in[4] + kC15[6] * in[6];
+ float even6 =
+ in[0] - kC15[4] * in[2] + kC15[8] * in[4] - kC15[12] * in[6];
+ float even7 = in[0] - kC15[0] * in[2] + kC15[0] * in[4] - kC15[0] * in[6];
+ float odd0 =
+ kC15[1] * in[1] + kC15[3] * in[3] + kC15[5] * in[5] + kC15[7] * in[7];
+ float odd1 = kC15[3] * in[1] + kC15[9] * in[3] - kC15[9] * in[7];
+ float odd2 = kC15[5] * in[1] - kC15[5] * in[5] - kC15[5] * in[7];
+ float odd3 = kC15[7] * in[1] - kC15[9] * in[3] - kC15[5] * in[5] +
+ kC15[11] * in[7];
+ float odd4 = kC15[9] * in[1] - kC15[3] * in[3] + kC15[3] * in[7];
+ float odd5 = kC15[11] * in[1] - kC15[3] * in[3] + kC15[5] * in[5] -
+ kC15[13] * in[7];
+ float odd6 = kC15[13] * in[1] - kC15[9] * in[3] + kC15[5] * in[5] -
+ kC15[1] * in[7];
+ out[0] = even0 + odd0;
+ out[14] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[13] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[12] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[11] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[10] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[9] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[8] = even6 - odd6;
+ out[7] = even7;
+ break;
+ }
+ case 16: {
+ static constexpr float kC16[16] = {
+ 1.414213562373, 1.407403737526, 1.387039845322, 1.353318001174,
+ 1.306562964876, 1.247225012987, 1.175875602419, 1.093201867002,
+ 1.000000000000, 0.897167586343, 0.785694958387, 0.666655658478,
+ 0.541196100146, 0.410524527522, 0.275899379283, 0.138617169199,
+ };
+ float even0 = in[0] + kC16[2] * in[2] + kC16[4] * in[4] + kC16[6] * in[6];
+ float even1 =
+ in[0] + kC16[6] * in[2] + kC16[12] * in[4] - kC16[14] * in[6];
+ float even2 =
+ in[0] + kC16[10] * in[2] - kC16[12] * in[4] - kC16[2] * in[6];
+ float even3 =
+ in[0] + kC16[14] * in[2] - kC16[4] * in[4] - kC16[10] * in[6];
+ float even4 =
+ in[0] - kC16[14] * in[2] - kC16[4] * in[4] + kC16[10] * in[6];
+ float even5 =
+ in[0] - kC16[10] * in[2] - kC16[12] * in[4] + kC16[2] * in[6];
+ float even6 =
+ in[0] - kC16[6] * in[2] + kC16[12] * in[4] + kC16[14] * in[6];
+ float even7 = in[0] - kC16[2] * in[2] + kC16[4] * in[4] - kC16[6] * in[6];
+ float odd0 = (kC16[1] * in[1] + kC16[3] * in[3] + kC16[5] * in[5] +
+ kC16[7] * in[7]);
+ float odd1 = (kC16[3] * in[1] + kC16[9] * in[3] + kC16[15] * in[5] -
+ kC16[11] * in[7]);
+ float odd2 = (kC16[5] * in[1] + kC16[15] * in[3] - kC16[7] * in[5] -
+ kC16[3] * in[7]);
+ float odd3 = (kC16[7] * in[1] - kC16[11] * in[3] - kC16[3] * in[5] +
+ kC16[15] * in[7]);
+ float odd4 = (kC16[9] * in[1] - kC16[5] * in[3] - kC16[13] * in[5] +
+ kC16[1] * in[7]);
+ float odd5 = (kC16[11] * in[1] - kC16[1] * in[3] + kC16[9] * in[5] +
+ kC16[13] * in[7]);
+ float odd6 = (kC16[13] * in[1] - kC16[7] * in[3] + kC16[1] * in[5] -
+ kC16[5] * in[7]);
+ float odd7 = (kC16[15] * in[1] - kC16[13] * in[3] + kC16[11] * in[5] -
+ kC16[9] * in[7]);
+ out[0] = even0 + odd0;
+ out[15] = even0 - odd0;
+ out[1] = even1 + odd1;
+ out[14] = even1 - odd1;
+ out[2] = even2 + odd2;
+ out[13] = even2 - odd2;
+ out[3] = even3 + odd3;
+ out[12] = even3 - odd3;
+ out[4] = even4 + odd4;
+ out[11] = even4 - odd4;
+ out[5] = even5 + odd5;
+ out[10] = even5 - odd5;
+ out[6] = even6 + odd6;
+ out[9] = even6 - odd6;
+ out[7] = even7 + odd7;
+ out[8] = even7 - odd7;
+ break;
+ }
+ }
+}
+
+void InverseTransformBlockGeneric(const int16_t* JXL_RESTRICT qblock,
+ const float* JXL_RESTRICT dequant,
+ const float* JXL_RESTRICT biases,
+ float* JXL_RESTRICT scratch_space,
+ float* JXL_RESTRICT output,
+ size_t output_stride, size_t dctsize) {
+ float* JXL_RESTRICT block0 = scratch_space;
+ float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2;
+ DequantBlock(qblock, dequant, biases, block0);
+ if (dctsize == 1) {
+ *output = *block0;
+ } else if (dctsize == 2 || dctsize == 4) {
+ float* JXL_RESTRICT block2 = scratch_space + 2 * DCTSIZE2;
+ ComputeScaledIDCT(block0, block1, block2, 8);
+ if (dctsize == 4) {
+ for (size_t iy = 0; iy < 4; ++iy) {
+ for (size_t ix = 0; ix < 4; ++ix) {
+ float* block = &block2[16 * iy + 2 * ix];
+ output[iy * output_stride + ix] =
+ 0.25f * (block[0] + block[1] + block[8] + block[9]);
+ }
+ }
+ } else {
+ for (size_t iy = 0; iy < 2; ++iy) {
+ for (size_t ix = 0; ix < 2; ++ix) {
+ float* block = &block2[32 * iy + 4 * ix];
+ output[iy * output_stride + ix] =
+ 0.0625f *
+ (block[0] + block[1] + block[2] + block[3] + block[8] + block[9] +
+ block[10] + block[11] + block[16] + block[17] + block[18] +
+ block[19] + block[24] + block[25] + block[26] + block[27]);
+ }
+ }
+ }
+ } else {
+ float dctin[DCTSIZE];
+ float dctout[DCTSIZE * 2];
+ size_t insize = std::min<size_t>(dctsize, DCTSIZE);
+ for (size_t ix = 0; ix < insize; ++ix) {
+ for (size_t iy = 0; iy < insize; ++iy) {
+ dctin[iy] = block0[iy * DCTSIZE + ix];
+ }
+ Compute1dIDCT(dctin, dctout, dctsize);
+ for (size_t iy = 0; iy < dctsize; ++iy) {
+ block1[iy * dctsize + ix] = dctout[iy];
+ }
+ }
+ for (size_t iy = 0; iy < dctsize; ++iy) {
+ Compute1dIDCT(block1 + iy * dctsize, output + iy * output_stride,
+ dctsize);
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(InverseTransformBlock8x8);
+HWY_EXPORT(InverseTransformBlockGeneric);
+
+void ChooseInverseTransform(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (m->scaled_dct_size[c] == DCTSIZE) {
+ m->inverse_transform[c] = HWY_DYNAMIC_DISPATCH(InverseTransformBlock8x8);
+ } else {
+ m->inverse_transform[c] =
+ HWY_DYNAMIC_DISPATCH(InverseTransformBlockGeneric);
+ }
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/idct.h b/third_party/jpeg-xl/lib/jpegli/idct.h
new file mode 100644
index 0000000000..21c5c452e6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/idct.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_IDCT_H_
+#define LIB_JPEGLI_IDCT_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stddef.h>
+#include <stdint.h>
+/* clang-format on */
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void ChooseInverseTransform(j_decompress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_IDCT_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/input.cc b/third_party/jpeg-xl/lib/jpegli/input.cc
new file mode 100644
index 0000000000..765bf98946
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/input.cc
@@ -0,0 +1,414 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/input.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/input.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DU8 = Rebind<uint8_t, D>;
+using DU16 = Rebind<uint16_t, D>;
+
+constexpr D d;
+constexpr DU du;
+constexpr DU8 du8;
+constexpr DU16 du16;
+
+static constexpr double kMul16 = 1.0 / 257.0;
+static constexpr double kMulFloat = 255.0;
+
+template <size_t C>
+void ReadUint8Row(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ row_out[c][x] = row_in[C * x + c];
+ }
+ }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadUint16Row(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ const uint16_t* row16 = reinterpret_cast<const uint16_t*>(row_in);
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ uint16_t val = row16[C * x + c];
+ if (swap_endianness) val = JXL_BSWAP16(val);
+ row_out[c][x] = val * kMul16;
+ }
+ }
+}
+
+template <size_t C, bool swap_endianness = false>
+void ReadFloatRow(const uint8_t* row_in, size_t x0, size_t len,
+ float* row_out[kMaxComponents]) {
+ const float* rowf = reinterpret_cast<const float*>(row_in);
+ for (size_t x = x0; x < len; ++x) {
+ for (size_t c = 0; c < C; ++c) {
+ float val = rowf[C * x + c];
+ if (swap_endianness) val = BSwapFloat(val);
+ row_out[c][x] = val * kMulFloat;
+ }
+ }
+}
+
+void ReadUint8RowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(ConvertTo(d, PromoteTo(du, LoadU(du8, row_in + x))), d, row0 + x);
+ }
+ ReadUint8Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<DU8> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(du8, row_in + 2 * x, out0, out1);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ }
+ ReadUint8Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<DU8> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(du8, row_in + 3 * x, out0, out1, out2);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+ }
+ ReadUint8Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint8RowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<DU8> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(du8, row_in + 4 * x, out0, out1, out2, out3);
+ Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x);
+ Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x);
+ Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x);
+ Store(ConvertTo(d, PromoteTo(du, out3)), d, row3 + x);
+ }
+ ReadUint8Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, LoadU(du16, row + x)))), d,
+ row0 + x);
+ }
+ ReadUint16Row<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<DU16> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(du16, row + 2 * x, out0, out1);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ }
+ ReadUint16Row<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<DU16> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(du16, row + 3 * x, out0, out1, out2);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+ }
+ ReadUint16Row<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMul16);
+ const uint16_t* JXL_RESTRICT const row =
+ reinterpret_cast<const uint16_t*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<DU16> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(du16, row + 4 * x, out0, out1, out2, out3);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x);
+ Store(Mul(mul, ConvertTo(d, PromoteTo(du, out3))), d, row3 + x);
+ }
+ ReadUint16Row<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadUint16RowSingleSwap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved2Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved3Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadUint16RowInterleaved4Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadUint16Row<4, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowSingle(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ for (size_t x = 0; x < simd_len; x += N) {
+ Store(Mul(mul, LoadU(d, row + x)), d, row0 + x);
+ }
+ ReadFloatRow<1>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved2(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ Vec<D> out0, out1;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved2(d, row + 2 * x, out0, out1);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ }
+ ReadFloatRow<2>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved3(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ Vec<D> out0, out1, out2;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved3(d, row + 3 * x, out0, out1, out2);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ Store(Mul(mul, out2), d, row2 + x);
+ }
+ ReadFloatRow<3>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowInterleaved4(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ const size_t N = Lanes(d);
+ const size_t simd_len = len & (~(N - 1));
+ const auto mul = Set(d, kMulFloat);
+ const float* JXL_RESTRICT const row = reinterpret_cast<const float*>(row_in);
+ float* JXL_RESTRICT const row0 = row_out[0];
+ float* JXL_RESTRICT const row1 = row_out[1];
+ float* JXL_RESTRICT const row2 = row_out[2];
+ float* JXL_RESTRICT const row3 = row_out[3];
+ Vec<D> out0, out1, out2, out3;
+ for (size_t x = 0; x < simd_len; x += N) {
+ LoadInterleaved4(d, row + 4 * x, out0, out1, out2, out3);
+ Store(Mul(mul, out0), d, row0 + x);
+ Store(Mul(mul, out1), d, row1 + x);
+ Store(Mul(mul, out2), d, row2 + x);
+ Store(Mul(mul, out3), d, row3 + x);
+ }
+ ReadFloatRow<4>(row_in, simd_len, len, row_out);
+}
+
+void ReadFloatRowSingleSwap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<1, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved2Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<2, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved3Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<3, true>(row_in, 0, len, row_out);
+}
+
+void ReadFloatRowInterleaved4Swap(const uint8_t* row_in, size_t len,
+ float* row_out[kMaxComponents]) {
+ ReadFloatRow<4, true>(row_in, 0, len, row_out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(ReadUint8RowSingle);
+HWY_EXPORT(ReadUint8RowInterleaved2);
+HWY_EXPORT(ReadUint8RowInterleaved3);
+HWY_EXPORT(ReadUint8RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingle);
+HWY_EXPORT(ReadUint16RowInterleaved2);
+HWY_EXPORT(ReadUint16RowInterleaved3);
+HWY_EXPORT(ReadUint16RowInterleaved4);
+HWY_EXPORT(ReadUint16RowSingleSwap);
+HWY_EXPORT(ReadUint16RowInterleaved2Swap);
+HWY_EXPORT(ReadUint16RowInterleaved3Swap);
+HWY_EXPORT(ReadUint16RowInterleaved4Swap);
+HWY_EXPORT(ReadFloatRowSingle);
+HWY_EXPORT(ReadFloatRowInterleaved2);
+HWY_EXPORT(ReadFloatRowInterleaved3);
+HWY_EXPORT(ReadFloatRowInterleaved4);
+HWY_EXPORT(ReadFloatRowSingleSwap);
+HWY_EXPORT(ReadFloatRowInterleaved2Swap);
+HWY_EXPORT(ReadFloatRowInterleaved3Swap);
+HWY_EXPORT(ReadFloatRowInterleaved4Swap);
+
+void ChooseInputMethod(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ bool swap_endianness =
+ (m->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+ (m->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+ m->input_method = nullptr;
+ if (m->data_type == JPEGLI_TYPE_UINT8) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_UINT16 && !swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_UINT16 && swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingleSwap);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2Swap);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3Swap);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4Swap);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_FLOAT && !swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingle);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4);
+ }
+ } else if (m->data_type == JPEGLI_TYPE_FLOAT && swap_endianness) {
+ if (cinfo->raw_data_in || cinfo->input_components == 1) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingleSwap);
+ } else if (cinfo->input_components == 2) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2Swap);
+ } else if (cinfo->input_components == 3) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3Swap);
+ } else if (cinfo->input_components == 4) {
+ m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4Swap);
+ }
+ }
+ if (m->input_method == nullptr) {
+ JPEGLI_ERROR("Could not find input method.");
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/input.h b/third_party/jpeg-xl/lib/jpegli/input.h
new file mode 100644
index 0000000000..27b0e80fdb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/input.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_INPUT_H_
+#define LIB_JPEGLI_INPUT_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void ChooseInputMethod(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_INPUT_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/input_suspension_test.cc b/third_party/jpeg-xl/lib/jpegli/input_suspension_test.cc
new file mode 100644
index 0000000000..4914e5e34b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/input_suspension_test.cc
@@ -0,0 +1,612 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+
+struct SourceManager {
+ SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size,
+ bool is_partial_file)
+ : data_(data),
+ len_(len),
+ pos_(0),
+ max_chunk_size_(max_chunk_size),
+ is_partial_file_(is_partial_file) {
+ pub_.init_source = init_source;
+ pub_.fill_input_buffer = fill_input_buffer;
+ pub_.next_input_byte = nullptr;
+ pub_.bytes_in_buffer = 0;
+ pub_.skip_input_data = skip_input_data;
+ pub_.resync_to_restart = jpegli_resync_to_restart;
+ pub_.term_source = term_source;
+ if (max_chunk_size_ == 0) max_chunk_size_ = len;
+ }
+
+ ~SourceManager() {
+ EXPECT_EQ(0, pub_.bytes_in_buffer);
+ if (!is_partial_file_) {
+ EXPECT_EQ(len_, pos_);
+ }
+ }
+
+ bool LoadNextChunk() {
+ if (pos_ >= len_ && !is_partial_file_) {
+ return false;
+ }
+ if (pub_.bytes_in_buffer > 0) {
+ EXPECT_LE(pub_.bytes_in_buffer, buffer_.size());
+ memmove(&buffer_[0], pub_.next_input_byte, pub_.bytes_in_buffer);
+ }
+ size_t chunk_size =
+ pos_ < len_ ? std::min(len_ - pos_, max_chunk_size_) : 2;
+ buffer_.resize(pub_.bytes_in_buffer + chunk_size);
+ memcpy(&buffer_[pub_.bytes_in_buffer],
+ pos_ < len_ ? data_ + pos_ : kFakeEoiMarker, chunk_size);
+ pub_.next_input_byte = &buffer_[0];
+ pub_.bytes_in_buffer += chunk_size;
+ pos_ += chunk_size;
+ return true;
+ }
+
+ private:
+ jpeg_source_mgr pub_;
+ std::vector<uint8_t> buffer_;
+ const uint8_t* data_;
+ size_t len_;
+ size_t pos_;
+ size_t max_chunk_size_;
+ bool is_partial_file_;
+
+ static void init_source(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ src->pub_.next_input_byte = nullptr;
+ src->pub_.bytes_in_buffer = 0;
+ }
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ auto src = reinterpret_cast<SourceManager*>(cinfo->src);
+ if (num_bytes <= 0) {
+ return;
+ }
+ if (src->pub_.bytes_in_buffer >= static_cast<size_t>(num_bytes)) {
+ src->pub_.bytes_in_buffer -= num_bytes;
+ src->pub_.next_input_byte += num_bytes;
+ } else {
+ src->pos_ += num_bytes - src->pub_.bytes_in_buffer;
+ src->pub_.bytes_in_buffer = 0;
+ }
+ }
+
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+uint8_t markers_seen[kMarkerSequenceLen];
+size_t num_markers_seen = 0;
+
+uint8_t get_next_byte(j_decompress_ptr cinfo) {
+ cinfo->src->bytes_in_buffer--;
+ return *cinfo->src->next_input_byte++;
+}
+
+boolean test_marker_processor(j_decompress_ptr cinfo) {
+ markers_seen[num_markers_seen] = cinfo->unread_marker;
+ if (cinfo->src->bytes_in_buffer < 2) {
+ return FALSE;
+ }
+ size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo);
+ EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len);
+ if (marker_len > 2) {
+ (*cinfo->src->skip_input_data)(cinfo, marker_len - 2);
+ }
+ ++num_markers_seen;
+ return TRUE;
+}
+
+void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo,
+ SourceManager* src, TestImage* output) {
+ output->ysize = cinfo->output_height;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->num_components;
+ if (cinfo->raw_data_out) {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ } else {
+ output->color_space = cinfo->out_color_space;
+ output->AllocatePixels();
+ }
+ size_t total_output_lines = 0;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t max_lines;
+ size_t num_output_lines;
+ if (cinfo->raw_data_out) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height);
+ max_lines = iMCU_height;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ while ((num_output_lines =
+ jpegli_read_raw_data(cinfo, &data[0], max_lines)) == 0) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ } else {
+ size_t max_output_lines = dparams.max_output_lines;
+ if (max_output_lines == 0) max_output_lines = cinfo->output_height;
+ size_t lines_left = cinfo->output_height - cinfo->output_scanline;
+ max_lines = std::min<size_t>(max_output_lines, lines_left);
+ size_t stride = cinfo->output_width * cinfo->num_components;
+ std::vector<JSAMPROW> scanlines(max_lines);
+ for (size_t i = 0; i < max_lines; ++i) {
+ size_t yidx = cinfo->output_scanline + i;
+ scanlines[i] = &output->pixels[yidx * stride];
+ }
+ while ((num_output_lines = jpegli_read_scanlines(cinfo, &scanlines[0],
+ max_lines)) == 0) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ }
+ total_output_lines += num_output_lines;
+ EXPECT_EQ(total_output_lines, cinfo->output_scanline);
+ if (num_output_lines < max_lines) {
+ JXL_CHECK(src && src->LoadNextChunk());
+ }
+ }
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ TestImage input;
+ CompressParams jparams;
+ DecompressParams dparams;
+ float max_rms_dist = 1.0f;
+};
+
+std::vector<uint8_t> GetTestJpegData(TestConfig& config) {
+ if (!config.fn.empty()) {
+ return ReadTestData(config.fn.c_str());
+ }
+ GeneratePixels(&config.input);
+ std::vector<uint8_t> compressed;
+ JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed));
+ return compressed;
+}
+
+bool IsSequential(const TestConfig& config) {
+ if (!config.fn.empty()) {
+ return config.fn_desc.find("PROGR") == std::string::npos;
+ }
+ return config.jparams.progressive_mode <= 0;
+}
+
+class InputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepNonBuffered) {
+ TestConfig config = GetParam();
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ if (config.jparams.add_marker) {
+ jpegli_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+ jpegli_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+ num_markers_seen = 0;
+ jpegli_set_marker_processor(&cinfo, 0xe6, test_marker_processor);
+ jpegli_set_marker_processor(&cinfo, 0xe7, test_marker_processor);
+ jpegli_set_marker_processor(&cinfo, 0xe8, test_marker_processor);
+ }
+ while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ SetDecompressParams(dparams, &cinfo, true);
+ if (config.jparams.add_marker) {
+ EXPECT_EQ(num_markers_seen, kMarkerSequenceLen);
+ EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen));
+ }
+ VerifyHeader(config.jparams, &cinfo);
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays;
+ while ((coef_arrays = jpegli_read_coefficients(&cinfo)) == nullptr) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ } else {
+ while (!jpegli_start_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ ReadOutputImage(dparams, &cinfo, &src, &output0);
+ }
+
+ while (!jpegli_finish_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+ VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, InputOutputLockStepBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ std::vector<TestImage> output_progression0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ SetDecompressParams(dparams, &cinfo, true);
+
+ cinfo.buffered_image = TRUE;
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+
+ EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+ EXPECT_FALSE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ int sos_marker_cnt = 1; // read_header reads the first SOS marker
+ while (!jpegli_input_complete(&cinfo)) {
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ TestImage output;
+ ReadOutputImage(dparams, &cinfo, &src, &output);
+ output_progression0.emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+ while (!jpegli_finish_output(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output_progression0.back());
+ }
+ }
+
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+ ASSERT_EQ(output_progression0.size(), output_progression1.size());
+ for (size_t i = 0; i < output_progression0.size(); ++i) {
+ const TestImage& output = output_progression0[i];
+ const TestImage& expected = output_progression1[i];
+ VerifyOutputImage(expected, output, config.max_rms_dist);
+ }
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ std::vector<TestImage> output_progression1;
+ DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed,
+ &output_progression1);
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ int status;
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+ EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+ cinfo.buffered_image = TRUE;
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+ cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+ EXPECT_TRUE(jpegli_start_decompress(&cinfo));
+ EXPECT_FALSE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(1, cinfo.input_scan_number);
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ EXPECT_TRUE(jpegli_input_complete(&cinfo));
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(0, cinfo.output_scan_number);
+
+ EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number));
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+ ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+ EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number);
+ EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number);
+
+ EXPECT_TRUE(jpegli_finish_output(&cinfo));
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ }
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ VerifyOutputImage(output_progression1.back(), output0, config.max_rms_dist);
+}
+
+TEST_P(InputSuspensionTestParam, PreConsumeInputNonBuffered) {
+ TestConfig config = GetParam();
+ if (config.jparams.add_marker || IsSequential(config)) return;
+ const DecompressParams& dparams = config.dparams;
+ std::vector<uint8_t> compressed = GetTestJpegData(config);
+ bool is_partial = config.dparams.size_factor < 1.0f;
+ if (is_partial) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size,
+ is_partial);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ cinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+
+ int status;
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+ EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo));
+ cinfo.raw_data_out = dparams.output_mode == RAW_DATA;
+ cinfo.do_block_smoothing = dparams.do_block_smoothing;
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jpegli_read_coefficients(&cinfo);
+ } else {
+ while (!jpegli_start_decompress(&cinfo)) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) {
+ if (status == JPEG_SUSPENDED) {
+ JXL_CHECK(src.LoadNextChunk());
+ }
+ }
+
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output0);
+ } else {
+ ReadOutputImage(dparams, &cinfo, nullptr, &output0);
+ }
+
+ EXPECT_TRUE(jpegli_finish_decompress(&cinfo));
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1);
+ VerifyOutputImage(output1, output0, config.max_rms_dist);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"},
+ });
+ for (const auto& it : testfiles) {
+ for (size_t chunk_size : {1, 64, 65536}) {
+ for (size_t max_output_lines : {0, 1, 8, 16}) {
+ TestConfig config;
+ config.fn = it.first;
+ config.fn_desc = it.second;
+ config.dparams.chunk_size = chunk_size;
+ config.dparams.max_output_lines = max_output_lines;
+ all_tests.push_back(config);
+ if (max_output_lines == 16) {
+ config.dparams.output_mode = RAW_DATA;
+ all_tests.push_back(config);
+ config.dparams.output_mode = COEFFICIENTS;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ for (size_t r : {1, 17, 1024}) {
+ for (size_t chunk_size : {1, 65536}) {
+ TestConfig config;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.progressive_mode = 2;
+ config.jparams.restart_interval = r;
+ all_tests.push_back(config);
+ }
+ }
+ for (size_t chunk_size : {1, 4, 1024}) {
+ TestConfig config;
+ config.input.xsize = 256;
+ config.input.ysize = 256;
+ config.dparams.chunk_size = chunk_size;
+ config.jparams.add_marker = true;
+ all_tests.push_back(config);
+ }
+ // Tests for partial input.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ for (int progr : {0, 1, 3}) {
+ for (int samp : {1, 2}) {
+ for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = progr;
+ config.dparams.size_factor = size_factor;
+ config.dparams.output_mode = output_mode;
+ // The last partially available block can behave differently.
+ // TODO(szabadka) Figure out if we can make the behaviour more
+ // similar.
+ config.max_rms_dist = samp == 1 ? 1.75f : 3.0f;
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ // Tests for block smoothing.
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) {
+ for (int samp : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = 517;
+ config.input.ysize = 523;
+ config.jparams.h_sampling = {samp, 1, 1};
+ config.jparams.v_sampling = {samp, 1, 1};
+ config.jparams.progressive_mode = 2;
+ config.dparams.size_factor = size_factor;
+ config.dparams.do_block_smoothing = true;
+ // libjpeg does smoothing for incomplete scans differently at
+ // the border between current and previous scans.
+ config.max_rms_dist = 8.0f;
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ if (!c.fn.empty()) {
+ os << c.fn_desc;
+ } else {
+ os << c.input;
+ }
+ os << c.jparams;
+ if (c.dparams.chunk_size == 0) {
+ os << "CompleteInput";
+ } else {
+ os << "InputChunks" << c.dparams.chunk_size;
+ }
+ if (c.dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+ }
+ if (c.dparams.max_output_lines == 0) {
+ os << "CompleteOutput";
+ } else {
+ os << "OutputLines" << c.dparams.max_output_lines;
+ }
+ if (c.dparams.output_mode == RAW_DATA) {
+ os << "RawDataOut";
+ } else if (c.dparams.output_mode == COEFFICIENTS) {
+ os << "CoeffsOut";
+ }
+ if (c.dparams.do_block_smoothing) {
+ os << "BlockSmoothing";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<InputSuspensionTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(InputSuspensionTest, InputSuspensionTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/jpeg.version.62 b/third_party/jpeg-xl/lib/jpegli/jpeg.version.62
new file mode 100644
index 0000000000..3a8d1f5ec5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/jpeg.version.62
@@ -0,0 +1,11 @@
+LIBJPEG_6.2 {
+ global:
+ jpeg*;
+};
+
+LIBJPEGTURBO_6.2 {
+ global:
+ jpeg_mem_src*;
+ jpeg_mem_dest*;
+ tj*;
+}; \ No newline at end of file
diff --git a/third_party/jpeg-xl/lib/jpegli/jpeg.version.8 b/third_party/jpeg-xl/lib/jpegli/jpeg.version.8
new file mode 100644
index 0000000000..aa891f8571
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/jpeg.version.8
@@ -0,0 +1,9 @@
+LIBJPEG_8.0 {
+ global:
+ jpeg*;
+};
+
+LIBJPEGTURBO_8.0 {
+ global:
+ tj*;
+};
diff --git a/third_party/jpeg-xl/lib/jpegli/libjpeg_wrapper.cc b/third_party/jpeg-xl/lib/jpegli/libjpeg_wrapper.cc
new file mode 100644
index 0000000000..ef5ef224d3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/libjpeg_wrapper.cc
@@ -0,0 +1,260 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// This file contains wrapper-functions that are used to build the libjpeg.so
+// shared library that is API- and ABI-compatible with libjpeg-turbo's version
+// of libjpeg.so.
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/error.h"
+
+struct jpeg_error_mgr *jpeg_std_error(struct jpeg_error_mgr *err) {
+ return jpegli_std_error(err);
+}
+
+void jpeg_abort(j_common_ptr cinfo) { jpegli_abort(cinfo); }
+
+void jpeg_destroy(j_common_ptr cinfo) { jpegli_destroy(cinfo); }
+
+JQUANT_TBL *jpeg_alloc_quant_table(j_common_ptr cinfo) {
+ return jpegli_alloc_quant_table(cinfo);
+}
+
+JHUFF_TBL *jpeg_alloc_huff_table(j_common_ptr cinfo) {
+ return jpegli_alloc_huff_table(cinfo);
+}
+
+void jpeg_CreateDecompress(j_decompress_ptr cinfo, int version,
+ size_t structsize) {
+ jpegli_CreateDecompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) {
+ jpegli_stdio_src(cinfo, infile);
+}
+
+void jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+ unsigned long insize) {
+ jpegli_mem_src(cinfo, inbuffer, insize);
+}
+
+int jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) {
+ return jpegli_read_header(cinfo, require_image);
+}
+
+boolean jpeg_start_decompress(j_decompress_ptr cinfo) {
+ return jpegli_start_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION max_lines) {
+ return jpegli_read_scanlines(cinfo, scanlines, max_lines);
+}
+
+JDIMENSION jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) {
+ return jpegli_skip_scanlines(cinfo, num_lines);
+}
+
+void jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+ JDIMENSION *width) {
+ jpegli_crop_scanline(cinfo, xoffset, width);
+}
+
+boolean jpeg_finish_decompress(j_decompress_ptr cinfo) {
+ return jpegli_finish_decompress(cinfo);
+}
+
+JDIMENSION jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION max_lines) {
+ return jpegli_read_raw_data(cinfo, data, max_lines);
+}
+
+jvirt_barray_ptr *jpeg_read_coefficients(j_decompress_ptr cinfo) {
+ return jpegli_read_coefficients(cinfo);
+}
+
+boolean jpeg_has_multiple_scans(j_decompress_ptr cinfo) {
+ return jpegli_has_multiple_scans(cinfo);
+}
+
+boolean jpeg_start_output(j_decompress_ptr cinfo, int scan_number) {
+ return jpegli_start_output(cinfo, scan_number);
+}
+
+boolean jpeg_finish_output(j_decompress_ptr cinfo) {
+ return jpegli_finish_output(cinfo);
+}
+
+boolean jpeg_input_complete(j_decompress_ptr cinfo) {
+ return jpegli_input_complete(cinfo);
+}
+
+int jpeg_consume_input(j_decompress_ptr cinfo) {
+ return jpegli_consume_input(cinfo);
+}
+
+#if JPEG_LIB_VERSION >= 80
+void jpeg_core_output_dimensions(j_decompress_ptr cinfo) {
+ jpegli_core_output_dimensions(cinfo);
+}
+#endif
+void jpeg_calc_output_dimensions(j_decompress_ptr cinfo) {
+ jpegli_calc_output_dimensions(cinfo);
+}
+
+void jpeg_save_markers(j_decompress_ptr cinfo, int marker_code,
+ unsigned int length_limit) {
+ jpegli_save_markers(cinfo, marker_code, length_limit);
+}
+
+void jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+ jpeg_marker_parser_method routine) {
+ jpegli_set_marker_processor(cinfo, marker_code, routine);
+}
+
+boolean jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+ unsigned int *icc_data_len) {
+ return jpegli_read_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_abort_decompress(j_decompress_ptr cinfo) {
+ return jpegli_abort_decompress(cinfo);
+}
+
+void jpeg_destroy_decompress(j_decompress_ptr cinfo) {
+ return jpegli_destroy_decompress(cinfo);
+}
+
+void jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) {
+ jpegli_CreateCompress(cinfo, version, structsize);
+}
+
+void jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) {
+ jpegli_stdio_dest(cinfo, outfile);
+}
+
+void jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
+ unsigned long *outsize) {
+ jpegli_mem_dest(cinfo, outbuffer, outsize);
+}
+
+void jpeg_set_defaults(j_compress_ptr cinfo) { jpegli_set_defaults(cinfo); }
+
+void jpeg_default_colorspace(j_compress_ptr cinfo) {
+ jpegli_default_colorspace(cinfo);
+}
+
+void jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) {
+ jpegli_set_colorspace(cinfo, colorspace);
+}
+
+void jpeg_set_quality(j_compress_ptr cinfo, int quality,
+ boolean force_baseline) {
+ jpegli_set_quality(cinfo, quality, force_baseline);
+}
+
+void jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+ boolean force_baseline) {
+ jpegli_set_linear_quality(cinfo, scale_factor, force_baseline);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) {
+ jpegli_default_qtables(cinfo, force_baseline);
+}
+#endif
+
+int jpeg_quality_scaling(int quality) {
+ return jpegli_quality_scaling(quality);
+}
+
+void jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+ const unsigned int *basic_table, int scale_factor,
+ boolean force_baseline) {
+ jpegli_add_quant_table(cinfo, which_tbl, basic_table, scale_factor,
+ force_baseline);
+}
+
+void jpeg_simple_progression(j_compress_ptr cinfo) {
+ jpegli_simple_progression(cinfo);
+}
+
+void jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) {
+ jpegli_suppress_tables(cinfo, suppress);
+}
+
+#if JPEG_LIB_VERSION >= 70
+void jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) {
+ jpegli_calc_jpeg_dimensions(cinfo);
+}
+#endif
+
+void jpeg_copy_critical_parameters(j_decompress_ptr srcinfo,
+ j_compress_ptr dstinfo) {
+ jpegli_copy_critical_parameters(srcinfo, dstinfo);
+}
+
+void jpeg_write_m_header(j_compress_ptr cinfo, int marker,
+ unsigned int datalen) {
+ jpegli_write_m_header(cinfo, marker, datalen);
+}
+
+void jpeg_write_m_byte(j_compress_ptr cinfo, int val) {
+ jpegli_write_m_byte(cinfo, val);
+}
+
+void jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr,
+ unsigned int datalen) {
+ jpegli_write_marker(cinfo, marker, dataptr, datalen);
+}
+
+void jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr,
+ unsigned int icc_data_len) {
+ jpegli_write_icc_profile(cinfo, icc_data_ptr, icc_data_len);
+}
+
+void jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) {
+ jpegli_start_compress(cinfo, write_all_tables);
+}
+
+void jpeg_write_tables(j_compress_ptr cinfo) { jpegli_write_tables(cinfo); }
+
+JDIMENSION jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines,
+ JDIMENSION num_lines) {
+ return jpegli_write_scanlines(cinfo, scanlines, num_lines);
+}
+
+JDIMENSION jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+ JDIMENSION num_lines) {
+ return jpegli_write_raw_data(cinfo, data, num_lines);
+}
+
+void jpeg_write_coefficients(j_compress_ptr cinfo,
+ jvirt_barray_ptr *coef_arrays) {
+ jpegli_write_coefficients(cinfo, coef_arrays);
+}
+
+void jpeg_finish_compress(j_compress_ptr cinfo) {
+ jpegli_finish_compress(cinfo);
+}
+
+void jpeg_abort_compress(j_compress_ptr cinfo) { jpegli_abort_compress(cinfo); }
+
+void jpeg_destroy_compress(j_compress_ptr cinfo) {
+ jpegli_destroy_compress(cinfo);
+}
+
+boolean jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) {
+ return jpegli_resync_to_restart(cinfo, desired);
+}
+
+void jpeg_new_colormap(j_decompress_ptr cinfo) { jpegli_new_colormap(cinfo); }
diff --git a/third_party/jpeg-xl/lib/jpegli/memory_manager.cc b/third_party/jpeg-xl/lib/jpegli/memory_manager.cc
new file mode 100644
index 0000000000..f6530d8f02
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/memory_manager.cc
@@ -0,0 +1,181 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/memory_manager.h"
+
+#include <string.h>
+
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jpegli/common_internal.h"
+#include "lib/jpegli/error.h"
+
+struct jvirt_sarray_control {
+ JSAMPARRAY full_buffer;
+ size_t numrows;
+ JDIMENSION maxaccess;
+};
+
+struct jvirt_barray_control {
+ JBLOCKARRAY full_buffer;
+ size_t numrows;
+ JDIMENSION maxaccess;
+};
+
+namespace jpegli {
+
+namespace {
+
+struct MemoryManager {
+ struct jpeg_memory_mgr pub;
+ std::vector<void*> owned_ptrs[2 * JPOOL_NUMPOOLS];
+ uint64_t pool_memory_usage[2 * JPOOL_NUMPOOLS];
+ uint64_t total_memory_usage;
+ uint64_t peak_memory_usage;
+};
+
+void* Alloc(j_common_ptr cinfo, int pool_id, size_t sizeofobject) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ if (pool_id < 0 || pool_id >= 2 * JPOOL_NUMPOOLS) {
+ JPEGLI_ERROR("Invalid pool id %d", pool_id);
+ }
+ if (mem->pub.max_memory_to_use > 0 &&
+ mem->total_memory_usage + static_cast<uint64_t>(sizeofobject) >
+ static_cast<uint64_t>(mem->pub.max_memory_to_use)) {
+ JPEGLI_ERROR("Total memory usage exceeding %ld",
+ mem->pub.max_memory_to_use);
+ }
+ void* p;
+ if (pool_id < JPOOL_NUMPOOLS) {
+ p = malloc(sizeofobject);
+ } else {
+ p = hwy::AllocateAlignedBytes(sizeofobject, nullptr, nullptr);
+ }
+ if (p == nullptr) {
+ JPEGLI_ERROR("Out of memory");
+ }
+ mem->owned_ptrs[pool_id].push_back(p);
+ mem->pool_memory_usage[pool_id] += sizeofobject;
+ mem->total_memory_usage += sizeofobject;
+ mem->peak_memory_usage =
+ std::max(mem->peak_memory_usage, mem->total_memory_usage);
+ return p;
+}
+
+template <typename T>
+T** Alloc2dArray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
+ JDIMENSION numrows) {
+ T** array = Allocate<T*>(cinfo, numrows, pool_id);
+ // Always use aligned allocator for large 2d arrays.
+ if (pool_id < JPOOL_NUMPOOLS) {
+ pool_id += JPOOL_NUMPOOLS;
+ }
+ size_t stride = RoundUpTo(samplesperrow, HWY_ALIGNMENT);
+ T* buffer = Allocate<T>(cinfo, numrows * stride, pool_id);
+ for (size_t i = 0; i < numrows; ++i) {
+ array[i] = &buffer[i * stride];
+ }
+ return array;
+}
+
+template <typename Control, typename T>
+Control* RequestVirtualArray(j_common_ptr cinfo, int pool_id, boolean pre_zero,
+ JDIMENSION samplesperrow, JDIMENSION numrows,
+ JDIMENSION maxaccess) {
+ if (pool_id != JPOOL_IMAGE) {
+ JPEGLI_ERROR("Only image lifetime virtual arrays are supported.");
+ }
+ Control* p = Allocate<Control>(cinfo, 1, pool_id);
+ p->full_buffer = Alloc2dArray<T>(cinfo, pool_id, samplesperrow, numrows);
+ p->numrows = numrows;
+ p->maxaccess = maxaccess;
+ if (pre_zero) {
+ for (size_t i = 0; i < numrows; ++i) {
+ memset(p->full_buffer[i], 0, samplesperrow * sizeof(T));
+ }
+ }
+ return p;
+}
+
+void RealizeVirtualArrays(j_common_ptr cinfo) {
+ // Nothing to do, the full arrays were realized at request time already.
+}
+
+template <typename Control, typename T>
+T** AccessVirtualArray(j_common_ptr cinfo, Control* ptr, JDIMENSION start_row,
+ JDIMENSION num_rows, boolean writable) {
+ if (num_rows > ptr->maxaccess) {
+ JPEGLI_ERROR("Invalid virtual array access, num rows %u vs max rows %u",
+ num_rows, ptr->maxaccess);
+ }
+ if (start_row + num_rows > ptr->numrows) {
+ JPEGLI_ERROR("Invalid virtual array access, %u vs %u total rows",
+ start_row + num_rows, ptr->numrows);
+ }
+ if (ptr->full_buffer == nullptr) {
+ JPEGLI_ERROR("Invalid virtual array access, array not realized.");
+ }
+ return ptr->full_buffer + start_row;
+}
+
+void ClearPool(j_common_ptr cinfo, int pool_id) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ mem->owned_ptrs[pool_id].clear();
+ mem->total_memory_usage -= mem->pool_memory_usage[pool_id];
+ mem->pool_memory_usage[pool_id] = 0;
+}
+
+void FreePool(j_common_ptr cinfo, int pool_id) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) {
+ JPEGLI_ERROR("Invalid pool id %d", pool_id);
+ }
+ for (void* ptr : mem->owned_ptrs[pool_id]) {
+ free(ptr);
+ }
+ ClearPool(cinfo, pool_id);
+ for (void* ptr : mem->owned_ptrs[JPOOL_NUMPOOLS + pool_id]) {
+ hwy::FreeAlignedBytes(ptr, nullptr, nullptr);
+ }
+ ClearPool(cinfo, JPOOL_NUMPOOLS + pool_id);
+}
+
+void SelfDestruct(j_common_ptr cinfo) {
+ MemoryManager* mem = reinterpret_cast<MemoryManager*>(cinfo->mem);
+ for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) {
+ FreePool(cinfo, pool_id);
+ }
+ delete mem;
+ cinfo->mem = nullptr;
+}
+
+} // namespace
+
+void InitMemoryManager(j_common_ptr cinfo) {
+ MemoryManager* mem = new MemoryManager;
+ mem->pub.alloc_small = jpegli::Alloc;
+ mem->pub.alloc_large = jpegli::Alloc;
+ mem->pub.alloc_sarray = jpegli::Alloc2dArray<JSAMPLE>;
+ mem->pub.alloc_barray = jpegli::Alloc2dArray<JBLOCK>;
+ mem->pub.request_virt_sarray =
+ jpegli::RequestVirtualArray<jvirt_sarray_control, JSAMPLE>;
+ mem->pub.request_virt_barray =
+ jpegli::RequestVirtualArray<jvirt_barray_control, JBLOCK>;
+ mem->pub.realize_virt_arrays = jpegli::RealizeVirtualArrays;
+ mem->pub.access_virt_sarray =
+ jpegli::AccessVirtualArray<jvirt_sarray_control, JSAMPLE>;
+ mem->pub.access_virt_barray =
+ jpegli::AccessVirtualArray<jvirt_barray_control, JBLOCK>;
+ mem->pub.free_pool = jpegli::FreePool;
+ mem->pub.self_destruct = jpegli::SelfDestruct;
+ mem->pub.max_memory_to_use = 0;
+ mem->total_memory_usage = 0;
+ mem->peak_memory_usage = 0;
+ memset(mem->pool_memory_usage, 0, sizeof(mem->pool_memory_usage));
+ cinfo->mem = reinterpret_cast<struct jpeg_memory_mgr*>(mem);
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/memory_manager.h b/third_party/jpeg-xl/lib/jpegli/memory_manager.h
new file mode 100644
index 0000000000..238f85a308
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/memory_manager.h
@@ -0,0 +1,40 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_MEMORY_MANAGER_H_
+#define LIB_JPEGLI_MEMORY_MANAGER_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <stdlib.h>
+/* clang-format on */
+
+#define JPOOL_PERMANENT_ALIGNED (JPOOL_NUMPOOLS + JPOOL_PERMANENT)
+#define JPOOL_IMAGE_ALIGNED (JPOOL_NUMPOOLS + JPOOL_IMAGE)
+
+namespace jpegli {
+
+void InitMemoryManager(j_common_ptr cinfo);
+
+template <typename T>
+T* Allocate(j_common_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ void* p = (*cinfo->mem->alloc_small)(cinfo, pool_id, len * sizeof(T));
+ return reinterpret_cast<T*>(p);
+}
+
+template <typename T>
+T* Allocate(j_decompress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+template <typename T>
+T* Allocate(j_compress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) {
+ return Allocate<T>(reinterpret_cast<j_common_ptr>(cinfo), len, pool_id);
+}
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_MEMORY_MANAGER_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/output_suspension_test.cc b/third_party/jpeg-xl/lib/jpegli/output_suspension_test.cc
new file mode 100644
index 0000000000..73db791727
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/output_suspension_test.cc
@@ -0,0 +1,219 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+static constexpr size_t kInitialBufferSize = 1024;
+static constexpr size_t kFinalBufferSize = 18;
+
+struct DestinationManager {
+ jpeg_destination_mgr pub;
+ std::vector<uint8_t> buffer;
+
+ DestinationManager() {
+ pub.init_destination = init_destination;
+ pub.empty_output_buffer = empty_output_buffer;
+ pub.term_destination = term_destination;
+ }
+
+ void Rewind() {
+ pub.next_output_byte = buffer.data();
+ pub.free_in_buffer = buffer.size();
+ }
+
+ void EmptyTo(std::vector<uint8_t>* output, size_t new_size = 0) {
+ output->insert(output->end(), buffer.data(), pub.next_output_byte);
+ if (new_size > 0) {
+ buffer.resize(new_size);
+ }
+ Rewind();
+ }
+
+ static void init_destination(j_compress_ptr cinfo) {
+ auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+ us->buffer.resize(kInitialBufferSize);
+ us->Rewind();
+ }
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) { return FALSE; }
+
+ static void term_destination(j_compress_ptr cinfo) {}
+};
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+ size_t buffer_size;
+ size_t lines_batch_size;
+};
+
+class OutputSuspensionTestParam : public ::testing::TestWithParam<TestConfig> {
+};
+
+TEST_P(OutputSuspensionTestParam, PixelData) {
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ TestImage& input = config.input;
+ GeneratePixels(&input);
+ DestinationManager dest;
+ std::vector<uint8_t> compressed;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = JCS_RGB;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ size_t stride = cinfo.image_width * cinfo.input_components;
+ std::vector<uint8_t> row_bytes(config.lines_batch_size * stride);
+ while (cinfo.next_scanline < cinfo.image_height) {
+ size_t lines_left = cinfo.image_height - cinfo.next_scanline;
+ size_t num_lines = std::min(config.lines_batch_size, lines_left);
+ memcpy(&row_bytes[0], &input.pixels[cinfo.next_scanline * stride],
+ num_lines * stride);
+ std::vector<JSAMPROW> rows(num_lines);
+ for (size_t i = 0; i < num_lines; ++i) {
+ rows[i] = &row_bytes[i * stride];
+ }
+ size_t lines_done = 0;
+ while (lines_done < num_lines) {
+ lines_done += jpegli_write_scanlines(&cinfo, &rows[lines_done],
+ num_lines - lines_done);
+ if (lines_done < num_lines) {
+ dest.EmptyTo(&compressed, config.buffer_size);
+ }
+ }
+ }
+ dest.EmptyTo(&compressed, kFinalBufferSize);
+ jpegli_finish_compress(&cinfo);
+ dest.EmptyTo(&compressed);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_compress(&cinfo);
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output);
+ VerifyOutputImage(input, output, 2.5);
+}
+
+TEST_P(OutputSuspensionTestParam, RawData) {
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ if (config.lines_batch_size != 1) return;
+ TestImage& input = config.input;
+ input.color_space = JCS_YCbCr;
+ GeneratePixels(&input);
+ GenerateRawData(config.jparams, &input);
+ DestinationManager dest;
+ std::vector<uint8_t> compressed;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = JCS_YCbCr;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ cinfo.raw_data_in = TRUE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+ size_t max_lines = config.jparams.max_v_sample() * DCTSIZE;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo.num_components);
+ std::vector<JSAMPARRAY> data(cinfo.num_components);
+ for (int c = 0; c < cinfo.num_components; ++c) {
+ rowdata[c].resize(config.jparams.v_samp(c) * DCTSIZE);
+ data[c] = &rowdata[c][0];
+ }
+ while (cinfo.next_scanline < cinfo.image_height) {
+ for (int c = 0; c < cinfo.num_components; ++c) {
+ size_t cwidth = cinfo.comp_info[c].width_in_blocks * DCTSIZE;
+ size_t cheight = cinfo.comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = config.jparams.v_samp(c) * DCTSIZE;
+ size_t y0 = (cinfo.next_scanline / max_lines) * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+ }
+ }
+ while (jpegli_write_raw_data(&cinfo, &data[0], max_lines) == 0) {
+ dest.EmptyTo(&compressed, config.buffer_size);
+ }
+ }
+ dest.EmptyTo(&compressed, kFinalBufferSize);
+ jpegli_finish_compress(&cinfo);
+ dest.EmptyTo(&compressed);
+ return true;
+ };
+ try_catch_block();
+ jpegli_destroy_compress(&cinfo);
+ DecompressParams dparams;
+ dparams.output_mode = RAW_DATA;
+ TestImage output;
+ DecodeWithLibjpeg(CompressParams(), dparams, compressed, &output);
+ VerifyOutputImage(input, output, 3.5);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1920;
+ const size_t ysize0 = 1080;
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int v_sampling : {1, 2}) {
+ for (int nlines : {1, 8, 117}) {
+ for (int bufsize : {1, 16, 16 << 10}) {
+ TestConfig config;
+ config.lines_batch_size = nlines;
+ config.buffer_size = bufsize;
+ config.input.xsize = xsize0;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {1, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ os << "Lines" << c.lines_batch_size;
+ os << "BufSize" << c.buffer_size;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<OutputSuspensionTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(OutputSuspensionTest, OutputSuspensionTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/quant.cc b/third_party/jpeg-xl/lib/jpegli/quant.cc
new file mode 100644
index 0000000000..3ab9bcf856
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/quant.cc
@@ -0,0 +1,748 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/quant.h"
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/adaptive_quantization.h"
+#include "lib/jpegli/common.h"
+#include "lib/jpegli/encode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+
+namespace {
+
+// Global scale is chosen in a way that butteraugli 3-norm matches libjpeg
+// with the same quality setting. Fitted for quality 90 on jyrki31 corpus.
+constexpr float kGlobalScaleXYB = 1.43951668f;
+constexpr float kGlobalScaleYCbCr = 1.66986909f;
+
+static constexpr float kBaseQuantMatrixXYB[] = {
+ // c = 0
+ 7.5629935265f,
+ 19.8247814178f,
+ 22.5724945068f,
+ 20.6706695557f,
+ 22.6864585876f,
+ 23.5696277618f,
+ 25.8129081726f,
+ 36.3307571411f,
+ 19.8247814178f,
+ 21.5503177643f,
+ 19.9372234344f,
+ 20.5424213409f,
+ 21.8645496368f,
+ 23.9041385651f,
+ 28.2844066620f,
+ 32.6609764099f,
+ 22.5724945068f,
+ 19.9372234344f,
+ 21.9017257690f,
+ 19.1223449707f,
+ 21.7515811920f,
+ 24.6724700928f,
+ 25.4249649048f,
+ 32.6653823853f,
+ 20.6706695557f,
+ 20.5424213409f,
+ 19.1223449707f,
+ 20.1610221863f,
+ 25.3719692230f,
+ 25.9668903351f,
+ 30.9804954529f,
+ 31.3406009674f,
+ 22.6864585876f,
+ 21.8645496368f,
+ 21.7515811920f,
+ 25.3719692230f,
+ 26.2431850433f,
+ 40.5992202759f,
+ 43.2624626160f,
+ 63.3010940552f,
+ 23.5696277618f,
+ 23.9041385651f,
+ 24.6724700928f,
+ 25.9668903351f,
+ 40.5992202759f,
+ 48.3026771545f,
+ 34.0964355469f,
+ 61.9852142334f,
+ 25.8129081726f,
+ 28.2844066620f,
+ 25.4249649048f,
+ 30.9804954529f,
+ 43.2624626160f,
+ 34.0964355469f,
+ 34.4937438965f,
+ 66.9702758789f,
+ 36.3307571411f,
+ 32.6609764099f,
+ 32.6653823853f,
+ 31.3406009674f,
+ 63.3010940552f,
+ 61.9852142334f,
+ 66.9702758789f,
+ 39.9652709961f,
+ // c = 1
+ 1.6262000799f,
+ 3.2199242115f,
+ 3.4903779030f,
+ 3.9148359299f,
+ 4.8337211609f,
+ 4.9108843803f,
+ 5.3137121201f,
+ 6.1676793098f,
+ 3.2199242115f,
+ 3.4547898769f,
+ 3.6036829948f,
+ 4.2652835846f,
+ 4.8368387222f,
+ 4.8226222992f,
+ 5.6120514870f,
+ 6.3431472778f,
+ 3.4903779030f,
+ 3.6036829948f,
+ 3.9044559002f,
+ 4.3374395370f,
+ 4.8435096741f,
+ 5.4057979584f,
+ 5.6066360474f,
+ 6.1075134277f,
+ 3.9148359299f,
+ 4.2652835846f,
+ 4.3374395370f,
+ 4.6064834595f,
+ 5.1751475334f,
+ 5.4013924599f,
+ 6.0399808884f,
+ 6.7825231552f,
+ 4.8337211609f,
+ 4.8368387222f,
+ 4.8435096741f,
+ 5.1751475334f,
+ 5.3748049736f,
+ 6.1410837173f,
+ 7.6529307365f,
+ 7.5235214233f,
+ 4.9108843803f,
+ 4.8226222992f,
+ 5.4057979584f,
+ 5.4013924599f,
+ 6.1410837173f,
+ 6.3431472778f,
+ 7.1083049774f,
+ 7.6008300781f,
+ 5.3137121201f,
+ 5.6120514870f,
+ 5.6066360474f,
+ 6.0399808884f,
+ 7.6529307365f,
+ 7.1083049774f,
+ 7.0943155289f,
+ 7.0478363037f,
+ 6.1676793098f,
+ 6.3431472778f,
+ 6.1075134277f,
+ 6.7825231552f,
+ 7.5235214233f,
+ 7.6008300781f,
+ 7.0478363037f,
+ 6.9186143875f,
+ // c = 2
+ 3.3038473129f,
+ 10.0689258575f,
+ 12.2785224915f,
+ 14.6041173935f,
+ 16.2107315063f,
+ 19.2314529419f,
+ 28.0129547119f,
+ 55.6682891846f,
+ 10.0689258575f,
+ 11.4085016251f,
+ 11.3871345520f,
+ 15.4934167862f,
+ 16.5364933014f,
+ 14.9153423309f,
+ 26.3748722076f,
+ 40.8614425659f,
+ 12.2785224915f,
+ 11.3871345520f,
+ 17.0886878967f,
+ 13.9500350952f,
+ 16.0003223419f,
+ 28.5660629272f,
+ 26.2124195099f,
+ 30.1260128021f,
+ 14.6041173935f,
+ 15.4934167862f,
+ 13.9500350952f,
+ 21.1235027313f,
+ 26.1579780579f,
+ 25.5579223633f,
+ 40.6859359741f,
+ 33.8056335449f,
+ 16.2107315063f,
+ 16.5364933014f,
+ 16.0003223419f,
+ 26.1579780579f,
+ 26.8042831421f,
+ 26.1587715149f,
+ 35.7343978882f,
+ 43.6857032776f,
+ 19.2314529419f,
+ 14.9153423309f,
+ 28.5660629272f,
+ 25.5579223633f,
+ 26.1587715149f,
+ 34.5418128967f,
+ 41.3197937012f,
+ 48.7867660522f,
+ 28.0129547119f,
+ 26.3748722076f,
+ 26.2124195099f,
+ 40.6859359741f,
+ 35.7343978882f,
+ 41.3197937012f,
+ 47.6329460144f,
+ 55.3498458862f,
+ 55.6682891846f,
+ 40.8614425659f,
+ 30.1260128021f,
+ 33.8056335449f,
+ 43.6857032776f,
+ 48.7867660522f,
+ 55.3498458862f,
+ 63.6065597534f,
+};
+
+static const float kBaseQuantMatrixYCbCr[] = {
+ // c = 0
+ 1.4076321125f,
+ 2.6927082539f,
+ 2.6927735806f,
+ 2.9220938683f,
+ 3.0870633125f,
+ 3.4968640804f,
+ 3.5730612278f,
+ 3.5978596210f,
+ 2.6927082539f,
+ 2.6926636696f,
+ 2.7195601463f,
+ 2.9238407612f,
+ 3.1882488728f,
+ 3.0607142448f,
+ 3.1882314682f,
+ 3.8304426670f,
+ 2.6927735806f,
+ 2.7195601463f,
+ 2.9532215595f,
+ 3.5562388897f,
+ 3.7088179588f,
+ 3.0576279163f,
+ 3.7443304062f,
+ 4.2484717369f,
+ 2.9220938683f,
+ 2.9238407612f,
+ 3.5562388897f,
+ 3.0594384670f,
+ 4.1780085564f,
+ 4.9221563339f,
+ 4.7842588425f,
+ 4.6059336662f,
+ 3.0870633125f,
+ 3.1882488728f,
+ 3.7088179588f,
+ 4.1780085564f,
+ 4.3475294113f,
+ 5.5422372818f,
+ 5.5741071701f,
+ 5.4531836510f,
+ 3.4968640804f,
+ 3.0607142448f,
+ 3.0576279163f,
+ 4.9221563339f,
+ 5.5422372818f,
+ 5.4393601418f,
+ 5.1039180756f,
+ 6.0990614891f,
+ 3.5730612278f,
+ 3.1882314682f,
+ 3.7443304062f,
+ 4.7842588425f,
+ 5.5741071701f,
+ 5.1039180756f,
+ 5.4144043922f,
+ 5.4524297714f,
+ 3.5978596210f,
+ 3.8304426670f,
+ 4.2484717369f,
+ 4.6059336662f,
+ 5.4531836510f,
+ 6.0990614891f,
+ 5.4524297714f,
+ 4.3595433235f,
+ // c = 1
+ 2.8152642250f,
+ 10.4298934937f,
+ 16.1451492310f,
+ 15.3725156784f,
+ 17.6543502808f,
+ 19.1104965210f,
+ 17.5021877289f,
+ 29.5177459717f,
+ 10.4298934937f,
+ 15.7448558807f,
+ 16.8441677094f,
+ 15.3214502335f,
+ 17.5918464661f,
+ 16.8787574768f,
+ 27.0867996216f,
+ 21.3443832397f,
+ 16.1451492310f,
+ 16.8441677094f,
+ 14.7525558472f,
+ 18.0765247345f,
+ 18.2206096649f,
+ 23.2126445770f,
+ 98.1291885376f,
+ 23.6039886475f,
+ 15.3725156784f,
+ 15.3214502335f,
+ 18.0765247345f,
+ 17.2925109863f,
+ 16.1435356140f,
+ 24.0464611053f,
+ 27.1577339172f,
+ 35.3269882202f,
+ 17.6543502808f,
+ 17.5918464661f,
+ 18.2206096649f,
+ 16.1435356140f,
+ 19.2819595337f,
+ 16.2939300537f,
+ 19.6862888336f,
+ 51.0941123962f,
+ 19.1104965210f,
+ 16.8787574768f,
+ 23.2126445770f,
+ 24.0464611053f,
+ 16.2939300537f,
+ 32.3153648376f,
+ 45.7272338867f,
+ 64.6245880127f,
+ 17.5021877289f,
+ 27.0867996216f,
+ 98.1291885376f,
+ 27.1577339172f,
+ 19.6862888336f,
+ 45.7272338867f,
+ 61.8331909180f,
+ 85.0626754761f,
+ 29.5177459717f,
+ 21.3443832397f,
+ 23.6039886475f,
+ 35.3269882202f,
+ 51.0941123962f,
+ 64.6245880127f,
+ 85.0626754761f,
+ 112.7605514526f,
+ // c = 2
+ 2.8152642250f,
+ 5.4735932350f,
+ 7.3637795448f,
+ 6.5195322037f,
+ 8.1501169205f,
+ 8.7243938446f,
+ 8.7219915390f,
+ 9.3618907928f,
+ 5.4735932350f,
+ 7.1514792442f,
+ 7.2054982185f,
+ 8.1126995087f,
+ 8.1497650146f,
+ 7.1335659027f,
+ 7.8453893661f,
+ 8.3512821198f,
+ 7.3637795448f,
+ 7.2054982185f,
+ 6.9224662781f,
+ 8.0766754150f,
+ 9.1168527603f,
+ 7.3714752197f,
+ 7.3646650314f,
+ 8.6790895462f,
+ 6.5195322037f,
+ 8.1126995087f,
+ 8.0766754150f,
+ 7.8294739723f,
+ 7.7385902405f,
+ 7.8628563881f,
+ 7.4404106140f,
+ 8.4759435654f,
+ 8.1501169205f,
+ 8.1497650146f,
+ 9.1168527603f,
+ 7.7385902405f,
+ 7.0960793495f,
+ 8.9185447693f,
+ 8.2047510147f,
+ 7.8465061188f,
+ 8.7243938446f,
+ 7.1335659027f,
+ 7.3714752197f,
+ 7.8628563881f,
+ 8.9185447693f,
+ 8.6063842773f,
+ 9.7156696320f,
+ 64.6700744629f,
+ 8.7219915390f,
+ 7.8453893661f,
+ 7.3646650314f,
+ 7.4404106140f,
+ 8.2047510147f,
+ 9.7156696320f,
+ 61.9934043884f,
+ 83.2930450439f,
+ 9.3618907928f,
+ 8.3512821198f,
+ 8.6790895462f,
+ 8.4759435654f,
+ 7.8465061188f,
+ 64.6700744629f,
+ 83.2930450439f,
+ 113.0502548218f,
+};
+
+static const float k420GlobalScale = 1.2;
+static const float k420Rescale[64] = {
+ 0.6386, 0.4213, 0.3994, 0.3333, 0.3143, 0.3367, 0.3612, 0.3794, //
+ 0.4213, 0.4026, 0.3309, 0.3344, 0.3059, 0.3118, 0.4069, 0.3595, //
+ 0.3994, 0.3309, 0.4080, 0.2531, 0.2645, 0.3630, 0.3502, 0.3231, //
+ 0.3333, 0.3344, 0.2531, 0.2960, 0.3153, 0.3476, 0.3430, 0.4004, //
+ 0.3143, 0.3059, 0.2645, 0.3153, 0.2733, 0.3296, 0.3338, 0.3418, //
+ 0.3367, 0.3118, 0.3630, 0.3476, 0.3296, 0.3144, 0.2262, 0.1326, //
+ 0.3612, 0.4069, 0.3502, 0.3430, 0.3338, 0.2262, 0.1000, 0.1000, //
+ 0.3794, 0.3595, 0.3231, 0.4004, 0.3418, 0.1326, 0.1000, 0.3366, //
+};
+
+static const float kBaseQuantMatrixStd[] = {
+ // c = 0
+ 16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f, //
+ 12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f, //
+ 14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f, //
+ 14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f, //
+ 18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f, //
+ 24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f, //
+ 49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f, //
+ 72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f, //
+ // c = 1
+ 17.0f, 18.0f, 24.0f, 47.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 18.0f, 21.0f, 26.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 24.0f, 26.0f, 56.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 47.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+ 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, //
+};
+
+static const float kZeroBiasMulYCbCrLQ[] = {
+ // c = 0
+ 0.6190f, 0.0568f, 0.3880f, 0.6190f, 0.6190f, 0.4490f, 0.4490f, 0.6187f, //
+ 0.0568f, 0.5829f, 0.6189f, 0.6190f, 0.6190f, 0.7190f, 0.6190f, 0.6189f, //
+ 0.3880f, 0.6189f, 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.6187f, 0.6100f, //
+ 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.3839f, 0.7160f, 0.6190f, //
+ 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.6190f, 0.3880f, 0.5860f, 0.4790f, //
+ 0.4490f, 0.7190f, 0.6190f, 0.3839f, 0.3880f, 0.6190f, 0.6190f, 0.6190f, //
+ 0.4490f, 0.6190f, 0.6187f, 0.7160f, 0.5860f, 0.6190f, 0.6204f, 0.6190f, //
+ 0.6187f, 0.6189f, 0.6100f, 0.6190f, 0.4790f, 0.6190f, 0.6190f, 0.3480f, //
+ // c = 1
+ 0.9430f, 1.1640f, 0.9373f, 1.1319f, 0.8016f, 0.9136f, 1.1530f, 0.9430f, //
+ 1.1640f, 0.9188f, 0.9160f, 1.1980f, 1.1830f, 0.9758f, 0.9430f, 0.9430f, //
+ 0.9373f, 0.9160f, 0.8430f, 1.1720f, 0.7083f, 0.9430f, 0.9430f, 0.9430f, //
+ 1.1319f, 1.1980f, 1.1720f, 1.1490f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, //
+ 0.8016f, 1.1830f, 0.7083f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, //
+ 0.9136f, 0.9758f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, //
+ 1.1530f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, //
+ 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, 0.9430f, //
+ // c = 2
+ 0.3060f, 1.3190f, 0.4308f, 0.4460f, 0.0661f, 0.0660f, 0.2660f, 0.2960f, //
+ 1.3190f, 0.3280f, 0.3093f, 0.0750f, 0.0505f, 0.1594f, 0.3060f, 0.2113f, //
+ 0.4308f, 0.3093f, 0.3060f, 0.1182f, 0.0500f, 0.3060f, 0.3915f, 0.2426f, //
+ 0.4460f, 0.0750f, 0.1182f, 0.0512f, 0.0500f, 0.2130f, 0.3930f, 0.1590f, //
+ 0.0661f, 0.0505f, 0.0500f, 0.0500f, 0.3055f, 0.3360f, 0.5148f, 0.5403f, //
+ 0.0660f, 0.1594f, 0.3060f, 0.2130f, 0.3360f, 0.5060f, 0.5874f, 0.3060f, //
+ 0.2660f, 0.3060f, 0.3915f, 0.3930f, 0.5148f, 0.5874f, 0.3060f, 0.3060f, //
+ 0.2960f, 0.2113f, 0.2426f, 0.1590f, 0.5403f, 0.3060f, 0.3060f, 0.3060f, //
+};
+
+static const float kZeroBiasMulYCbCrHQ[] = {
+ // c = 0
+ 0.7830f, 0.0044f, 0.2521f, 0.6547f, 0.8161f, 0.6130f, 0.8841f, 0.8155f, //
+ 0.0044f, 0.6831f, 0.6553f, 0.6295f, 0.7848f, 0.7843f, 0.8474f, 0.7836f, //
+ 0.2521f, 0.6553f, 0.7834f, 0.7829f, 0.8161f, 0.8072f, 0.7743f, 0.9242f, //
+ 0.6547f, 0.6295f, 0.7829f, 0.8654f, 0.7829f, 0.6986f, 0.7818f, 0.7726f, //
+ 0.8161f, 0.7848f, 0.8161f, 0.7829f, 0.7471f, 0.7827f, 0.7843f, 0.7653f, //
+ 0.6130f, 0.7843f, 0.8072f, 0.6986f, 0.7827f, 0.7848f, 0.9508f, 0.7653f, //
+ 0.8841f, 0.8474f, 0.7743f, 0.7818f, 0.7843f, 0.9508f, 0.7839f, 0.8437f, //
+ 0.8155f, 0.7836f, 0.9242f, 0.7726f, 0.7653f, 0.7653f, 0.8437f, 0.7819f, //
+ // c = 1
+ 1.0540f, 1.0816f, 1.0556f, 1.2876f, 1.1554f, 1.1567f, 1.8851f, 0.5488f, //
+ 1.0816f, 1.1537f, 1.1850f, 1.0712f, 1.1671f, 2.0719f, 1.0544f, 1.4764f, //
+ 1.0556f, 1.1850f, 1.2870f, 1.1981f, 1.8181f, 1.2618f, 1.0564f, 1.1191f, //
+ 1.2876f, 1.0712f, 1.1981f, 1.4753f, 2.0609f, 1.0564f, 1.2645f, 1.0564f, //
+ 1.1554f, 1.1671f, 1.8181f, 2.0609f, 0.7324f, 1.1163f, 0.8464f, 1.0564f, //
+ 1.1567f, 2.0719f, 1.2618f, 1.0564f, 1.1163f, 1.0040f, 1.0564f, 1.0564f, //
+ 1.8851f, 1.0544f, 1.0564f, 1.2645f, 0.8464f, 1.0564f, 1.0564f, 1.0564f, //
+ 0.5488f, 1.4764f, 1.1191f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, //
+ // c = 2
+ 0.6620f, 0.5392f, 0.6659f, 0.8968f, 0.6829f, 0.6328f, 0.5802f, 0.4836f, //
+ 0.5392f, 0.6746f, 0.6760f, 0.6102f, 0.6015f, 0.6958f, 0.7327f, 0.4897f, //
+ 0.6659f, 0.6760f, 0.6957f, 0.6543f, 0.4396f, 0.6330f, 0.7081f, 0.2583f, //
+ 0.8968f, 0.6102f, 0.6543f, 0.5913f, 0.6457f, 0.5828f, 0.5139f, 0.3565f, //
+ 0.6829f, 0.6015f, 0.4396f, 0.6457f, 0.5633f, 0.4263f, 0.6371f, 0.5949f, //
+ 0.6328f, 0.6958f, 0.6330f, 0.5828f, 0.4263f, 0.2847f, 0.2909f, 0.6629f, //
+ 0.5802f, 0.7327f, 0.7081f, 0.5139f, 0.6371f, 0.2909f, 0.6644f, 0.6644f, //
+ 0.4836f, 0.4897f, 0.2583f, 0.3565f, 0.5949f, 0.6629f, 0.6644f, 0.6644f, //
+};
+
+static const float kZeroBiasOffsetYCbCr[] = {
+ 0.59082f,
+ 0.58146f,
+ 0.57988f,
+};
+
+constexpr uint8_t kTransferFunctionPQ = 16;
+constexpr uint8_t kTransferFunctionHLG = 18;
+
+float DistanceToLinearQuality(float distance) {
+ if (distance <= 0.1f) {
+ return 1.0f;
+ } else if (distance <= 4.6f) {
+ return (200.0f / 9.0f) * (distance - 0.1f);
+ } else if (distance <= 6.4f) {
+ return 5000.0f / (100.0f - (distance - 0.1f) / 0.09f);
+ } else if (distance < 25.0f) {
+ return 530000.0f /
+ (3450.0f -
+ 300.0f * std::sqrt((848.0f * distance - 5330.0f) / 120.0f));
+ } else {
+ return 5000.0f;
+ }
+}
+
+constexpr float kExponent[DCTSIZE2] = {
+ 1.00f, 0.51f, 0.67f, 0.74f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 0.51f, 0.66f, 0.69f, 0.87f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 0.67f, 0.69f, 0.84f, 0.83f, 0.96f, 1.00f, 1.00f, 1.00f, //
+ 0.74f, 0.87f, 0.83f, 1.00f, 1.00f, 0.91f, 0.91f, 1.00f, //
+ 1.00f, 1.00f, 0.96f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, //
+ 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, //
+};
+constexpr float kDist0 = 1.5f; // distance where non-linearity kicks in.
+
+float DistanceToScale(float distance, int k) {
+ if (distance < kDist0) {
+ return distance;
+ }
+ const float exp = kExponent[k];
+ const float mul = std::pow(kDist0, 1.0 - exp);
+ return std::max<float>(0.5f * distance, mul * std::pow(distance, exp));
+}
+
+float ScaleToDistance(float scale, int k) {
+ if (scale < kDist0) {
+ return scale;
+ }
+ const float exp = 1.0 / kExponent[k];
+ const float mul = std::pow(kDist0, 1.0 - exp);
+ return std::min<float>(2.0f * scale, mul * std::pow(scale, exp));
+}
+
+float QuantValsToDistance(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ float global_scale = kGlobalScaleYCbCr;
+ if (m->cicp_transfer_function == kTransferFunctionPQ) {
+ global_scale *= .4f;
+ } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+ global_scale *= .5f;
+ }
+ int quant_max = m->force_baseline ? 255 : 32767U;
+ static const float kDistMax = 10000.0f;
+ float dist_min = 0.0f;
+ float dist_max = kDistMax;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ uint16_t* quantval = cinfo->quant_tbl_ptrs[quant_idx]->quantval;
+ const float* base_qm = &kBaseQuantMatrixYCbCr[quant_idx * DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float dmin = 0.0;
+ float dmax = kDistMax;
+ float invq = 1.0f / base_qm[k] / global_scale;
+ int qval = quantval[k];
+ if (qval > 1) {
+ float scale_min = (qval - 0.5f) * invq;
+ dmin = ScaleToDistance(scale_min, k);
+ }
+ if (qval < quant_max) {
+ float scale_max = (qval + 0.5f) * invq;
+ dmax = ScaleToDistance(scale_max, k);
+ }
+ if (dmin <= dist_max) {
+ dist_min = std::max(dmin, dist_min);
+ }
+ if (dmax >= dist_min) {
+ dist_max = std::min(dist_max, dmax);
+ }
+ }
+ }
+ float distance;
+ if (dist_min == 0) {
+ distance = dist_max;
+ } else if (dist_max == kDistMax) {
+ distance = dist_min;
+ } else {
+ distance = 0.5f * (dist_min + dist_max);
+ }
+ return distance;
+}
+
+bool IsYUV420(j_compress_ptr cinfo) {
+ return (cinfo->jpeg_color_space == JCS_YCbCr &&
+ cinfo->comp_info[0].h_samp_factor == 2 &&
+ cinfo->comp_info[0].v_samp_factor == 2 &&
+ cinfo->comp_info[1].h_samp_factor == 1 &&
+ cinfo->comp_info[1].v_samp_factor == 1 &&
+ cinfo->comp_info[2].h_samp_factor == 1 &&
+ cinfo->comp_info[2].v_samp_factor == 1);
+}
+
+} // namespace
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+ bool add_two_chroma_tables) {
+ jpeg_comp_master* m = cinfo->master;
+ const bool xyb = m->xyb_mode && cinfo->jpeg_color_space == JCS_RGB;
+ const bool is_yuv420 = IsYUV420(cinfo);
+
+ float global_scale;
+ bool non_linear_scaling = true;
+ const float* base_quant_matrix[NUM_QUANT_TBLS];
+ int num_base_tables;
+
+ if (xyb) {
+ global_scale = kGlobalScaleXYB;
+ num_base_tables = 3;
+ base_quant_matrix[0] = kBaseQuantMatrixXYB;
+ base_quant_matrix[1] = kBaseQuantMatrixXYB + DCTSIZE2;
+ base_quant_matrix[2] = kBaseQuantMatrixXYB + 2 * DCTSIZE2;
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr && !m->use_std_tables) {
+ global_scale = kGlobalScaleYCbCr;
+ if (m->cicp_transfer_function == kTransferFunctionPQ) {
+ global_scale *= .4f;
+ } else if (m->cicp_transfer_function == kTransferFunctionHLG) {
+ global_scale *= .5f;
+ }
+ if (is_yuv420) {
+ global_scale *= k420GlobalScale;
+ }
+ if (add_two_chroma_tables) {
+ cinfo->comp_info[2].quant_tbl_no = 2;
+ num_base_tables = 3;
+ base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+ base_quant_matrix[1] = kBaseQuantMatrixYCbCr + DCTSIZE2;
+ base_quant_matrix[2] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+ } else {
+ num_base_tables = 2;
+ base_quant_matrix[0] = kBaseQuantMatrixYCbCr;
+ // Use the Cr table for both Cb and Cr.
+ base_quant_matrix[1] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2;
+ }
+ } else {
+ global_scale = 0.01f;
+ non_linear_scaling = false;
+ num_base_tables = 2;
+ base_quant_matrix[0] = kBaseQuantMatrixStd;
+ base_quant_matrix[1] = kBaseQuantMatrixStd + DCTSIZE2;
+ }
+
+ int quant_max = m->force_baseline ? 255 : 32767U;
+ for (int quant_idx = 0; quant_idx < num_base_tables; ++quant_idx) {
+ const float* base_qm = base_quant_matrix[quant_idx];
+ JQUANT_TBL** qtable = &cinfo->quant_tbl_ptrs[quant_idx];
+ if (*qtable == nullptr) {
+ *qtable = jpegli_alloc_quant_table(reinterpret_cast<j_common_ptr>(cinfo));
+ }
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float scale = global_scale;
+ if (non_linear_scaling) {
+ scale *= DistanceToScale(distances[quant_idx], k);
+ if (is_yuv420 && quant_idx > 0) {
+ scale *= k420Rescale[k];
+ }
+ } else {
+ scale *= DistanceToLinearQuality(distances[quant_idx]);
+ }
+ int qval = std::round(scale * base_qm[k]);
+ (*qtable)->quantval[k] = std::max(1, std::min(qval, quant_max));
+ }
+ (*qtable)->sent_table = FALSE;
+ }
+}
+
+void InitQuantizer(j_compress_ptr cinfo) {
+ jpeg_comp_master* m = cinfo->master;
+ // Compute quantization multupliers from the quant table values.
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ int quant_idx = cinfo->comp_info[c].quant_tbl_no;
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx];
+ if (!quant_table) {
+ JPEGLI_ERROR("Missing quantization table %d for component %d", quant_idx,
+ c);
+ }
+ for (size_t k = 0; k < DCTSIZE2; k++) {
+ int val = quant_table->quantval[k];
+ if (val == 0) {
+ JPEGLI_ERROR("Invalid quantval 0.");
+ }
+ m->quant_mul[c][k] = 8.0f / val;
+ }
+ }
+ if (m->use_adaptive_quantization) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ m->zero_bias_mul[c][k] = 0.5f;
+ m->zero_bias_offset[c][k] = 0.5f;
+ }
+ }
+ if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ float distance = QuantValsToDistance(cinfo);
+ static const float kDistHQ = 1.0f;
+ static const float kDistLQ = 3.0f;
+ float mix0 = (distance - kDistHQ) / (kDistLQ - kDistHQ);
+ mix0 = std::max(0.0f, std::min(1.0f, mix0));
+ float mix1 = 1.0f - mix0;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ float mul0 = kZeroBiasMulYCbCrLQ[c * DCTSIZE2 + k];
+ float mul1 = kZeroBiasMulYCbCrHQ[c * DCTSIZE2 + k];
+ m->zero_bias_mul[c][k] = mix0 * mul0 + mix1 * mul1;
+ m->zero_bias_offset[c][k] = kZeroBiasOffsetYCbCr[c];
+ }
+ }
+ }
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/quant.h b/third_party/jpeg-xl/lib/jpegli/quant.h
new file mode 100644
index 0000000000..44deb48d45
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/quant.h
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_QUANT_H_
+#define LIB_JPEGLI_QUANT_H_
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+namespace jpegli {
+
+void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS],
+ bool add_two_chroma_tables);
+
+void InitQuantizer(j_compress_ptr cinfo);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_QUANT_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/render.cc b/third_party/jpeg-xl/lib/jpegli/render.cc
new file mode 100644
index 0000000000..026345552a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/render.cc
@@ -0,0 +1,802 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/render.h"
+
+#include <string.h>
+
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <hwy/aligned_allocator.h>
+
+#include "lib/jpegli/color_quantize.h"
+#include "lib/jpegli/color_transform.h"
+#include "lib/jpegli/decode_internal.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/idct.h"
+#include "lib/jpegli/upsample.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#if JXL_MEMORY_SANITIZER
+#include "sanitizer/msan_interface.h"
+#endif
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/render.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::NearestInt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeftSame;
+using hwy::HWY_NAMESPACE::ShiftRightSame;
+using hwy::HWY_NAMESPACE::Vec;
+using D = HWY_FULL(float);
+using DI = HWY_FULL(int32_t);
+constexpr D d;
+constexpr DI di;
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+ const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+ int32_t* JXL_RESTRICT sumabs) {
+ for (size_t i = 0; i < coeffs_size; i += Lanes(d)) {
+ size_t k = i % DCTSIZE2;
+ const Rebind<int16_t, DI> di16;
+ const Vec<DI> coeff = PromoteTo(di, Load(di16, coeffs + i));
+ const auto abs_coeff = Abs(coeff);
+ const auto not_0 = Gt(abs_coeff, Zero(di));
+ const auto nzero = IfThenElseZero(not_0, Set(di, 1));
+ Store(Add(nzero, Load(di, nonzeros + k)), di, nonzeros + k);
+ Store(Add(abs_coeff, Load(di, sumabs + k)), di, sumabs + k);
+ }
+}
+
+void DecenterRow(float* row, size_t xsize) {
+ const HWY_CAPPED(float, 8) df;
+ const auto c128 = Set(df, 128.0f / 255);
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ Store(Add(Load(df, row + x), c128), df, row + x);
+ }
+}
+
+void DitherRow(j_decompress_ptr cinfo, float* row, int c, size_t y,
+ size_t xsize) {
+ jpeg_decomp_master* m = cinfo->master;
+ if (!m->dither_[c]) return;
+ const float* dither_row =
+ &m->dither_[c][(y & m->dither_mask_) * m->dither_size_];
+ for (size_t x = 0; x < xsize; ++x) {
+ row[x] += dither_row[x & m->dither_mask_];
+ }
+}
+
+template <typename T>
+void StoreUnsignedRow(float* JXL_RESTRICT input[], size_t x0, size_t len,
+ size_t num_channels, float multiplier, T* output) {
+ const HWY_CAPPED(float, 8) d;
+ auto zero = Zero(d);
+ auto mul = Set(d, multiplier);
+ const Rebind<T, decltype(d)> du;
+#if JXL_MEMORY_SANITIZER
+ const size_t padding = hwy::RoundUpTo(len, Lanes(d)) - len;
+ for (size_t c = 0; c < num_channels; ++c) {
+ __msan_unpoison(input[c] + x0 + len, sizeof(input[c][0]) * padding);
+ }
+#endif
+ if (num_channels == 1) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
+ }
+ } else if (num_channels == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
+ }
+ } else if (num_channels == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+ StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
+ }
+ } else if (num_channels == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul);
+ auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul);
+ auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul);
+ auto v3 = Clamp(zero, Mul(LoadU(d, &input[3][x0 + i]), mul), mul);
+ StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)),
+ DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
+ }
+ }
+#if JXL_MEMORY_SANITIZER
+ __msan_poison(output + num_channels * len,
+ sizeof(output[0]) * num_channels * padding);
+#endif
+}
+
+void StoreFloatRow(float* JXL_RESTRICT input[3], size_t x0, size_t len,
+ size_t num_channels, float* output) {
+ const HWY_CAPPED(float, 8) d;
+ if (num_channels == 1) {
+ memcpy(output, input[0] + x0, len * sizeof(output[0]));
+ } else if (num_channels == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved2(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]), d, &output[2 * i]);
+ }
+ } else if (num_channels == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved3(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]),
+ LoadU(d, &input[2][x0 + i]), d, &output[3 * i]);
+ }
+ } else if (num_channels == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved4(LoadU(d, &input[0][x0 + i]),
+ LoadU(d, &input[1][x0 + i]),
+ LoadU(d, &input[2][x0 + i]),
+ LoadU(d, &input[3][x0 + i]), d, &output[4 * i]);
+ }
+ }
+}
+
+static constexpr float kFSWeightMR = 7.0f / 16.0f;
+static constexpr float kFSWeightBL = 3.0f / 16.0f;
+static constexpr float kFSWeightBM = 5.0f / 16.0f;
+static constexpr float kFSWeightBR = 1.0f / 16.0f;
+
+float LimitError(float error) {
+ float abserror = std::abs(error);
+ if (abserror > 48.0f) {
+ abserror = 32.0f;
+ } else if (abserror > 16.0f) {
+ abserror = 0.5f * abserror + 8.0f;
+ }
+ return error > 0.0f ? abserror : -abserror;
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+ size_t xoffset, size_t len, size_t num_channels,
+ uint8_t* JXL_RESTRICT output) {
+ jpeg_decomp_master* m = cinfo->master;
+ uint8_t* JXL_RESTRICT scratch_space = m->output_scratch_;
+ if (cinfo->quantize_colors && m->quant_pass_ == 1) {
+ float* error_row[kMaxComponents];
+ float* next_error_row[kMaxComponents];
+ if (cinfo->dither_mode == JDITHER_ORDERED) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ DitherRow(cinfo, &rows[c][xoffset], c, cinfo->output_scanline,
+ cinfo->output_width);
+ }
+ } else if (cinfo->dither_mode == JDITHER_FS) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ if (cinfo->output_scanline % 2 == 0) {
+ error_row[c] = m->error_row_[c];
+ next_error_row[c] = m->error_row_[c + kMaxComponents];
+ } else {
+ error_row[c] = m->error_row_[c + kMaxComponents];
+ next_error_row[c] = m->error_row_[c];
+ }
+ memset(next_error_row[c], 0.0, cinfo->output_width * sizeof(float));
+ }
+ }
+ const float mul = 255.0f;
+ if (cinfo->dither_mode != JDITHER_FS) {
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+ }
+ for (size_t i = 0; i < len; ++i) {
+ uint8_t* pixel = &scratch_space[num_channels * i];
+ if (cinfo->dither_mode == JDITHER_FS) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ float val = rows[c][i] * mul + LimitError(error_row[c][i]);
+ pixel[c] = std::round(std::min(255.0f, std::max(0.0f, val)));
+ }
+ }
+ int index = LookupColorIndex(cinfo, pixel);
+ output[i] = index;
+ if (cinfo->dither_mode == JDITHER_FS) {
+ size_t prev_i = i > 0 ? i - 1 : 0;
+ size_t next_i = i + 1 < len ? i + 1 : len - 1;
+ for (size_t c = 0; c < num_channels; ++c) {
+ float error = pixel[c] - cinfo->colormap[c][index];
+ error_row[c][next_i] += kFSWeightMR * error;
+ next_error_row[c][prev_i] += kFSWeightBL * error;
+ next_error_row[c][i] += kFSWeightBM * error;
+ next_error_row[c][next_i] += kFSWeightBR * error;
+ }
+ }
+ }
+ } else if (m->output_data_type_ == JPEGLI_TYPE_UINT8) {
+ const float mul = 255.0;
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space);
+ memcpy(output, scratch_space, len * num_channels);
+ } else if (m->output_data_type_ == JPEGLI_TYPE_UINT16) {
+ const float mul = 65535.0;
+ uint16_t* tmp = reinterpret_cast<uint16_t*>(scratch_space);
+ StoreUnsignedRow(rows, xoffset, len, num_channels, mul, tmp);
+ if (m->swap_endianness_) {
+ const HWY_CAPPED(uint16_t, 8) du;
+ size_t output_len = len * num_channels;
+ for (size_t j = 0; j < output_len; j += Lanes(du)) {
+ auto v = LoadU(du, tmp + j);
+ auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8));
+ StoreU(vswap, du, tmp + j);
+ }
+ }
+ memcpy(output, tmp, len * num_channels * 2);
+ } else if (m->output_data_type_ == JPEGLI_TYPE_FLOAT) {
+ float* tmp = reinterpret_cast<float*>(scratch_space);
+ StoreFloatRow(rows, xoffset, len, num_channels, tmp);
+ if (m->swap_endianness_) {
+ size_t output_len = len * num_channels;
+ for (size_t j = 0; j < output_len; ++j) {
+ tmp[j] = BSwapFloat(tmp[j]);
+ }
+ }
+ memcpy(output, tmp, len * num_channels * 4);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jpegli {
+
+HWY_EXPORT(GatherBlockStats);
+HWY_EXPORT(WriteToOutput);
+HWY_EXPORT(DecenterRow);
+
+void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs,
+ const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros,
+ int32_t* JXL_RESTRICT sumabs) {
+ return HWY_DYNAMIC_DISPATCH(GatherBlockStats)(coeffs, coeffs_size, nonzeros,
+ sumabs);
+}
+
+void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[],
+ size_t xoffset, size_t len, size_t num_channels,
+ uint8_t* JXL_RESTRICT output) {
+ return HWY_DYNAMIC_DISPATCH(WriteToOutput)(cinfo, rows, xoffset, len,
+ num_channels, output);
+}
+
+void DecenterRow(float* row, size_t xsize) {
+ return HWY_DYNAMIC_DISPATCH(DecenterRow)(row, xsize);
+}
+
+// Padding for horizontal chroma upsampling.
+constexpr size_t kPaddingLeft = 64;
+constexpr size_t kPaddingRight = 64;
+
+bool ShouldApplyDequantBiases(j_decompress_ptr cinfo, int ci) {
+ const auto& compinfo = cinfo->comp_info[ci];
+ return (compinfo.h_samp_factor == cinfo->max_h_samp_factor &&
+ compinfo.v_samp_factor == cinfo->max_v_samp_factor);
+}
+
+// See the following article for the details:
+// J. R. Price and M. Rabbani, "Dequantization bias for JPEG decompression"
+// Proceedings International Conference on Information Technology: Coding and
+// Computing (Cat. No.PR00540), 2000, pp. 30-35, doi: 10.1109/ITCC.2000.844179.
+void ComputeOptimalLaplacianBiases(const int num_blocks, const int* nonzeros,
+ const int* sumabs, float* biases) {
+ for (size_t k = 1; k < DCTSIZE2; ++k) {
+ if (nonzeros[k] == 0) {
+ biases[k] = 0.5f;
+ continue;
+ }
+ // Notation adapted from the article
+ float N = num_blocks;
+ float N1 = nonzeros[k];
+ float N0 = num_blocks - N1;
+ float S = sumabs[k];
+ // Compute gamma from N0, N1, N, S (eq. 11), with A and B being just
+ // temporary grouping of terms.
+ float A = 4.0 * S + 2.0 * N;
+ float B = 4.0 * S - 2.0 * N1;
+ float gamma = (-1.0 * N0 + std::sqrt(N0 * N0 * 1.0 + A * B)) / A;
+ float gamma2 = gamma * gamma;
+ // The bias is computed from gamma with (eq. 5), where the quantization
+ // multiplier Q can be factored out and thus the bias can be applied
+ // directly on the quantized coefficient.
+ biases[k] =
+ 0.5 * (((1.0 + gamma2) / (1.0 - gamma2)) + 1.0 / std::log(gamma));
+ }
+}
+
+constexpr std::array<int, SAVED_COEFS> Q_POS = {0, 1, 8, 16, 9,
+ 2, 3, 10, 17, 24};
+
+bool is_nonzero_quantizers(const JQUANT_TBL* qtable) {
+ return std::all_of(Q_POS.begin(), Q_POS.end(),
+ [&](int pos) { return qtable->quantval[pos] != 0; });
+}
+
+// Determine whether smoothing should be applied during decompression
+bool do_smoothing(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ bool smoothing_useful = false;
+
+ if (!cinfo->progressive_mode || cinfo->coef_bits == nullptr) {
+ return false;
+ }
+ auto coef_bits_latch = m->coef_bits_latch;
+ auto prev_coef_bits_latch = m->prev_coef_bits_latch;
+
+ for (int ci = 0; ci < cinfo->num_components; ci++) {
+ jpeg_component_info* compptr = &cinfo->comp_info[ci];
+ JQUANT_TBL* qtable = compptr->quant_table;
+ int* coef_bits = cinfo->coef_bits[ci];
+ int* prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
+
+ // Return early if conditions for smoothing are not met
+ if (qtable == nullptr || !is_nonzero_quantizers(qtable) ||
+ coef_bits[0] < 0) {
+ return false;
+ }
+
+ coef_bits_latch[ci][0] = coef_bits[0];
+
+ for (int coefi = 1; coefi < SAVED_COEFS; coefi++) {
+ prev_coef_bits_latch[ci][coefi] =
+ cinfo->input_scan_number > 1 ? prev_coef_bits[coefi] : -1;
+ if (coef_bits[coefi] != 0) {
+ smoothing_useful = true;
+ }
+ coef_bits_latch[ci][coefi] = coef_bits[coefi];
+ }
+ }
+
+ return smoothing_useful;
+}
+
+void PredictSmooth(j_decompress_ptr cinfo, JBLOCKARRAY blocks, int component,
+ size_t bx, int iy) {
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ int16_t* scratch = cinfo->master->smoothing_scratch_;
+ std::vector<int> Q_VAL(SAVED_COEFS);
+ int* coef_bits;
+
+ std::array<std::array<int, 5>, 5> dc_values;
+ auto& compinfo = cinfo->comp_info[component];
+ const size_t by0 = imcu_row * compinfo.v_samp_factor;
+ const size_t by = by0 + iy;
+
+ int prev_iy = by > 0 ? iy - 1 : 0;
+ int prev_prev_iy = by > 1 ? iy - 2 : prev_iy;
+ int next_iy = by + 1 < compinfo.height_in_blocks ? iy + 1 : iy;
+ int next_next_iy = by + 2 < compinfo.height_in_blocks ? iy + 2 : next_iy;
+
+ const int16_t* cur_row = blocks[iy][bx];
+ const int16_t* prev_row = blocks[prev_iy][bx];
+ const int16_t* prev_prev_row = blocks[prev_prev_iy][bx];
+ const int16_t* next_row = blocks[next_iy][bx];
+ const int16_t* next_next_row = blocks[next_next_iy][bx];
+
+ int prev_block_ind = bx ? -DCTSIZE2 : 0;
+ int prev_prev_block_ind = bx > 1 ? -2 * DCTSIZE2 : prev_block_ind;
+ int next_block_ind = bx + 1 < compinfo.width_in_blocks ? DCTSIZE2 : 0;
+ int next_next_block_ind =
+ bx + 2 < compinfo.width_in_blocks ? DCTSIZE2 * 2 : next_block_ind;
+
+ std::array<const int16_t*, 5> row_ptrs = {prev_prev_row, prev_row, cur_row,
+ next_row, next_next_row};
+ std::array<int, 5> block_inds = {prev_prev_block_ind, prev_block_ind, 0,
+ next_block_ind, next_next_block_ind};
+
+ memcpy(scratch, cur_row, DCTSIZE2 * sizeof(cur_row[0]));
+
+ for (int r = 0; r < 5; ++r) {
+ for (int c = 0; c < 5; ++c) {
+ dc_values[r][c] = row_ptrs[r][block_inds[c]];
+ }
+ }
+ // Get the correct coef_bits: In case of an incomplete scan, we use the
+ // prev coeficients.
+ if (cinfo->output_iMCU_row + 1 > cinfo->input_iMCU_row) {
+ coef_bits = cinfo->master->prev_coef_bits_latch[component];
+ } else {
+ coef_bits = cinfo->master->coef_bits_latch[component];
+ }
+
+ bool change_dc = true;
+ for (int i = 1; i < SAVED_COEFS; i++) {
+ if (coef_bits[i] != -1) {
+ change_dc = false;
+ break;
+ }
+ }
+
+ JQUANT_TBL* quanttbl = cinfo->quant_tbl_ptrs[compinfo.quant_tbl_no];
+ for (size_t i = 0; i < 6; ++i) {
+ Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+ }
+ if (change_dc) {
+ for (size_t i = 6; i < SAVED_COEFS; ++i) {
+ Q_VAL[i] = quanttbl->quantval[Q_POS[i]];
+ }
+ }
+ auto calculate_dct_value = [&](int coef_index) {
+ int64_t num = 0;
+ int pred;
+ int Al;
+ // we use the symmetry of the smoothing matrices by transposing the 5x5 dc
+ // matrix in that case.
+ bool swap_indices = coef_index == 2 || coef_index == 5 || coef_index == 8 ||
+ coef_index == 9;
+ auto dc = [&](int i, int j) {
+ return swap_indices ? dc_values[j][i] : dc_values[i][j];
+ };
+ Al = coef_bits[coef_index];
+ switch (coef_index) {
+ case 0:
+ // set the DC
+ num = (-2 * dc(0, 0) - 6 * dc(0, 1) - 8 * dc(0, 2) - 6 * dc(0, 3) -
+ 2 * dc(0, 4) - 6 * dc(1, 0) + 6 * dc(1, 1) + 42 * dc(1, 2) +
+ 6 * dc(1, 3) - 6 * dc(1, 4) - 8 * dc(2, 0) + 42 * dc(2, 1) +
+ 152 * dc(2, 2) + 42 * dc(2, 3) - 8 * dc(2, 4) - 6 * dc(3, 0) +
+ 6 * dc(3, 1) + 42 * dc(3, 2) + 6 * dc(3, 3) - 6 * dc(3, 4) -
+ 2 * dc(4, 0) - 6 * dc(4, 1) - 8 * dc(4, 2) - 6 * dc(4, 3) -
+ 2 * dc(4, 4));
+ // special case: for the DC the dequantization is different
+ Al = 0;
+ break;
+ case 1:
+ case 2:
+ // set Q01 or Q10
+ num = (change_dc ? (-dc(0, 0) - dc(0, 1) + dc(0, 3) + dc(0, 4) -
+ 3 * dc(1, 0) + 13 * dc(1, 1) - 13 * dc(1, 3) +
+ 3 * dc(1, 4) - 3 * dc(2, 0) + 38 * dc(2, 1) -
+ 38 * dc(2, 3) + 3 * dc(2, 4) - 3 * dc(3, 0) +
+ 13 * dc(3, 1) - 13 * dc(3, 3) + 3 * dc(3, 4) -
+ dc(4, 0) - dc(4, 1) + dc(4, 3) + dc(4, 4))
+ : (-7 * dc(2, 0) + 50 * dc(2, 1) - 50 * dc(2, 3) +
+ 7 * dc(2, 4)));
+ break;
+ case 3:
+ case 5:
+ // set Q02 or Q20
+ num = (change_dc
+ ? dc(0, 2) + 2 * dc(1, 1) + 7 * dc(1, 2) + 2 * dc(1, 3) -
+ 5 * dc(2, 1) - 14 * dc(2, 2) - 5 * dc(2, 3) +
+ 2 * dc(3, 1) + 7 * dc(3, 2) + 2 * dc(3, 3) + dc(4, 2)
+ : (-dc(0, 2) + 13 * dc(1, 2) - 24 * dc(2, 2) +
+ 13 * dc(3, 2) - dc(4, 2)));
+ break;
+ case 4:
+ // set Q11
+ num =
+ (change_dc ? -dc(0, 0) + dc(0, 4) + 9 * dc(1, 1) - 9 * dc(1, 3) -
+ 9 * dc(3, 1) + 9 * dc(3, 3) + dc(4, 0) - dc(4, 4)
+ : (dc(1, 4) + dc(3, 0) - 10 * dc(3, 1) + 10 * dc(3, 3) -
+ dc(0, 1) - dc(3, 4) + dc(4, 1) - dc(4, 3) + dc(0, 3) -
+ dc(1, 0) + 10 * dc(1, 1) - 10 * dc(1, 3)));
+ break;
+ case 6:
+ case 9:
+ // set Q03 or Q30
+ num = (dc(1, 1) - dc(1, 3) + 2 * dc(2, 1) - 2 * dc(2, 3) + dc(3, 1) -
+ dc(3, 3));
+ break;
+ case 7:
+ case 8:
+ // set Q12 and Q21
+ num = (dc(1, 1) - 3 * dc(1, 2) + dc(1, 3) - dc(3, 1) + 3 * dc(3, 2) -
+ dc(3, 3));
+ break;
+ }
+ num = Q_VAL[0] * num;
+ if (num >= 0) {
+ pred = ((Q_VAL[coef_index] << 7) + num) / (Q_VAL[coef_index] << 8);
+ if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+ } else {
+ pred = ((Q_VAL[coef_index] << 7) - num) / (Q_VAL[coef_index] << 8);
+ if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1;
+ pred = -pred;
+ }
+ return static_cast<int16_t>(pred);
+ };
+
+ int loop_end = change_dc ? SAVED_COEFS : 6;
+ for (int i = 1; i < loop_end; ++i) {
+ if (coef_bits[i] != 0 && scratch[Q_POS[i]] == 0) {
+ scratch[Q_POS[i]] = calculate_dct_value(i);
+ }
+ }
+ if (change_dc) {
+ scratch[0] = calculate_dct_value(0);
+ }
+}
+
+void PrepareForOutput(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ size_t iMCU_width = cinfo->max_h_samp_factor * m->min_scaled_dct_size;
+ size_t output_stride = m->iMCU_cols_ * iMCU_width;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const auto& comp = cinfo->comp_info[c];
+ size_t cheight = comp.v_samp_factor * m->scaled_dct_size[c];
+ m->raw_height_[c] = cinfo->total_iMCU_rows * cheight;
+ m->raw_output_[c].Allocate(cinfo, 3 * cheight, output_stride);
+ }
+ int num_all_components =
+ std::max(cinfo->out_color_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ m->render_output_[c].Allocate(cinfo, cinfo->max_v_samp_factor,
+ output_stride);
+ }
+ m->idct_scratch_ = Allocate<float>(cinfo, 5 * DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ m->upsample_scratch_ = Allocate<float>(
+ cinfo, output_stride + kPaddingLeft + kPaddingRight, JPOOL_IMAGE_ALIGNED);
+ size_t bytes_per_sample = jpegli_bytes_per_sample(m->output_data_type_);
+ size_t bytes_per_pixel = cinfo->out_color_components * bytes_per_sample;
+ size_t scratch_stride = RoundUpTo(output_stride, HWY_ALIGNMENT);
+ m->output_scratch_ = Allocate<uint8_t>(
+ cinfo, bytes_per_pixel * scratch_stride, JPOOL_IMAGE_ALIGNED);
+ m->smoothing_scratch_ =
+ Allocate<int16_t>(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED);
+ bool smoothing = do_smoothing(cinfo);
+ m->apply_smoothing = smoothing && cinfo->do_block_smoothing;
+ size_t coeffs_per_block = cinfo->num_components * DCTSIZE2;
+ m->nonzeros_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ m->sumabs_ = Allocate<int>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ memset(m->nonzeros_, 0, coeffs_per_block * sizeof(m->nonzeros_[0]));
+ memset(m->sumabs_, 0, coeffs_per_block * sizeof(m->sumabs_[0]));
+ memset(m->num_processed_blocks_, 0, sizeof(m->num_processed_blocks_));
+ m->biases_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ memset(m->biases_, 0, coeffs_per_block * sizeof(m->biases_[0]));
+ cinfo->output_iMCU_row = 0;
+ cinfo->output_scanline = 0;
+ const float kDequantScale = 1.0f / (8 * 255);
+ if (m->dequant_ == nullptr) {
+ m->dequant_ = Allocate<float>(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED);
+ memset(m->dequant_, 0, coeffs_per_block * sizeof(float));
+ }
+ for (int c = 0; c < cinfo->num_components; c++) {
+ const auto& comp = cinfo->comp_info[c];
+ JQUANT_TBL* table = comp.quant_table;
+ if (table == nullptr) continue;
+ for (size_t k = 0; k < DCTSIZE2; ++k) {
+ m->dequant_[c * DCTSIZE2 + k] = table->quantval[k] * kDequantScale;
+ }
+ }
+ ChooseInverseTransform(cinfo);
+ ChooseColorTransform(cinfo);
+}
+
+void DecodeCurrentiMCURow(j_decompress_ptr cinfo) {
+ jpeg_decomp_master* m = cinfo->master;
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ JBLOCKARRAY ba[kMaxComponents];
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const jpeg_component_info* comp = &cinfo->comp_info[c];
+ int by0 = imcu_row * comp->v_samp_factor;
+ int block_rows_left = comp->height_in_blocks - by0;
+ int max_block_rows = std::min(comp->v_samp_factor, block_rows_left);
+ int offset = m->streaming_mode_ ? 0 : by0;
+ ba[c] = (*cinfo->mem->access_virt_barray)(
+ reinterpret_cast<j_common_ptr>(cinfo), m->coef_arrays[c], offset,
+ max_block_rows, false);
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t k0 = c * DCTSIZE2;
+ auto& compinfo = cinfo->comp_info[c];
+ size_t block_row = imcu_row * compinfo.v_samp_factor;
+ if (ShouldApplyDequantBiases(cinfo, c)) {
+ // Update statistics for this iMCU row.
+ for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+ size_t by = block_row + iy;
+ if (by >= compinfo.height_in_blocks) {
+ continue;
+ }
+ int16_t* JXL_RESTRICT coeffs = &ba[c][iy][0][0];
+ size_t num = compinfo.width_in_blocks * DCTSIZE2;
+ GatherBlockStats(coeffs, num, &m->nonzeros_[k0], &m->sumabs_[k0]);
+ m->num_processed_blocks_[c] += compinfo.width_in_blocks;
+ }
+ if (imcu_row % 4 == 3) {
+ // Re-compute optimal biases every few iMCU-rows.
+ ComputeOptimalLaplacianBiases(m->num_processed_blocks_[c],
+ &m->nonzeros_[k0], &m->sumabs_[k0],
+ &m->biases_[k0]);
+ }
+ }
+ RowBuffer<float>* raw_out = &m->raw_output_[c];
+ for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) {
+ size_t by = block_row + iy;
+ if (by >= compinfo.height_in_blocks) {
+ continue;
+ }
+ size_t dctsize = m->scaled_dct_size[c];
+ int16_t* JXL_RESTRICT row_in = &ba[c][iy][0][0];
+ float* JXL_RESTRICT row_out = raw_out->Row(by * dctsize);
+ for (size_t bx = 0; bx < compinfo.width_in_blocks; ++bx) {
+ if (m->apply_smoothing) {
+ PredictSmooth(cinfo, ba[c], c, bx, iy);
+ (*m->inverse_transform[c])(m->smoothing_scratch_, &m->dequant_[k0],
+ &m->biases_[k0], m->idct_scratch_,
+ &row_out[bx * dctsize], raw_out->stride(),
+ dctsize);
+ } else {
+ (*m->inverse_transform[c])(&row_in[bx * DCTSIZE2], &m->dequant_[k0],
+ &m->biases_[k0], m->idct_scratch_,
+ &row_out[bx * dctsize], raw_out->stride(),
+ dctsize);
+ }
+ }
+ if (m->streaming_mode_) {
+ memset(row_in, 0, compinfo.width_in_blocks * sizeof(JBLOCK));
+ }
+ }
+ }
+}
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data) {
+ jpegli::DecodeCurrentiMCURow(cinfo);
+ jpeg_decomp_master* m = cinfo->master;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ const auto& compinfo = cinfo->comp_info[c];
+ size_t comp_width = compinfo.width_in_blocks * DCTSIZE;
+ size_t comp_height = compinfo.height_in_blocks * DCTSIZE;
+ size_t comp_nrows = compinfo.v_samp_factor * DCTSIZE;
+ size_t y0 = cinfo->output_iMCU_row * compinfo.v_samp_factor * DCTSIZE;
+ size_t y1 = std::min(y0 + comp_nrows, comp_height);
+ for (size_t y = y0; y < y1; ++y) {
+ float* rows[1] = {m->raw_output_[c].Row(y)};
+ uint8_t* output = data[c][y - y0];
+ DecenterRow(rows[0], comp_width);
+ WriteToOutput(cinfo, rows, 0, comp_width, 1, output);
+ }
+ }
+ ++cinfo->output_iMCU_row;
+ cinfo->output_scanline += cinfo->max_v_samp_factor * DCTSIZE;
+ if (cinfo->output_scanline >= cinfo->output_height) {
+ ++m->output_passes_done_;
+ }
+}
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+ JSAMPARRAY scanlines, size_t max_output_rows) {
+ jpeg_decomp_master* m = cinfo->master;
+ const int vfactor = cinfo->max_v_samp_factor;
+ const int hfactor = cinfo->max_h_samp_factor;
+ const size_t imcu_row = cinfo->output_iMCU_row;
+ const size_t imcu_height = vfactor * m->min_scaled_dct_size;
+ const size_t imcu_width = hfactor * m->min_scaled_dct_size;
+ const size_t output_width = m->iMCU_cols_ * imcu_width;
+ if (imcu_row == cinfo->total_iMCU_rows ||
+ (imcu_row > 1 && cinfo->output_scanline < (imcu_row - 1) * imcu_height)) {
+ // We are ready to output some scanlines.
+ size_t ybegin = cinfo->output_scanline;
+ size_t yend =
+ (imcu_row == cinfo->total_iMCU_rows ? cinfo->output_height
+ : (imcu_row - 1) * imcu_height);
+ yend = std::min<size_t>(yend, ybegin + max_output_rows - *num_output_rows);
+ size_t yb = (ybegin / vfactor) * vfactor;
+ size_t ye = DivCeil(yend, vfactor) * vfactor;
+ for (size_t y = yb; y < ye; y += vfactor) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ RowBuffer<float>* raw_out = &m->raw_output_[c];
+ RowBuffer<float>* render_out = &m->render_output_[c];
+ int line_groups = vfactor / m->v_factor[c];
+ size_t yc = y / m->v_factor[c];
+ for (int dy = 0; dy < line_groups; ++dy) {
+ if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) {
+ size_t ymid = yc + dy;
+ const float* JXL_RESTRICT row_mid = raw_out->Row(ymid);
+ const float* JXL_RESTRICT row_top =
+ ymid == 0 ? row_mid : raw_out->Row(ymid - 1);
+ const float* JXL_RESTRICT row_bot = ymid + 1 == m->raw_height_[c]
+ ? row_mid
+ : raw_out->Row(ymid + 1);
+ Upsample2Vertical(row_top, row_mid, row_bot,
+ render_out->Row(2 * dy),
+ render_out->Row(2 * dy + 1), output_width);
+ } else {
+ for (int yix = 0; yix < m->v_factor[c]; ++yix) {
+ size_t ymid = yc + dy;
+ memcpy(render_out->Row(m->v_factor[c] * dy + yix),
+ raw_out->Row(ymid), raw_out->xsize() * sizeof(float));
+ }
+ }
+ }
+ }
+ for (int yix = 0; yix < vfactor; ++yix) {
+ if (y + yix < ybegin || y + yix >= yend) continue;
+ float* rows[kMaxComponents];
+ int num_all_components =
+ std::max(cinfo->out_color_components, cinfo->num_components);
+ for (int c = 0; c < num_all_components; ++c) {
+ rows[c] = m->render_output_[c].Row(yix);
+ }
+ (*m->color_transform)(rows, output_width);
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ // Undo the centering of the sample values around zero.
+ DecenterRow(rows[c], output_width);
+ }
+ if (scanlines) {
+ uint8_t* output = scanlines[*num_output_rows];
+ WriteToOutput(cinfo, rows, m->xoffset_, cinfo->output_width,
+ cinfo->out_color_components, output);
+ }
+ JXL_ASSERT(cinfo->output_scanline == y + yix);
+ ++cinfo->output_scanline;
+ ++(*num_output_rows);
+ if (cinfo->output_scanline == cinfo->output_height) {
+ ++m->output_passes_done_;
+ }
+ }
+ }
+ } else {
+ DecodeCurrentiMCURow(cinfo);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ if (m->h_factor[c] == 1) continue;
+ const auto& compinfo = cinfo->comp_info[c];
+ RowBuffer<float>* raw_out = &m->raw_output_[c];
+ size_t cheight = compinfo.v_samp_factor * m->scaled_dct_size[c];
+ size_t y0 = imcu_row * cheight;
+ if (cinfo->do_fancy_upsampling && m->h_factor[c] == 2) {
+ for (size_t iy = 0; iy < cheight; ++iy) {
+ float* JXL_RESTRICT row = raw_out->Row(y0 + iy);
+ Upsample2Horizontal(row, m->upsample_scratch_, output_width);
+ }
+ } else {
+ for (size_t iy = 0; iy < cheight; ++iy) {
+ float* JXL_RESTRICT row = raw_out->Row(y0 + iy);
+ float* JXL_RESTRICT tmp = m->upsample_scratch_;
+ // TODO(szabadka) SIMDify this.
+ for (size_t x = 0; x < output_width; ++x) {
+ tmp[x] = row[x / m->h_factor[c]];
+ }
+ memcpy(row, tmp, output_width * sizeof(tmp[0]));
+ }
+ }
+ }
+ ++cinfo->output_iMCU_row;
+ }
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/render.h b/third_party/jpeg-xl/lib/jpegli/render.h
new file mode 100644
index 0000000000..93b80d975a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/render.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_RENDER_H_
+#define LIB_JPEGLI_RENDER_H_
+
+/* clang-format off */
+#include <stdint.h>
+#include <stdio.h>
+#include <jpeglib.h>
+/* clang-format on */
+
+#include <vector>
+
+namespace jpegli {
+
+void PrepareForOutput(j_decompress_ptr cinfo);
+
+void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows,
+ JSAMPARRAY scanlines, size_t max_output_rows);
+
+void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_RENDER_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/simd.cc b/third_party/jpeg-xl/lib/jpegli/simd.cc
new file mode 100644
index 0000000000..5e84939342
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/simd.cc
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/simd.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/simd.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+namespace {
+
+HWY_EXPORT(GetVectorSize); // Local function.
+
+} // namespace
+
+size_t VectorSize() {
+ static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+ return bytes;
+}
+
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/simd.h b/third_party/jpeg-xl/lib/jpegli/simd.h
new file mode 100644
index 0000000000..aec772e2d4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/simd.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_SIMD_H_
+#define LIB_JPEGLI_SIMD_H_
+
+#include <stddef.h>
+
+namespace jpegli {
+
+// Returns SIMD vector size in bytes.
+size_t VectorSize();
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_SIMD_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/source_manager.cc b/third_party/jpeg-xl/lib/jpegli/source_manager.cc
new file mode 100644
index 0000000000..0b8e0a5c8c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/source_manager.cc
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/error.h"
+#include "lib/jpegli/memory_manager.h"
+
+namespace jpegli {
+
+void init_mem_source(j_decompress_ptr cinfo) {}
+void init_stdio_source(j_decompress_ptr cinfo) {}
+
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+ if (num_bytes <= 0) return;
+ while (num_bytes > static_cast<long>(cinfo->src->bytes_in_buffer)) {
+ num_bytes -= cinfo->src->bytes_in_buffer;
+ (*cinfo->src->fill_input_buffer)(cinfo);
+ }
+ cinfo->src->next_input_byte += num_bytes;
+ cinfo->src->bytes_in_buffer -= num_bytes;
+}
+
+void term_source(j_decompress_ptr cinfo) {}
+
+boolean EmitFakeEoiMarker(j_decompress_ptr cinfo) {
+ static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9};
+ cinfo->src->next_input_byte = kFakeEoiMarker;
+ cinfo->src->bytes_in_buffer = 2;
+ return TRUE;
+}
+
+constexpr size_t kStdioBufferSize = 64 << 10;
+
+struct StdioSourceManager {
+ jpeg_source_mgr pub;
+ FILE* f;
+ uint8_t* buffer;
+
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) {
+ auto src = reinterpret_cast<StdioSourceManager*>(cinfo->src);
+ size_t num_bytes_read = fread(src->buffer, 1, kStdioBufferSize, src->f);
+ if (num_bytes_read == 0) {
+ return EmitFakeEoiMarker(cinfo);
+ }
+ src->pub.next_input_byte = src->buffer;
+ src->pub.bytes_in_buffer = num_bytes_read;
+ return TRUE;
+ }
+};
+
+} // namespace jpegli
+
+void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char* inbuffer,
+ unsigned long insize) {
+ if (cinfo->src && cinfo->src->init_source != jpegli::init_mem_source) {
+ JPEGLI_ERROR("jpegli_mem_src: a different source manager was already set");
+ }
+ if (!cinfo->src) {
+ cinfo->src = jpegli::Allocate<jpeg_source_mgr>(cinfo, 1);
+ }
+ cinfo->src->next_input_byte = inbuffer;
+ cinfo->src->bytes_in_buffer = insize;
+ cinfo->src->init_source = jpegli::init_mem_source;
+ cinfo->src->fill_input_buffer = jpegli::EmitFakeEoiMarker;
+ cinfo->src->skip_input_data = jpegli::skip_input_data;
+ cinfo->src->resync_to_restart = jpegli_resync_to_restart;
+ cinfo->src->term_source = jpegli::term_source;
+}
+
+void jpegli_stdio_src(j_decompress_ptr cinfo, FILE* infile) {
+ if (cinfo->src && cinfo->src->init_source != jpegli::init_stdio_source) {
+ JPEGLI_ERROR("jpeg_stdio_src: a different source manager was already set");
+ }
+ if (!cinfo->src) {
+ cinfo->src = reinterpret_cast<jpeg_source_mgr*>(
+ jpegli::Allocate<jpegli::StdioSourceManager>(cinfo, 1));
+ }
+ auto src = reinterpret_cast<jpegli::StdioSourceManager*>(cinfo->src);
+ src->f = infile;
+ src->buffer = jpegli::Allocate<uint8_t>(cinfo, jpegli::kStdioBufferSize);
+ src->pub.next_input_byte = src->buffer;
+ src->pub.bytes_in_buffer = 0;
+ src->pub.init_source = jpegli::init_stdio_source;
+ src->pub.fill_input_buffer = jpegli::StdioSourceManager::fill_input_buffer;
+ src->pub.skip_input_data = jpegli::skip_input_data;
+ src->pub.resync_to_restart = jpegli_resync_to_restart;
+ src->pub.term_source = jpegli::term_source;
+}
diff --git a/third_party/jpeg-xl/lib/jpegli/source_manager_test.cc b/third_party/jpeg-xl/lib/jpegli/source_manager_test.cc
new file mode 100644
index 0000000000..c8d1fbc053
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/source_manager_test.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void ReadOutputImage(j_decompress_ptr cinfo, TestImage* output) {
+ jpegli_read_header(cinfo, /*require_image=*/TRUE);
+ jpegli_start_decompress(cinfo);
+ output->ysize = cinfo->output_height;
+ output->xsize = cinfo->output_width;
+ output->components = cinfo->num_components;
+ output->AllocatePixels();
+ size_t stride = cinfo->output_width * cinfo->num_components;
+ while (cinfo->output_scanline < cinfo->output_height) {
+ JSAMPROW scanline = &output->pixels[cinfo->output_scanline * stride];
+ jpegli_read_scanlines(cinfo, &scanline, 1);
+ }
+ jpegli_finish_decompress(cinfo);
+}
+
+struct TestConfig {
+ std::string fn;
+ std::string fn_desc;
+ DecompressParams dparams;
+};
+
+class SourceManagerTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(SourceManagerTestParam, TestStdioSourceManager) {
+ TestConfig config = GetParam();
+ jxl::FileWrapper testfile(GetTestDataPath(config.fn), "rb");
+ FILE* src = nullptr;
+ if (config.dparams.size_factor != 1.0) {
+ std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ src = tmpfile();
+ ASSERT_TRUE(src != nullptr);
+ fwrite(compressed.data(), 1, compressed.size(), src);
+ rewind(src);
+ return;
+ } else {
+ src = testfile;
+ }
+ ASSERT_TRUE(src != nullptr);
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_stdio_src(&cinfo, src);
+ ReadOutputImage(&cinfo, &output0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(),
+ ReadTestData(config.fn.c_str()), &output1);
+ VerifyOutputImage(output1, output0, 1.0f);
+}
+
+TEST_P(SourceManagerTestParam, TestMemSourceManager) {
+ TestConfig config = GetParam();
+ std::vector<uint8_t> compressed = ReadTestData(config.fn.c_str());
+ if (config.dparams.size_factor < 1.0f) {
+ compressed.resize(compressed.size() * config.dparams.size_factor);
+ }
+ TestImage output0;
+ jpeg_decompress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_decompress(&cinfo);
+ jpegli_mem_src(&cinfo, compressed.data(), compressed.size());
+ ReadOutputImage(&cinfo, &output0);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&cinfo);
+
+ TestImage output1;
+ DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1);
+ VerifyOutputImage(output1, output0, 1.0f);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ {
+ std::vector<std::pair<std::string, std::string>> testfiles({
+ {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"},
+ {"jxl/flower/flower.png.im_q85_420.jpg", "Q85YUV420"},
+ {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"},
+ });
+ for (const auto& it : testfiles) {
+ for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) {
+ TestConfig config;
+ config.fn = it.first;
+ config.fn_desc = it.second;
+ config.dparams.size_factor = size_factor;
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.fn_desc;
+ if (c.dparams.size_factor < 1.0f) {
+ os << "Partial" << static_cast<int>(c.dparams.size_factor * 100) << "p";
+ }
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<SourceManagerTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(SourceManagerTest, SourceManagerTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/streaming_test.cc b/third_party/jpeg-xl/lib/jpegli/streaming_test.cc
new file mode 100644
index 0000000000..4a8981ee6a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/streaming_test.cc
@@ -0,0 +1,233 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+
+namespace jpegli {
+namespace {
+
+// A simple suspending source manager with an input buffer.
+struct SourceManager {
+ jpeg_source_mgr pub;
+ std::vector<uint8_t> buffer;
+
+ SourceManager() {
+ pub.next_input_byte = nullptr;
+ pub.bytes_in_buffer = 0;
+ pub.init_source = init_source;
+ pub.fill_input_buffer = fill_input_buffer;
+ pub.skip_input_data = skip_input_data;
+ pub.resync_to_restart = jpegli_resync_to_restart;
+ pub.term_source = term_source;
+ }
+
+ static void init_source(j_decompress_ptr cinfo) {}
+ static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; }
+ static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {}
+ static void term_source(j_decompress_ptr cinfo) {}
+};
+
+// A destination manager that empties its output buffer into a SourceManager's
+// input buffer. The buffer size is kept short because empty_output_buffer() is
+// called only when the output buffer is full, and we want to update the decoder
+// input frequently to demostrate that streaming works.
+static constexpr size_t kOutputBufferSize = 1024;
+struct DestinationManager {
+ jpeg_destination_mgr pub;
+ std::vector<uint8_t> buffer;
+ SourceManager* dest;
+
+ DestinationManager(SourceManager* src)
+ : buffer(kOutputBufferSize), dest(src) {
+ pub.next_output_byte = buffer.data();
+ pub.free_in_buffer = buffer.size();
+ pub.init_destination = init_destination;
+ pub.empty_output_buffer = empty_output_buffer;
+ pub.term_destination = term_destination;
+ }
+
+ static void init_destination(j_compress_ptr cinfo) {}
+
+ static boolean empty_output_buffer(j_compress_ptr cinfo) {
+ auto us = reinterpret_cast<DestinationManager*>(cinfo->dest);
+ jpeg_destination_mgr* src = &us->pub;
+ jpeg_source_mgr* dst = &us->dest->pub;
+ std::vector<uint8_t>& src_buf = us->buffer;
+ std::vector<uint8_t>& dst_buf = us->dest->buffer;
+ if (dst->bytes_in_buffer > 0 && dst->bytes_in_buffer < dst_buf.size()) {
+ memmove(dst_buf.data(), dst->next_input_byte, dst->bytes_in_buffer);
+ }
+ size_t src_len = src_buf.size() - src->free_in_buffer;
+ dst_buf.resize(dst->bytes_in_buffer + src_len);
+ memcpy(&dst_buf[dst->bytes_in_buffer], src_buf.data(), src_len);
+ dst->next_input_byte = dst_buf.data();
+ dst->bytes_in_buffer = dst_buf.size();
+ src->next_output_byte = src_buf.data();
+ src->free_in_buffer = src_buf.size();
+ return true;
+ }
+
+ static void term_destination(j_compress_ptr cinfo) {
+ empty_output_buffer(cinfo);
+ }
+};
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+};
+
+class StreamingTestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(StreamingTestParam, TestStreaming) {
+ jpeg_decompress_struct dinfo = {};
+ jpeg_compress_struct cinfo = {};
+ TestConfig config = GetParam();
+ TestImage& input = config.input;
+ TestImage output;
+ GeneratePixels(&input);
+ const auto try_catch_block = [&]() {
+ ERROR_HANDLER_SETUP(jpegli);
+ dinfo.err = cinfo.err;
+ dinfo.client_data = cinfo.client_data;
+ // Create a pair of compressor and decompressor objects, where the
+ // compressor's output is connected to the decompressor's input.
+ jpegli_create_decompress(&dinfo);
+ jpegli_create_compress(&cinfo);
+ SourceManager src;
+ dinfo.src = reinterpret_cast<jpeg_source_mgr*>(&src);
+ DestinationManager dest(&src);
+ cinfo.dest = reinterpret_cast<jpeg_destination_mgr*>(&dest);
+
+ cinfo.image_width = input.xsize;
+ cinfo.image_height = input.ysize;
+ cinfo.input_components = input.components;
+ cinfo.in_color_space = input.color_space;
+ jpegli_set_defaults(&cinfo);
+ cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0];
+ jpegli_set_progressive_level(&cinfo, 0);
+ cinfo.optimize_coding = FALSE;
+ jpegli_start_compress(&cinfo, TRUE);
+
+ size_t stride = cinfo.image_width * cinfo.input_components;
+ size_t iMCU_height = 8 * cinfo.max_v_samp_factor;
+ std::vector<uint8_t> row_bytes(iMCU_height * stride);
+ size_t yin = 0;
+ size_t yout = 0;
+ while (yin < cinfo.image_height) {
+ // Feed one iMCU row at a time to the compressor.
+ size_t lines_in = std::min(iMCU_height, cinfo.image_height - yin);
+ memcpy(&row_bytes[0], &input.pixels[yin * stride], lines_in * stride);
+ std::vector<JSAMPROW> rows_in(lines_in);
+ for (size_t i = 0; i < lines_in; ++i) {
+ rows_in[i] = &row_bytes[i * stride];
+ }
+ EXPECT_EQ(lines_in,
+ jpegli_write_scanlines(&cinfo, &rows_in[0], lines_in));
+ yin += lines_in;
+ if (yin == cinfo.image_height) {
+ jpegli_finish_compress(&cinfo);
+ }
+
+ // Atfer the first iMCU row, we don't yet expect any output because the
+ // compressor delays processing to have context rows after the iMCU row.
+ if (yin < std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+ continue;
+ }
+
+ // After two iMCU rows, the compressor has started emitting compressed
+ // data. We check here that at least the scan header was output, because
+ // we expect that the compressor's output buffer was filled at least once
+ // while emitting the first compressed iMCU row.
+ if (yin == std::min<size_t>(2 * iMCU_height, cinfo.image_height)) {
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+ output.xsize = dinfo.image_width;
+ output.ysize = dinfo.image_height;
+ output.components = dinfo.num_components;
+ EXPECT_EQ(output.xsize, input.xsize);
+ EXPECT_EQ(output.ysize, input.ysize);
+ EXPECT_EQ(output.components, input.components);
+ EXPECT_TRUE(jpegli_start_decompress(&dinfo));
+ output.pixels.resize(output.ysize * stride);
+ if (yin < cinfo.image_height) {
+ continue;
+ }
+ }
+
+ // After six iMCU rows, the compressor has emitted five iMCU rows of
+ // compressed data, of which we expect four full iMCU row of compressed
+ // data to be in the decoder's input buffer, but since the decoder also
+ // needs context rows for upsampling and smoothing, we don't expect any
+ // output to be ready yet.
+ if (yin < 7 * iMCU_height && yin < cinfo.image_height) {
+ continue;
+ }
+
+ // After five iMCU rows, we expect the decoder to have rendered the output
+ // with four iMCU rows of delay.
+ // TODO(szabadka) Reduce the processing delay in the decoder if possible.
+ size_t lines_out =
+ (yin == cinfo.image_height ? cinfo.image_height - yout : iMCU_height);
+ std::vector<JSAMPROW> rows_out(lines_out);
+ for (size_t i = 0; i < lines_out; ++i) {
+ rows_out[i] =
+ reinterpret_cast<JSAMPLE*>(&output.pixels[(yout + i) * stride]);
+ }
+ EXPECT_EQ(lines_out,
+ jpegli_read_scanlines(&dinfo, &rows_out[0], lines_out));
+ VerifyOutputImage(input, output, yout, lines_out, 3.8f);
+ yout += lines_out;
+
+ if (yout == cinfo.image_height) {
+ EXPECT_TRUE(jpegli_finish_decompress(&dinfo));
+ }
+ }
+ return true;
+ };
+ EXPECT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&dinfo);
+ jpegli_destroy_compress(&cinfo);
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1920;
+ const size_t ysize0 = 1080;
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize0;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {1, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<StreamingTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(StreamingTest, StreamingTestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/test_utils.cc b/third_party/jpeg-xl/lib/jpegli/test_utils.cc
new file mode 100644
index 0000000000..1ae5483d4a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/test_utils.cc
@@ -0,0 +1,1240 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/test_utils.h"
+
+#include <cmath>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/sanitizers.h"
+
+#if !defined(TEST_DATA_PATH)
+#include "tools/cpp/runfiles/runfiles.h"
+#endif
+
+namespace jpegli {
+
+#if defined(TEST_DATA_PATH)
+std::string GetTestDataPath(const std::string& filename) {
+ return std::string(TEST_DATA_PATH "/") + filename;
+}
+#else
+using bazel::tools::cpp::runfiles::Runfiles;
+const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create(""));
+std::string GetTestDataPath(const std::string& filename) {
+ std::string root(JPEGXL_ROOT_PACKAGE "/testdata/");
+ return kRunfiles->Rlocation(root + filename);
+}
+#endif
+
+std::vector<uint8_t> ReadTestData(const std::string& filename) {
+ std::string full_path = GetTestDataPath(filename);
+ std::vector<uint8_t> data;
+ fprintf(stderr, "ReadTestData %s\n", full_path.c_str());
+ JXL_CHECK(jxl::ReadFile(full_path, &data));
+ printf("Test data %s is %d bytes long.\n", filename.c_str(),
+ static_cast<int>(data.size()));
+ return data;
+}
+
+void CustomQuantTable::Generate() {
+ basic_table.resize(DCTSIZE2);
+ quantval.resize(DCTSIZE2);
+ switch (table_type) {
+ case 0: {
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ basic_table[k] = k + 1;
+ }
+ break;
+ }
+ default:
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ basic_table[k] = table_type;
+ }
+ }
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ quantval[k] = (basic_table[k] * scale_factor + 50U) / 100U;
+ quantval[k] = std::max(quantval[k], 1U);
+ quantval[k] = std::min(quantval[k], 65535U);
+ if (!add_raw) {
+ quantval[k] = std::min(quantval[k], force_baseline ? 255U : 32767U);
+ }
+ }
+}
+
+bool PNMParser::ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth) {
+ if (pos_[0] != 'P' || (pos_[1] != '5' && pos_[1] != '6')) {
+ fprintf(stderr, "Invalid PNM header.");
+ return false;
+ }
+ *num_channels = (pos_[1] == '5' ? 1 : 3);
+ pos_ += 2;
+
+ size_t maxval;
+ if (!SkipWhitespace() || !ParseUnsigned(xsize) || !SkipWhitespace() ||
+ !ParseUnsigned(ysize) || !SkipWhitespace() || !ParseUnsigned(&maxval) ||
+ !SkipWhitespace()) {
+ return false;
+ }
+ if (maxval == 0 || maxval >= 65536) {
+ fprintf(stderr, "Invalid maxval value.\n");
+ return false;
+ }
+ bool found_bitdepth = false;
+ for (int bits = 1; bits <= 16; ++bits) {
+ if (maxval == (1u << bits) - 1) {
+ *bitdepth = bits;
+ found_bitdepth = true;
+ break;
+ }
+ }
+ if (!found_bitdepth) {
+ fprintf(stderr, "Invalid maxval value.\n");
+ return false;
+ }
+
+ *pos = pos_;
+ return true;
+}
+
+bool PNMParser::ParseUnsigned(size_t* number) {
+ if (pos_ == end_ || *pos_ < '0' || *pos_ > '9') {
+ fprintf(stderr, "Expected unsigned number.\n");
+ return false;
+ }
+ *number = 0;
+ while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') {
+ *number *= 10;
+ *number += *pos_ - '0';
+ ++pos_;
+ }
+
+ return true;
+}
+
+bool PNMParser::SkipWhitespace() {
+ if (pos_ == end_ || !IsWhitespace(*pos_)) {
+ fprintf(stderr, "Expected whitespace.\n");
+ return false;
+ }
+ while (pos_ < end_ && IsWhitespace(*pos_)) {
+ ++pos_;
+ }
+ return true;
+}
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth,
+ std::vector<uint8_t>* pixels) {
+ if (data.size() < 2) {
+ fprintf(stderr, "PNM file too small.\n");
+ return false;
+ }
+ PNMParser parser(data.data(), data.size());
+ const uint8_t* pos = nullptr;
+ if (!parser.ParseHeader(&pos, xsize, ysize, num_channels, bitdepth)) {
+ return false;
+ }
+ pixels->resize(data.data() + data.size() - pos);
+ memcpy(&(*pixels)[0], pos, pixels->size());
+ return true;
+}
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace) {
+ switch (colorspace) {
+ case JCS_UNKNOWN:
+ return "UNKNOWN";
+ case JCS_GRAYSCALE:
+ return "GRAYSCALE";
+ case JCS_RGB:
+ return "RGB";
+ case JCS_YCbCr:
+ return "YCbCr";
+ case JCS_CMYK:
+ return "CMYK";
+ case JCS_YCCK:
+ return "YCCK";
+ default:
+ return "";
+ }
+}
+
+std::string IOMethodName(JpegliDataType data_type,
+ JpegliEndianness endianness) {
+ std::string retval;
+ if (data_type == JPEGLI_TYPE_UINT8) {
+ return "";
+ } else if (data_type == JPEGLI_TYPE_UINT16) {
+ retval = "UINT16";
+ } else if (data_type == JPEGLI_TYPE_FLOAT) {
+ retval = "FLOAT";
+ }
+ if (endianness == JPEGLI_LITTLE_ENDIAN) {
+ retval += "LE";
+ } else if (endianness == JPEGLI_BIG_ENDIAN) {
+ retval += "BE";
+ }
+ return retval;
+}
+
+std::string SamplingId(const CompressParams& jparams) {
+ std::stringstream os;
+ JXL_CHECK(jparams.h_sampling.size() == jparams.v_sampling.size());
+ if (!jparams.h_sampling.empty()) {
+ size_t len = jparams.h_sampling.size();
+ while (len > 1 && jparams.h_sampling[len - 1] == 1 &&
+ jparams.v_sampling[len - 1] == 1) {
+ --len;
+ }
+ os << "SAMP";
+ for (size_t i = 0; i < len; ++i) {
+ if (i > 0) os << "_";
+ os << jparams.h_sampling[i] << "x" << jparams.v_sampling[i];
+ }
+ }
+ return os.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input) {
+ os << input.xsize << "x" << input.ysize;
+ os << IOMethodName(input.data_type, input.endianness);
+ if (input.color_space != JCS_RGB) {
+ os << "InputColor" << ColorSpaceName(input.color_space);
+ }
+ if (input.color_space == JCS_UNKNOWN) {
+ os << input.components;
+ }
+ return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams) {
+ os << "Q" << jparams.quality;
+ os << SamplingId(jparams);
+ if (jparams.set_jpeg_colorspace) {
+ os << "JpegColor" << ColorSpaceName(jparams.jpeg_color_space);
+ }
+ if (!jparams.comp_ids.empty()) {
+ os << "CID";
+ for (size_t i = 0; i < jparams.comp_ids.size(); ++i) {
+ os << jparams.comp_ids[i];
+ }
+ }
+ if (!jparams.quant_indexes.empty()) {
+ os << "QIDX";
+ for (size_t i = 0; i < jparams.quant_indexes.size(); ++i) {
+ os << jparams.quant_indexes[i];
+ }
+ for (const auto& table : jparams.quant_tables) {
+ os << "TABLE" << table.slot_idx << "T" << table.table_type << "F"
+ << table.scale_factor
+ << (table.add_raw ? "R"
+ : table.force_baseline ? "B"
+ : "");
+ }
+ }
+ if (jparams.progressive_mode >= 0) {
+ os << "P" << jparams.progressive_mode;
+ } else if (jparams.simple_progression) {
+ os << "Psimple";
+ }
+ if (jparams.optimize_coding == 1) {
+ JXL_CHECK(jparams.progressive_mode <= 0 && !jparams.simple_progression);
+ os << "OptimizedCode";
+ } else if (jparams.optimize_coding == 0) {
+ JXL_CHECK(jparams.progressive_mode <= 0 && !jparams.simple_progression);
+ os << "FixedCode";
+ if (jparams.use_flat_dc_luma_code) {
+ os << "FlatDCLuma";
+ } else if (jparams.omit_standard_tables) {
+ os << "OmitDHT";
+ }
+ }
+ if (!jparams.use_adaptive_quantization) {
+ os << "NoAQ";
+ }
+ if (jparams.restart_interval > 0) {
+ os << "R" << jparams.restart_interval;
+ }
+ if (jparams.restart_in_rows > 0) {
+ os << "RR" << jparams.restart_in_rows;
+ }
+ if (jparams.xyb_mode) {
+ os << "XYB";
+ } else if (jparams.libjpeg_mode) {
+ os << "Libjpeg";
+ }
+ if (jparams.override_JFIF >= 0) {
+ os << (jparams.override_JFIF ? "AddJFIF" : "NoJFIF");
+ }
+ if (jparams.override_Adobe >= 0) {
+ os << (jparams.override_Adobe ? "AddAdobe" : "NoAdobe");
+ }
+ if (jparams.add_marker) {
+ os << "AddMarker";
+ }
+ if (!jparams.icc.empty()) {
+ os << "ICCSize" << jparams.icc.size();
+ }
+ if (jparams.smoothing_factor != 0) {
+ os << "SF" << jparams.smoothing_factor;
+ }
+ return os;
+}
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels) {
+ if (colorspace == JCS_GRAYSCALE) {
+ *channels = 1;
+ } else if (colorspace == JCS_RGB || colorspace == JCS_YCbCr) {
+ *channels = 3;
+ } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+ *channels = 4;
+ } else if (colorspace == JCS_UNKNOWN) {
+ JXL_CHECK(*channels <= 4);
+ } else {
+ JXL_ABORT();
+ }
+}
+
+void RGBToYCbCr(float r, float g, float b, float* y, float* cb, float* cr) {
+ *y = 0.299f * r + 0.587f * g + 0.114f * b;
+ *cb = -0.168736f * r - 0.331264f * g + 0.5f * b + 0.5f;
+ *cr = 0.5f * r - 0.418688f * g - 0.081312f * b + 0.5f;
+}
+
+void ConvertPixel(const uint8_t* input_rgb, uint8_t* out,
+ J_COLOR_SPACE colorspace, size_t num_channels,
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8,
+ bool swap_endianness = JPEGLI_NATIVE_ENDIAN) {
+ const float kMul = 255.0f;
+ float r = input_rgb[0] / kMul;
+ float g = input_rgb[1] / kMul;
+ float b = input_rgb[2] / kMul;
+ uint8_t out8[MAX_COMPONENTS];
+ if (colorspace == JCS_GRAYSCALE) {
+ const float Y = 0.299f * r + 0.587f * g + 0.114f * b;
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ } else if (colorspace == JCS_RGB || colorspace == JCS_UNKNOWN) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ out8[c] = input_rgb[std::min<size_t>(2, c)];
+ }
+ } else if (colorspace == JCS_YCbCr) {
+ float Y, Cb, Cr;
+ RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+ out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+ } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) {
+ float K = 1.0f - std::max(r, std::max(g, b));
+ float scaleK = 1.0f / (1.0f - K);
+ r *= scaleK;
+ g *= scaleK;
+ b *= scaleK;
+ if (colorspace == JCS_CMYK) {
+ out8[0] = static_cast<uint8_t>(std::round((1.0f - r) * kMul));
+ out8[1] = static_cast<uint8_t>(std::round((1.0f - g) * kMul));
+ out8[2] = static_cast<uint8_t>(std::round((1.0f - b) * kMul));
+ } else if (colorspace == JCS_YCCK) {
+ float Y, Cb, Cr;
+ RGBToYCbCr(r, g, b, &Y, &Cb, &Cr);
+ out8[0] = static_cast<uint8_t>(std::round(Y * kMul));
+ out8[1] = static_cast<uint8_t>(std::round(Cb * kMul));
+ out8[2] = static_cast<uint8_t>(std::round(Cr * kMul));
+ }
+ out8[3] = static_cast<uint8_t>(std::round(K * kMul));
+ } else {
+ JXL_ABORT("Colorspace %d not supported", colorspace);
+ }
+ if (data_type == JPEGLI_TYPE_UINT8) {
+ memcpy(out, out8, num_channels);
+ } else if (data_type == JPEGLI_TYPE_UINT16) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ uint16_t val = (out8[c] << 8) + out8[c];
+ val |= 0x40; // Make little-endian and big-endian asymmetric
+ if (swap_endianness) {
+ val = JXL_BSWAP16(val);
+ }
+ memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+ }
+ } else if (data_type == JPEGLI_TYPE_FLOAT) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ float val = out8[c] / 255.0f;
+ if (swap_endianness) {
+ val = BSwapFloat(val);
+ }
+ memcpy(&out[sizeof(val) * c], &val, sizeof(val));
+ }
+ }
+}
+
+void ConvertToGrayscale(TestImage* img) {
+ if (img->color_space == JCS_GRAYSCALE) return;
+ JXL_CHECK(img->data_type == JPEGLI_TYPE_UINT8);
+ for (size_t i = 0; i < img->pixels.size(); i += 3) {
+ if (img->color_space == JCS_RGB) {
+ ConvertPixel(&img->pixels[i], &img->pixels[i / 3], JCS_GRAYSCALE, 1);
+ } else if (img->color_space == JCS_YCbCr) {
+ img->pixels[i / 3] = img->pixels[i];
+ }
+ }
+ img->pixels.resize(img->pixels.size() / 3);
+ img->color_space = JCS_GRAYSCALE;
+ img->components = 1;
+}
+
+void GeneratePixels(TestImage* img) {
+ const std::vector<uint8_t> imgdata = ReadTestData("jxl/flower/flower.pnm");
+ size_t xsize, ysize, channels, bitdepth;
+ std::vector<uint8_t> pixels;
+ JXL_CHECK(ReadPNM(imgdata, &xsize, &ysize, &channels, &bitdepth, &pixels));
+ if (img->xsize == 0) img->xsize = xsize;
+ if (img->ysize == 0) img->ysize = ysize;
+ JXL_CHECK(img->xsize <= xsize);
+ JXL_CHECK(img->ysize <= ysize);
+ JXL_CHECK(3 == channels);
+ JXL_CHECK(8 == bitdepth);
+ size_t in_bytes_per_pixel = channels;
+ size_t in_stride = xsize * in_bytes_per_pixel;
+ size_t x0 = (xsize - img->xsize) / 2;
+ size_t y0 = (ysize - img->ysize) / 2;
+ SetNumChannels(img->color_space, &img->components);
+ size_t out_bytes_per_pixel =
+ jpegli_bytes_per_sample(img->data_type) * img->components;
+ size_t out_stride = img->xsize * out_bytes_per_pixel;
+ bool swap_endianness =
+ (img->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) ||
+ (img->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian());
+ img->pixels.resize(img->ysize * out_stride);
+ for (size_t iy = 0; iy < img->ysize; ++iy) {
+ size_t y = y0 + iy;
+ for (size_t ix = 0; ix < img->xsize; ++ix) {
+ size_t x = x0 + ix;
+ size_t idx_in = y * in_stride + x * in_bytes_per_pixel;
+ size_t idx_out = iy * out_stride + ix * out_bytes_per_pixel;
+ ConvertPixel(&pixels[idx_in], &img->pixels[idx_out], img->color_space,
+ img->components, img->data_type, swap_endianness);
+ }
+ }
+}
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img) {
+ for (size_t c = 0; c < img->components; ++c) {
+ size_t xsize = jparams.comp_width(*img, c);
+ size_t ysize = jparams.comp_height(*img, c);
+ size_t factor_y = jparams.max_v_sample() / jparams.v_samp(c);
+ size_t factor_x = jparams.max_h_sample() / jparams.h_samp(c);
+ size_t factor = factor_x * factor_y;
+ std::vector<uint8_t> plane(ysize * xsize);
+ size_t bytes_per_pixel = img->components;
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ int result = 0;
+ for (size_t iy = 0; iy < factor_y; ++iy) {
+ size_t yy = std::min(y * factor_y + iy, img->ysize - 1);
+ for (size_t ix = 0; ix < factor_x; ++ix) {
+ size_t xx = std::min(x * factor_x + ix, img->xsize - 1);
+ size_t pixel_ix = (yy * img->xsize + xx) * bytes_per_pixel + c;
+ result += img->pixels[pixel_ix];
+ }
+ }
+ result = static_cast<uint8_t>((result + factor / 2) / factor);
+ plane[y * xsize + x] = result;
+ }
+ }
+ img->raw_data.emplace_back(std::move(plane));
+ }
+}
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img) {
+ for (size_t c = 0; c < img->components; ++c) {
+ int xsize_blocks = jparams.comp_width(*img, c) / DCTSIZE;
+ int ysize_blocks = jparams.comp_height(*img, c) / DCTSIZE;
+ std::vector<JCOEF> plane(ysize_blocks * xsize_blocks * DCTSIZE2);
+ for (int by = 0; by < ysize_blocks; ++by) {
+ for (int bx = 0; bx < xsize_blocks; ++bx) {
+ JCOEF* block = &plane[(by * xsize_blocks + bx) * DCTSIZE2];
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ block[k] = (bx - by) / (k + 1);
+ }
+ }
+ }
+ img->coeffs.emplace_back(std::move(plane));
+ }
+}
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ j_compress_ptr cinfo) {
+ cinfo->image_width = input.xsize;
+ cinfo->image_height = input.ysize;
+ cinfo->input_components = input.components;
+ if (jparams.xyb_mode) {
+ jpegli_set_xyb_mode(cinfo);
+ }
+ if (jparams.libjpeg_mode) {
+ jpegli_enable_adaptive_quantization(cinfo, FALSE);
+ jpegli_use_standard_quant_tables(cinfo);
+ jpegli_set_progressive_level(cinfo, 0);
+ }
+ jpegli_set_defaults(cinfo);
+ cinfo->in_color_space = input.color_space;
+ jpegli_default_colorspace(cinfo);
+ if (jparams.override_JFIF >= 0) {
+ cinfo->write_JFIF_header = jparams.override_JFIF;
+ }
+ if (jparams.override_Adobe >= 0) {
+ cinfo->write_Adobe_marker = jparams.override_Adobe;
+ }
+ if (jparams.set_jpeg_colorspace) {
+ jpegli_set_colorspace(cinfo, jparams.jpeg_color_space);
+ }
+ if (!jparams.comp_ids.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].component_id = jparams.comp_ids[c];
+ }
+ }
+ if (!jparams.h_sampling.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].h_samp_factor = jparams.h_sampling[c];
+ cinfo->comp_info[c].v_samp_factor = jparams.v_sampling[c];
+ }
+ }
+ jpegli_set_quality(cinfo, jparams.quality, TRUE);
+ if (!jparams.quant_indexes.empty()) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ cinfo->comp_info[c].quant_tbl_no = jparams.quant_indexes[c];
+ }
+ for (const auto& table : jparams.quant_tables) {
+ if (table.add_raw) {
+ cinfo->quant_tbl_ptrs[table.slot_idx] =
+ jpegli_alloc_quant_table((j_common_ptr)cinfo);
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ cinfo->quant_tbl_ptrs[table.slot_idx]->quantval[k] =
+ table.quantval[k];
+ }
+ cinfo->quant_tbl_ptrs[table.slot_idx]->sent_table = FALSE;
+ } else {
+ jpegli_add_quant_table(cinfo, table.slot_idx, &table.basic_table[0],
+ table.scale_factor, table.force_baseline);
+ }
+ }
+ }
+ if (jparams.simple_progression) {
+ jpegli_simple_progression(cinfo);
+ JXL_CHECK(jparams.progressive_mode == -1);
+ }
+ if (jparams.progressive_mode > 2) {
+ const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+ cinfo->scan_info = script.scans;
+ cinfo->num_scans = script.num_scans;
+ } else if (jparams.progressive_mode >= 0) {
+ jpegli_set_progressive_level(cinfo, jparams.progressive_mode);
+ }
+ jpegli_set_input_format(cinfo, input.data_type, input.endianness);
+ jpegli_enable_adaptive_quantization(cinfo, jparams.use_adaptive_quantization);
+ cinfo->restart_interval = jparams.restart_interval;
+ cinfo->restart_in_rows = jparams.restart_in_rows;
+ cinfo->smoothing_factor = jparams.smoothing_factor;
+ if (jparams.optimize_coding == 1) {
+ cinfo->optimize_coding = TRUE;
+ } else if (jparams.optimize_coding == 0) {
+ cinfo->optimize_coding = FALSE;
+ }
+ cinfo->raw_data_in = !input.raw_data.empty();
+ if (jparams.optimize_coding == 0 && jparams.use_flat_dc_luma_code) {
+ JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+ memset(tbl, 0, sizeof(*tbl));
+ tbl->bits[4] = 15;
+ for (int i = 0; i < 15; ++i) tbl->huffval[i] = i;
+ }
+ if (input.coeffs.empty()) {
+ bool write_all_tables = TRUE;
+ if (jparams.optimize_coding == 0 && !jparams.use_flat_dc_luma_code &&
+ jparams.omit_standard_tables) {
+ write_all_tables = FALSE;
+ cinfo->dc_huff_tbl_ptrs[0]->sent_table = TRUE;
+ cinfo->dc_huff_tbl_ptrs[1]->sent_table = TRUE;
+ cinfo->ac_huff_tbl_ptrs[0]->sent_table = TRUE;
+ cinfo->ac_huff_tbl_ptrs[1]->sent_table = TRUE;
+ }
+ jpegli_start_compress(cinfo, write_all_tables);
+ if (jparams.add_marker) {
+ jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+ sizeof(kMarkerData));
+ jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+ for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+ jpegli_write_m_byte(cinfo, kMarkerData[p]);
+ }
+ for (size_t i = 0; i < kMarkerSequenceLen; ++i) {
+ jpegli_write_marker(cinfo, kMarkerSequence[i], kMarkerData,
+ ((i + 2) % sizeof(kMarkerData)));
+ }
+ }
+ if (!jparams.icc.empty()) {
+ jpegli_write_icc_profile(cinfo, jparams.icc.data(), jparams.icc.size());
+ }
+ }
+ if (cinfo->raw_data_in) {
+ // Need to copy because jpeg API requires non-const pointers.
+ std::vector<std::vector<uint8_t>> raw_data = input.raw_data;
+ size_t max_lines = jparams.max_v_sample() * DCTSIZE;
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ rowdata[c].resize(jparams.v_samp(c) * DCTSIZE);
+ data[c] = &rowdata[c][0];
+ }
+ while (cinfo->next_scanline < cinfo->image_height) {
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t cwidth = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t cheight = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = jparams.v_samp(c) * DCTSIZE;
+ size_t y0 = (cinfo->next_scanline / max_lines) * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr);
+ }
+ }
+ size_t num_lines = jpegli_write_raw_data(cinfo, &data[0], max_lines);
+ JXL_CHECK(num_lines == max_lines);
+ }
+ } else if (!input.coeffs.empty()) {
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ jvirt_barray_ptr* coef_arrays = reinterpret_cast<jvirt_barray_ptr*>((
+ *cinfo->mem->alloc_small)(
+ comptr, JPOOL_IMAGE, cinfo->num_components * sizeof(jvirt_barray_ptr)));
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize_blocks = jparams.comp_width(input, c) / DCTSIZE;
+ size_t ysize_blocks = jparams.comp_height(input, c) / DCTSIZE;
+ coef_arrays[c] = (*cinfo->mem->request_virt_barray)(
+ comptr, JPOOL_IMAGE, FALSE, xsize_blocks, ysize_blocks,
+ cinfo->comp_info[c].v_samp_factor);
+ }
+ jpegli_write_coefficients(cinfo, coef_arrays);
+ if (jparams.add_marker) {
+ jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData,
+ sizeof(kMarkerData));
+ jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData));
+ for (size_t p = 0; p < sizeof(kMarkerData); ++p) {
+ jpegli_write_m_byte(cinfo, kMarkerData[p]);
+ }
+ }
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(
+ comptr, coef_arrays[c], by, 1, true);
+ size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+ size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+ memcpy(ba[0], &input.coeffs[c][offset], stride);
+ }
+ }
+ } else {
+ size_t stride = cinfo->image_width * cinfo->input_components *
+ jpegli_bytes_per_sample(input.data_type);
+ std::vector<uint8_t> row_bytes(stride);
+ for (size_t y = 0; y < cinfo->image_height; ++y) {
+ memcpy(&row_bytes[0], &input.pixels[y * stride], stride);
+ JSAMPROW row[] = {row_bytes.data()};
+ jpegli_write_scanlines(cinfo, row, 1);
+ }
+ }
+ jpegli_finish_compress(cinfo);
+}
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ std::vector<uint8_t>* compressed) {
+ uint8_t* buffer = nullptr;
+ unsigned long buffer_size = 0;
+ jpeg_compress_struct cinfo;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &buffer, &buffer_size);
+ EncodeWithJpegli(input, jparams, &cinfo);
+ return true;
+ };
+ bool success = try_catch_block();
+ jpegli_destroy_compress(&cinfo);
+ if (success) {
+ compressed->resize(buffer_size);
+ std::copy_n(buffer, buffer_size, compressed->data());
+ }
+ if (buffer) std::free(buffer);
+ return success;
+}
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, int scan_number,
+ bool is_jpegli) {
+ const ScanDecompressParams* sparams = nullptr;
+ for (const auto& sp : dparams.scan_params) {
+ if (scan_number <= sp.max_scan_number) {
+ sparams = &sp;
+ break;
+ }
+ }
+ if (sparams == nullptr) {
+ return;
+ }
+ if (dparams.quantize_colors) {
+ cinfo->dither_mode = sparams->dither_mode;
+ if (sparams->color_quant_mode == CQUANT_1PASS) {
+ cinfo->two_pass_quantize = FALSE;
+ cinfo->colormap = nullptr;
+ } else if (sparams->color_quant_mode == CQUANT_2PASS) {
+ JXL_CHECK(cinfo->out_color_space = JCS_RGB);
+ cinfo->two_pass_quantize = TRUE;
+ cinfo->colormap = nullptr;
+ } else if (sparams->color_quant_mode == CQUANT_EXTERNAL) {
+ JXL_CHECK(cinfo->out_color_space = JCS_RGB);
+ cinfo->two_pass_quantize = FALSE;
+ bool have_colormap = cinfo->colormap != nullptr;
+ cinfo->actual_number_of_colors = kTestColorMapNumColors;
+ cinfo->colormap = (*cinfo->mem->alloc_sarray)(
+ reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE,
+ cinfo->actual_number_of_colors, 3);
+ jxl::msan::UnpoisonMemory(cinfo->colormap, 3 * sizeof(JSAMPROW));
+ for (int i = 0; i < kTestColorMapNumColors; ++i) {
+ cinfo->colormap[0][i] = (kTestColorMap[i] >> 16) & 0xff;
+ cinfo->colormap[1][i] = (kTestColorMap[i] >> 8) & 0xff;
+ cinfo->colormap[2][i] = (kTestColorMap[i] >> 0) & 0xff;
+ }
+ if (have_colormap) {
+ if (is_jpegli) {
+ jpegli_new_colormap(cinfo);
+ } else {
+ jpeg_new_colormap(cinfo);
+ }
+ }
+ } else if (sparams->color_quant_mode == CQUANT_REUSE) {
+ JXL_CHECK(cinfo->out_color_space = JCS_RGB);
+ JXL_CHECK(cinfo->colormap);
+ }
+ }
+}
+
+void SetDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, bool is_jpegli) {
+ cinfo->do_block_smoothing = dparams.do_block_smoothing;
+ cinfo->do_fancy_upsampling = dparams.do_fancy_upsampling;
+ if (dparams.output_mode == RAW_DATA) {
+ cinfo->raw_data_out = TRUE;
+ }
+ if (dparams.set_out_color_space) {
+ cinfo->out_color_space = dparams.out_color_space;
+ if (dparams.out_color_space == JCS_UNKNOWN) {
+ cinfo->jpeg_color_space = JCS_UNKNOWN;
+ }
+ }
+ cinfo->scale_num = dparams.scale_num;
+ cinfo->scale_denom = dparams.scale_denom;
+ cinfo->quantize_colors = dparams.quantize_colors;
+ cinfo->desired_number_of_colors = dparams.desired_number_of_colors;
+ if (!dparams.scan_params.empty()) {
+ if (cinfo->buffered_image) {
+ for (const auto& sparams : dparams.scan_params) {
+ if (sparams.color_quant_mode == CQUANT_1PASS) {
+ cinfo->enable_1pass_quant = TRUE;
+ } else if (sparams.color_quant_mode == CQUANT_2PASS) {
+ cinfo->enable_2pass_quant = TRUE;
+ } else if (sparams.color_quant_mode == CQUANT_EXTERNAL) {
+ cinfo->enable_external_quant = TRUE;
+ }
+ }
+ SetScanDecompressParams(dparams, cinfo, 1, is_jpegli);
+ } else {
+ SetScanDecompressParams(dparams, cinfo, kLastScan, is_jpegli);
+ }
+ }
+ if (is_jpegli) {
+ jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness);
+ }
+}
+
+void CheckMarkerPresent(j_decompress_ptr cinfo, uint8_t marker_type) {
+ bool marker_found = false;
+ for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr;
+ marker = marker->next) {
+ jxl::msan::UnpoisonMemory(marker, sizeof(*marker));
+ jxl::msan::UnpoisonMemory(marker->data, marker->data_length);
+ if (marker->marker == marker_type &&
+ marker->data_length == sizeof(kMarkerData) &&
+ memcmp(marker->data, kMarkerData, sizeof(kMarkerData)) == 0) {
+ marker_found = true;
+ }
+ }
+ JXL_CHECK(marker_found);
+}
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+ if (jparams.set_jpeg_colorspace) {
+ JXL_CHECK(cinfo->jpeg_color_space == jparams.jpeg_color_space);
+ }
+ if (jparams.override_JFIF >= 0) {
+ JXL_CHECK(cinfo->saw_JFIF_marker == jparams.override_JFIF);
+ }
+ if (jparams.override_Adobe >= 0) {
+ JXL_CHECK(cinfo->saw_Adobe_marker == jparams.override_Adobe);
+ }
+ if (jparams.add_marker) {
+ CheckMarkerPresent(cinfo, kSpecialMarker0);
+ CheckMarkerPresent(cinfo, kSpecialMarker1);
+ }
+ jxl::msan::UnpoisonMemory(
+ cinfo->comp_info, cinfo->num_components * sizeof(cinfo->comp_info[0]));
+ int max_h_samp_factor = 1;
+ int max_v_samp_factor = 1;
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ if (!jparams.comp_ids.empty()) {
+ JXL_CHECK(comp->component_id == jparams.comp_ids[i]);
+ }
+ if (!jparams.h_sampling.empty()) {
+ JXL_CHECK(comp->h_samp_factor == jparams.h_sampling[i]);
+ }
+ if (!jparams.v_sampling.empty()) {
+ JXL_CHECK(comp->v_samp_factor == jparams.v_sampling[i]);
+ }
+ if (!jparams.quant_indexes.empty()) {
+ JXL_CHECK(comp->quant_tbl_no == jparams.quant_indexes[i]);
+ }
+ max_h_samp_factor = std::max(max_h_samp_factor, comp->h_samp_factor);
+ max_v_samp_factor = std::max(max_v_samp_factor, comp->v_samp_factor);
+ }
+ JXL_CHECK(max_h_samp_factor == cinfo->max_h_samp_factor);
+ JXL_CHECK(max_v_samp_factor == cinfo->max_v_samp_factor);
+ int referenced_tables[NUM_QUANT_TBLS] = {};
+ for (int i = 0; i < cinfo->num_components; ++i) {
+ jpeg_component_info* comp = &cinfo->comp_info[i];
+ JXL_CHECK(comp->width_in_blocks ==
+ DivCeil(cinfo->image_width * comp->h_samp_factor,
+ max_h_samp_factor * DCTSIZE));
+ JXL_CHECK(comp->height_in_blocks ==
+ DivCeil(cinfo->image_height * comp->v_samp_factor,
+ max_v_samp_factor * DCTSIZE));
+ referenced_tables[comp->quant_tbl_no] = 1;
+ }
+ for (const auto& table : jparams.quant_tables) {
+ JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[table.slot_idx];
+ if (!referenced_tables[table.slot_idx]) {
+ JXL_CHECK(quant_table == nullptr);
+ continue;
+ }
+ JXL_CHECK(quant_table != nullptr);
+ jxl::msan::UnpoisonMemory(quant_table, sizeof(*quant_table));
+ for (int k = 0; k < DCTSIZE2; ++k) {
+ JXL_CHECK(quant_table->quantval[k] == table.quantval[k]);
+ }
+ }
+}
+
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo) {
+ JXL_CHECK(cinfo->input_scan_number > 0);
+ if (cinfo->progressive_mode) {
+ JXL_CHECK(cinfo->Ss != 0 || cinfo->Se != 63);
+ } else {
+ JXL_CHECK(cinfo->Ss == 0 && cinfo->Se == 63);
+ }
+ if (jparams.progressive_mode > 2) {
+ JXL_CHECK(jparams.progressive_mode < 3 + kNumTestScripts);
+ const ScanScript& script = kTestScript[jparams.progressive_mode - 3];
+ JXL_CHECK(cinfo->input_scan_number <= script.num_scans);
+ const jpeg_scan_info& scan = script.scans[cinfo->input_scan_number - 1];
+ JXL_CHECK(cinfo->comps_in_scan == scan.comps_in_scan);
+ for (int i = 0; i < cinfo->comps_in_scan; ++i) {
+ JXL_CHECK(cinfo->cur_comp_info[i]->component_index ==
+ scan.component_index[i]);
+ }
+ JXL_CHECK(cinfo->Ss == scan.Ss);
+ JXL_CHECK(cinfo->Se == scan.Se);
+ JXL_CHECK(cinfo->Ah == scan.Ah);
+ JXL_CHECK(cinfo->Al == scan.Al);
+ }
+ if (jparams.restart_interval > 0) {
+ JXL_CHECK(cinfo->restart_interval == jparams.restart_interval);
+ } else if (jparams.restart_in_rows > 0) {
+ JXL_CHECK(cinfo->restart_interval ==
+ jparams.restart_in_rows * cinfo->MCUs_per_row);
+ }
+ if (jparams.progressive_mode == 0 && jparams.optimize_coding == 0) {
+ if (cinfo->jpeg_color_space == JCS_RGB) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+ } else if (cinfo->jpeg_color_space == JCS_YCbCr) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+ } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+ } else if (cinfo->jpeg_color_space == JCS_YCCK) {
+ JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0);
+ JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1);
+ JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0);
+ }
+ if (jparams.use_flat_dc_luma_code) {
+ JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0];
+ jxl::msan::UnpoisonMemory(tbl, sizeof(*tbl));
+ for (int i = 0; i < 15; ++i) {
+ JXL_CHECK(tbl->huffval[i] == i);
+ }
+ }
+ }
+}
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors) {
+ JXL_CHECK(colormap != nullptr);
+ std::vector<uint8_t> tmp(xsize * components);
+ for (size_t x = 0; x < xsize; ++x) {
+ JXL_CHECK(row[x] < num_colors);
+ for (int c = 0; c < components; ++c) {
+ tmp[x * components + c] = colormap[c][row[x]];
+ }
+ }
+ memcpy(row, tmp.data(), tmp.size());
+}
+
+void ReadOutputPass(j_decompress_ptr cinfo, const DecompressParams& dparams,
+ TestImage* output) {
+ JDIMENSION xoffset = 0;
+ JDIMENSION yoffset = 0;
+ JDIMENSION xsize_cropped = cinfo->output_width;
+ JDIMENSION ysize_cropped = cinfo->output_height;
+ if (dparams.crop_output) {
+ xoffset = xsize_cropped = cinfo->output_width / 3;
+ yoffset = ysize_cropped = cinfo->output_height / 3;
+ jpeg_crop_scanline(cinfo, &xoffset, &xsize_cropped);
+ JXL_CHECK(xsize_cropped == cinfo->output_width);
+ }
+ output->xsize = xsize_cropped;
+ output->ysize = ysize_cropped;
+ output->components = cinfo->out_color_components;
+ if (cinfo->quantize_colors) {
+ jxl::msan::UnpoisonMemory(cinfo->colormap, cinfo->out_color_components *
+ sizeof(cinfo->colormap[0]));
+ for (int c = 0; c < cinfo->out_color_components; ++c) {
+ jxl::msan::UnpoisonMemory(
+ cinfo->colormap[c],
+ cinfo->actual_number_of_colors * sizeof(cinfo->colormap[c][0]));
+ }
+ }
+ if (!cinfo->raw_data_out) {
+ size_t stride = output->xsize * output->components;
+ output->pixels.resize(output->ysize * stride);
+ output->color_space = cinfo->out_color_space;
+ if (yoffset > 0) {
+ jpeg_skip_scanlines(cinfo, yoffset);
+ }
+ for (size_t y = 0; y < output->ysize; ++y) {
+ JSAMPROW rows[] = {
+ reinterpret_cast<JSAMPLE*>(&output->pixels[y * stride])};
+ JXL_CHECK(1 == jpeg_read_scanlines(cinfo, rows, 1));
+ jxl::msan::UnpoisonMemory(
+ rows[0], sizeof(JSAMPLE) * cinfo->output_components * output->xsize);
+ if (cinfo->quantize_colors) {
+ UnmapColors(rows[0], cinfo->output_width, cinfo->out_color_components,
+ cinfo->colormap, cinfo->actual_number_of_colors);
+ }
+ }
+ if (cinfo->output_scanline < cinfo->output_height) {
+ jpeg_skip_scanlines(cinfo, cinfo->output_height - cinfo->output_scanline);
+ }
+ } else {
+ output->color_space = cinfo->jpeg_color_space;
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ std::vector<uint8_t> plane(ysize * xsize);
+ output->raw_data.emplace_back(std::move(plane));
+ }
+ while (cinfo->output_scanline < cinfo->output_height) {
+ size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE;
+ JXL_CHECK(cinfo->output_scanline == cinfo->output_iMCU_row * iMCU_height);
+ std::vector<std::vector<JSAMPROW>> rowdata(cinfo->num_components);
+ std::vector<JSAMPARRAY> data(cinfo->num_components);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE;
+ size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE;
+ size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE;
+ rowdata[c].resize(num_lines);
+ size_t y0 = cinfo->output_iMCU_row * num_lines;
+ for (size_t i = 0; i < num_lines; ++i) {
+ rowdata[c][i] =
+ y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr;
+ }
+ data[c] = &rowdata[c][0];
+ }
+ JXL_CHECK(iMCU_height ==
+ jpeg_read_raw_data(cinfo, &data[0], iMCU_height));
+ }
+ }
+ JXL_CHECK(cinfo->total_iMCU_rows ==
+ DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE));
+}
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+ TestImage* output) {
+ output->xsize = cinfo->image_width;
+ output->ysize = cinfo->image_height;
+ output->components = cinfo->num_components;
+ output->color_space = cinfo->out_color_space;
+ j_common_ptr comptr = reinterpret_cast<j_common_ptr>(cinfo);
+ for (int c = 0; c < cinfo->num_components; ++c) {
+ jpeg_component_info* comp = &cinfo->comp_info[c];
+ std::vector<JCOEF> coeffs(comp->width_in_blocks * comp->height_in_blocks *
+ DCTSIZE2);
+ for (size_t by = 0; by < comp->height_in_blocks; ++by) {
+ JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(comptr, coef_arrays[c],
+ by, 1, true);
+ size_t stride = comp->width_in_blocks * sizeof(JBLOCK);
+ size_t offset = by * comp->width_in_blocks * DCTSIZE2;
+ memcpy(&coeffs[offset], ba[0], stride);
+ }
+ output->coeffs.emplace_back(std::move(coeffs));
+ }
+}
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ std::vector<TestImage>* output_progression) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() {
+ ERROR_HANDLER_SETUP(jpeg);
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, compressed.data(), compressed.size());
+ if (jparams.add_marker) {
+ jpeg_save_markers(&cinfo, kSpecialMarker0, 0xffff);
+ jpeg_save_markers(&cinfo, kSpecialMarker1, 0xffff);
+ }
+ JXL_CHECK(JPEG_REACHED_SOS ==
+ jpeg_read_header(&cinfo, /*require_image=*/TRUE));
+ cinfo.buffered_image = TRUE;
+ SetDecompressParams(dparams, &cinfo, /*is_jpegli=*/false);
+ VerifyHeader(jparams, &cinfo);
+ JXL_CHECK(jpeg_start_decompress(&cinfo));
+ // start decompress should not read the whole input in buffered image mode
+ JXL_CHECK(!jpeg_input_complete(&cinfo));
+ JXL_CHECK(cinfo.output_scan_number == 0);
+ int sos_marker_cnt = 1; // read header reads the first SOS marker
+ while (!jpeg_input_complete(&cinfo)) {
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ if (dparams.skip_scans && (cinfo.input_scan_number % 2) != 1) {
+ int result = JPEG_SUSPENDED;
+ while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) {
+ result = jpeg_consume_input(&cinfo);
+ }
+ if (result == JPEG_REACHED_SOS) ++sos_marker_cnt;
+ continue;
+ }
+ SetScanDecompressParams(dparams, &cinfo, cinfo.input_scan_number,
+ /*is_jpegli=*/false);
+ JXL_CHECK(jpeg_start_output(&cinfo, cinfo.input_scan_number));
+ // start output sets output_scan_number, but does not change
+ // input_scan_number
+ JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ VerifyScanHeader(jparams, &cinfo);
+ TestImage output;
+ ReadOutputPass(&cinfo, dparams, &output);
+ output_progression->emplace_back(std::move(output));
+ // read scanlines/read raw data does not change input/output scan number
+ if (!cinfo.progressive_mode) {
+ JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt);
+ JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number);
+ }
+ JXL_CHECK(jpeg_finish_output(&cinfo));
+ ++sos_marker_cnt; // finish output reads the next SOS marker or EOI
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(&cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(&cinfo, coef_arrays, &output_progression->back());
+ }
+ }
+ JXL_CHECK(jpeg_finish_decompress(&cinfo));
+ return true;
+ };
+ JXL_CHECK(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams, j_decompress_ptr cinfo,
+ TestImage* output) {
+ if (jparams.add_marker) {
+ jpeg_save_markers(cinfo, kSpecialMarker0, 0xffff);
+ jpeg_save_markers(cinfo, kSpecialMarker1, 0xffff);
+ }
+ if (!jparams.icc.empty()) {
+ jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xffff);
+ }
+ JXL_CHECK(JPEG_REACHED_SOS ==
+ jpeg_read_header(cinfo, /*require_image=*/TRUE));
+ if (!jparams.icc.empty()) {
+ uint8_t* icc_data = nullptr;
+ unsigned int icc_len;
+ JXL_CHECK(jpeg_read_icc_profile(cinfo, &icc_data, &icc_len));
+ JXL_CHECK(icc_data);
+ jxl::msan::UnpoisonMemory(icc_data, icc_len);
+ JXL_CHECK(0 == memcmp(jparams.icc.data(), icc_data, icc_len));
+ free(icc_data);
+ }
+ SetDecompressParams(dparams, cinfo, /*is_jpegli=*/false);
+ VerifyHeader(jparams, cinfo);
+ if (dparams.output_mode == COEFFICIENTS) {
+ jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(cinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ CopyCoefficients(cinfo, coef_arrays, output);
+ } else {
+ JXL_CHECK(jpeg_start_decompress(cinfo));
+ VerifyScanHeader(jparams, cinfo);
+ ReadOutputPass(cinfo, dparams, output);
+ }
+ JXL_CHECK(jpeg_finish_decompress(cinfo));
+}
+
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ TestImage* output) {
+ jpeg_decompress_struct cinfo = {};
+ const auto try_catch_block = [&]() {
+ ERROR_HANDLER_SETUP(jpeg);
+ jpeg_create_decompress(&cinfo);
+ jpeg_mem_src(&cinfo, compressed.data(), compressed.size());
+ DecodeWithLibjpeg(jparams, dparams, &cinfo, output);
+ return true;
+ };
+ JXL_CHECK(try_catch_block());
+ jpeg_destroy_decompress(&cinfo);
+}
+
+void DumpImage(const TestImage& image, const std::string fn) {
+ JXL_CHECK(image.components == 1 || image.components == 3);
+ jxl::FileWrapper f(fn.c_str(), "wb");
+ size_t bytes_per_sample = jpegli_bytes_per_sample(image.data_type);
+ uint32_t maxval = (1u << (8 * bytes_per_sample)) - 1;
+ char type = image.components == 1 ? '5' : '6';
+ fprintf(f, "P%c\n%" PRIuS " %" PRIuS "\n%u\n", type, image.xsize, image.ysize,
+ maxval);
+ fwrite(image.pixels.data(), 1, image.pixels.size(), f);
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double* max_diff) {
+ size_t stride = input.xsize * input.components;
+ size_t start_offset = start_line * stride;
+ auto get_sample = [&](const TestImage& im, const std::vector<uint8_t>& data,
+ size_t idx) -> double {
+ size_t bytes_per_sample = jpegli_bytes_per_sample(im.data_type);
+ bool is_little_endian =
+ (im.endianness == JPEGLI_LITTLE_ENDIAN ||
+ (im.endianness == JPEGLI_NATIVE_ENDIAN && IsLittleEndian()));
+ size_t offset = start_offset + idx * bytes_per_sample;
+ JXL_CHECK(offset < data.size());
+ const uint8_t* p = &data[offset];
+ if (im.data_type == JPEGLI_TYPE_UINT8) {
+ static const double mul8 = 1.0 / 255.0;
+ return p[0] * mul8;
+ } else if (im.data_type == JPEGLI_TYPE_UINT16) {
+ static const double mul16 = 1.0 / 65535.0;
+ return (is_little_endian ? LoadLE16(p) : LoadBE16(p)) * mul16;
+ } else if (im.data_type == JPEGLI_TYPE_FLOAT) {
+ return (is_little_endian ? LoadLEFloat(p) : LoadBEFloat(p));
+ }
+ return 0.0;
+ };
+ double diff2 = 0.0;
+ size_t num_samples = 0;
+ if (max_diff) *max_diff = 0.0;
+ if (!input.pixels.empty() && !output.pixels.empty()) {
+ num_samples = num_lines * stride;
+ for (size_t i = 0; i < num_samples; ++i) {
+ double sample_orig = get_sample(input, input.pixels, i);
+ double sample_output = get_sample(output, output.pixels, i);
+ double diff = sample_orig - sample_output;
+ if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+ diff2 += diff * diff;
+ }
+ } else {
+ JXL_CHECK(!input.raw_data.empty());
+ JXL_CHECK(!output.raw_data.empty());
+ for (size_t c = 0; c < input.raw_data.size(); ++c) {
+ JXL_CHECK(c < output.raw_data.size());
+ num_samples += input.raw_data[c].size();
+ for (size_t i = 0; i < input.raw_data[c].size(); ++i) {
+ double sample_orig = get_sample(input, input.raw_data[c], i);
+ double sample_output = get_sample(output, output.raw_data[c], i);
+ double diff = sample_orig - sample_output;
+ if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff));
+ diff2 += diff * diff;
+ }
+ }
+ }
+ return std::sqrt(diff2 / num_samples) * 255.0;
+}
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ double* max_diff) {
+ return DistanceRms(input, output, 0, output.ysize, max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double max_rms,
+ double max_diff) {
+ double max_d;
+ double rms = DistanceRms(input, output, start_line, num_lines, &max_d);
+ printf("rms: %f, max_rms: %f, max_d: %f, max_diff: %f\n", rms, max_rms,
+ max_d, max_diff);
+ JXL_CHECK(rms <= max_rms);
+ JXL_CHECK(max_d <= max_diff);
+}
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ double max_rms, double max_diff) {
+ JXL_CHECK(output.xsize == input.xsize);
+ JXL_CHECK(output.ysize == input.ysize);
+ JXL_CHECK(output.components == input.components);
+ JXL_CHECK(output.color_space == input.color_space);
+ if (!input.coeffs.empty()) {
+ JXL_CHECK(input.coeffs.size() == input.components);
+ JXL_CHECK(output.coeffs.size() == input.components);
+ for (size_t c = 0; c < input.components; ++c) {
+ JXL_CHECK(output.coeffs[c].size() == input.coeffs[c].size());
+ JXL_CHECK(0 == memcmp(input.coeffs[c].data(), output.coeffs[c].data(),
+ input.coeffs[c].size()));
+ }
+ } else {
+ VerifyOutputImage(input, output, 0, output.ysize, max_rms, max_diff);
+ }
+}
+
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/test_utils.h b/third_party/jpeg-xl/lib/jpegli/test_utils.h
new file mode 100644
index 0000000000..f300b5de9e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/test_utils.h
@@ -0,0 +1,318 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TEST_UTILS_H_
+#define LIB_JPEGLI_TEST_UTILS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+/* clang-format off */
+#include <stdio.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+/* clang-format on */
+
+#include "lib/jpegli/common.h"
+
+namespace jpegli {
+
+// We define this here as well to make sure that the *_api_test.cc tests only
+// use the public API and therefore we don't include any *_internal.h headers.
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+ return (a + b - 1) / b;
+}
+
+#define ERROR_HANDLER_SETUP(flavor) \
+ jpeg_error_mgr jerr; \
+ jmp_buf env; \
+ cinfo.err = flavor##_std_error(&jerr); \
+ if (setjmp(env)) { \
+ return false; \
+ } \
+ cinfo.client_data = reinterpret_cast<void*>(&env); \
+ cinfo.err->error_exit = [](j_common_ptr cinfo) { \
+ (*cinfo->err->output_message)(cinfo); \
+ jmp_buf* env = reinterpret_cast<jmp_buf*>(cinfo->client_data); \
+ flavor##_destroy(cinfo); \
+ longjmp(*env, 1); \
+ };
+
+#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
+
+static constexpr int kSpecialMarker0 = 0xe5;
+static constexpr int kSpecialMarker1 = 0xe9;
+static constexpr uint8_t kMarkerData[] = {0, 1, 255, 0, 17};
+static constexpr uint8_t kMarkerSequence[] = {0xe6, 0xe8, 0xe7,
+ 0xe6, 0xe7, 0xe8};
+static constexpr size_t kMarkerSequenceLen = ARRAY_SIZE(kMarkerSequence);
+
+static constexpr jpeg_scan_info kScript1[] = {
+ {1, {0}, 0, 63, 0, 0},
+ {1, {1}, 0, 63, 0, 0},
+ {1, {2}, 0, 63, 0, 0},
+};
+
+static constexpr jpeg_scan_info kScript2[] = {
+ {3, {0, 1, 2}, 0, 0, 0, 0},
+ {1, {0}, 1, 63, 0, 0},
+ {1, {1}, 1, 63, 0, 0},
+ {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript3[] = {
+ {1, {0}, 0, 0, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {2}, 0, 0, 0, 0},
+ {1, {0}, 1, 63, 0, 0}, {1, {1}, 1, 63, 0, 0}, {1, {2}, 1, 63, 0, 0},
+};
+static constexpr jpeg_scan_info kScript4[] = {
+ {3, {0, 1, 2}, 0, 0, 0, 0}, {1, {0}, 1, 63, 0, 1}, {1, {1}, 1, 63, 0, 1},
+ {1, {2}, 1, 63, 0, 1}, {1, {0}, 1, 63, 1, 0}, {1, {1}, 1, 63, 1, 0},
+ {1, {2}, 1, 63, 1, 0},
+};
+
+struct ScanScript {
+ int num_scans;
+ const jpeg_scan_info* scans;
+};
+
+static constexpr ScanScript kTestScript[] = {
+ {ARRAY_SIZE(kScript1), kScript1},
+ {ARRAY_SIZE(kScript2), kScript2},
+ {ARRAY_SIZE(kScript3), kScript3},
+ {ARRAY_SIZE(kScript4), kScript4},
+};
+static constexpr int kNumTestScripts = ARRAY_SIZE(kTestScript);
+
+static constexpr int kLastScan = 0xffff;
+
+static uint32_t kTestColorMap[] = {
+ 0x000000, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0x00ffff,
+ 0xff00ff, 0xffffff, 0x6251fc, 0x45d9c7, 0xa7f059, 0xd9a945,
+ 0xfa4e44, 0xceaffc, 0xbad7db, 0xc1f0b1, 0xdbca9a, 0xfacac5,
+ 0xf201ff, 0x0063db, 0x00f01c, 0xdbb204, 0xf12f0c, 0x7ba1dc};
+static constexpr int kTestColorMapNumColors = ARRAY_SIZE(kTestColorMap);
+
+std::string IOMethodName(JpegliDataType data_type, JpegliEndianness endianness);
+
+std::string ColorSpaceName(J_COLOR_SPACE colorspace);
+
+enum JpegIOMode {
+ PIXELS,
+ RAW_DATA,
+ COEFFICIENTS,
+};
+
+struct CustomQuantTable {
+ int slot_idx = 0;
+ uint16_t table_type = 0;
+ int scale_factor = 100;
+ bool add_raw = false;
+ bool force_baseline = true;
+ std::vector<unsigned int> basic_table;
+ std::vector<unsigned int> quantval;
+ void Generate();
+};
+
+struct TestImage {
+ size_t xsize = 2268;
+ size_t ysize = 1512;
+ J_COLOR_SPACE color_space = JCS_RGB;
+ size_t components = 3;
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+ JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+ std::vector<uint8_t> pixels;
+ std::vector<std::vector<uint8_t>> raw_data;
+ std::vector<std::vector<JCOEF>> coeffs;
+ void AllocatePixels() {
+ pixels.resize(ysize * xsize * components *
+ jpegli_bytes_per_sample(data_type));
+ }
+ void Clear() {
+ pixels.clear();
+ raw_data.clear();
+ coeffs.clear();
+ }
+};
+
+std::ostream& operator<<(std::ostream& os, const TestImage& input);
+
+struct CompressParams {
+ int quality = 90;
+ bool set_jpeg_colorspace = false;
+ J_COLOR_SPACE jpeg_color_space = JCS_UNKNOWN;
+ std::vector<int> quant_indexes;
+ std::vector<CustomQuantTable> quant_tables;
+ std::vector<int> h_sampling;
+ std::vector<int> v_sampling;
+ std::vector<int> comp_ids;
+ int override_JFIF = -1;
+ int override_Adobe = -1;
+ bool add_marker = false;
+ bool simple_progression = false;
+ // -1 is library default
+ // 0, 1, 2 is set through jpegli_set_progressive_level()
+ // 2 + N is kScriptN
+ int progressive_mode = -1;
+ unsigned int restart_interval = 0;
+ int restart_in_rows = 0;
+ int smoothing_factor = 0;
+ int optimize_coding = -1;
+ bool use_flat_dc_luma_code = false;
+ bool omit_standard_tables = false;
+ bool xyb_mode = false;
+ bool libjpeg_mode = false;
+ bool use_adaptive_quantization = true;
+ std::vector<uint8_t> icc;
+
+ int h_samp(int c) const { return h_sampling.empty() ? 1 : h_sampling[c]; }
+ int v_samp(int c) const { return v_sampling.empty() ? 1 : v_sampling[c]; }
+ int max_h_sample() const {
+ auto it = std::max_element(h_sampling.begin(), h_sampling.end());
+ return it == h_sampling.end() ? 1 : *it;
+ }
+ int max_v_sample() const {
+ auto it = std::max_element(v_sampling.begin(), v_sampling.end());
+ return it == v_sampling.end() ? 1 : *it;
+ }
+ int comp_width(const TestImage& input, int c) const {
+ return DivCeil(input.xsize * h_samp(c), max_h_sample() * DCTSIZE) * DCTSIZE;
+ }
+ int comp_height(const TestImage& input, int c) const {
+ return DivCeil(input.ysize * v_samp(c), max_v_sample() * DCTSIZE) * DCTSIZE;
+ }
+};
+
+std::ostream& operator<<(std::ostream& os, const CompressParams& jparams);
+
+void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo);
+
+enum ColorQuantMode {
+ CQUANT_1PASS,
+ CQUANT_2PASS,
+ CQUANT_EXTERNAL,
+ CQUANT_REUSE,
+};
+
+struct ScanDecompressParams {
+ int max_scan_number;
+ J_DITHER_MODE dither_mode;
+ ColorQuantMode color_quant_mode;
+};
+
+struct DecompressParams {
+ float size_factor = 1.0f;
+ size_t chunk_size = 65536;
+ size_t max_output_lines = 16;
+ JpegIOMode output_mode = PIXELS;
+ JpegliDataType data_type = JPEGLI_TYPE_UINT8;
+ JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN;
+ bool set_out_color_space = false;
+ J_COLOR_SPACE out_color_space = JCS_UNKNOWN;
+ bool crop_output = false;
+ bool do_block_smoothing = false;
+ bool do_fancy_upsampling = true;
+ bool skip_scans = false;
+ int scale_num = 1;
+ int scale_denom = 1;
+ bool quantize_colors = false;
+ int desired_number_of_colors = 256;
+ std::vector<ScanDecompressParams> scan_params;
+};
+
+void SetDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, bool is_jpegli);
+
+void SetScanDecompressParams(const DecompressParams& dparams,
+ j_decompress_ptr cinfo, int scan_number,
+ bool is_jpegli);
+
+void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays,
+ TestImage* output);
+
+void UnmapColors(uint8_t* row, size_t xsize, int components,
+ JSAMPARRAY colormap, size_t num_colors);
+
+std::string GetTestDataPath(const std::string& filename);
+std::vector<uint8_t> ReadTestData(const std::string& filename);
+
+class PNMParser {
+ public:
+ explicit PNMParser(const uint8_t* data, const size_t len)
+ : pos_(data), end_(data + len) {}
+
+ // Sets "pos" to the first non-header byte/pixel on success.
+ bool ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth);
+
+ private:
+ static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; }
+ static bool IsWhitespace(const uint8_t c) {
+ return IsLineBreak(c) || c == '\t' || c == ' ';
+ }
+
+ bool ParseUnsigned(size_t* number);
+
+ bool SkipWhitespace();
+
+ const uint8_t* pos_;
+ const uint8_t* const end_;
+};
+
+bool ReadPNM(const std::vector<uint8_t>& data, size_t* xsize, size_t* ysize,
+ size_t* num_channels, size_t* bitdepth,
+ std::vector<uint8_t>* pixels);
+
+void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels);
+
+void ConvertToGrayscale(TestImage* img);
+
+void GeneratePixels(TestImage* img);
+
+void GenerateRawData(const CompressParams& jparams, TestImage* img);
+
+void GenerateCoeffs(const CompressParams& jparams, TestImage* img);
+
+void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ j_compress_ptr cinfo);
+
+bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams,
+ std::vector<uint8_t>* compressed);
+
+// Verifies that an image encoded with libjpegli can be decoded with libjpeg,
+// and checks that the jpeg coding metadata matches jparams.
+void DecodeAllScansWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ std::vector<TestImage>* output_progression);
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams, j_decompress_ptr cinfo,
+ TestImage* output);
+void DecodeWithLibjpeg(const CompressParams& jparams,
+ const DecompressParams& dparams,
+ const std::vector<uint8_t>& compressed,
+ TestImage* output);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines,
+ double* max_diff = nullptr);
+
+double DistanceRms(const TestImage& input, const TestImage& output,
+ double* max_diff = nullptr);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ size_t start_line, size_t num_lines, double max_rms,
+ double max_diff = 255.0);
+
+void VerifyOutputImage(const TestImage& input, const TestImage& output,
+ double max_rms, double max_diff = 255.0);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_TEST_UTILS_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/testing.h b/third_party/jpeg-xl/lib/jpegli/testing.h
new file mode 100644
index 0000000000..873a0171e7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/testing.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_TESTING_H_
+#define LIB_JPEGLI_TESTING_H_
+
+// GTest/GMock specific macros / wrappers.
+
+// gmock unconditionally redefines those macros (to wrong values).
+// Lets include it only here and mitigate the problem.
+#pragma push_macro("PRIdS")
+#pragma push_macro("PRIuS")
+#include "gmock/gmock.h"
+#pragma pop_macro("PRIuS")
+#pragma pop_macro("PRIdS")
+
+#include "gtest/gtest.h"
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Ensures that we don't make our test bounds too lax, effectively disabling the
+// tests.
+MATCHER_P(IsSlightlyBelow, max, "") {
+ return max * 0.75 <= arg && arg <= max * 1.0;
+}
+
+#endif // LIB_JPEGLI_TESTING_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/transcode_api_test.cc b/third_party/jpeg-xl/lib/jpegli/transcode_api_test.cc
new file mode 100644
index 0000000000..1d99ce37fa
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/transcode_api_test.cc
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <vector>
+
+#include "lib/jpegli/decode.h"
+#include "lib/jpegli/encode.h"
+#include "lib/jpegli/test_utils.h"
+#include "lib/jpegli/testing.h"
+#include "lib/jxl/base/status.h"
+
+namespace jpegli {
+namespace {
+
+void TranscodeWithJpegli(const std::vector<uint8_t>& jpeg_input,
+ const CompressParams& jparams,
+ std::vector<uint8_t>* jpeg_output) {
+ jpeg_decompress_struct dinfo = {};
+ jpeg_compress_struct cinfo = {};
+ uint8_t* transcoded_data = nullptr;
+ unsigned long transcoded_size;
+ const auto try_catch_block = [&]() -> bool {
+ ERROR_HANDLER_SETUP(jpegli);
+ dinfo.err = cinfo.err;
+ dinfo.client_data = cinfo.client_data;
+ jpegli_create_decompress(&dinfo);
+ jpegli_mem_src(&dinfo, jpeg_input.data(), jpeg_input.size());
+ EXPECT_EQ(JPEG_REACHED_SOS,
+ jpegli_read_header(&dinfo, /*require_image=*/TRUE));
+ jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&dinfo);
+ JXL_CHECK(coef_arrays != nullptr);
+ jpegli_create_compress(&cinfo);
+ jpegli_mem_dest(&cinfo, &transcoded_data, &transcoded_size);
+ jpegli_copy_critical_parameters(&dinfo, &cinfo);
+ jpegli_set_progressive_level(&cinfo, jparams.progressive_mode);
+ cinfo.optimize_coding = jparams.optimize_coding;
+ jpegli_write_coefficients(&cinfo, coef_arrays);
+ jpegli_finish_compress(&cinfo);
+ jpegli_finish_decompress(&dinfo);
+ return true;
+ };
+ ASSERT_TRUE(try_catch_block());
+ jpegli_destroy_decompress(&dinfo);
+ jpegli_destroy_compress(&cinfo);
+ if (transcoded_data) {
+ jpeg_output->assign(transcoded_data, transcoded_data + transcoded_size);
+ free(transcoded_data);
+ }
+}
+
+struct TestConfig {
+ TestImage input;
+ CompressParams jparams;
+};
+
+class TranscodeAPITestParam : public ::testing::TestWithParam<TestConfig> {};
+
+TEST_P(TranscodeAPITestParam, TestAPI) {
+ TestConfig config = GetParam();
+ CompressParams& jparams = config.jparams;
+ GeneratePixels(&config.input);
+
+ // Start with sequential non-optimized jpeg.
+ jparams.progressive_mode = 0;
+ jparams.optimize_coding = 0;
+ std::vector<uint8_t> compressed;
+ ASSERT_TRUE(EncodeWithJpegli(config.input, jparams, &compressed));
+ TestImage output0;
+ DecodeWithLibjpeg(jparams, DecompressParams(), compressed, &output0);
+
+ // Transcode first to a sequential optimized jpeg, and then further to
+ // a progressive jpeg.
+ for (int progr : {0, 2}) {
+ std::vector<uint8_t> transcoded;
+ jparams.progressive_mode = progr;
+ jparams.optimize_coding = 1;
+ TranscodeWithJpegli(compressed, jparams, &transcoded);
+
+ // We expect a size reduction of at least 2%.
+ EXPECT_LT(transcoded.size(), compressed.size() * 0.98f);
+
+ // Verify that transcoding is lossless.
+ TestImage output1;
+ DecodeWithLibjpeg(jparams, DecompressParams(), transcoded, &output1);
+ ASSERT_EQ(output0.pixels.size(), output1.pixels.size());
+ EXPECT_EQ(0, memcmp(output0.pixels.data(), output1.pixels.data(),
+ output0.pixels.size()));
+ compressed = transcoded;
+ }
+}
+
+std::vector<TestConfig> GenerateTests() {
+ std::vector<TestConfig> all_tests;
+ const size_t xsize0 = 1024;
+ const size_t ysize0 = 768;
+ for (int dxsize : {0, 1, 8, 9}) {
+ for (int dysize : {0, 1, 8, 9}) {
+ for (int h_sampling : {1, 2}) {
+ for (int v_sampling : {1, 2}) {
+ TestConfig config;
+ config.input.xsize = xsize0 + dxsize;
+ config.input.ysize = ysize0 + dysize;
+ config.jparams.h_sampling = {h_sampling, 1, 1};
+ config.jparams.v_sampling = {v_sampling, 1, 1};
+ all_tests.push_back(config);
+ }
+ }
+ }
+ }
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const TestConfig& c) {
+ os << c.input;
+ os << c.jparams;
+ return os;
+}
+
+std::string TestDescription(
+ const testing::TestParamInfo<TranscodeAPITestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JPEGLI_INSTANTIATE_TEST_SUITE_P(TranscodeAPITest, TranscodeAPITestParam,
+ testing::ValuesIn(GenerateTests()),
+ TestDescription);
+
+} // namespace
+} // namespace jpegli
diff --git a/third_party/jpeg-xl/lib/jpegli/transpose-inl.h b/third_party/jpeg-xl/lib/jpegli/transpose-inl.h
new file mode 100644
index 0000000000..9fdd222f4e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/transpose-inl.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JPEGLI_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JPEGLI_TRANSPOSE_INL_H_
+#undef LIB_JPEGLI_TRANSPOSE_INL_H_
+#else
+#define LIB_JPEGLI_TRANSPOSE_INL_H_
+#endif
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+namespace {
+
+#if HWY_CAP_GE256
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ const HWY_CAPPED(float, 8) d;
+ auto i0 = Load(d, from);
+ auto i1 = Load(d, from + 1 * 8);
+ auto i2 = Load(d, from + 2 * 8);
+ auto i3 = Load(d, from + 3 * 8);
+ auto i4 = Load(d, from + 4 * 8);
+ auto i5 = Load(d, from + 5 * 8);
+ auto i6 = Load(d, from + 6 * 8);
+ auto i7 = Load(d, from + 7 * 8);
+
+ const auto q0 = InterleaveLower(d, i0, i2);
+ const auto q1 = InterleaveLower(d, i1, i3);
+ const auto q2 = InterleaveUpper(d, i0, i2);
+ const auto q3 = InterleaveUpper(d, i1, i3);
+ const auto q4 = InterleaveLower(d, i4, i6);
+ const auto q5 = InterleaveLower(d, i5, i7);
+ const auto q6 = InterleaveUpper(d, i4, i6);
+ const auto q7 = InterleaveUpper(d, i5, i7);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ const auto r4 = InterleaveLower(d, q4, q5);
+ const auto r5 = InterleaveUpper(d, q4, q5);
+ const auto r6 = InterleaveLower(d, q6, q7);
+ const auto r7 = InterleaveUpper(d, q6, q7);
+
+ i0 = ConcatLowerLower(d, r4, r0);
+ i1 = ConcatLowerLower(d, r5, r1);
+ i2 = ConcatLowerLower(d, r6, r2);
+ i3 = ConcatLowerLower(d, r7, r3);
+ i4 = ConcatUpperUpper(d, r4, r0);
+ i5 = ConcatUpperUpper(d, r5, r1);
+ i6 = ConcatUpperUpper(d, r6, r2);
+ i7 = ConcatUpperUpper(d, r7, r3);
+
+ Store(i0, d, to);
+ Store(i1, d, to + 1 * 8);
+ Store(i2, d, to + 2 * 8);
+ Store(i3, d, to + 3 * 8);
+ Store(i4, d, to + 4 * 8);
+ Store(i5, d, to + 5 * 8);
+ Store(i6, d, to + 6 * 8);
+ Store(i7, d, to + 7 * 8);
+}
+#elif HWY_TARGET != HWY_SCALAR
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ const HWY_CAPPED(float, 4) d;
+ for (size_t n = 0; n < 8; n += 4) {
+ for (size_t m = 0; m < 8; m += 4) {
+ auto p0 = Load(d, from + n * 8 + m);
+ auto p1 = Load(d, from + (n + 1) * 8 + m);
+ auto p2 = Load(d, from + (n + 2) * 8 + m);
+ auto p3 = Load(d, from + (n + 3) * 8 + m);
+ const auto q0 = InterleaveLower(d, p0, p2);
+ const auto q1 = InterleaveLower(d, p1, p3);
+ const auto q2 = InterleaveUpper(d, p0, p2);
+ const auto q3 = InterleaveUpper(d, p1, p3);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ Store(r0, d, to + m * 8 + n);
+ Store(r1, d, to + (1 + m) * 8 + n);
+ Store(r2, d, to + (2 + m) * 8 + n);
+ Store(r3, d, to + (3 + m) * 8 + n);
+ }
+ }
+}
+#else
+static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from,
+ float* JXL_RESTRICT to) {
+ for (size_t n = 0; n < 8; ++n) {
+ for (size_t m = 0; m < 8; ++m) {
+ to[8 * n + m] = from[8 * m + n];
+ }
+ }
+}
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JPEGLI_TRANSPOSE_INL_H_
diff --git a/third_party/jpeg-xl/lib/jpegli/upsample.cc b/third_party/jpeg-xl/lib/jpegli/upsample.cc
new file mode 100644
index 0000000000..5559aa78a6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/upsample.cc
@@ -0,0 +1,137 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jpegli/upsample.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jpegli {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+#if HWY_CAP_GE512
+using hwy::HWY_NAMESPACE::Half;
+using hwy::HWY_NAMESPACE::Vec;
+template <size_t i, class DF, class V>
+HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
+ using HF = Half<DF>;
+ using HHF = Half<HF>;
+ auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
+ return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
+}
+
+template <class DF, class V>
+HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
+ using HF = Half<DF>;
+ return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
+}
+
+#endif
+
+// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
+// aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
+ static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+ Store(v0, df, mem);
+ Store(v1, df, mem + 1);
+#elif !HWY_CAP_GE256
+ Store(InterleaveLower(df, v0, v1), df, mem);
+ Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
+#else
+ if (!HWY_CAP_GE512 || Lanes(df) == 8) {
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(ConcatLowerLower(df, t1, t0), df, mem);
+ Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
+ } else {
+#if HWY_CAP_GE512
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
+ Quarter<1>(df, t0), Quarter<1>(df, t1)),
+ df, mem);
+ Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
+ Quarter<3>(df, t0), Quarter<3>(df, t1)),
+ df, mem + Lanes(df));
+#endif
+ }
+#endif
+}
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out) {
+ HWY_FULL(float) df;
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ const size_t len_in = (len_out + 1) >> 1;
+ memcpy(scratch_space, row, len_in * sizeof(row[0]));
+ scratch_space[-1] = scratch_space[0];
+ scratch_space[len_in] = scratch_space[len_in - 1];
+ for (size_t x = 0; x < len_in; x += Lanes(df)) {
+ auto current = Mul(Load(df, scratch_space + x), threefour);
+ auto prev = LoadU(df, scratch_space + x - 1);
+ auto next = LoadU(df, scratch_space + x + 1);
+ auto left = MulAdd(onefour, prev, current);
+ auto right = MulAdd(onefour, next, current);
+ StoreInterleaved(df, left, right, row + x * 2);
+ }
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len) {
+ HWY_FULL(float) df;
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ for (size_t x = 0; x < len; x += Lanes(df)) {
+ auto it = Load(df, row_top + x);
+ auto im = Load(df, row_mid + x);
+ auto ib = Load(df, row_bot + x);
+ auto im_scaled = Mul(im, threefour);
+ Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
+ Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jpegli
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jpegli {
+
+HWY_EXPORT(Upsample2Horizontal);
+HWY_EXPORT(Upsample2Vertical);
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out) {
+ return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out);
+}
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len) {
+ return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot,
+ row_out0, row_out1, len);
+}
+} // namespace jpegli
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jpegli/upsample.h b/third_party/jpeg-xl/lib/jpegli/upsample.h
new file mode 100644
index 0000000000..1a057208dc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jpegli/upsample.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JPEGLI_UPSAMPLE_H_
+#define LIB_JPEGLI_UPSAMPLE_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jpegli {
+
+void Upsample2Horizontal(float* JXL_RESTRICT row,
+ float* JXL_RESTRICT scratch_space, size_t len_out);
+
+void Upsample2Vertical(const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row_mid,
+ const float* JXL_RESTRICT row_bot,
+ float* JXL_RESTRICT row_out0,
+ float* JXL_RESTRICT row_out1, size_t len);
+
+} // namespace jpegli
+
+#endif // LIB_JPEGLI_UPSAMPLE_H_
diff --git a/third_party/jpeg-xl/lib/jxl.cmake b/third_party/jpeg-xl/lib/jxl.cmake
new file mode 100644
index 0000000000..2672cb4c77
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl.cmake
@@ -0,0 +1,329 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+if (JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS OR JPEGXL_ENABLE_BOXES)
+list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_BOX_SOURCES})
+endif()
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS)
+list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_JPEG_SOURCES})
+endif()
+
+set_source_files_properties(jxl/enc_fast_lossless.cc PROPERTIES COMPILE_FLAGS -O3)
+
+set(JPEGXL_DEC_INTERNAL_LIBS
+ hwy
+ Threads::Threads
+ ${ATOMICS_LIBRARIES}
+)
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_BOXES)
+list(APPEND JPEGXL_DEC_INTERNAL_LIBS brotlidec brotlicommon)
+endif()
+
+set(JPEGXL_INTERNAL_LIBS
+ ${JPEGXL_DEC_INTERNAL_LIBS}
+ brotlienc
+)
+
+if (JPEGXL_ENABLE_SKCMS)
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_SKCMS=1)
+ if (JPEGXL_BUNDLE_SKCMS)
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_BUNDLE_SKCMS=1)
+ # skcms objects are later added to JPEGXL_INTERNAL_OBJECTS
+ else ()
+ list(APPEND JPEGXL_INTERNAL_LIBS skcms)
+ endif ()
+else ()
+ list(APPEND JPEGXL_INTERNAL_LIBS lcms2)
+endif ()
+
+if (JPEGXL_ENABLE_TRANSCODE_JPEG)
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=1)
+else()
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=0)
+endif ()
+
+if (JPEGXL_ENABLE_BOXES)
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=1)
+else()
+ list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=0)
+endif ()
+
+set(OBJ_COMPILE_DEFINITIONS
+ JPEGXL_MAJOR_VERSION=${JPEGXL_MAJOR_VERSION}
+ JPEGXL_MINOR_VERSION=${JPEGXL_MINOR_VERSION}
+ JPEGXL_PATCH_VERSION=${JPEGXL_PATCH_VERSION}
+ # Used to determine if we are building the library when defined or just
+ # including the library when not defined. This is public so libjxl shared
+ # library gets this define too.
+ JXL_INTERNAL_LIBRARY_BUILD
+)
+
+# Generate version.h
+configure_file("jxl/version.h.in" "include/jxl/version.h")
+
+# Headers for exporting/importing public headers
+include(GenerateExportHeader)
+
+# CMake does not allow generate_export_header for INTERFACE library, so we
+# add this stub library just for file generation.
+add_library(jxl_export OBJECT ${JPEGXL_INTERNAL_PUBLIC_HEADERS})
+set_target_properties(jxl_export PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+ DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+ LINKER_LANGUAGE CXX
+)
+generate_export_header(jxl_export
+ BASE_NAME JXL
+ EXPORT_FILE_NAME include/jxl/jxl_export.h)
+# Place all public headers in a single directory.
+foreach(path ${JPEGXL_INTERNAL_PUBLIC_HEADERS})
+ configure_file(
+ ${path}
+ ${path}
+ COPYONLY
+ )
+endforeach()
+
+add_library(jxl_includes INTERFACE)
+target_include_directories(jxl_includes SYSTEM INTERFACE
+ "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+)
+add_dependencies(jxl_includes jxl_export)
+
+# Base headers / utilities.
+add_library(jxl_base-obj OBJECT ${JPEGXL_INTERNAL_BASE_SOURCES})
+target_compile_options(jxl_base-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_base-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_base-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_base-obj PUBLIC
+ ${PROJECT_SOURCE_DIR}
+ ${JXL_HWY_INCLUDE_DIRS}
+)
+
+jxl_link_libraries(jxl_base-obj jxl_includes)
+
+if(JPEGXL_ENABLE_PROFILER)
+ target_compile_definitions(jxl_base-obj PUBLIC -DJXL_PROFILER_ENABLED=1)
+endif()
+
+# Decoder-only object library
+add_library(jxl_dec-obj OBJECT ${JPEGXL_INTERNAL_DEC_SOURCES})
+target_compile_options(jxl_dec-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_dec-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_dec-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_dec-obj PUBLIC
+ "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>"
+ "${JXL_HWY_INCLUDE_DIRS}"
+ "$<BUILD_INTERFACE:$<TARGET_PROPERTY:brotlicommon,INTERFACE_INCLUDE_DIRECTORIES>>"
+)
+target_compile_definitions(jxl_dec-obj PUBLIC
+ ${OBJ_COMPILE_DEFINITIONS}
+)
+jxl_link_libraries(jxl_dec-obj jxl_base-obj)
+
+# Object library. This is used to hold the set of objects and properties.
+add_library(jxl_enc-obj OBJECT ${JPEGXL_INTERNAL_ENC_SOURCES})
+target_compile_options(jxl_enc-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+target_compile_options(jxl_enc-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+set_property(TARGET jxl_enc-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_enc-obj PUBLIC
+ ${PROJECT_SOURCE_DIR}
+ ${JXL_HWY_INCLUDE_DIRS}
+ $<TARGET_PROPERTY:brotlicommon,INTERFACE_INCLUDE_DIRECTORIES>
+)
+target_compile_definitions(jxl_enc-obj PUBLIC
+ ${OBJ_COMPILE_DEFINITIONS}
+)
+jxl_link_libraries(jxl_enc-obj jxl_base-obj)
+
+#TODO(lode): don't depend on CMS for the core library
+if (JPEGXL_ENABLE_SKCMS)
+ target_include_directories(jxl_enc-obj PRIVATE
+ $<TARGET_PROPERTY:skcms,INCLUDE_DIRECTORIES>
+ )
+else ()
+ target_include_directories(jxl_enc-obj PRIVATE
+ $<TARGET_PROPERTY:lcms2,INCLUDE_DIRECTORIES>
+ )
+endif ()
+
+set_target_properties(jxl_dec-obj PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+ DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+)
+
+set_target_properties(jxl_enc-obj PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+ DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD
+)
+
+# Private static library. This exposes all the internal functions and is used
+# for tests.
+add_library(jxl_dec-static STATIC
+ $<TARGET_OBJECTS:jxl_base-obj>
+ $<TARGET_OBJECTS:jxl_dec-obj>
+)
+target_link_libraries(jxl_dec-static
+ PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_DEC_INTERNAL_LIBS} jxl_includes)
+
+# The list of objects in the static and shared libraries.
+set(JPEGXL_INTERNAL_OBJECTS
+ $<TARGET_OBJECTS:jxl_base-obj>
+ $<TARGET_OBJECTS:jxl_enc-obj>
+ $<TARGET_OBJECTS:jxl_dec-obj>
+)
+if (JPEGXL_ENABLE_SKCMS AND JPEGXL_BUNDLE_SKCMS)
+ list(APPEND JPEGXL_INTERNAL_OBJECTS $<TARGET_OBJECTS:skcms-obj>)
+endif()
+
+# Private static library. This exposes all the internal functions and is used
+# for tests.
+# TODO(lode): once the source files are correctly split so that it is possible
+# to do, remove $<TARGET_OBJECTS:jxl_dec-obj> here and depend on jxl_dec-static
+add_library(jxl-static STATIC ${JPEGXL_INTERNAL_OBJECTS})
+target_link_libraries(jxl-static
+ PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_INTERNAL_LIBS} jxl_includes)
+target_include_directories(jxl-static PUBLIC
+ "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>")
+
+# JXL_EXPORT is defined to "__declspec(dllimport)" automatically by CMake
+# in Windows builds when including headers from the C API and compiling from
+# outside the jxl library. This is required when using the shared library,
+# however in windows this causes the function to not be found when linking
+# against the static library. This define JXL_EXPORT= here forces it to not
+# use dllimport in tests and other tools that require the static library.
+target_compile_definitions(jxl-static INTERFACE -DJXL_EXPORT=)
+target_compile_definitions(jxl_dec-static INTERFACE -DJXL_EXPORT=)
+
+# TODO(deymo): Move TCMalloc linkage to the tools/ directory since the library
+# shouldn't do any allocs anyway.
+if(JPEGXL_ENABLE_TCMALLOC)
+ pkg_check_modules(TCMallocMinimal REQUIRED IMPORTED_TARGET
+ libtcmalloc_minimal)
+ # tcmalloc 2.8 has concurrency issues that makes it sometimes return nullptr
+ # for large allocs. See https://github.com/gperftools/gperftools/issues/1204
+ # for details.
+ if(TCMallocMinimal_VERSION VERSION_EQUAL 2.8)
+ message(FATAL_ERROR
+ "tcmalloc version 2.8 has a concurrency bug. You have installed "
+ "version ${TCMallocMinimal_VERSION}, please either downgrade tcmalloc "
+ "to version 2.7, upgrade to 2.8.1 or newer or pass "
+ "-DJPEGXL_ENABLE_TCMALLOC=OFF to jpeg-xl cmake line. See the following "
+ "bug for details:\n"
+ " https://github.com/gperftools/gperftools/issues/1204\n")
+ endif()
+ target_link_libraries(jxl-static PUBLIC PkgConfig::TCMallocMinimal)
+endif() # JPEGXL_ENABLE_TCMALLOC
+
+# Install the static library too, but as jxl.a file without the -static except
+# in Windows.
+if (NOT WIN32 OR MINGW)
+ set_target_properties(jxl-static PROPERTIES OUTPUT_NAME "jxl")
+ set_target_properties(jxl_dec-static PROPERTIES OUTPUT_NAME "jxl_dec")
+endif()
+install(TARGETS jxl-static DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(TARGETS jxl_dec-static DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+if (BUILD_SHARED_LIBS)
+
+# Public shared library.
+add_library(jxl SHARED ${JPEGXL_INTERNAL_OBJECTS})
+strip_static(JPEGXL_INTERNAL_SHARED_LIBS JPEGXL_INTERNAL_LIBS)
+target_link_libraries(jxl PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes)
+target_link_libraries(jxl PRIVATE ${JPEGXL_INTERNAL_SHARED_LIBS})
+# Shared library include path contains only the "include/" paths.
+set_target_properties(jxl PROPERTIES
+ VERSION ${JPEGXL_LIBRARY_VERSION}
+ SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+ LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+ RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+# Public shared decoder library.
+add_library(jxl_dec SHARED $<TARGET_OBJECTS:jxl_base-obj> $<TARGET_OBJECTS:jxl_dec-obj>)
+strip_static(JPEGXL_DEC_INTERNAL_SHARED_LIBS JPEGXL_DEC_INTERNAL_LIBS)
+target_link_libraries(jxl_dec PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes)
+target_link_libraries(jxl_dec PRIVATE ${JPEGXL_DEC_INTERNAL_SHARED_LIBS})
+# Shared library include path contains only the "include/" paths.
+set_target_properties(jxl_dec PROPERTIES
+ VERSION ${JPEGXL_LIBRARY_VERSION}
+ SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+ LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+ RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+# Check whether the linker support excluding libs
+set(LINKER_EXCLUDE_LIBS_FLAG "-Wl,--exclude-libs=ALL")
+include(CheckCSourceCompiles)
+list(APPEND CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG})
+check_c_source_compiles("int main(){return 0;}" LINKER_SUPPORT_EXCLUDE_LIBS)
+list(REMOVE_ITEM CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG})
+
+# Add a jxl.version file as a version script to tag symbols with the
+# appropriate version number. This script is also used to limit what's exposed
+# in the shared library from the static dependencies bundled here.
+foreach(target IN ITEMS jxl jxl_dec)
+ set_target_properties(${target} PROPERTIES
+ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version)
+ if(APPLE)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY
+ LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms")
+ elseif(WIN32)
+ # Nothing needed here, we use __declspec(dllexport) (jxl_export.h)
+ else()
+ set_property(TARGET ${target} APPEND_STRING PROPERTY
+ LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version")
+ endif() # APPLE
+ # This hides the default visibility symbols from static libraries bundled into
+ # the shared library. In particular this prevents exposing symbols from hwy
+ # and skcms in the shared library.
+ if(LINKER_SUPPORT_EXCLUDE_LIBS)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY
+ LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}")
+ endif()
+endforeach()
+
+# Only install libjxl shared library. The libjxl_dec is not installed since it
+# contains symbols also in libjxl which would conflict if programs try to use
+# both.
+install(TARGETS jxl
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+else()
+add_library(jxl ALIAS jxl-static)
+add_library(jxl_dec ALIAS jxl_dec-static)
+endif() # BUILD_SHARED_LIBS
+
+# Add a pkg-config file for libjxl.
+set(JPEGXL_LIBRARY_REQUIRES
+ "libhwy libbrotlienc libbrotlidec")
+if(NOT JPEGXL_ENABLE_SKCMS)
+ set(JPEGXL_LIBRARY_REQUIRES "${JPEGXL_LIBRARY_REQUIRES} lcms2")
+endif()
+
+# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}")
+ set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}")
+else()
+ set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
+endif()
+# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}")
+ set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}")
+else()
+ set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+endif()
+
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/jxl/libjxl.pc.in"
+ "libjxl.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl.pc"
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
diff --git a/third_party/jpeg-xl/lib/jxl/ac_context.h b/third_party/jpeg-xl/lib/jxl/ac_context.h
new file mode 100644
index 0000000000..a2b9e046d1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ac_context.h
@@ -0,0 +1,149 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_CONTEXT_H_
+#define LIB_JXL_AC_CONTEXT_H_
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+
+namespace jxl {
+
+// Block context used for scanning order, number of non-zeros, AC coefficients.
+// Equal to the channel.
+constexpr uint32_t kDCTOrderContextStart = 0;
+
+// The number of predicted nonzeros goes from 0 to 1008. We use
+// ceil(log2(predicted+1)) as a context for the number of nonzeros, so from 0 to
+// 10, inclusive.
+constexpr uint32_t kNonZeroBuckets = 37;
+
+static const uint16_t kCoeffFreqContext[64] = {
+ 0xBAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+ 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26,
+ 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30,
+};
+
+static const uint16_t kCoeffNumNonzeroContext[64] = {
+ 0xBAD, 0, 31, 62, 62, 93, 93, 93, 93, 123, 123, 123, 123,
+ 152, 152, 152, 152, 152, 152, 152, 152, 180, 180, 180, 180, 180,
+ 180, 180, 180, 180, 180, 180, 180, 206, 206, 206, 206, 206, 206,
+ 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+ 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206,
+};
+
+// Supremum of ZeroDensityContext(x, y) + 1, when x + y < 64.
+constexpr int kZeroDensityContextCount = 458;
+// Supremum of ZeroDensityContext(x, y) + 1.
+constexpr int kZeroDensityContextLimit = 474;
+
+/* This function is used for entropy-sources pre-clustering.
+ *
+ * Ideally, each combination of |nonzeros_left| and |k| should go to its own
+ * bucket; but it implies (64 * 63 / 2) == 2016 buckets. If there is other
+ * dimension (e.g. block context), then number of primary clusters becomes too
+ * big.
+ *
+ * To solve this problem, |nonzeros_left| and |k| values are clustered. It is
+ * known that their sum is at most 64, consequently, the total number buckets
+ * is at most A(64) * B(64).
+ */
+// TODO(user): investigate, why disabling pre-clustering makes entropy code
+// less dense. Perhaps we would need to add HQ clustering algorithm that would
+// be able to squeeze better by spending more CPU cycles.
+static JXL_INLINE size_t ZeroDensityContext(size_t nonzeros_left, size_t k,
+ size_t covered_blocks,
+ size_t log2_covered_blocks,
+ size_t prev) {
+ JXL_DASSERT((1u << log2_covered_blocks) == covered_blocks);
+ nonzeros_left = (nonzeros_left + covered_blocks - 1) >> log2_covered_blocks;
+ k >>= log2_covered_blocks;
+ JXL_DASSERT(k > 0);
+ JXL_DASSERT(k < 64);
+ JXL_DASSERT(nonzeros_left > 0);
+ // Asserting nonzeros_left + k < 65 here causes crashes in debug mode with
+ // invalid input, since the (hot) decoding loop does not check this condition.
+ // As no out-of-bound memory reads are issued even if that condition is
+ // broken, we check this simpler condition which holds anyway. The decoder
+ // will still mark a file in which that condition happens as not valid at the
+ // end of the decoding loop, as `nzeros` will not be `0`.
+ JXL_DASSERT(nonzeros_left < 64);
+ return (kCoeffNumNonzeroContext[nonzeros_left] + kCoeffFreqContext[k]) * 2 +
+ prev;
+}
+
+struct BlockCtxMap {
+ std::vector<int> dc_thresholds[3];
+ std::vector<uint32_t> qf_thresholds;
+ std::vector<uint8_t> ctx_map;
+ size_t num_ctxs, num_dc_ctxs;
+
+ static constexpr uint8_t kDefaultCtxMap[] = {
+ // Default ctx map clusters all the large transforms together.
+ 0, 1, 2, 2, 3, 3, 4, 5, 6, 6, 6, 6, 6, //
+ 7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, //
+ 7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, //
+ };
+ static_assert(3 * kNumOrders ==
+ sizeof(kDefaultCtxMap) / sizeof *kDefaultCtxMap,
+ "Update default context map");
+
+ size_t Context(int dc_idx, uint32_t qf, size_t ord, size_t c) const {
+ size_t qf_idx = 0;
+ for (uint32_t t : qf_thresholds) {
+ if (qf > t) qf_idx++;
+ }
+ size_t idx = c < 2 ? c ^ 1 : 2;
+ idx = idx * kNumOrders + ord;
+ idx = idx * (qf_thresholds.size() + 1) + qf_idx;
+ idx = idx * num_dc_ctxs + dc_idx;
+ return ctx_map[idx];
+ }
+ // Non-zero context is based on number of non-zeros and block context.
+ // For better clustering, contexts with same number of non-zeros are grouped.
+ constexpr uint32_t ZeroDensityContextsOffset(uint32_t block_ctx) const {
+ return num_ctxs * kNonZeroBuckets + kZeroDensityContextCount * block_ctx;
+ }
+
+ // Context map for AC coefficients consists of 2 blocks:
+ // |num_ctxs x : context for number of non-zeros in the block
+ // kNonZeroBuckets| computed from block context and predicted
+ // value (based top and left values)
+ // |num_ctxs x : context for AC coefficient symbols,
+ // kZeroDensityContextCount| computed from block context,
+ // number of non-zeros left and
+ // index in scan order
+ constexpr uint32_t NumACContexts() const {
+ return num_ctxs * (kNonZeroBuckets + kZeroDensityContextCount);
+ }
+
+ // Non-zero context is based on number of non-zeros and block context.
+ // For better clustering, contexts with same number of non-zeros are grouped.
+ inline uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) const {
+ uint32_t ctx;
+ if (non_zeros >= 64) non_zeros = 64;
+ if (non_zeros < 8) {
+ ctx = non_zeros;
+ } else {
+ ctx = 4 + non_zeros / 2;
+ }
+ return ctx * num_ctxs + block_ctx;
+ }
+
+ BlockCtxMap() {
+ ctx_map.assign(std::begin(kDefaultCtxMap), std::end(kDefaultCtxMap));
+ num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+ num_dc_ctxs = 1;
+ }
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_AC_CONTEXT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/ac_strategy.cc b/third_party/jpeg-xl/lib/jxl/ac_strategy.cc
new file mode 100644
index 0000000000..ada3bcb6f5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ac_strategy.cc
@@ -0,0 +1,108 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <numeric> // iota
+#include <type_traits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+// Tries to generalize zig-zag order to non-square blocks. Surprisingly, in
+// square block frequency along the (i + j == const) diagonals is roughly the
+// same. For historical reasons, consecutive diagonals are traversed
+// in alternating directions - so called "zig-zag" (or "snake") order.
+template <bool is_lut>
+static void CoeffOrderAndLut(AcStrategy acs, coeff_order_t* out) {
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ CoefficientLayout(&cy, &cx);
+
+ // CoefficientLayout ensures cx >= cy.
+ // We compute the zigzag order for a cx x cx block, then discard all the
+ // lines that are not multiple of the ratio between cx and cy.
+ size_t xs = cx / cy;
+ size_t xsm = xs - 1;
+ size_t xss = CeilLog2Nonzero(xs);
+ // First half of the block
+ size_t cur = cx * cy;
+ for (size_t i = 0; i < cx * kBlockDim; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ size_t x = j;
+ size_t y = i - j;
+ if (i % 2) std::swap(x, y);
+ if ((y & xsm) != 0) continue;
+ y >>= xss;
+ size_t val = 0;
+ if (x < cx && y < cy) {
+ val = y * cx + x;
+ } else {
+ val = cur++;
+ }
+ if (is_lut) {
+ out[y * cx * kBlockDim + x] = val;
+ } else {
+ out[val] = y * cx * kBlockDim + x;
+ }
+ }
+ }
+ // Second half
+ for (size_t ip = cx * kBlockDim - 1; ip > 0; ip--) {
+ size_t i = ip - 1;
+ for (size_t j = 0; j <= i; j++) {
+ size_t x = cx * kBlockDim - 1 - (i - j);
+ size_t y = cx * kBlockDim - 1 - j;
+ if (i % 2) std::swap(x, y);
+ if ((y & xsm) != 0) continue;
+ y >>= xss;
+ size_t val = cur++;
+ if (is_lut) {
+ out[y * cx * kBlockDim + x] = val;
+ } else {
+ out[val] = y * cx * kBlockDim + x;
+ }
+ }
+ }
+}
+
+void AcStrategy::ComputeNaturalCoeffOrder(coeff_order_t* order) const {
+ CoeffOrderAndLut</*is_lut=*/false>(*this, order);
+}
+void AcStrategy::ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const {
+ CoeffOrderAndLut</*is_lut=*/true>(*this, lut);
+}
+
+// These definitions are needed before C++17.
+constexpr size_t AcStrategy::kMaxCoeffBlocks;
+constexpr size_t AcStrategy::kMaxBlockDim;
+constexpr size_t AcStrategy::kMaxCoeffArea;
+
+AcStrategyImage::AcStrategyImage(size_t xsize, size_t ysize)
+ : layers_(xsize, ysize) {
+ row_ = layers_.Row(0);
+ stride_ = layers_.PixelsPerRow();
+}
+
+size_t AcStrategyImage::CountBlocks(AcStrategy::Type type) const {
+ size_t ret = 0;
+ for (size_t y = 0; y < layers_.ysize(); y++) {
+ const uint8_t* JXL_RESTRICT row = layers_.ConstRow(y);
+ for (size_t x = 0; x < layers_.xsize(); x++) {
+ if (row[x] == ((static_cast<uint8_t>(type) << 1) | 1)) ret++;
+ }
+ }
+ return ret;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/ac_strategy.h b/third_party/jpeg-xl/lib/jxl/ac_strategy.h
new file mode 100644
index 0000000000..7d21167e6e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ac_strategy.h
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AC_STRATEGY_H_
+#define LIB_JXL_AC_STRATEGY_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/base.h> // kMaxVectorSize
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+
+// Defines the different kinds of transforms, and heuristics to choose between
+// them.
+// `AcStrategy` represents what transform should be used, and which sub-block of
+// that transform we are currently in. Note that DCT4x4 is applied on all four
+// 4x4 sub-blocks of an 8x8 block.
+// `AcStrategyImage` defines which strategy should be used for each 8x8 block
+// of the image. The highest 4 bits represent the strategy to be used, the
+// lowest 4 represent the index of the block inside that strategy.
+
+namespace jxl {
+
+class AcStrategy {
+ public:
+ // Extremal values for the number of blocks/coefficients of a single strategy.
+ static constexpr size_t kMaxCoeffBlocks = 32;
+ static constexpr size_t kMaxBlockDim = kBlockDim * kMaxCoeffBlocks;
+ // Maximum number of coefficients in a block. Guaranteed to be a multiple of
+ // the vector size.
+ static constexpr size_t kMaxCoeffArea = kMaxBlockDim * kMaxBlockDim;
+ static_assert((kMaxCoeffArea * sizeof(float)) % hwy::kMaxVectorSize == 0,
+ "Coefficient area is not a multiple of vector size");
+
+ // Raw strategy types.
+ enum Type : uint32_t {
+ // Regular block size DCT
+ DCT = 0,
+ // Encode pixels without transforming
+ IDENTITY = 1,
+ // Use 2-by-2 DCT
+ DCT2X2 = 2,
+ // Use 4-by-4 DCT
+ DCT4X4 = 3,
+ // Use 16-by-16 DCT
+ DCT16X16 = 4,
+ // Use 32-by-32 DCT
+ DCT32X32 = 5,
+ // Use 16-by-8 DCT
+ DCT16X8 = 6,
+ // Use 8-by-16 DCT
+ DCT8X16 = 7,
+ // Use 32-by-8 DCT
+ DCT32X8 = 8,
+ // Use 8-by-32 DCT
+ DCT8X32 = 9,
+ // Use 32-by-16 DCT
+ DCT32X16 = 10,
+ // Use 16-by-32 DCT
+ DCT16X32 = 11,
+ // 4x8 and 8x4 DCT
+ DCT4X8 = 12,
+ DCT8X4 = 13,
+ // Corner-DCT.
+ AFV0 = 14,
+ AFV1 = 15,
+ AFV2 = 16,
+ AFV3 = 17,
+ // Larger DCTs
+ DCT64X64 = 18,
+ DCT64X32 = 19,
+ DCT32X64 = 20,
+ DCT128X128 = 21,
+ DCT128X64 = 22,
+ DCT64X128 = 23,
+ DCT256X256 = 24,
+ DCT256X128 = 25,
+ DCT128X256 = 26,
+ // Marker for num of valid strategies.
+ kNumValidStrategies
+ };
+
+ static constexpr uint32_t TypeBit(const Type type) {
+ return 1u << static_cast<uint32_t>(type);
+ }
+
+ // Returns true if this block is the first 8x8 block (i.e. top-left) of a
+ // possibly multi-block strategy.
+ JXL_INLINE bool IsFirstBlock() const { return is_first_; }
+
+ JXL_INLINE bool IsMultiblock() const {
+ constexpr uint32_t bits =
+ TypeBit(Type::DCT16X16) | TypeBit(Type::DCT32X32) |
+ TypeBit(Type::DCT16X8) | TypeBit(Type::DCT8X16) |
+ TypeBit(Type::DCT32X8) | TypeBit(Type::DCT8X32) |
+ TypeBit(Type::DCT16X32) | TypeBit(Type::DCT32X16) |
+ TypeBit(Type::DCT32X64) | TypeBit(Type::DCT64X32) |
+ TypeBit(Type::DCT64X64) | TypeBit(DCT64X128) | TypeBit(DCT128X64) |
+ TypeBit(DCT128X128) | TypeBit(DCT128X256) | TypeBit(DCT256X128) |
+ TypeBit(DCT256X256);
+ JXL_DASSERT(Strategy() < kNumValidStrategies);
+ return ((1u << static_cast<uint32_t>(Strategy())) & bits) != 0;
+ }
+
+ // Returns the raw strategy value. Should only be used for tokenization.
+ JXL_INLINE uint8_t RawStrategy() const {
+ return static_cast<uint8_t>(strategy_);
+ }
+
+ JXL_INLINE Type Strategy() const { return strategy_; }
+
+ // Inverse check
+ static JXL_INLINE constexpr bool IsRawStrategyValid(int raw_strategy) {
+ return raw_strategy < static_cast<int32_t>(kNumValidStrategies) &&
+ raw_strategy >= 0;
+ }
+ static JXL_INLINE AcStrategy FromRawStrategy(uint8_t raw_strategy) {
+ return FromRawStrategy(static_cast<Type>(raw_strategy));
+ }
+ static JXL_INLINE AcStrategy FromRawStrategy(Type raw_strategy) {
+ JXL_DASSERT(IsRawStrategyValid(static_cast<uint32_t>(raw_strategy)));
+ return AcStrategy(raw_strategy, /*is_first=*/true);
+ }
+
+ // "Natural order" means the order of increasing of "anisotropic" frequency of
+ // continuous version of DCT basis.
+ // Round-trip, for any given strategy s:
+ // X = NaturalCoeffOrder(s)[NaturalCoeffOrderLutN(s)[X]]
+ // X = NaturalCoeffOrderLut(s)[NaturalCoeffOrderN(s)[X]]
+ void ComputeNaturalCoeffOrder(coeff_order_t* order) const;
+ void ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const;
+
+ // Number of 8x8 blocks that this strategy will cover. 0 for non-top-left
+ // blocks inside a multi-block transform.
+ JXL_INLINE size_t covered_blocks_x() const {
+ static constexpr uint8_t kLut[] = {1, 1, 1, 1, 2, 4, 1, 2, 1,
+ 4, 2, 4, 1, 1, 1, 1, 1, 1,
+ 8, 4, 8, 16, 8, 16, 32, 16, 32};
+ static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+ "Update LUT");
+ return kLut[size_t(strategy_)];
+ }
+
+ JXL_INLINE size_t covered_blocks_y() const {
+ static constexpr uint8_t kLut[] = {1, 1, 1, 1, 2, 4, 2, 1, 4,
+ 1, 4, 2, 1, 1, 1, 1, 1, 1,
+ 8, 8, 4, 16, 16, 8, 32, 32, 16};
+ static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+ "Update LUT");
+ return kLut[size_t(strategy_)];
+ }
+
+ JXL_INLINE size_t log2_covered_blocks() const {
+ static constexpr uint8_t kLut[] = {0, 0, 0, 0, 2, 4, 1, 1, 2,
+ 2, 3, 3, 0, 0, 0, 0, 0, 0,
+ 6, 5, 5, 8, 7, 7, 10, 9, 9};
+ static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies,
+ "Update LUT");
+ return kLut[size_t(strategy_)];
+ }
+
+ private:
+ friend class AcStrategyRow;
+ JXL_INLINE AcStrategy(Type strategy, bool is_first)
+ : strategy_(strategy), is_first_(is_first) {
+ JXL_DASSERT(IsMultiblock() || is_first == true);
+ }
+
+ Type strategy_;
+ bool is_first_;
+};
+
+// Class to use a certain row of the AC strategy.
+class AcStrategyRow {
+ public:
+ explicit AcStrategyRow(const uint8_t* row) : row_(row) {}
+ AcStrategy operator[](size_t x) const {
+ return AcStrategy(static_cast<AcStrategy::Type>(row_[x] >> 1), row_[x] & 1);
+ }
+
+ private:
+ const uint8_t* JXL_RESTRICT row_;
+};
+
+class AcStrategyImage {
+ public:
+ AcStrategyImage() = default;
+ AcStrategyImage(size_t xsize, size_t ysize);
+ AcStrategyImage(AcStrategyImage&&) = default;
+ AcStrategyImage& operator=(AcStrategyImage&&) = default;
+
+ void FillDCT8(const Rect& rect) {
+ FillPlane<uint8_t>((static_cast<uint8_t>(AcStrategy::Type::DCT) << 1) | 1,
+ &layers_, rect);
+ }
+ void FillDCT8() { FillDCT8(Rect(layers_)); }
+
+ void FillInvalid() { FillImage(INVALID, &layers_); }
+
+ void Set(size_t x, size_t y, AcStrategy::Type type) {
+#if JXL_ENABLE_ASSERT
+ AcStrategy acs = AcStrategy::FromRawStrategy(type);
+#endif // JXL_ENABLE_ASSERT
+ JXL_ASSERT(y + acs.covered_blocks_y() <= layers_.ysize());
+ JXL_ASSERT(x + acs.covered_blocks_x() <= layers_.xsize());
+ JXL_CHECK(SetNoBoundsCheck(x, y, type, /*check=*/false));
+ }
+
+ Status SetNoBoundsCheck(size_t x, size_t y, AcStrategy::Type type,
+ bool check = true) {
+ AcStrategy acs = AcStrategy::FromRawStrategy(type);
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ size_t pos = (y + iy) * stride_ + x + ix;
+ if (check && row_[pos] != INVALID) {
+ return JXL_FAILURE("Invalid AC strategy: block overlap");
+ }
+ row_[pos] =
+ (static_cast<uint8_t>(type) << 1) | ((iy | ix) == 0 ? 1 : 0);
+ }
+ }
+ return true;
+ }
+
+ bool IsValid(size_t x, size_t y) { return row_[y * stride_ + x] != INVALID; }
+
+ AcStrategyRow ConstRow(size_t y, size_t x_prefix = 0) const {
+ return AcStrategyRow(layers_.ConstRow(y) + x_prefix);
+ }
+
+ AcStrategyRow ConstRow(const Rect& rect, size_t y) const {
+ return ConstRow(rect.y0() + y, rect.x0());
+ }
+
+ size_t PixelsPerRow() const { return layers_.PixelsPerRow(); }
+
+ size_t xsize() const { return layers_.xsize(); }
+ size_t ysize() const { return layers_.ysize(); }
+
+ // Count the number of blocks of a given type.
+ size_t CountBlocks(AcStrategy::Type type) const;
+
+ private:
+ ImageB layers_;
+ uint8_t* JXL_RESTRICT row_;
+ size_t stride_;
+
+ // A value that does not represent a valid combined AC strategy
+ // value. Used as a sentinel.
+ static constexpr uint8_t INVALID = 0xFF;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_AC_STRATEGY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/ac_strategy_test.cc b/third_party/jpeg-xl/lib/jxl/ac_strategy_test.cc
new file mode 100644
index 0000000000..d366aa3f82
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ac_strategy_test.cc
@@ -0,0 +1,237 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ac_strategy.h"
+
+#include <string.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <hwy/base.h> // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <utility>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+// Test that DCT -> IDCT is a noop.
+class AcStrategyRoundtrip : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+ void Run() {
+ const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+ const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+ auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+ float* scratch_space = mem.get();
+ float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+ float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+ Rng rng(type * 65537 + 13);
+
+ for (size_t j = 0; j < 64; j++) {
+ size_t i = (acs.log2_covered_blocks()
+ ? rng.UniformU(0, 64u << acs.log2_covered_blocks())
+ : j);
+ float* input = idct + AcStrategy::kMaxCoeffArea;
+ std::fill_n(input, AcStrategy::kMaxCoeffArea, 0);
+ input[i] = 0.2f;
+ TransformFromPixels(type, input, acs.covered_blocks_x() * 8, coeffs,
+ scratch_space);
+ ASSERT_NEAR(coeffs[0], 0.2 / (64 << acs.log2_covered_blocks()), 1e-6)
+ << " i = " << i;
+ TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+ scratch_space);
+ for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+ ASSERT_NEAR(idct[j], j == i ? 0.2f : 0, 2e-6)
+ << "j = " << j << " i = " << i << " acs " << type;
+ }
+ }
+ // Test DC.
+ std::fill_n(idct, AcStrategy::kMaxCoeffArea, 0);
+ for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+ for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+ float* dc = idct + AcStrategy::kMaxCoeffArea;
+ std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+ dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+ LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs);
+ DCFromLowestFrequencies(type, coeffs, idct, acs.covered_blocks_x() * 8);
+ std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+ dc[y * acs.covered_blocks_x() * 8 + x] = 0.2;
+ for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) {
+ ASSERT_NEAR(idct[j], dc[j], 1e-6)
+ << "j = " << j << " x = " << x << " y = " << y << " acs " << type;
+ }
+ }
+ }
+ }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+ AcStrategyRoundtrip,
+ ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtrip, Test) { Run(); }
+
+// Test that DC(2x2) -> DCT coefficients -> IDCT -> downsampled IDCT is a noop.
+class AcStrategyRoundtripDownsample
+ : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+ void Run() {
+ const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+ const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+
+ auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+ float* scratch_space = mem.get();
+ float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea;
+ std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+ float* idct = coeffs + AcStrategy::kMaxCoeffArea;
+ Rng rng(type * 65537 + 13);
+
+ for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+ for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+ if (x > 4 || y > 4) {
+ if (rng.Bernoulli(0.9f)) continue;
+ }
+ float* dc = idct + AcStrategy::kMaxCoeffArea;
+ std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+ dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+ LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs);
+ TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+ scratch_space);
+ std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f);
+ std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0);
+ dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f;
+ // Downsample
+ for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+ for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+ float sum = 0;
+ for (size_t iy = 0; iy < 8; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+ dx * 8 + ix];
+ }
+ }
+ sum /= 64.0f;
+ ASSERT_NEAR(sum, dc[dy * 8 * acs.covered_blocks_x() + dx], 1e-6)
+ << "acs " << type;
+ }
+ }
+ }
+ }
+ }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+ AcStrategyRoundtripDownsample,
+ ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyRoundtripDownsample, Test) { Run(); }
+
+// Test that IDCT(block with zeros in the non-topleft corner) -> downsampled
+// IDCT is the same as IDCT -> DC(2x2) of the same block.
+class AcStrategyDownsample : public ::hwy::TestWithParamTargetAndT<int> {
+ protected:
+ void Run() {
+ const AcStrategy::Type type = static_cast<AcStrategy::Type>(GetParam());
+ const AcStrategy acs = AcStrategy::FromRawStrategy(type);
+ size_t cx = acs.covered_blocks_y();
+ size_t cy = acs.covered_blocks_x();
+ CoefficientLayout(&cy, &cx);
+
+ auto mem = hwy::AllocateAligned<float>(4 * AcStrategy::kMaxCoeffArea);
+ float* scratch_space = mem.get();
+ float* idct = scratch_space + AcStrategy::kMaxCoeffArea;
+ float* idct_acs_downsampled = idct + AcStrategy::kMaxCoeffArea;
+ Rng rng(type * 65537 + 13);
+
+ for (size_t y = 0; y < cy; y++) {
+ for (size_t x = 0; x < cx; x++) {
+ if (x > 4 || y > 4) {
+ if (rng.Bernoulli(0.9f)) continue;
+ }
+ float* coeffs = idct + AcStrategy::kMaxCoeffArea;
+ std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+ coeffs[y * cx * 8 + x] = 0.2f;
+ TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8,
+ scratch_space);
+ std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0);
+ coeffs[y * cx * 8 + x] = 0.2f;
+ DCFromLowestFrequencies(type, coeffs, idct_acs_downsampled,
+ acs.covered_blocks_x() * 8);
+ // Downsample
+ for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) {
+ for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) {
+ float sum = 0;
+ for (size_t iy = 0; iy < 8; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() +
+ dx * 8 + ix];
+ }
+ }
+ sum /= 64;
+ ASSERT_NEAR(
+ sum, idct_acs_downsampled[dy * 8 * acs.covered_blocks_x() + dx],
+ 1e-6)
+ << " acs " << type;
+ }
+ }
+ }
+ }
+ }
+};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(
+ AcStrategyDownsample,
+ ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies)));
+
+TEST_P(AcStrategyDownsample, Test) { Run(); }
+
+class AcStrategyTargetTest : public ::hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(AcStrategyTargetTest);
+
+TEST_P(AcStrategyTargetTest, RoundtripAFVDCT) {
+ HWY_ALIGN_MAX float idct[16];
+ for (size_t i = 0; i < 16; i++) {
+ HWY_ALIGN_MAX float pixels[16] = {};
+ pixels[i] = 1;
+ HWY_ALIGN_MAX float coeffs[16] = {};
+
+ AFVDCT4x4(pixels, coeffs);
+ AFVIDCT4x4(coeffs, idct);
+ for (size_t j = 0; j < 16; j++) {
+ EXPECT_NEAR(idct[j], pixels[j], 1e-6);
+ }
+ }
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFV) {
+ const AcStrategy::Type type = AcStrategy::Type::AFV0;
+ HWY_ALIGN_MAX float pixels[64] = {1};
+ HWY_ALIGN_MAX float coeffs[64] = {};
+ HWY_ALIGN_MAX float scratch_space[64] = {};
+ for (size_t i = 0; i < 1 << 14; i++) {
+ TransformToPixels(type, coeffs, pixels, 8, scratch_space);
+ TransformFromPixels(type, pixels, 8, coeffs, scratch_space);
+ }
+ EXPECT_NEAR(pixels[0], 0.0, 1E-6);
+}
+
+TEST_P(AcStrategyTargetTest, BenchmarkAFVDCT) {
+ HWY_ALIGN_MAX float pixels[64] = {1};
+ HWY_ALIGN_MAX float coeffs[64] = {};
+ for (size_t i = 0; i < 1 << 14; i++) {
+ AFVDCT4x4(pixels, coeffs);
+ AFVIDCT4x4(coeffs, pixels);
+ }
+ EXPECT_NEAR(pixels[0], 1.0, 1E-6);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/alpha.cc b/third_party/jpeg-xl/lib/jxl/alpha.cc
new file mode 100644
index 0000000000..48d7e7ee92
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/alpha.cc
@@ -0,0 +1,115 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include <string.h>
+
+#include <algorithm>
+
+namespace jxl {
+
+static float Clamp(float x) { return std::max(std::min(1.0f, x), 0.0f); }
+
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+ const AlphaBlendingInputLayer& fg,
+ const AlphaBlendingOutput& out, size_t num_pixels,
+ bool alpha_is_premultiplied, bool clamp) {
+ if (alpha_is_premultiplied) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+ out.r[x] = (fg.r[x] + bg.r[x] * (1.f - fga));
+ out.g[x] = (fg.g[x] + bg.g[x] * (1.f - fga));
+ out.b[x] = (fg.b[x] + bg.b[x] * (1.f - fga));
+ out.a[x] = (1.f - (1.f - fga) * (1.f - bg.a[x]));
+ }
+ } else {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ float fga = clamp ? Clamp(fg.a[x]) : fg.a[x];
+ const float new_a = 1.f - (1.f - fga) * (1.f - bg.a[x]);
+ const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+ out.r[x] = (fg.r[x] * fga + bg.r[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+ out.g[x] = (fg.g[x] * fga + bg.g[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+ out.b[x] = (fg.b[x] * fga + bg.b[x] * bg.a[x] * (1.f - fga)) * rnew_a;
+ out.a[x] = new_a;
+ }
+ }
+}
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+ const float* fga, float* out, size_t num_pixels,
+ bool alpha_is_premultiplied, bool clamp) {
+ if (bg == bga && fg == fga) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ float fa = clamp ? fga[x] : Clamp(fga[x]);
+ out[x] = (1.f - (1.f - fa) * (1.f - bga[x]));
+ }
+ } else {
+ if (alpha_is_premultiplied) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ float fa = clamp ? fga[x] : Clamp(fga[x]);
+ out[x] = (fg[x] + bg[x] * (1.f - fa));
+ }
+ } else {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ float fa = clamp ? fga[x] : Clamp(fga[x]);
+ const float new_a = 1.f - (1.f - fa) * (1.f - bga[x]);
+ const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f);
+ out[x] = (fg[x] * fa + bg[x] * bga[x] * (1.f - fa)) * rnew_a;
+ }
+ }
+ }
+}
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+ float* out, size_t num_pixels, bool clamp) {
+ if (fg == fga) {
+ memcpy(out, bg, num_pixels * sizeof(*out));
+ } else if (clamp) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ out[x] = bg[x] + fg[x] * Clamp(fga[x]);
+ }
+ } else {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ out[x] = bg[x] + fg[x] * fga[x];
+ }
+ }
+}
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+ size_t num_pixels, bool clamp) {
+ if (clamp) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ out[x] = bg[x] * Clamp(fg[x]);
+ }
+ } else {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ out[x] = bg[x] * fg[x];
+ }
+ }
+}
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+ float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+ size_t num_pixels) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ const float multiplier = std::max(kSmallAlpha, a[x]);
+ r[x] *= multiplier;
+ g[x] *= multiplier;
+ b[x] *= multiplier;
+ }
+}
+
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+ float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+ size_t num_pixels) {
+ for (size_t x = 0; x < num_pixels; ++x) {
+ const float multiplier = 1.f / std::max(kSmallAlpha, a[x]);
+ r[x] *= multiplier;
+ g[x] *= multiplier;
+ b[x] *= multiplier;
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/alpha.h b/third_party/jpeg-xl/lib/jxl/alpha.h
new file mode 100644
index 0000000000..efb76c800f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/alpha.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ALPHA_H_
+#define LIB_JXL_ALPHA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// A very small value to avoid divisions by zero when converting to
+// unpremultiplied alpha. Page 21 of the technical introduction to OpenEXR
+// (https://www.openexr.com/documentation/TechnicalIntroduction.pdf) recommends
+// "a power of two" that is "less than half of the smallest positive 16-bit
+// floating-point value". That smallest value happens to be the denormal number
+// 2^-24, so 2^-26 should be a good choice.
+static constexpr float kSmallAlpha = 1.f / (1u << 26u);
+
+struct AlphaBlendingInputLayer {
+ const float* r;
+ const float* g;
+ const float* b;
+ const float* a;
+};
+
+struct AlphaBlendingOutput {
+ float* r;
+ float* g;
+ float* b;
+ float* a;
+};
+
+// Note: The pointers in `out` are allowed to alias those in `bg` or `fg`.
+// No pointer shall be null.
+void PerformAlphaBlending(const AlphaBlendingInputLayer& bg,
+ const AlphaBlendingInputLayer& fg,
+ const AlphaBlendingOutput& out, size_t num_pixels,
+ bool alpha_is_premultiplied, bool clamp);
+// Single plane alpha blending
+void PerformAlphaBlending(const float* bg, const float* bga, const float* fg,
+ const float* fga, float* out, size_t num_pixels,
+ bool alpha_is_premultiplied, bool clamp);
+
+void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga,
+ float* out, size_t num_pixels, bool clamp);
+
+void PerformMulBlending(const float* bg, const float* fg, float* out,
+ size_t num_pixels, bool clamp);
+
+void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+ float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+ size_t num_pixels);
+void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g,
+ float* JXL_RESTRICT b, const float* JXL_RESTRICT a,
+ size_t num_pixels);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ALPHA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/alpha_test.cc b/third_party/jpeg-xl/lib/jxl/alpha_test.cc
new file mode 100644
index 0000000000..ddafd829ec
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/alpha_test.cc
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/alpha.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::_;
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+TEST(AlphaTest, BlendingWithNonPremultiplied) {
+ const float bg_rgb[3] = {100, 110, 120};
+ const float bg_a = 180.f / 255;
+ const float fg_rgb[3] = {25, 21, 23};
+ const float fg_a = 15420.f / 65535;
+ const float fg_a2 = 2.0f;
+ float out_rgb[3];
+ float out_a;
+ PerformAlphaBlending(
+ /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+ /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+ /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+ /*alpha_is_premultiplied=*/false, /*clamp=*/false);
+ EXPECT_THAT(out_rgb,
+ ElementsAre(FloatNear(77.2f, .05f), FloatNear(83.0f, .05f),
+ FloatNear(90.6f, .05f)));
+ EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+ PerformAlphaBlending(
+ /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+ /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+ /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+ /*alpha_is_premultiplied=*/false, /*clamp=*/true);
+ EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+ FloatNear(fg_rgb[1], .05f),
+ FloatNear(fg_rgb[2], .05f)));
+ EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, BlendingWithPremultiplied) {
+ const float bg_rgb[3] = {100, 110, 120};
+ const float bg_a = 180.f / 255;
+ const float fg_rgb[3] = {25, 21, 23};
+ const float fg_a = 15420.f / 65535;
+ const float fg_a2 = 2.0f;
+ float out_rgb[3];
+ float out_a;
+ PerformAlphaBlending(
+ /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+ /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a},
+ /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+ /*alpha_is_premultiplied=*/true, /*clamp=*/false);
+ EXPECT_THAT(out_rgb,
+ ElementsAre(FloatNear(101.5f, .05f), FloatNear(105.1f, .05f),
+ FloatNear(114.8f, .05f)));
+ EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5);
+ PerformAlphaBlending(
+ /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a},
+ /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2},
+ /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1,
+ /*alpha_is_premultiplied=*/true, /*clamp=*/true);
+ EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f),
+ FloatNear(fg_rgb[1], .05f),
+ FloatNear(fg_rgb[2], .05f)));
+ EXPECT_NEAR(out_a, 1.0f, 1e-5);
+}
+
+TEST(AlphaTest, Mul) {
+ const float bg = 100;
+ const float fg = 25;
+ float out;
+ PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/false);
+ EXPECT_THAT(out, FloatNear(fg * bg, .05f));
+ PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/true);
+ EXPECT_THAT(out, FloatNear(bg, .05f));
+}
+
+TEST(AlphaTest, PremultiplyAndUnpremultiply) {
+ const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+ float r[] = {120, 130, 140, 150};
+ float g[] = {124, 134, 144, 154};
+ float b[] = {127, 137, 147, 157};
+
+ PremultiplyAlpha(r, g, b, alpha, 4);
+ EXPECT_THAT(
+ r, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(130 * 63.f / 255, 1e-5f),
+ FloatNear(140 * 127.f / 255, 1e-5f), 150));
+ EXPECT_THAT(
+ g, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(134 * 63.f / 255, 1e-5f),
+ FloatNear(144 * 127.f / 255, 1e-5f), 154));
+ EXPECT_THAT(
+ b, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(137 * 63.f / 255, 1e-5f),
+ FloatNear(147 * 127.f / 255, 1e-5f), 157));
+
+ UnpremultiplyAlpha(r, g, b, alpha, 4);
+ EXPECT_THAT(r, ElementsAre(FloatNear(120, 1e-4f), FloatNear(130, 1e-4f),
+ FloatNear(140, 1e-4f), FloatNear(150, 1e-4f)));
+ EXPECT_THAT(g, ElementsAre(FloatNear(124, 1e-4f), FloatNear(134, 1e-4f),
+ FloatNear(144, 1e-4f), FloatNear(154, 1e-4f)));
+ EXPECT_THAT(b, ElementsAre(FloatNear(127, 1e-4f), FloatNear(137, 1e-4f),
+ FloatNear(147, 1e-4f), FloatNear(157, 1e-4f)));
+}
+
+TEST(AlphaTest, UnpremultiplyAndPremultiply) {
+ const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f};
+ float r[] = {50, 60, 70, 80};
+ float g[] = {54, 64, 74, 84};
+ float b[] = {57, 67, 77, 87};
+
+ UnpremultiplyAlpha(r, g, b, alpha, 4);
+ EXPECT_THAT(r, ElementsAre(_, FloatNear(60 * 255.f / 63, 1e-4f),
+ FloatNear(70 * 255.f / 127, 1e-4f), 80));
+ EXPECT_THAT(g, ElementsAre(_, FloatNear(64 * 255.f / 63, 1e-4f),
+ FloatNear(74 * 255.f / 127, 1e-4f), 84));
+ EXPECT_THAT(b, ElementsAre(_, FloatNear(67 * 255.f / 63, 1e-4f),
+ FloatNear(77 * 255.f / 127, 1e-4f), 87));
+
+ PremultiplyAlpha(r, g, b, alpha, 4);
+ EXPECT_THAT(r, ElementsAre(FloatNear(50, 1e-4f), FloatNear(60, 1e-4f),
+ FloatNear(70, 1e-4f), FloatNear(80, 1e-4f)));
+ EXPECT_THAT(g, ElementsAre(FloatNear(54, 1e-4f), FloatNear(64, 1e-4f),
+ FloatNear(74, 1e-4f), FloatNear(84, 1e-4f)));
+ EXPECT_THAT(b, ElementsAre(FloatNear(57, 1e-4f), FloatNear(67, 1e-4f),
+ FloatNear(77, 1e-4f), FloatNear(87, 1e-4f)));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/ans_common.cc b/third_party/jpeg-xl/lib/jxl/ans_common.cc
new file mode 100644
index 0000000000..d2cf897ec4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ans_common.cc
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <numeric>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+std::vector<int32_t> CreateFlatHistogram(int length, int total_count) {
+ JXL_ASSERT(length > 0);
+ JXL_ASSERT(length <= total_count);
+ const int count = total_count / length;
+ std::vector<int32_t> result(length, count);
+ const int rem_counts = total_count % length;
+ for (int i = 0; i < rem_counts; ++i) {
+ ++result[i];
+ }
+ return result;
+}
+
+// First, all trailing non-occurring symbols are removed from the distribution;
+// if this leaves the distribution empty, a dummy symbol with max weight is
+// added. This ensures that the resulting distribution sums to total table size.
+// Then, `entry_size` is chosen to be the largest power of two so that
+// `table_size` = ANS_TAB_SIZE/`entry_size` is at least as big as the
+// distribution size.
+// Note that each entry will only ever contain two different symbols, and
+// consecutive ranges of offsets, which allows us to use a compact
+// representation.
+// Each entry is initialized with only the (symbol=i, offset) pairs; then
+// positions for which the entry overflows (i.e. distribution[i] > entry_size)
+// or is not full are computed, and put into a stack in increasing order.
+// Missing symbols in the distribution are padded with 0 (because `table_size`
+// >= number of symbols). The `cutoff` value for each entry is initialized to
+// the number of occupied slots in that entry (i.e. `distributions[i]`). While
+// the overflowing-symbol stack is not empty (which implies that the
+// underflowing-symbol stack also is not), the top overfull and underfull
+// positions are popped from the stack; the empty slots in the underfull entry
+// are then filled with as many slots as needed from the overfull entry; such
+// slots are placed after the slots in the overfull entry, and `offsets[1]` is
+// computed accordingly. The formerly underfull entry is thus now neither
+// underfull nor overfull, and represents exactly two symbols. The overfull
+// entry might be either overfull or underfull, and is pushed into the
+// corresponding stack.
+void InitAliasTable(std::vector<int32_t> distribution, uint32_t range,
+ size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a) {
+ while (!distribution.empty() && distribution.back() == 0) {
+ distribution.pop_back();
+ }
+ // Ensure that a valid table is always returned, even for an empty
+ // alphabet. Otherwise, a specially-crafted stream might crash the
+ // decoder.
+ if (distribution.empty()) {
+ distribution.emplace_back(range);
+ }
+ const size_t table_size = 1 << log_alpha_size;
+#if JXL_ENABLE_ASSERT
+ int sum = std::accumulate(distribution.begin(), distribution.end(), 0);
+#endif // JXL_ENABLE_ASSERT
+ JXL_ASSERT(static_cast<uint32_t>(sum) == range);
+ // range must be a power of two
+ JXL_ASSERT((range & (range - 1)) == 0);
+ JXL_ASSERT(distribution.size() <= table_size);
+ JXL_ASSERT(table_size <= range);
+ const uint32_t entry_size = range >> log_alpha_size; // this is exact
+ // Special case for single-symbol distributions, that ensures that the state
+ // does not change when decoding from such a distribution. Note that, since we
+ // hardcode offset0 == 0, it is not straightforward (if at all possible) to
+ // fix the general case to produce this result.
+ for (size_t sym = 0; sym < distribution.size(); sym++) {
+ if (distribution[sym] == ANS_TAB_SIZE) {
+ for (size_t i = 0; i < table_size; i++) {
+ a[i].right_value = sym;
+ a[i].cutoff = 0;
+ a[i].offsets1 = entry_size * i;
+ a[i].freq0 = 0;
+ a[i].freq1_xor_freq0 = ANS_TAB_SIZE;
+ }
+ return;
+ }
+ }
+ std::vector<uint32_t> underfull_posn;
+ std::vector<uint32_t> overfull_posn;
+ std::vector<uint32_t> cutoffs(1 << log_alpha_size);
+ // Initialize entries.
+ for (size_t i = 0; i < distribution.size(); i++) {
+ cutoffs[i] = distribution[i];
+ if (cutoffs[i] > entry_size) {
+ overfull_posn.push_back(i);
+ } else if (cutoffs[i] < entry_size) {
+ underfull_posn.push_back(i);
+ }
+ }
+ for (uint32_t i = distribution.size(); i < table_size; i++) {
+ cutoffs[i] = 0;
+ underfull_posn.push_back(i);
+ }
+ // Reassign overflow/underflow values.
+ while (!overfull_posn.empty()) {
+ uint32_t overfull_i = overfull_posn.back();
+ overfull_posn.pop_back();
+ JXL_ASSERT(!underfull_posn.empty());
+ uint32_t underfull_i = underfull_posn.back();
+ underfull_posn.pop_back();
+ uint32_t underfull_by = entry_size - cutoffs[underfull_i];
+ cutoffs[overfull_i] -= underfull_by;
+ // overfull positions have their original symbols
+ a[underfull_i].right_value = overfull_i;
+ a[underfull_i].offsets1 = cutoffs[overfull_i];
+ // Slots in the right part of entry underfull_i were taken from the end
+ // of the symbols in entry overfull_i.
+ if (cutoffs[overfull_i] < entry_size) {
+ underfull_posn.push_back(overfull_i);
+ } else if (cutoffs[overfull_i] > entry_size) {
+ overfull_posn.push_back(overfull_i);
+ }
+ }
+ for (uint32_t i = 0; i < table_size; i++) {
+ // cutoffs[i] is properly initialized but the clang-analyzer doesn't infer
+ // it since it is partially initialized across two for-loops.
+ // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult)
+ if (cutoffs[i] == entry_size) {
+ a[i].right_value = i;
+ a[i].offsets1 = 0;
+ a[i].cutoff = 0;
+ } else {
+ // Note that, if cutoff is not equal to entry_size,
+ // a[i].offsets1 was initialized with (overfull cutoff) -
+ // (entry_size - a[i].cutoff). Thus, subtracting
+ // a[i].cutoff cannot make it negative.
+ a[i].offsets1 -= cutoffs[i];
+ a[i].cutoff = cutoffs[i];
+ }
+ const size_t freq0 = i < distribution.size() ? distribution[i] : 0;
+ const size_t i1 = a[i].right_value;
+ const size_t freq1 = i1 < distribution.size() ? distribution[i1] : 0;
+ a[i].freq0 = static_cast<uint16_t>(freq0);
+ a[i].freq1_xor_freq0 = static_cast<uint16_t>(freq1 ^ freq0);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/ans_common.h b/third_party/jpeg-xl/lib/jxl/ans_common.h
new file mode 100644
index 0000000000..fb5058e310
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ans_common.h
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_COMMON_H_
+#define LIB_JXL_ANS_COMMON_H_
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/cache_control.h> // Prefetch
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Returns the precision (number of bits) that should be used to store
+// a histogram count such that Log2Floor(count) == logcount.
+static JXL_INLINE uint32_t GetPopulationCountPrecision(uint32_t logcount,
+ uint32_t shift) {
+ int32_t r = std::min<int>(
+ logcount, int(shift) - int((ANS_LOG_TAB_SIZE - logcount) >> 1));
+ if (r < 0) return 0;
+ return r;
+}
+
+// Returns a histogram where the counts are positive, differ by at most 1,
+// and add up to total_count. The bigger counts (if any) are at the beginning
+// of the histogram.
+std::vector<int32_t> CreateFlatHistogram(int length, int total_count);
+
+// An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into
+// the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions:
+// - each symbol occurs as many times as specified by any valid distribution
+// of frequencies of the symbols. A valid distribution here is an array of
+// ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE],
+// and whose sum is ANS_TAB_SIZE.
+// - lookups can be done in constant time, and also return how many smaller
+// input values map into the same symbol, according to some well-defined order
+// of input values.
+// - the space used by the alias table is given by a small constant times the
+// index of the largest symbol with nonzero probability in the distribution.
+// Each of the entries in the table covers a range of `entry_size` values in the
+// [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive
+// sub-ranges. In the range covered by entry `i`, the first `cutoff` values map
+// to symbol `i`, while the others map to symbol `right_value`.
+//
+// TODO(veluca): consider making the order used for computing offsets easier to
+// define - it is currently defined by the algorithm to compute the alias table.
+// Beware of breaking the implicit assumption that symbols that come after the
+// cutoff value should have an offset at least as big as the cutoff.
+
+struct AliasTable {
+ struct Symbol {
+ size_t value;
+ size_t offset;
+ size_t freq;
+ };
+
+// Working set size matters here (~64 tables x 256 entries).
+// offsets0 is always zero (beginning of [0] side among the same symbol).
+// offsets1 is an offset of (pos >= cutoff) side decremented by cutoff.
+#pragma pack(push, 1)
+ struct Entry {
+ uint8_t cutoff; // < kEntrySizeMinus1 when used by ANS.
+ uint8_t right_value; // < alphabet size.
+ uint16_t freq0;
+
+ // Only used if `greater` (see Lookup)
+ uint16_t offsets1; // <= ANS_TAB_SIZE
+ uint16_t freq1_xor_freq0; // for branchless ternary in Lookup
+ };
+#pragma pack(pop)
+
+ // Dividing `value` by `entry_size` determines `i`, the entry which is
+ // responsible for the input. If the remainder is below `cutoff`, then the
+ // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences
+ // of `i` "before" the start of this entry, the offset of the input will be
+ // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped
+ // symbol is `right_value`; since `offsets[1]` stores the number of
+ // occurrences of `right_value` "before" this entry, minus the `cutoff` value,
+ // the input offset is then `remainder + offsets[1]`.
+ static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value,
+ size_t log_entry_size,
+ size_t entry_size_minus_1) {
+ const size_t i = value >> log_entry_size;
+ const size_t pos = value & entry_size_minus_1;
+
+#if JXL_BYTE_ORDER_LITTLE
+ uint64_t entry;
+ memcpy(&entry, &table[i].cutoff, sizeof(entry));
+ const size_t cutoff = entry & 0xFF; // = MOVZX
+ const size_t right_value = (entry >> 8) & 0xFF; // = MOVZX
+ const size_t freq0 = (entry >> 16) & 0xFFFF;
+#else
+ // Generates multiple loads with complex addressing.
+ const size_t cutoff = table[i].cutoff;
+ const size_t right_value = table[i].right_value;
+ const size_t freq0 = table[i].freq0;
+#endif
+
+ const bool greater = pos >= cutoff;
+
+#if JXL_BYTE_ORDER_LITTLE
+ const uint64_t conditional = greater ? entry : 0; // = CMOV
+ const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF;
+ const size_t freq1_xor_freq0_or_0 = conditional >> 48;
+#else
+ const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0;
+ const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0;
+#endif
+
+ // WARNING: moving this code may interfere with CMOV heuristics.
+ Symbol s;
+ s.value = greater ? right_value : i;
+ s.offset = offsets1_or_0 + pos;
+ s.freq = freq0 ^ freq1_xor_freq0_or_0; // = greater ? freq1 : freq0
+ // XOR avoids implementation-defined conversion from unsigned to signed.
+ // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes
+ // spills, simple ternary has a long dependency chain.
+
+ return s;
+ }
+
+ static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value,
+ size_t log_entry_size) {
+ const size_t i = value >> log_entry_size;
+ hwy::Prefetch(table + i);
+ }
+};
+
+// Computes an alias table for a given distribution.
+void InitAliasTable(std::vector<int32_t> distribution, uint32_t range,
+ size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ANS_COMMON_H_
diff --git a/third_party/jpeg-xl/lib/jxl/ans_common_test.cc b/third_party/jpeg-xl/lib/jxl/ans_common_test.cc
new file mode 100644
index 0000000000..487b6cf5bd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ans_common_test.cc
@@ -0,0 +1,43 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/ans_common.h"
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void VerifyAliasDistribution(const std::vector<int>& distribution,
+ uint32_t range) {
+ constexpr size_t log_alpha_size = 8;
+ AliasTable::Entry table[1 << log_alpha_size];
+ InitAliasTable(distribution, range, log_alpha_size, table);
+ std::vector<std::vector<uint32_t>> offsets(distribution.size());
+ for (uint32_t i = 0; i < range; i++) {
+ AliasTable::Symbol s = AliasTable::Lookup(
+ table, i, ANS_LOG_TAB_SIZE - 8, (1 << (ANS_LOG_TAB_SIZE - 8)) - 1);
+ offsets[s.value].push_back(s.offset);
+ }
+ for (uint32_t i = 0; i < distribution.size(); i++) {
+ ASSERT_EQ(static_cast<size_t>(distribution[i]), offsets[i].size());
+ std::sort(offsets[i].begin(), offsets[i].end());
+ for (uint32_t j = 0; j < offsets[i].size(); j++) {
+ ASSERT_EQ(offsets[i][j], j);
+ }
+ }
+}
+
+TEST(ANSCommonTest, AliasDistributionSmoke) {
+ VerifyAliasDistribution({ANS_TAB_SIZE / 2, ANS_TAB_SIZE / 2}, ANS_TAB_SIZE);
+ VerifyAliasDistribution({ANS_TAB_SIZE}, ANS_TAB_SIZE);
+ VerifyAliasDistribution({0, 0, 0, ANS_TAB_SIZE, 0}, ANS_TAB_SIZE);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/ans_params.h b/third_party/jpeg-xl/lib/jxl/ans_params.h
new file mode 100644
index 0000000000..4bbc284c0b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ans_params.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ANS_PARAMS_H_
+#define LIB_JXL_ANS_PARAMS_H_
+
+// Common parameters that are needed for both the ANS entropy encoding and
+// decoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// TODO(veluca): decide if 12 is the best constant here (valid range is up to
+// 16). This requires recomputing the Huffman tables in {enc,dec}_ans.cc
+// 14 gives a 0.2% improvement at d1 and makes d8 slightly worse. This is
+// likely not worth the increase in encoder complexity.
+#define ANS_LOG_TAB_SIZE 12u
+#define ANS_TAB_SIZE (1 << ANS_LOG_TAB_SIZE)
+#define ANS_TAB_MASK (ANS_TAB_SIZE - 1)
+
+// Largest possible symbol to be encoded by either ANS or prefix coding.
+#define PREFIX_MAX_ALPHABET_SIZE 4096
+#define ANS_MAX_ALPHABET_SIZE 256
+
+// Max number of bits for prefix coding.
+#define PREFIX_MAX_BITS 15
+
+#define ANS_SIGNATURE 0x13 // Initial state, used as CRC.
+
+} // namespace jxl
+
+#endif // LIB_JXL_ANS_PARAMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/ans_test.cc b/third_party/jpeg-xl/lib/jxl/ans_test.cc
new file mode 100644
index 0000000000..06bc46477f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/ans_test.cc
@@ -0,0 +1,278 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripTestcase(int n_histograms, int alphabet_size,
+ const std::vector<Token>& input_values) {
+ constexpr uint16_t kMagic1 = 0x9e33;
+ constexpr uint16_t kMagic2 = 0x8b04;
+
+ BitWriter writer;
+ // Space for magic bytes.
+ BitWriter::Allotment allotment_magic1(&writer, 16);
+ writer.Write(16, kMagic1);
+ allotment_magic1.ReclaimAndCharge(&writer, 0, nullptr);
+
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ std::vector<std::vector<Token>> input_values_vec;
+ input_values_vec.push_back(input_values);
+
+ BuildAndEncodeHistograms(HistogramParams(), n_histograms, input_values_vec,
+ &codes, &context_map, &writer, 0, nullptr);
+ WriteTokens(input_values_vec[0], codes, context_map, &writer, 0, nullptr);
+
+ // Magic bytes + padding
+ BitWriter::Allotment allotment_magic2(&writer, 24);
+ writer.Write(16, kMagic2);
+ writer.ZeroPadToByte();
+ allotment_magic2.ReclaimAndCharge(&writer, 0, nullptr);
+
+ // We do not truncate the output. Reading past the end reads out zeroes
+ // anyway.
+ BitReader br(writer.GetSpan());
+
+ ASSERT_EQ(br.ReadBits(16), kMagic1);
+
+ std::vector<uint8_t> dec_context_map;
+ ANSCode decoded_codes;
+ ASSERT_TRUE(
+ DecodeHistograms(&br, n_histograms, &decoded_codes, &dec_context_map));
+ ASSERT_EQ(dec_context_map, context_map);
+ ANSSymbolReader reader(&decoded_codes, &br);
+
+ for (const Token& symbol : input_values) {
+ uint32_t read_symbol =
+ reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+ ASSERT_EQ(read_symbol, symbol.value);
+ }
+ ASSERT_TRUE(reader.CheckANSFinalState());
+
+ ASSERT_EQ(br.ReadBits(16), kMagic2);
+ EXPECT_TRUE(br.Close());
+}
+
+TEST(ANSTest, EmptyRoundtrip) {
+ RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, std::vector<Token>());
+}
+
+TEST(ANSTest, SingleSymbolRoundtrip) {
+ for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+ RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, {{0, i}});
+ }
+ for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) {
+ RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE,
+ std::vector<Token>(1024, {0, i}));
+ }
+}
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+constexpr size_t kReps = 3;
+#else
+constexpr size_t kReps = 10;
+#endif
+
+void RoundtripRandomStream(int alphabet_size, size_t reps = kReps,
+ size_t num = 1 << 18) {
+ constexpr int kNumHistograms = 3;
+ Rng rng(0);
+ for (size_t i = 0; i < reps; i++) {
+ std::vector<Token> symbols;
+ for (size_t j = 0; j < num; j++) {
+ int context = rng.UniformI(0, kNumHistograms);
+ int value = rng.UniformU(0, alphabet_size);
+ symbols.emplace_back(context, value);
+ }
+ RoundtripTestcase(kNumHistograms, alphabet_size, symbols);
+ }
+}
+
+void RoundtripRandomUnbalancedStream(int alphabet_size) {
+ constexpr int kNumHistograms = 3;
+ constexpr int kPrecision = 1 << 10;
+ Rng rng(0);
+ for (size_t i = 0; i < kReps; i++) {
+ std::vector<int> distributions[kNumHistograms] = {};
+ for (int j = 0; j < kNumHistograms; j++) {
+ distributions[j].resize(kPrecision);
+ int symbol = 0;
+ int remaining = 1;
+ for (int k = 0; k < kPrecision; k++) {
+ if (remaining == 0) {
+ if (symbol < alphabet_size - 1) symbol++;
+ // There is no meaning behind this distribution: it's anything that
+ // will create a nonuniform distribution and won't have too few
+ // symbols usually. Also we want different distributions we get to be
+ // sufficiently dissimilar.
+ remaining = rng.UniformU(0, kPrecision - k + 1);
+ }
+ distributions[j][k] = symbol;
+ remaining--;
+ }
+ }
+ std::vector<Token> symbols;
+ for (int j = 0; j < 1 << 18; j++) {
+ int context = rng.UniformI(0, kNumHistograms);
+ int value = rng.UniformU(0, kPrecision);
+ symbols.emplace_back(context, value);
+ }
+ RoundtripTestcase(kNumHistograms + 1, alphabet_size, symbols);
+ }
+}
+
+TEST(ANSTest, RandomStreamRoundtrip3Small) { RoundtripRandomStream(3, 1, 16); }
+
+TEST(ANSTest, RandomStreamRoundtrip3) { RoundtripRandomStream(3); }
+
+TEST(ANSTest, RandomStreamRoundtripBig) {
+ RoundtripRandomStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtrip3) {
+ RoundtripRandomUnbalancedStream(3);
+}
+
+TEST(ANSTest, RandomUnbalancedStreamRoundtripBig) {
+ RoundtripRandomUnbalancedStream(ANS_MAX_ALPHABET_SIZE);
+}
+
+TEST(ANSTest, UintConfigRoundtrip) {
+ for (size_t log_alpha_size = 5; log_alpha_size <= 8; log_alpha_size++) {
+ std::vector<HybridUintConfig> uint_config, uint_config_dec;
+ for (size_t i = 0; i < log_alpha_size; i++) {
+ for (size_t j = 0; j <= i; j++) {
+ for (size_t k = 0; k <= i - j; k++) {
+ uint_config.emplace_back(i, j, k);
+ }
+ }
+ }
+ uint_config.emplace_back(log_alpha_size, 0, 0);
+ uint_config_dec.resize(uint_config.size());
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, 10 * uint_config.size());
+ EncodeUintConfigs(uint_config, &writer, log_alpha_size);
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ writer.ZeroPadToByte();
+ BitReader br(writer.GetSpan());
+ EXPECT_TRUE(DecodeUintConfigs(log_alpha_size, &uint_config_dec, &br));
+ EXPECT_TRUE(br.Close());
+ for (size_t i = 0; i < uint_config.size(); i++) {
+ EXPECT_EQ(uint_config[i].split_token, uint_config_dec[i].split_token);
+ EXPECT_EQ(uint_config[i].msb_in_token, uint_config_dec[i].msb_in_token);
+ EXPECT_EQ(uint_config[i].lsb_in_token, uint_config_dec[i].lsb_in_token);
+ }
+ }
+}
+
+void TestCheckpointing(bool ans, bool lz77) {
+ std::vector<std::vector<Token>> input_values(1);
+ for (size_t i = 0; i < 1024; i++) {
+ input_values[0].push_back(Token(0, i % 4));
+ }
+ // up to lz77 window size.
+ for (size_t i = 0; i < (1 << 20) - 1022; i++) {
+ input_values[0].push_back(Token(0, (i % 5) + 4));
+ }
+ // Ensure that when the window wraps around, new values are different.
+ input_values[0].push_back(Token(0, 0));
+ for (size_t i = 0; i < 1024; i++) {
+ input_values[0].push_back(Token(0, i % 4));
+ }
+
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ HistogramParams params;
+ params.lz77_method = lz77 ? HistogramParams::LZ77Method::kLZ77
+ : HistogramParams::LZ77Method::kNone;
+ params.force_huffman = !ans;
+
+ BitWriter writer;
+ {
+ auto input_values_copy = input_values;
+ BuildAndEncodeHistograms(params, 1, input_values_copy, &codes, &context_map,
+ &writer, 0, nullptr);
+ WriteTokens(input_values_copy[0], codes, context_map, &writer, 0, nullptr);
+ writer.ZeroPadToByte();
+ }
+
+ // We do not truncate the output. Reading past the end reads out zeroes
+ // anyway.
+ BitReader br(writer.GetSpan());
+ Status status = true;
+ {
+ BitReaderScopedCloser bc(&br, &status);
+
+ std::vector<uint8_t> dec_context_map;
+ ANSCode decoded_codes;
+ ASSERT_TRUE(DecodeHistograms(&br, 1, &decoded_codes, &dec_context_map));
+ ASSERT_EQ(dec_context_map, context_map);
+ ANSSymbolReader reader(&decoded_codes, &br);
+
+ ANSSymbolReader::Checkpoint checkpoint;
+ size_t br_pos = 0;
+ constexpr size_t kInterval = ANSSymbolReader::kMaxCheckpointInterval - 2;
+ for (size_t i = 0; i < input_values[0].size(); i++) {
+ if (i % kInterval == 0 && i > 0) {
+ reader.Restore(checkpoint);
+ ASSERT_TRUE(br.Close());
+ br = BitReader(writer.GetSpan());
+ br.SkipBits(br_pos);
+ for (size_t j = i - kInterval; j < i; j++) {
+ Token symbol = input_values[0][j];
+ uint32_t read_symbol =
+ reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+ ASSERT_EQ(read_symbol, symbol.value) << "j = " << j;
+ }
+ }
+ if (i % kInterval == 0) {
+ reader.Save(&checkpoint);
+ br_pos = br.TotalBitsConsumed();
+ }
+ Token symbol = input_values[0][i];
+ uint32_t read_symbol =
+ reader.ReadHybridUint(symbol.context, &br, dec_context_map);
+ ASSERT_EQ(read_symbol, symbol.value) << "i = " << i;
+ }
+ ASSERT_TRUE(reader.CheckANSFinalState());
+ }
+ EXPECT_TRUE(status);
+}
+
+TEST(ANSTest, TestCheckpointingANS) {
+ TestCheckpointing(/*ans=*/true, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingPrefix) {
+ TestCheckpointing(/*ans=*/false, /*lz77=*/false);
+}
+
+TEST(ANSTest, TestCheckpointingANSLZ77) {
+ TestCheckpointing(/*ans=*/true, /*lz77=*/true);
+}
+
+TEST(ANSTest, TestCheckpointingPrefixLZ77) {
+ TestCheckpointing(/*ans=*/false, /*lz77=*/true);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/base/arch_macros.h b/third_party/jpeg-xl/lib/jxl/base/arch_macros.h
new file mode 100644
index 0000000000..a98301915e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/arch_macros.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_ARCH_MACROS_H_
+#define LIB_JXL_BASE_ARCH_MACROS_H_
+
+// Defines the JXL_ARCH_* macros.
+
+namespace jxl {
+
+#if defined(__x86_64__) || defined(_M_X64)
+#define JXL_ARCH_X64 1
+#else
+#define JXL_ARCH_X64 0
+#endif
+
+#if defined(__powerpc64__) || defined(_M_PPC)
+#define JXL_ARCH_PPC 1
+#else
+#define JXL_ARCH_PPC 0
+#endif
+
+#if defined(__aarch64__) || defined(__arm__)
+#define JXL_ARCH_ARM 1
+#else
+#define JXL_ARCH_ARM 0
+#endif
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_ARCH_MACROS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/bits.h b/third_party/jpeg-xl/lib/jxl/base/bits.h
new file mode 100644
index 0000000000..9f86118e72
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/bits.h
@@ -0,0 +1,147 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BITS_H_
+#define LIB_JXL_BASE_BITS_H_
+
+// Specialized instructions for processing register-sized bit arrays.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h>
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+
+namespace jxl {
+
+// Empty struct used as a size tag type.
+template <size_t N>
+struct SizeTag {};
+
+template <typename T>
+constexpr bool IsSigned() {
+ return T(0) > T(-1);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+ JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+ unsigned long index;
+ _BitScanReverse(&index, x);
+ return 31 - index;
+#else
+ return static_cast<size_t>(__builtin_clz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+ JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+ unsigned long index;
+ _BitScanReverse64(&index, x);
+ return 63 - index;
+#else // JXL_ARCH_X64
+ // _BitScanReverse64 not available
+ uint32_t msb = static_cast<uint32_t>(x >> 32u);
+ unsigned long index;
+ if (msb == 0) {
+ uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+ _BitScanReverse(&index, lsb);
+ return 63 - index;
+ } else {
+ _BitScanReverse(&index, msb);
+ return 31 - index;
+ }
+#endif // JXL_ARCH_X64
+#else
+ return static_cast<size_t>(__builtin_clzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsAboveMS1Bit_Nonzero(const T x) {
+ static_assert(!IsSigned<T>(), "Num0BitsAboveMS1Bit_Nonzero: use unsigned");
+ return Num0BitsAboveMS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Undefined results for x == 0.
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) {
+ JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+ unsigned long index;
+ _BitScanForward(&index, x);
+ return index;
+#else
+ return static_cast<size_t>(__builtin_ctz(x));
+#endif
+}
+static JXL_INLINE JXL_MAYBE_UNUSED size_t
+Num0BitsBelowLS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) {
+ JXL_DASSERT(x != 0);
+#if JXL_COMPILER_MSVC
+#if JXL_ARCH_X64
+ unsigned long index;
+ _BitScanForward64(&index, x);
+ return index;
+#else // JXL_ARCH_64
+ // _BitScanForward64 not available
+ uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
+ unsigned long index;
+ if (lsb == 0) {
+ uint32_t msb = static_cast<uint32_t>(x >> 32u);
+ _BitScanForward(&index, msb);
+ return 32 + index;
+ } else {
+ _BitScanForward(&index, lsb);
+ return index;
+ }
+#endif // JXL_ARCH_X64
+#else
+ return static_cast<size_t>(__builtin_ctzll(x));
+#endif
+}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit_Nonzero(T x) {
+ static_assert(!IsSigned<T>(), "Num0BitsBelowLS1Bit_Nonzero: use unsigned");
+ return Num0BitsBelowLS1Bit_Nonzero(SizeTag<sizeof(T)>(), x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsAboveMS1Bit(const T x) {
+ return (x == 0) ? sizeof(T) * 8 : Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns bit width for x == 0.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit(const T x) {
+ return (x == 0) ? sizeof(T) * 8 : Num0BitsBelowLS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded down.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t FloorLog2Nonzero(const T x) {
+ return (sizeof(T) * 8 - 1) ^ Num0BitsAboveMS1Bit_Nonzero(x);
+}
+
+// Returns base-2 logarithm, rounded up.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED size_t CeilLog2Nonzero(const T x) {
+ const size_t floor_log2 = FloorLog2Nonzero(x);
+ if ((x & (x - 1)) == 0) return floor_log2; // power of two
+ return floor_log2 + 1;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_BITS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/byte_order.h b/third_party/jpeg-xl/lib/jxl/base/byte_order.h
new file mode 100644
index 0000000000..8966834e08
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/byte_order.h
@@ -0,0 +1,274 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_BYTE_ORDER_H_
+#define LIB_JXL_BASE_BYTE_ORDER_H_
+
+#include <jxl/types.h>
+#include <stdint.h>
+#include <string.h> // memcpy
+
+#include "lib/jxl/base/compiler_specific.h"
+
+#if JXL_COMPILER_MSVC
+#include <intrin.h> // _byteswap_*
+#endif
+
+#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+#define JXL_BYTE_ORDER_LITTLE 1
+#else
+// This means that we don't know that the byte order is little endian, in
+// this case we use endian-neutral code that works for both little- and
+// big-endian.
+#define JXL_BYTE_ORDER_LITTLE 0
+#endif
+
+// Returns whether the system is little-endian (least-significant byte first).
+#if JXL_BYTE_ORDER_LITTLE
+static constexpr bool IsLittleEndian() { return true; }
+#else
+static inline bool IsLittleEndian() {
+ const uint32_t multibyte = 1;
+ uint8_t byte;
+ memcpy(&byte, &multibyte, 1);
+ return byte == 1;
+}
+#endif
+
+static inline bool SwapEndianness(JxlEndianness endianness) {
+ return ((endianness == JXL_BIG_ENDIAN && IsLittleEndian()) ||
+ (endianness == JXL_LITTLE_ENDIAN && !IsLittleEndian()));
+}
+
+#if JXL_COMPILER_MSVC
+#define JXL_BSWAP16(x) _byteswap_ushort(x)
+#define JXL_BSWAP32(x) _byteswap_ulong(x)
+#define JXL_BSWAP64(x) _byteswap_uint64(x)
+#else
+#define JXL_BSWAP16(x) __builtin_bswap16(x)
+#define JXL_BSWAP32(x) __builtin_bswap32(x)
+#define JXL_BSWAP64(x) __builtin_bswap64(x)
+#endif
+
+static JXL_INLINE uint32_t LoadBE16(const uint8_t* p) {
+ const uint32_t byte1 = p[0];
+ const uint32_t byte0 = p[1];
+ return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadLE16(const uint8_t* p) {
+ const uint32_t byte0 = p[0];
+ const uint32_t byte1 = p[1];
+ return (byte1 << 8) | byte0;
+}
+
+static JXL_INLINE uint32_t LoadBE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ uint32_t big;
+ memcpy(&big, p, 4);
+ return JXL_BSWAP32(big);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ const uint32_t byte3 = p[0];
+ const uint32_t byte2 = p[1];
+ const uint32_t byte1 = p[2];
+ const uint32_t byte0 = p[3];
+ return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadBE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ uint64_t big;
+ memcpy(&big, p, 8);
+ return JXL_BSWAP64(big);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ const uint64_t byte7 = p[0];
+ const uint64_t byte6 = p[1];
+ const uint64_t byte5 = p[2];
+ const uint64_t byte4 = p[3];
+ const uint64_t byte3 = p[4];
+ const uint64_t byte2 = p[5];
+ const uint64_t byte1 = p[6];
+ const uint64_t byte0 = p[7];
+ return (byte7 << 56ull) | (byte6 << 48ull) | (byte5 << 40ull) |
+ (byte4 << 32ull) | (byte3 << 24ull) | (byte2 << 16ull) |
+ (byte1 << 8ull) | byte0;
+#endif
+}
+
+static JXL_INLINE uint32_t LoadLE32(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ uint32_t little;
+ memcpy(&little, p, 4);
+ return little;
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ const uint32_t byte0 = p[0];
+ const uint32_t byte1 = p[1];
+ const uint32_t byte2 = p[2];
+ const uint32_t byte3 = p[3];
+ return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+static JXL_INLINE uint64_t LoadLE64(const uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ uint64_t little;
+ memcpy(&little, p, 8);
+ return little;
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ const uint64_t byte0 = p[0];
+ const uint64_t byte1 = p[1];
+ const uint64_t byte2 = p[2];
+ const uint64_t byte3 = p[3];
+ const uint64_t byte4 = p[4];
+ const uint64_t byte5 = p[5];
+ const uint64_t byte6 = p[6];
+ const uint64_t byte7 = p[7];
+ return (byte7 << 56) | (byte6 << 48) | (byte5 << 40) | (byte4 << 32) |
+ (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0;
+#endif
+}
+
+// Loads a Big-Endian float
+static JXL_INLINE float LoadBEFloat(const uint8_t* p) {
+ uint32_t u = LoadBE32(p);
+ float result;
+ memcpy(&result, &u, 4);
+ return result;
+}
+
+// Loads a Little-Endian float
+static JXL_INLINE float LoadLEFloat(const uint8_t* p) {
+ uint32_t u = LoadLE32(p);
+ float result;
+ memcpy(&result, &u, 4);
+ return result;
+}
+
+static JXL_INLINE void StoreBE16(const uint32_t native, uint8_t* p) {
+ p[0] = (native >> 8) & 0xFF;
+ p[1] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreLE16(const uint32_t native, uint8_t* p) {
+ p[1] = (native >> 8) & 0xFF;
+ p[0] = native & 0xFF;
+}
+
+static JXL_INLINE void StoreBE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ const uint32_t big = JXL_BSWAP32(native);
+ memcpy(p, &big, 4);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ p[0] = native >> 24;
+ p[1] = (native >> 16) & 0xFF;
+ p[2] = (native >> 8) & 0xFF;
+ p[3] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreBE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ const uint64_t big = JXL_BSWAP64(native);
+ memcpy(p, &big, 8);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ p[0] = native >> 56ull;
+ p[1] = (native >> 48ull) & 0xFF;
+ p[2] = (native >> 40ull) & 0xFF;
+ p[3] = (native >> 32ull) & 0xFF;
+ p[4] = (native >> 24ull) & 0xFF;
+ p[5] = (native >> 16ull) & 0xFF;
+ p[6] = (native >> 8ull) & 0xFF;
+ p[7] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE32(const uint32_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ const uint32_t little = native;
+ memcpy(p, &little, 4);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ p[3] = native >> 24;
+ p[2] = (native >> 16) & 0xFF;
+ p[1] = (native >> 8) & 0xFF;
+ p[0] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE void StoreLE64(const uint64_t native, uint8_t* p) {
+#if JXL_BYTE_ORDER_LITTLE
+ const uint64_t little = native;
+ memcpy(p, &little, 8);
+#else
+ // Byte-order-independent - can't assume this machine is big endian.
+ p[7] = native >> 56;
+ p[6] = (native >> 48) & 0xFF;
+ p[5] = (native >> 40) & 0xFF;
+ p[4] = (native >> 32) & 0xFF;
+ p[3] = (native >> 24) & 0xFF;
+ p[2] = (native >> 16) & 0xFF;
+ p[1] = (native >> 8) & 0xFF;
+ p[0] = native & 0xFF;
+#endif
+}
+
+static JXL_INLINE float BSwapFloat(float x) {
+ uint32_t u;
+ memcpy(&u, &x, 4);
+ uint32_t uswap = JXL_BSWAP32(u);
+ float xswap;
+ memcpy(&xswap, &uswap, 4);
+ return xswap;
+}
+
+// Big/Little Endian order.
+struct OrderBE {};
+struct OrderLE {};
+
+// Wrappers for calling from generic code.
+static JXL_INLINE void Store16(OrderBE /*tag*/, const uint32_t native,
+ uint8_t* p) {
+ return StoreBE16(native, p);
+}
+
+static JXL_INLINE void Store16(OrderLE /*tag*/, const uint32_t native,
+ uint8_t* p) {
+ return StoreLE16(native, p);
+}
+
+static JXL_INLINE void Store32(OrderBE /*tag*/, const uint32_t native,
+ uint8_t* p) {
+ return StoreBE32(native, p);
+}
+
+static JXL_INLINE void Store32(OrderLE /*tag*/, const uint32_t native,
+ uint8_t* p) {
+ return StoreLE32(native, p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderBE /*tag*/, const uint8_t* p) {
+ return LoadBE16(p);
+}
+
+static JXL_INLINE uint32_t Load16(OrderLE /*tag*/, const uint8_t* p) {
+ return LoadLE16(p);
+}
+
+static JXL_INLINE uint32_t Load32(OrderBE /*tag*/, const uint8_t* p) {
+ return LoadBE32(p);
+}
+
+static JXL_INLINE uint32_t Load32(OrderLE /*tag*/, const uint8_t* p) {
+ return LoadLE32(p);
+}
+
+#endif // LIB_JXL_BASE_BYTE_ORDER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/cache_aligned.cc b/third_party/jpeg-xl/lib/jxl/base/cache_aligned.cc
new file mode 100644
index 0000000000..9a9cc585a1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/cache_aligned.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/cache_aligned.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// Disabled: slower than malloc + alignment.
+#define JXL_USE_MMAP 0
+
+#if JXL_USE_MMAP
+#include <sys/mman.h>
+#endif
+
+#include <algorithm> // std::max
+#include <atomic>
+#include <hwy/base.h> // kMaxVectorSize
+#include <limits>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace {
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+ void* allocated;
+ size_t allocated_size;
+ uint8_t left_padding[hwy::kMaxVectorSize];
+};
+#pragma pack(pop)
+
+std::atomic<uint64_t> num_allocations{0};
+std::atomic<uint64_t> bytes_in_use{0};
+std::atomic<uint64_t> max_bytes_in_use{0};
+
+} // namespace
+
+// Avoids linker errors in pre-C++17 builds.
+constexpr size_t CacheAligned::kPointerSize;
+constexpr size_t CacheAligned::kCacheLineSize;
+constexpr size_t CacheAligned::kAlignment;
+constexpr size_t CacheAligned::kAlias;
+
+void CacheAligned::PrintStats() {
+ fprintf(
+ stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n",
+ static_cast<size_t>(num_allocations.load(std::memory_order_relaxed)),
+ static_cast<double>(max_bytes_in_use.load(std::memory_order_relaxed)));
+}
+
+size_t CacheAligned::NextOffset() {
+ static std::atomic<uint32_t> next{0};
+ constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment;
+ const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+ return CacheAligned::kAlignment * group;
+}
+
+void* CacheAligned::Allocate(const size_t payload_size, size_t offset) {
+ JXL_ASSERT(payload_size <= std::numeric_limits<size_t>::max() / 2);
+ JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias);
+
+ // What: | misalign | unused | AllocationHeader |payload
+ // Size: |<= kAlias | offset | |payload_size
+ // ^allocated.^aligned.^header............^payload
+ // The header must immediately precede payload, which must remain aligned.
+ // To avoid wasting space, the header resides at the end of `unused`,
+ // which therefore cannot be empty (offset == 0).
+ if (offset == 0) {
+ // SVE/RVV vectors can be large, so we cannot rely on them (including the
+ // padding at the end of AllocationHeader) to fit in kAlignment.
+ offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment);
+ }
+
+#if JXL_USE_MMAP
+ const size_t allocated_size = offset + payload_size;
+ const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE;
+ void* allocated =
+ mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (allocated == MAP_FAILED) return nullptr;
+ const uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated);
+#else
+ const size_t allocated_size = kAlias + offset + payload_size;
+ void* allocated = malloc(allocated_size);
+ if (allocated == nullptr) return nullptr;
+ // Always round up even if already aligned - we already asked for kAlias
+ // extra bytes and there's no way to give them back.
+ uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+ static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+ static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+ aligned &= ~(kAlias - 1);
+#endif
+
+#if 0
+ // No effect.
+ uintptr_t page_aligned = reinterpret_cast<uintptr_t>(allocated);
+ page_aligned &= ~(4096 - 1);
+ if (madvise(reinterpret_cast<void*>(page_aligned), allocated_size,
+ MADV_WILLNEED) != 0) {
+ JXL_NOTIFY_ERROR("madvise failed");
+ }
+#elif 0
+ // INCREASES both first and subsequent decode times.
+ if (mlock(allocated, allocated_size) != 0) {
+ JXL_NOTIFY_ERROR("mlock failed");
+ }
+#endif
+
+ // Update statistics (#allocations and max bytes in use)
+ num_allocations.fetch_add(1, std::memory_order_relaxed);
+ const uint64_t prev_bytes =
+ bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel);
+ uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire);
+ for (;;) {
+ const uint64_t desired =
+ std::max(expected_max, prev_bytes + allocated_size);
+ if (max_bytes_in_use.compare_exchange_strong(expected_max, desired,
+ std::memory_order_acq_rel)) {
+ break;
+ }
+ }
+
+ const uintptr_t payload = aligned + offset; // still aligned
+
+ // Stash `allocated` and payload_size inside header for use by Free().
+ AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+ header->allocated = allocated;
+ header->allocated_size = allocated_size;
+
+ return JXL_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), 64);
+}
+
+void CacheAligned::Free(const void* aligned_pointer) {
+ if (aligned_pointer == nullptr) {
+ return;
+ }
+ const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+ JXL_ASSERT(payload % kAlignment == 0);
+ const AllocationHeader* header =
+ reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+ // Subtract (2's complement negation).
+ bytes_in_use.fetch_add(~header->allocated_size + 1,
+ std::memory_order_acq_rel);
+
+#if JXL_USE_MMAP
+ munmap(header->allocated, header->allocated_size);
+#else
+ free(header->allocated);
+#endif
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/base/cache_aligned.h b/third_party/jpeg-xl/lib/jxl/base/cache_aligned.h
new file mode 100644
index 0000000000..e57df14837
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/cache_aligned.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_CACHE_ALIGNED_H_
+#define LIB_JXL_BASE_CACHE_ALIGNED_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <memory>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Functions that depend on the cache line size.
+class CacheAligned {
+ public:
+ static void PrintStats();
+
+ static constexpr size_t kPointerSize = sizeof(void*);
+ static constexpr size_t kCacheLineSize = 64;
+ // To avoid RFOs, match L2 fill size (pairs of lines).
+ static constexpr size_t kAlignment = 2 * kCacheLineSize;
+ // Minimum multiple for which cache set conflicts and/or loads blocked by
+ // preceding stores can occur.
+ static constexpr size_t kAlias = 2048;
+
+ // Returns a 'random' (cyclical) offset suitable for Allocate.
+ static size_t NextOffset();
+
+ // Returns null or memory whose address is congruent to `offset` (mod kAlias).
+ // This reduces cache conflicts and load/store stalls, especially with large
+ // allocations that would otherwise have similar alignments. At least
+ // `payload_size` (which can be zero) bytes will be accessible.
+ static void* Allocate(size_t payload_size, size_t offset);
+
+ static void* Allocate(const size_t payload_size) {
+ return Allocate(payload_size, NextOffset());
+ }
+
+ static void Free(const void* aligned_pointer);
+};
+
+// Avoids the need for a function pointer (deleter) in CacheAlignedUniquePtr.
+struct CacheAlignedDeleter {
+ void operator()(uint8_t* aligned_pointer) const {
+ return CacheAligned::Free(aligned_pointer);
+ }
+};
+
+using CacheAlignedUniquePtr = std::unique_ptr<uint8_t[], CacheAlignedDeleter>;
+
+// Does not invoke constructors.
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes) {
+ return CacheAlignedUniquePtr(
+ static_cast<uint8_t*>(CacheAligned::Allocate(bytes)),
+ CacheAlignedDeleter());
+}
+
+static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes,
+ const size_t offset) {
+ return CacheAlignedUniquePtr(
+ static_cast<uint8_t*>(CacheAligned::Allocate(bytes, offset)),
+ CacheAlignedDeleter());
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_CACHE_ALIGNED_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/compiler_specific.h b/third_party/jpeg-xl/lib/jxl/base/compiler_specific.h
new file mode 100644
index 0000000000..abe1261f48
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/compiler_specific.h
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+#define LIB_JXL_BASE_COMPILER_SPECIFIC_H_
+
+// Macros for compiler version + nonstandard keywords, e.g. __builtin_expect.
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "lib/jxl/base/sanitizer_definitions.h"
+
+// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected,
+// otherwise 100 * major + minor version. Note that other packages check for
+// #ifdef COMPILER_MSVC, so we cannot use that same name.
+
+#ifdef _MSC_VER
+#define JXL_COMPILER_MSVC _MSC_VER
+#else
+#define JXL_COMPILER_MSVC 0
+#endif
+
+#ifdef __GNUC__
+#define JXL_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#define JXL_COMPILER_GCC 0
+#endif
+
+#ifdef __clang__
+#define JXL_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__)
+// Clang pretends to be GCC for compatibility.
+#undef JXL_COMPILER_GCC
+#define JXL_COMPILER_GCC 0
+#else
+#define JXL_COMPILER_CLANG 0
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_RESTRICT __restrict
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_RESTRICT __restrict__
+#else
+#define JXL_RESTRICT
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_INLINE __forceinline
+#define JXL_NOINLINE __declspec(noinline)
+#else
+#define JXL_INLINE inline __attribute__((always_inline))
+#define JXL_NOINLINE __attribute__((noinline))
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_NORETURN __declspec(noreturn)
+#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG
+#define JXL_NORETURN __attribute__((noreturn))
+#else
+#define JXL_NORETURN
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_UNREACHABLE __assume(false)
+#elif JXL_COMPILER_CLANG || JXL_COMPILER_GCC >= 405
+#define JXL_UNREACHABLE __builtin_unreachable()
+#else
+#define JXL_UNREACHABLE
+#endif
+
+#if JXL_COMPILER_MSVC
+#define JXL_MAYBE_UNUSED
+#else
+// Encountered "attribute list cannot appear here" when using the C++17
+// [[maybe_unused]], so only use the old style attribute for now.
+#define JXL_MAYBE_UNUSED __attribute__((unused))
+#endif
+
+// MSAN execution won't hurt if some code it not inlined, but this can greatly
+// improve compilation time. Unfortunately this macro can not be used just
+// everywhere - inside header files it leads to "multiple definition" error;
+// though it would be better not to have JXL_INLINE in header overall.
+#if JXL_MEMORY_SANITIZER || JXL_ADDRESS_SANITIZER || JXL_THREAD_SANITIZER
+#define JXL_MAYBE_INLINE JXL_MAYBE_UNUSED
+#else
+#define JXL_MAYBE_INLINE JXL_INLINE
+#endif
+
+#if JXL_COMPILER_MSVC
+// Unsupported, __assume is not the same.
+#define JXL_LIKELY(expr) expr
+#define JXL_UNLIKELY(expr) expr
+#else
+#define JXL_LIKELY(expr) __builtin_expect(!!(expr), 1)
+#define JXL_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if JXL_COMPILER_CLANG
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define JXL_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif JXL_COMPILER_GCC
+#define JXL_HAS_ASSUME_ALIGNED 1
+#else
+#define JXL_HAS_ASSUME_ALIGNED 0
+#endif
+
+#if JXL_HAS_ASSUME_ALIGNED
+#define JXL_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
+#else
+#define JXL_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
+#endif
+
+#ifdef __has_attribute
+#define JXL_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define JXL_HAVE_ATTRIBUTE(x) 0
+#endif
+
+// Raises warnings if the function return value is unused. Should appear as the
+// first part of a function definition/declaration.
+#if JXL_HAVE_ATTRIBUTE(nodiscard)
+#define JXL_MUST_USE_RESULT [[nodiscard]]
+#elif JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(warn_unused_result)
+#define JXL_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define JXL_MUST_USE_RESULT
+#endif
+
+// Disable certain -fsanitize flags for functions that are expected to include
+// things like unsigned integer overflow. For example use in the function
+// declaration JXL_NO_SANITIZE("unsigned-integer-overflow") to silence unsigned
+// integer overflow ubsan messages.
+#if JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(no_sanitize)
+#define JXL_NO_SANITIZE(X) __attribute__((no_sanitize(X)))
+#else
+#define JXL_NO_SANITIZE(X)
+#endif
+
+#if JXL_HAVE_ATTRIBUTE(__format__)
+#define JXL_FORMAT(idx_fmt, idx_arg) \
+ __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
+#else
+#define JXL_FORMAT(idx_fmt, idx_arg)
+#endif
+
+#if JXL_COMPILER_MSVC
+using ssize_t = intptr_t;
+#endif
+
+#endif // LIB_JXL_BASE_COMPILER_SPECIFIC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/data_parallel.cc b/third_party/jpeg-xl/lib/jxl/base/data_parallel.cc
new file mode 100644
index 0000000000..20a911255c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/data_parallel.cc
@@ -0,0 +1,23 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+namespace jxl {
+
+// static
+JxlParallelRetCode ThreadPool::SequentialRunnerStatic(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+ JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1);
+ if (init_ret != 0) return init_ret;
+
+ for (uint32_t i = start_range; i < end_range; i++) {
+ (*func)(jpegxl_opaque, i, 0);
+ }
+ return 0;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/base/data_parallel.h b/third_party/jpeg-xl/lib/jxl/base/data_parallel.h
new file mode 100644
index 0000000000..ba7e7adfad
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/data_parallel.h
@@ -0,0 +1,120 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_DATA_PARALLEL_H_
+#define LIB_JXL_BASE_DATA_PARALLEL_H_
+
+// Portable, low-overhead C++11 ThreadPool alternative to OpenMP for
+// data-parallel computations.
+
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#if JXL_COMPILER_MSVC
+// suppress warnings about the const & applied to function types
+#pragma warning(disable : 4180)
+#endif
+
+namespace jxl {
+
+class ThreadPool {
+ public:
+ ThreadPool(JxlParallelRunner runner, void* runner_opaque)
+ : runner_(runner ? runner : &ThreadPool::SequentialRunnerStatic),
+ runner_opaque_(runner ? runner_opaque : static_cast<void*>(this)) {}
+
+ ThreadPool(const ThreadPool&) = delete;
+ ThreadPool& operator&(const ThreadPool&) = delete;
+
+ JxlParallelRunner runner() const { return runner_; }
+ void* runner_opaque() const { return runner_opaque_; }
+
+ // Runs init_func(num_threads) followed by data_func(task, thread) on worker
+ // thread(s) for every task in [begin, end). init_func() must return a Status
+ // indicating whether the initialization succeeded.
+ // "thread" is an integer smaller than num_threads.
+ // Not thread-safe - no two calls to Run may overlap.
+ // Subsequent calls will reuse the same threads.
+ //
+ // Precondition: begin <= end.
+ template <class InitFunc, class DataFunc>
+ Status Run(uint32_t begin, uint32_t end, const InitFunc& init_func,
+ const DataFunc& data_func, const char* caller = "") {
+ JXL_ASSERT(begin <= end);
+ if (begin == end) return true;
+ RunCallState<InitFunc, DataFunc> call_state(init_func, data_func);
+ // The runner_ uses the C convention and returns 0 in case of error, so we
+ // convert it to a Status.
+ return (*runner_)(runner_opaque_, static_cast<void*>(&call_state),
+ &call_state.CallInitFunc, &call_state.CallDataFunc, begin,
+ end) == 0;
+ }
+
+ // Use this as init_func when no initialization is needed.
+ static Status NoInit(size_t num_threads) { return true; }
+
+ private:
+ // class holding the state of a Run() call to pass to the runner_ as an
+ // opaque_jpegxl pointer.
+ template <class InitFunc, class DataFunc>
+ class RunCallState final {
+ public:
+ RunCallState(const InitFunc& init_func, const DataFunc& data_func)
+ : init_func_(init_func), data_func_(data_func) {}
+
+ // JxlParallelRunInit interface.
+ static int CallInitFunc(void* jpegxl_opaque, size_t num_threads) {
+ const auto* self =
+ static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+ // Returns -1 when the internal init function returns false Status to
+ // indicate an error.
+ return self->init_func_(num_threads) ? 0 : -1;
+ }
+
+ // JxlParallelRunFunction interface.
+ static void CallDataFunc(void* jpegxl_opaque, uint32_t value,
+ size_t thread_id) {
+ const auto* self =
+ static_cast<RunCallState<InitFunc, DataFunc>*>(jpegxl_opaque);
+ return self->data_func_(value, thread_id);
+ }
+
+ private:
+ const InitFunc& init_func_;
+ const DataFunc& data_func_;
+ };
+
+ // Default JxlParallelRunner used when no runner is provided by the
+ // caller. This runner doesn't use any threading and thread_id is always 0.
+ static JxlParallelRetCode SequentialRunnerStatic(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range);
+
+ // The caller supplied runner function and its opaque void*.
+ const JxlParallelRunner runner_;
+ void* const runner_opaque_;
+};
+
+template <class InitFunc, class DataFunc>
+Status RunOnPool(ThreadPool* pool, const uint32_t begin, const uint32_t end,
+ const InitFunc& init_func, const DataFunc& data_func,
+ const char* caller) {
+ if (pool == nullptr) {
+ ThreadPool default_pool(nullptr, nullptr);
+ return default_pool.Run(begin, end, init_func, data_func, caller);
+ } else {
+ return pool->Run(begin, end, init_func, data_func, caller);
+ }
+}
+
+} // namespace jxl
+#if JXL_COMPILER_MSVC
+#pragma warning(default : 4180)
+#endif
+
+#endif // LIB_JXL_BASE_DATA_PARALLEL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/file_io.h b/third_party/jpeg-xl/lib/jxl/base/file_io.h
new file mode 100644
index 0000000000..64d5860915
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/file_io.h
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_FILE_IO_H_
+#define LIB_JXL_BASE_FILE_IO_H_
+
+// Helper functions for reading/writing files.
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+#include <list>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Returns extension including the dot, or empty string if none. Assumes
+// filename is not a hidden file (e.g. ".bashrc"). May be called with a pathname
+// if the filename contains a dot and/or no other path component does.
+static inline std::string Extension(const std::string& filename) {
+ const size_t pos = filename.rfind('.');
+ if (pos == std::string::npos) return std::string();
+ return filename.substr(pos);
+}
+
+// RAII, ensures files are closed even when returning early.
+class FileWrapper {
+ public:
+ FileWrapper(const FileWrapper& other) = delete;
+ FileWrapper& operator=(const FileWrapper& other) = delete;
+
+ explicit FileWrapper(const std::string& pathname, const char* mode)
+ : file_(pathname == "-" ? (mode[0] == 'r' ? stdin : stdout)
+ : fopen(pathname.c_str(), mode)),
+ close_on_delete_(pathname != "-") {
+#ifdef _WIN32
+ struct __stat64 s = {};
+ const int err = _stat64(pathname.c_str(), &s);
+ const bool is_file = (s.st_mode & S_IFREG) != 0;
+#else
+ struct stat s = {};
+ const int err = stat(pathname.c_str(), &s);
+ const bool is_file = S_ISREG(s.st_mode);
+#endif
+ if (err == 0 && is_file) {
+ size_ = s.st_size;
+ }
+ }
+
+ ~FileWrapper() {
+ if (file_ != nullptr && close_on_delete_) {
+ const int err = fclose(file_);
+ JXL_CHECK(err == 0);
+ }
+ }
+
+ // We intend to use FileWrapper as a replacement of FILE.
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ operator FILE*() const { return file_; }
+
+ int64_t size() { return size_; }
+
+ private:
+ FILE* const file_;
+ bool close_on_delete_ = true;
+ int64_t size_ = -1;
+};
+
+template <typename ContainerType>
+static inline Status ReadFile(const std::string& pathname,
+ ContainerType* JXL_RESTRICT bytes) {
+ FileWrapper f(pathname, "rb");
+ if (f == nullptr)
+ return JXL_FAILURE("Failed to open file for reading: %s", pathname.c_str());
+
+ // Get size of file in bytes
+ const int64_t size = f.size();
+ if (size < 0) {
+ // Size is unknown, loop reading chunks until EOF.
+ bytes->clear();
+ std::list<std::vector<uint8_t>> chunks;
+
+ size_t total_size = 0;
+ while (true) {
+ std::vector<uint8_t> chunk(16 * 1024);
+ const size_t bytes_read = fread(chunk.data(), 1, chunk.size(), f);
+ if (ferror(f) || bytes_read > chunk.size()) {
+ return JXL_FAILURE("Error reading %s", pathname.c_str());
+ }
+
+ chunk.resize(bytes_read);
+ total_size += bytes_read;
+ if (bytes_read != 0) {
+ chunks.emplace_back(std::move(chunk));
+ }
+ if (feof(f)) {
+ break;
+ }
+ }
+ bytes->resize(total_size);
+ size_t pos = 0;
+ for (const auto& chunk : chunks) {
+ // Needed in case ContainerType is std::string, whose data() is const.
+ char* bytes_writable = reinterpret_cast<char*>(&(*bytes)[0]);
+ memcpy(bytes_writable + pos, chunk.data(), chunk.size());
+ pos += chunk.size();
+ }
+ } else {
+ // Size is known, read the file directly.
+ bytes->resize(static_cast<size_t>(size));
+ size_t pos = 0;
+ while (pos < bytes->size()) {
+ // Needed in case ContainerType is std::string, whose data() is const.
+ char* bytes_writable = reinterpret_cast<char*>(&(*bytes)[0]);
+ const size_t bytes_read =
+ fread(bytes_writable + pos, 1, bytes->size() - pos, f);
+ if (bytes_read == 0) return JXL_FAILURE("Failed to read");
+ pos += bytes_read;
+ }
+ JXL_ASSERT(pos == bytes->size());
+ }
+ return true;
+}
+
+template <typename ContainerType>
+static inline Status WriteFile(const ContainerType& bytes,
+ const std::string& pathname) {
+ FileWrapper f(pathname, "wb");
+ if (f == nullptr)
+ return JXL_FAILURE("Failed to open file for writing: %s", pathname.c_str());
+
+ size_t pos = 0;
+ while (pos < bytes.size()) {
+ const size_t bytes_written =
+ fwrite(bytes.data() + pos, 1, bytes.size() - pos, f);
+ if (bytes_written == 0) return JXL_FAILURE("Failed to write");
+ pos += bytes_written;
+ }
+ JXL_ASSERT(pos == bytes.size());
+
+ return true;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_FILE_IO_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/float.h b/third_party/jpeg-xl/lib/jxl/base/float.h
new file mode 100644
index 0000000000..90bdeedf54
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/float.h
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_FLOAT_H_
+#define LIB_JXL_BASE_FLOAT_H_
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+namespace {
+// Based on highway scalar implementation, for testing
+float LoadFloat16(uint16_t bits16) {
+ const uint32_t sign = bits16 >> 15;
+ const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+ const uint32_t mantissa = bits16 & 0x3FF;
+
+ // Subnormal or zero
+ if (biased_exp == 0) {
+ const float subnormal = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+ return sign ? -subnormal : subnormal;
+ }
+
+ // Normalized: convert the representation directly (faster than ldexp/tables).
+ const uint32_t biased_exp32 = biased_exp + (127 - 15);
+ const uint32_t mantissa32 = mantissa << (23 - 10);
+ const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+
+ float result;
+ memcpy(&result, &bits32, 4);
+ return result;
+}
+} // namespace
+
+template <typename SaveFloatAtFn>
+static Status JXL_INLINE LoadFloatRow(const uint8_t* src, size_t count,
+ size_t stride, JxlDataType type,
+ bool little_endian, float scale,
+ SaveFloatAtFn callback) {
+ switch (type) {
+ case JXL_TYPE_FLOAT:
+ if (little_endian) {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadLEFloat(src + stride * i));
+ }
+ } else {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadBEFloat(src + stride * i));
+ }
+ }
+ return true;
+
+ case JXL_TYPE_UINT8:
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, src[stride * i] * scale);
+ }
+ return true;
+
+ case JXL_TYPE_UINT16:
+ if (little_endian) {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadLE16(src + stride * i) * scale);
+ }
+ } else {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadBE16(src + stride * i) * scale);
+ }
+ }
+ return true;
+
+ case JXL_TYPE_FLOAT16:
+ if (little_endian) {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadFloat16(LoadLE16(src + stride * i)));
+ }
+ } else {
+ for (size_t i = 0; i < count; ++i) {
+ callback(i, LoadFloat16(LoadBE16(src + stride * i)));
+ }
+ }
+ return true;
+
+ default:
+ return JXL_FAILURE("Unsupported sample format");
+ }
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_FLOAT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/iaca.h b/third_party/jpeg-xl/lib/jxl/base/iaca.h
new file mode 100644
index 0000000000..e5732dae5c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/iaca.h
@@ -0,0 +1,65 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_IACA_H_
+#define LIB_JXL_BASE_IACA_H_
+
+#include "lib/jxl/base/compiler_specific.h"
+
+// IACA (Intel's Code Analyzer) analyzes instruction latencies, but only for
+// code between special markers. These functions embed such markers in an
+// executable, but only for reading via IACA - they deliberately trigger a
+// crash if executed to ensure they are removed in normal builds.
+
+#ifndef JXL_IACA_ENABLED
+#define JXL_IACA_ENABLED 0
+#endif
+
+namespace jxl {
+
+// Call before the region of interest.
+static JXL_INLINE void BeginIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+ asm volatile(
+ // UD2 "instruction" raises an invalid opcode exception.
+ ".byte 0x0F, 0x0B\n\t"
+ // Magic sequence recognized by IACA (MOV + addr32 fs:NOP). This actually
+ // clobbers EBX, but we don't care because the code won't be run, and we
+ // want IACA to observe the same code the compiler would have generated
+ // without this marker.
+ "movl $111, %%ebx\n\t"
+ ".byte 0x64, 0x67, 0x90\n\t"
+ :
+ :
+ // (Allegedly) clobbering memory may prevent reordering.
+ : "memory");
+#endif
+}
+
+// Call after the region of interest.
+static JXL_INLINE void EndIACA() {
+#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG)
+ asm volatile(
+ // See above.
+ "movl $222, %%ebx\n\t"
+ ".byte 0x64, 0x67, 0x90\n\t"
+ // UD2
+ ".byte 0x0F, 0x0B\n\t"
+ :
+ :
+ // (Allegedly) clobbering memory may prevent reordering.
+ : "memory");
+#endif
+}
+
+// Add to a scope to mark a region.
+struct ScopeIACA {
+ JXL_INLINE ScopeIACA() { BeginIACA(); }
+ JXL_INLINE ~ScopeIACA() { EndIACA(); }
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_IACA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/os_macros.h b/third_party/jpeg-xl/lib/jxl/base/os_macros.h
new file mode 100644
index 0000000000..84d0b82bf5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/os_macros.h
@@ -0,0 +1,50 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OS_MACROS_H_
+#define LIB_JXL_BASE_OS_MACROS_H_
+
+// Defines the JXL_OS_* macros.
+
+#if defined(_WIN32) || defined(_WIN64)
+#define JXL_OS_WIN 1
+#else
+#define JXL_OS_WIN 0
+#endif
+
+#ifdef __linux__
+#define JXL_OS_LINUX 1
+#else
+#define JXL_OS_LINUX 0
+#endif
+
+#ifdef __APPLE__
+#define JXL_OS_MAC 1
+#else
+#define JXL_OS_MAC 0
+#endif
+
+#define JXL_OS_IOS 0
+#ifdef __APPLE__
+#include <TargetConditionals.h>
+#if TARGET_OS_IPHONE
+#undef JXL_OS_IOS
+#define JXL_OS_IOS 1
+#endif
+#endif
+
+#ifdef __FreeBSD__
+#define JXL_OS_FREEBSD 1
+#else
+#define JXL_OS_FREEBSD 0
+#endif
+
+#ifdef __HAIKU__
+#define JXL_OS_HAIKU 1
+#else
+#define JXL_OS_HAIKU 0
+#endif
+
+#endif // LIB_JXL_BASE_OS_MACROS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/override.h b/third_party/jpeg-xl/lib/jxl/base/override.h
new file mode 100644
index 0000000000..1f8b657974
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/override.h
@@ -0,0 +1,29 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_OVERRIDE_H_
+#define LIB_JXL_BASE_OVERRIDE_H_
+
+// 'Trool' for command line arguments: force enable/disable, or use default.
+
+namespace jxl {
+
+// No effect if kDefault, otherwise forces a feature (typically a FrameHeader
+// flag) on or off.
+enum class Override : int { kOn = 1, kOff = 0, kDefault = -1 };
+
+static inline Override OverrideFromBool(bool flag) {
+ return flag ? Override::kOn : Override::kOff;
+}
+
+static inline bool ApplyOverride(Override o, bool default_condition) {
+ if (o == Override::kOn) return true;
+ if (o == Override::kOff) return false;
+ return default_condition;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_OVERRIDE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/padded_bytes.cc b/third_party/jpeg-xl/lib/jxl/base/padded_bytes.cc
new file mode 100644
index 0000000000..11e4bff6fe
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/padded_bytes.cc
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+namespace jxl {
+
+void PaddedBytes::IncreaseCapacityTo(size_t capacity) {
+ JXL_ASSERT(capacity > capacity_);
+
+ size_t new_capacity = std::max(capacity, 3 * capacity_ / 2);
+ new_capacity = std::max<size_t>(64, new_capacity);
+
+ // BitWriter writes up to 7 bytes past the end.
+ CacheAlignedUniquePtr new_data = AllocateArray(new_capacity + 8);
+ if (new_data == nullptr) {
+ // Allocation failed, discard all data to ensure this is noticed.
+ size_ = capacity_ = 0;
+ return;
+ }
+
+ if (data_ == nullptr) {
+ // First allocation: ensure first byte is initialized (won't be copied).
+ new_data[0] = 0;
+ } else {
+ // Subsequent resize: copy existing data to new location.
+ memcpy(new_data.get(), data_.get(), size_);
+ // Ensure that the first new byte is initialized, to allow write_bits to
+ // safely append to the newly-resized PaddedBytes.
+ new_data[size_] = 0;
+ }
+
+ capacity_ = new_capacity;
+ std::swap(new_data, data_);
+}
+
+void PaddedBytes::assign(const uint8_t* new_begin, const uint8_t* new_end) {
+ JXL_DASSERT(new_begin <= new_end);
+ const size_t new_size = static_cast<size_t>(new_end - new_begin);
+
+ // memcpy requires non-overlapping ranges, and resizing might invalidate the
+ // new range. Neither happens if the new range is completely to the left or
+ // right of the _allocated_ range (irrespective of size_).
+ const uint8_t* allocated_end = begin() + capacity_;
+ const bool outside = new_end <= begin() || new_begin >= allocated_end;
+ if (outside) {
+ resize(new_size); // grow or shrink
+ memcpy(data(), new_begin, new_size);
+ return;
+ }
+
+ // There is overlap. The new size cannot be larger because we own the memory
+ // and the new range cannot include anything outside the allocated range.
+ JXL_ASSERT(new_size <= capacity_);
+
+ // memmove allows overlap and capacity_ is sufficient.
+ memmove(data(), new_begin, new_size);
+ size_ = new_size; // shrink
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/base/padded_bytes.h b/third_party/jpeg-xl/lib/jxl/base/padded_bytes.h
new file mode 100644
index 0000000000..4534ddf863
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/padded_bytes.h
@@ -0,0 +1,197 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PADDED_BYTES_H_
+#define LIB_JXL_BASE_PADDED_BYTES_H_
+
+// std::vector replacement with padding to reduce bounds checks in WriteBits
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // memcpy
+
+#include <algorithm> // max
+#include <initializer_list>
+#include <utility> // swap
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Provides a subset of the std::vector interface with some differences:
+// - allows BitWriter to write 64 bits at a time without bounds checking;
+// - ONLY zero-initializes the first byte (required by BitWriter);
+// - ensures cache-line alignment.
+class PaddedBytes {
+ public:
+ // Required for output params.
+ PaddedBytes() : size_(0), capacity_(0) {}
+
+ explicit PaddedBytes(size_t size) : size_(size), capacity_(0) {
+ if (size != 0) IncreaseCapacityTo(size);
+ }
+
+ PaddedBytes(size_t size, uint8_t value) : size_(size), capacity_(0) {
+ if (size != 0) {
+ IncreaseCapacityTo(size);
+ }
+ if (size_ != 0) {
+ memset(data(), value, size);
+ }
+ }
+
+ PaddedBytes(const PaddedBytes& other) : size_(other.size_), capacity_(0) {
+ if (size_ != 0) IncreaseCapacityTo(size_);
+ if (data() != nullptr) memcpy(data(), other.data(), size_);
+ }
+ PaddedBytes& operator=(const PaddedBytes& other) {
+ // Self-assignment is safe.
+ resize(other.size());
+ if (data() != nullptr) memmove(data(), other.data(), size_);
+ return *this;
+ }
+
+ // default is not OK - need to set other.size_ to 0!
+ PaddedBytes(PaddedBytes&& other) noexcept
+ : size_(other.size_),
+ capacity_(other.capacity_),
+ data_(std::move(other.data_)) {
+ other.size_ = other.capacity_ = 0;
+ }
+ PaddedBytes& operator=(PaddedBytes&& other) noexcept {
+ size_ = other.size_;
+ capacity_ = other.capacity_;
+ data_ = std::move(other.data_);
+
+ if (&other != this) {
+ other.size_ = other.capacity_ = 0;
+ }
+ return *this;
+ }
+
+ void swap(PaddedBytes& other) {
+ std::swap(size_, other.size_);
+ std::swap(capacity_, other.capacity_);
+ std::swap(data_, other.data_);
+ }
+
+ void reserve(size_t capacity) {
+ if (capacity > capacity_) IncreaseCapacityTo(capacity);
+ }
+
+ // NOTE: unlike vector, this does not initialize the new data!
+ // However, we guarantee that write_bits can safely append after
+ // the resize, as we zero-initialize the first new byte of data.
+ // If size < capacity(), does not invalidate the memory.
+ void resize(size_t size) {
+ if (size > capacity_) IncreaseCapacityTo(size);
+ size_ = (data() == nullptr) ? 0 : size;
+ }
+
+ // resize(size) plus explicit initialization of the new data with `value`.
+ void resize(size_t size, uint8_t value) {
+ size_t old_size = size_;
+ resize(size);
+ if (size_ > old_size) {
+ memset(data() + old_size, value, size_ - old_size);
+ }
+ }
+
+ // Amortized constant complexity due to exponential growth.
+ void push_back(uint8_t x) {
+ if (size_ == capacity_) {
+ IncreaseCapacityTo(capacity_ + 1);
+ if (data() == nullptr) return;
+ }
+
+ data_[size_++] = x;
+ }
+
+ size_t size() const { return size_; }
+ size_t capacity() const { return capacity_; }
+
+ uint8_t* data() { return data_.get(); }
+ const uint8_t* data() const { return data_.get(); }
+
+ // std::vector operations implemented in terms of the public interface above.
+
+ void clear() { resize(0); }
+ bool empty() const { return size() == 0; }
+
+ void assign(std::initializer_list<uint8_t> il) {
+ resize(il.size());
+ memcpy(data(), il.begin(), il.size());
+ }
+
+ // Replaces data() with [new_begin, new_end); potentially reallocates.
+ void assign(const uint8_t* new_begin, const uint8_t* new_end);
+
+ uint8_t* begin() { return data(); }
+ const uint8_t* begin() const { return data(); }
+ uint8_t* end() { return begin() + size(); }
+ const uint8_t* end() const { return begin() + size(); }
+
+ uint8_t& operator[](const size_t i) {
+ BoundsCheck(i);
+ return data()[i];
+ }
+ const uint8_t& operator[](const size_t i) const {
+ BoundsCheck(i);
+ return data()[i];
+ }
+
+ uint8_t& back() {
+ JXL_ASSERT(size() != 0);
+ return data()[size() - 1];
+ }
+ const uint8_t& back() const {
+ JXL_ASSERT(size() != 0);
+ return data()[size() - 1];
+ }
+
+ template <typename T>
+ void append(const T& other) {
+ append(reinterpret_cast<const uint8_t*>(other.data()),
+ reinterpret_cast<const uint8_t*>(other.data()) + other.size());
+ }
+
+ void append(const uint8_t* begin, const uint8_t* end) {
+ if (end - begin > 0) {
+ size_t old_size = size();
+ resize(size() + (end - begin));
+ memcpy(data() + old_size, begin, end - begin);
+ }
+ }
+
+ private:
+ void BoundsCheck(size_t i) const {
+ // <= is safe due to padding and required by BitWriter.
+ JXL_ASSERT(i <= size());
+ }
+
+ // Copies existing data to newly allocated "data_". If allocation fails,
+ // data() == nullptr and size_ = capacity_ = 0.
+ // The new capacity will be at least 1.5 times the old capacity. This ensures
+ // that we avoid quadratic behaviour.
+ void IncreaseCapacityTo(size_t capacity);
+
+ size_t size_;
+ size_t capacity_;
+ CacheAlignedUniquePtr data_;
+};
+
+template <typename T>
+static inline void Append(const T& s, PaddedBytes* out,
+ size_t* JXL_RESTRICT byte_pos) {
+ memcpy(out->data() + *byte_pos, s.data(), s.size());
+ *byte_pos += s.size();
+ JXL_CHECK(*byte_pos <= out->size());
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_PADDED_BYTES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/printf_macros.h b/third_party/jpeg-xl/lib/jxl/base/printf_macros.h
new file mode 100644
index 0000000000..3215052afd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/printf_macros.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PRINTF_MACROS_H_
+#define LIB_JXL_BASE_PRINTF_MACROS_H_
+
+// Format string macros. These should be included after any other system
+// library since those may unconditionally define these, depending on the
+// platform.
+
+// PRIuS and PRIdS macros to print size_t and ssize_t respectively.
+#if !defined(PRIdS)
+#if defined(_WIN64)
+#define PRIdS "lld"
+#elif defined(_WIN32)
+#define PRIdS "d"
+#else
+#define PRIdS "zd"
+#endif
+#endif // PRIdS
+
+#if !defined(PRIuS)
+#if defined(_WIN64)
+#define PRIuS "llu"
+#elif defined(_WIN32)
+#define PRIuS "u"
+#else
+#define PRIuS "zu"
+#endif
+#endif // PRIuS
+
+#endif // LIB_JXL_BASE_PRINTF_MACROS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/profiler.cc b/third_party/jpeg-xl/lib/jxl/base/profiler.cc
new file mode 100644
index 0000000000..a38d9b82b7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/profiler.cc
@@ -0,0 +1,540 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/profiler.h"
+
+#if JXL_PROFILER_ENABLED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> // memcpy
+
+#include <algorithm> // sort
+#include <atomic>
+#include <cinttypes> // PRIu64
+#include <hwy/cache_control.h>
+#include <limits>
+#include <new>
+
+// Optionally use SIMD in StreamCacheLine if available.
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/base/profiler.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace profiler {
+namespace HWY_NAMESPACE {
+
+// Overwrites `to` without loading it into cache (read-for-ownership).
+// Copies 64 bytes from/to naturally aligned addresses.
+void StreamCacheLine(const Packet* HWY_RESTRICT from, Packet* HWY_RESTRICT to) {
+#if HWY_TARGET == HWY_SCALAR
+ hwy::CopyBytes<64>(from, to);
+#else
+ const HWY_CAPPED(uint64_t, 2) d;
+ HWY_FENCE;
+ const uint64_t* HWY_RESTRICT from64 = reinterpret_cast<const uint64_t*>(from);
+ const auto v0 = Load(d, from64 + 0);
+ const auto v1 = Load(d, from64 + 2);
+ const auto v2 = Load(d, from64 + 4);
+ const auto v3 = Load(d, from64 + 6);
+ // Fences prevent the compiler from reordering loads/stores, which may
+ // interfere with write-combining.
+ HWY_FENCE;
+ uint64_t* HWY_RESTRICT to64 = reinterpret_cast<uint64_t*>(to);
+ Stream(v0, d, to64 + 0);
+ Stream(v1, d, to64 + 2);
+ Stream(v2, d, to64 + 4);
+ Stream(v3, d, to64 + 6);
+ HWY_FENCE;
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace profiler
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace profiler {
+
+HWY_EXPORT(StreamCacheLine);
+
+namespace {
+
+// How many mebibytes to allocate (if JXL_PROFILER_ENABLED) per thread that
+// enters at least one zone. Once this buffer is full, the thread will analyze
+// packets (two per zone), which introduces observer overhead.
+#ifndef PROFILER_THREAD_STORAGE
+#define PROFILER_THREAD_STORAGE 32ULL
+#endif
+
+#define PROFILER_PRINT_OVERHEAD 0
+
+// Upper bounds for fixed-size data structures (guarded via HWY_ASSERT):
+constexpr size_t kMaxDepth = 64; // Maximum nesting of zones.
+constexpr size_t kMaxZones = 256; // Total number of zones.
+
+// Stack of active (entered but not exited) zones. POD, uninitialized.
+// Used to deduct child duration from the parent's self time.
+struct ActiveZone {
+ const char* name;
+ uint64_t entry_timestamp;
+ uint64_t child_total;
+};
+
+// Totals for all Zones with the same name. POD, must be zero-initialized.
+struct ZoneTotals {
+ uint64_t total_duration;
+ const char* name;
+ uint64_t num_calls;
+};
+
+template <typename T>
+inline T ClampedSubtract(const T minuend, const T subtrahend) {
+ if (subtrahend > minuend) {
+ return 0;
+ }
+ return minuend - subtrahend;
+}
+
+} // namespace
+
+// Per-thread call graph (stack) and ZoneTotals for each zone.
+class Results {
+ public:
+ Results() {
+ // Zero-initialize all accumulators (avoids a check for num_zones_ == 0).
+ memset(zones_, 0, sizeof(zones_));
+ }
+
+ // Used for computing overhead when this thread encounters its first Zone.
+ // This has no observable effect apart from increasing "analyze_elapsed_".
+ uint64_t ZoneDuration(const Packet* packets) {
+ HWY_ASSERT(depth_ == 0);
+ HWY_ASSERT(num_zones_ == 0);
+ AnalyzePackets(packets, 2);
+ const uint64_t duration = zones_[0].total_duration;
+ zones_[0].num_calls = 0;
+ zones_[0].total_duration = 0;
+ HWY_ASSERT(depth_ == 0);
+ num_zones_ = 0;
+ return duration;
+ }
+
+ void SetSelfOverhead(const uint64_t self_overhead) {
+ self_overhead_ = self_overhead;
+ }
+
+ void SetChildOverhead(const uint64_t child_overhead) {
+ child_overhead_ = child_overhead;
+ }
+
+ // Draw all required information from the packets, which can be discarded
+ // afterwards. Called whenever this thread's storage is full.
+ void AnalyzePackets(const Packet* HWY_RESTRICT packets,
+ const size_t num_packets) {
+ // Ensures prior weakly-ordered streaming stores are globally visible.
+ hwy::FlushStream();
+
+ const uint64_t t0 = TicksBefore();
+
+ for (size_t i = 0; i < num_packets; ++i) {
+ const uint64_t timestamp = packets[i].timestamp;
+ // Entering a zone
+ if (packets[i].name != nullptr) {
+ HWY_ASSERT(depth_ < kMaxDepth);
+ zone_stack_[depth_].name = packets[i].name;
+ zone_stack_[depth_].entry_timestamp = timestamp;
+ zone_stack_[depth_].child_total = 0;
+ ++depth_;
+ continue;
+ }
+
+ HWY_ASSERT(depth_ != 0);
+ const ActiveZone& active = zone_stack_[depth_ - 1];
+ const uint64_t duration = timestamp - active.entry_timestamp;
+ const uint64_t self_duration = ClampedSubtract(
+ duration, self_overhead_ + child_overhead_ + active.child_total);
+
+ UpdateOrAdd(active.name, 1, self_duration);
+ --depth_;
+
+ // "Deduct" the nested time from its parent's self_duration.
+ if (depth_ != 0) {
+ zone_stack_[depth_ - 1].child_total += duration + child_overhead_;
+ }
+ }
+
+ const uint64_t t1 = TicksAfter();
+ analyze_elapsed_ += t1 - t0;
+ }
+
+ // Incorporates results from another thread. Call after all threads have
+ // exited any zones.
+ void Assimilate(const Results& other) {
+ const uint64_t t0 = TicksBefore();
+ HWY_ASSERT(depth_ == 0);
+ HWY_ASSERT(other.depth_ == 0);
+
+ for (size_t i = 0; i < other.num_zones_; ++i) {
+ const ZoneTotals& zone = other.zones_[i];
+ UpdateOrAdd(zone.name, zone.num_calls, zone.total_duration);
+ }
+ const uint64_t t1 = TicksAfter();
+ analyze_elapsed_ += t1 - t0 + other.analyze_elapsed_;
+ }
+
+ // Single-threaded.
+ void Print() {
+ const uint64_t t0 = TicksBefore();
+ MergeDuplicates();
+
+ // Sort by decreasing total (self) cost.
+ std::sort(zones_, zones_ + num_zones_,
+ [](const ZoneTotals& r1, const ZoneTotals& r2) {
+ return r1.total_duration > r2.total_duration;
+ });
+
+ uint64_t total_visible_duration = 0;
+ for (size_t i = 0; i < num_zones_; ++i) {
+ const ZoneTotals& r = zones_[i];
+ if (r.name[0] != '@') {
+ total_visible_duration += r.total_duration;
+ printf("%-40s: %10" PRIu64 " x %15" PRIu64 "= %15" PRIu64 "\n", r.name,
+ r.num_calls, r.total_duration / r.num_calls, r.total_duration);
+ }
+ }
+
+ const uint64_t t1 = TicksAfter();
+ analyze_elapsed_ += t1 - t0;
+ printf("Total clocks during analysis: %" PRIu64 "\n", analyze_elapsed_);
+ printf("Total clocks measured: %" PRIu64 "\n", total_visible_duration);
+ }
+
+ // Single-threaded. Clears all results as if no zones had been recorded.
+ void Reset() {
+ analyze_elapsed_ = 0;
+ HWY_ASSERT(depth_ == 0);
+ num_zones_ = 0;
+ memset(zone_stack_, 0, sizeof(zone_stack_));
+ memset(zones_, 0, sizeof(zones_));
+ }
+
+ private:
+ // Updates ZoneTotals of the same name, or inserts a new one if this thread
+ // has not yet seen that name. Uses a self-organizing list data structure,
+ // which avoids dynamic memory allocations and is faster than unordered_map.
+ void UpdateOrAdd(const char* name, const uint64_t num_calls,
+ const uint64_t duration) {
+ // Special case for first zone: (maybe) update, without swapping.
+ if (zones_[0].name == name) {
+ zones_[0].total_duration += duration;
+ zones_[0].num_calls += num_calls;
+ return;
+ }
+
+ // Look for a zone with the same name.
+ for (size_t i = 1; i < num_zones_; ++i) {
+ if (zones_[i].name == name) {
+ zones_[i].total_duration += duration;
+ zones_[i].num_calls += num_calls;
+ // Swap with predecessor (more conservative than move to front,
+ // but at least as successful).
+ std::swap(zones_[i - 1], zones_[i]);
+ return;
+ }
+ }
+
+ // Not found; create a new ZoneTotals.
+ HWY_ASSERT(num_zones_ < kMaxZones);
+ ZoneTotals* HWY_RESTRICT zone = zones_ + num_zones_;
+ zone->name = name;
+ zone->num_calls = num_calls;
+ zone->total_duration = duration;
+ ++num_zones_;
+ }
+
+ // Each instantiation of a function template seems to get its own copy of
+ // __func__ and GCC doesn't merge them. An N^2 search for duplicates is
+ // acceptable because we only expect a few dozen zones.
+ void MergeDuplicates() {
+ for (size_t i = 0; i < num_zones_; ++i) {
+ // Add any subsequent duplicates to num_calls and total_duration.
+ for (size_t j = i + 1; j < num_zones_;) {
+ if (!strcmp(zones_[i].name, zones_[j].name)) {
+ zones_[i].num_calls += zones_[j].num_calls;
+ zones_[i].total_duration += zones_[j].total_duration;
+ // Fill hole with last item.
+ zones_[j] = zones_[--num_zones_];
+ } else { // Name differed, try next ZoneTotals.
+ ++j;
+ }
+ }
+ }
+ }
+
+ uint64_t analyze_elapsed_ = 0;
+ uint64_t self_overhead_ = 0;
+ uint64_t child_overhead_ = 0;
+
+ size_t depth_ = 0; // Number of active zones <= kMaxDepth.
+ size_t num_zones_ = 0; // Number of unique zones <= kMaxZones.
+
+ // After other members to avoid large pointer offsets.
+ alignas(64) ActiveZone zone_stack_[kMaxDepth]; // Last = newest
+ alignas(64) ZoneTotals zones_[kMaxZones]; // Self-organizing list
+};
+
+ThreadSpecific::ThreadSpecific()
+ : max_packets_(PROFILER_THREAD_STORAGE << 16), // MiB / sizeof(Packet)
+ packets_(hwy::AllocateAligned<Packet>(max_packets_)),
+ num_packets_(0),
+ results_(hwy::MakeUniqueAligned<Results>()) {}
+
+ThreadSpecific::~ThreadSpecific() {}
+
+void ThreadSpecific::FlushBuffer() {
+ if (num_packets_ + kBufferCapacity > max_packets_) {
+ results_->AnalyzePackets(packets_.get(), num_packets_);
+ num_packets_ = 0;
+ }
+ // This buffering halves observer overhead and decreases the overall
+ // runtime by about 3%.
+ HWY_DYNAMIC_DISPATCH(StreamCacheLine)
+ (buffer_, packets_.get() + num_packets_);
+ num_packets_ += kBufferCapacity;
+ buffer_size_ = 0;
+}
+
+void ThreadSpecific::AnalyzeRemainingPackets() {
+ // Storage full => empty it.
+ if (num_packets_ + buffer_size_ > max_packets_) {
+ results_->AnalyzePackets(packets_.get(), num_packets_);
+ num_packets_ = 0;
+ }
+
+ // Move buffer to storage
+ memcpy(packets_.get() + num_packets_, buffer_, buffer_size_ * sizeof(Packet));
+ num_packets_ += buffer_size_;
+ buffer_size_ = 0;
+
+ results_->AnalyzePackets(packets_.get(), num_packets_);
+ num_packets_ = 0;
+}
+
+namespace {
+
+class HalfSampleMode {
+ public:
+ // Returns mode. "sorted" must be in ascending order.
+ template <typename T>
+ T operator()(const T* const HWY_RESTRICT sorted,
+ const size_t num_values) const {
+ int64_t center = num_values / 2;
+ int64_t width = num_values;
+
+ // Zoom in on modal intervals of decreasing width. Stop before we reach
+ // width=1, i.e. single values, for which there is no "slope".
+ while (width > 2) {
+ // Round up so we can still reach the outer edges of odd widths.
+ width = (width + 1) / 2;
+
+ center = CenterOfIntervalWithMinSlope(sorted, num_values, center, width);
+ }
+
+ return sorted[center]; // mode := middle value in modal interval.
+ }
+
+ private:
+ // Returns center of the densest region [c-radius, c+radius].
+ template <typename T>
+ static HWY_INLINE int64_t CenterOfIntervalWithMinSlope(
+ const T* HWY_RESTRICT sorted, const int64_t total_values,
+ const int64_t center, const int64_t width) {
+ const int64_t radius = (width + 1) / 2;
+
+ auto compute_slope = [radius, total_values, sorted](
+ int64_t c, int64_t* actual_center = nullptr) {
+ // For symmetry, check 2*radius+1 values, i.e. [min, max].
+ const int64_t min = std::max(c - radius, int64_t(0));
+ const int64_t max = std::min(c + radius, total_values - 1);
+ HWY_ASSERT(min < max);
+ HWY_ASSERT(sorted[min] <=
+ sorted[max] + std::numeric_limits<float>::epsilon());
+ const float dx = max - min + 1;
+ const float slope = (sorted[max] - sorted[min]) / dx;
+
+ if (actual_center != nullptr) {
+ // c may be out of bounds, so return center of the clamped bounds.
+ *actual_center = (min + max + 1) / 2;
+ }
+ return slope;
+ };
+
+ // First find min_slope for all centers.
+ float min_slope = std::numeric_limits<float>::max();
+ for (int64_t c = center - radius; c <= center + radius; ++c) {
+ min_slope = std::min(min_slope, compute_slope(c));
+ }
+
+ // Candidates := centers with slope ~= min_slope.
+ std::vector<int64_t> candidates;
+ for (int64_t c = center - radius; c <= center + radius; ++c) {
+ int64_t actual_center;
+ const float slope = compute_slope(c, &actual_center);
+ if (slope <= min_slope * 1.001f) {
+ candidates.push_back(actual_center);
+ }
+ }
+
+ // Keep the median.
+ HWY_ASSERT(!candidates.empty());
+ if (candidates.size() == 1) return candidates[0];
+ std::nth_element(candidates.begin(),
+ candidates.begin() + candidates.size() / 2,
+ candidates.end());
+ return candidates[candidates.size() / 2];
+ }
+};
+
+} // namespace
+
+void ThreadSpecific::ComputeOverhead() {
+ // Delay after capturing timestamps before/after the actual zone runs. Even
+ // with frequency throttling disabled, this has a multimodal distribution,
+ // including 32, 34, 48, 52, 59, 62.
+ uint64_t self_overhead;
+ {
+ const size_t kNumSamples = 32;
+ uint32_t samples[kNumSamples];
+ for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+ const size_t kNumDurations = 1024;
+ uint32_t durations[kNumDurations];
+
+ for (size_t idx_duration = 0; idx_duration < kNumDurations;
+ ++idx_duration) {
+ { //
+ PROFILER_ZONE("Dummy Zone (never shown)");
+ }
+ const uint64_t duration = results_->ZoneDuration(buffer_);
+ buffer_size_ = 0;
+ durations[idx_duration] = static_cast<uint32_t>(duration);
+ HWY_ASSERT(num_packets_ == 0);
+ }
+ std::sort(durations, durations + kNumDurations);
+ samples[idx_sample] = HalfSampleMode()(durations, kNumDurations);
+ }
+ // Median.
+ std::sort(samples, samples + kNumSamples);
+ self_overhead = samples[kNumSamples / 2];
+#if PROFILER_PRINT_OVERHEAD
+ printf("Overhead: %" PRIu64 "\n", static_cast<uint64_t>(self_overhead));
+#endif
+ results_->SetSelfOverhead(self_overhead);
+ }
+
+ // Delay before capturing start timestamp / after end timestamp.
+ const size_t kNumSamples = 32;
+ uint32_t samples[kNumSamples];
+ for (size_t idx_sample = 0; idx_sample < kNumSamples; ++idx_sample) {
+ const size_t kNumDurations = 16;
+ uint32_t durations[kNumDurations];
+ for (size_t idx_duration = 0; idx_duration < kNumDurations;
+ ++idx_duration) {
+ const size_t kReps = 10000;
+ // Analysis time should not be included => must fit within buffer.
+ HWY_ASSERT(kReps * 2 < max_packets_);
+ hwy::FlushStream();
+ const uint64_t t0 = TicksBefore();
+ for (size_t i = 0; i < kReps; ++i) {
+ PROFILER_ZONE("Dummy");
+ }
+ hwy::FlushStream();
+ const uint64_t t1 = TicksAfter();
+ HWY_ASSERT(num_packets_ + buffer_size_ == kReps * 2);
+ buffer_size_ = 0;
+ num_packets_ = 0;
+ const uint64_t avg_duration = (t1 - t0 + kReps / 2) / kReps;
+ durations[idx_duration] =
+ static_cast<uint32_t>(ClampedSubtract(avg_duration, self_overhead));
+ }
+ std::sort(durations, durations + kNumDurations);
+ samples[idx_sample] = HalfSampleMode()(durations, kNumDurations);
+ }
+ std::sort(samples, samples + kNumSamples);
+ const uint64_t child_overhead = samples[9 * kNumSamples / 10];
+#if PROFILER_PRINT_OVERHEAD
+ printf("Child overhead: %" PRIu64 "\n",
+ static_cast<uint64_t>(child_overhead));
+#endif
+ results_->SetChildOverhead(child_overhead);
+}
+
+namespace {
+
+// Could be a static member of Zone, but that would expose <atomic> in header.
+std::atomic<ThreadSpecific*>& GetHead() {
+ static std::atomic<ThreadSpecific*> head_{nullptr}; // Owning
+ return head_;
+}
+
+} // namespace
+
+// Thread-safe.
+ThreadSpecific* Zone::InitThreadSpecific() {
+ ThreadSpecific* thread_specific =
+ hwy::MakeUniqueAligned<ThreadSpecific>().release();
+
+ // Insert into unordered list
+ std::atomic<ThreadSpecific*>& head = GetHead();
+ ThreadSpecific* old_head = head.load(std::memory_order_relaxed);
+ thread_specific->SetNext(old_head);
+ while (!head.compare_exchange_weak(old_head, thread_specific,
+ std::memory_order_release,
+ std::memory_order_relaxed)) {
+ thread_specific->SetNext(old_head);
+ // TODO(janwas): pause
+ }
+
+ // ComputeOverhead also creates a Zone, so this needs to be set before that
+ // to prevent infinite recursion.
+ GetThreadSpecific() = thread_specific;
+
+ thread_specific->ComputeOverhead();
+ return thread_specific;
+}
+
+// Single-threaded.
+/*static*/ void Zone::PrintResults() {
+ ThreadSpecific* head = GetHead().load(std::memory_order_relaxed);
+ ThreadSpecific* p = head;
+ while (p) {
+ p->AnalyzeRemainingPackets();
+
+ // Combine all threads into a single Result.
+ if (p != head) {
+ head->GetResults().Assimilate(p->GetResults());
+ p->GetResults().Reset();
+ }
+
+ p = p->GetNext();
+ }
+
+ if (head != nullptr) {
+ head->GetResults().Print();
+ head->GetResults().Reset();
+ }
+}
+
+} // namespace profiler
+} // namespace jxl
+
+#endif // HWY_ONCE
+#endif // JXL_PROFILER_ENABLED
diff --git a/third_party/jpeg-xl/lib/jxl/base/profiler.h b/third_party/jpeg-xl/lib/jxl/base/profiler.h
new file mode 100644
index 0000000000..4c0efa4b3a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/profiler.h
@@ -0,0 +1,170 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_PROFILER_H_
+#define LIB_JXL_BASE_PROFILER_H_
+
+// High precision, low overhead time measurements. Returns exact call counts and
+// total elapsed time for user-defined 'zones' (code regions, i.e. C++ scopes).
+//
+// To use the profiler you must set the JPEGXL_ENABLE_PROFILER CMake flag, which
+// defines JXL_PROFILER_ENABLED.
+//
+// Usage: instrument regions of interest: { PROFILER_ZONE("name"); /*code*/ } or
+// void FuncToMeasure() { PROFILER_FUNC; /*code*/ }.
+// After all threads have exited any zones, invoke PROFILER_PRINT_RESULTS() to
+// print call counts and average durations [CPU cycles] to stdout, sorted in
+// descending order of total duration.
+
+// If zero, this file has no effect and no measurements will be recorded.
+#ifndef JXL_PROFILER_ENABLED
+#define JXL_PROFILER_ENABLED 0
+#endif
+#if JXL_PROFILER_ENABLED
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/aligned_allocator.h>
+#include <hwy/base.h>
+
+#include "lib/jxl/base/tsc_timer.h"
+
+#if HWY_COMPILER_MSVC
+#define PROFILER_PUBLIC
+#else
+#define PROFILER_PUBLIC __attribute__((visibility("default")))
+#endif
+
+namespace jxl {
+namespace profiler {
+
+// Represents zone entry/exit events. POD.
+#pragma pack(push, 1)
+struct Packet {
+ // Computing a hash or string table is likely too expensive, and offsets
+ // from other libraries' string literals can be too large to combine them and
+ // a full-resolution timestamp into 64 bits.
+ uint64_t timestamp;
+ const char* name; // nullptr for exit packets
+#if UINTPTR_MAX <= 0xFFFFFFFFu
+ uint32_t padding;
+#endif
+};
+#pragma pack(pop)
+static_assert(sizeof(Packet) == 16, "Wrong Packet size");
+
+class Results; // pImpl
+
+// Per-thread packet storage, dynamically allocated and aligned.
+class ThreadSpecific {
+ static constexpr size_t kBufferCapacity = 64 / sizeof(Packet);
+
+ public:
+ PROFILER_PUBLIC explicit ThreadSpecific();
+ PROFILER_PUBLIC ~ThreadSpecific();
+
+ // Depends on Zone => defined out of line.
+ PROFILER_PUBLIC void ComputeOverhead();
+
+ HWY_INLINE void WriteEntry(const char* name) { Write(name, TicksBefore()); }
+ HWY_INLINE void WriteExit() { Write(nullptr, TicksAfter()); }
+
+ PROFILER_PUBLIC void AnalyzeRemainingPackets();
+
+ // Accessors instead of public member for well-defined data layout.
+ void SetNext(ThreadSpecific* next) { next_ = next; }
+ ThreadSpecific* GetNext() const { return next_; }
+
+ Results& GetResults() { return *results_; }
+
+ private:
+ PROFILER_PUBLIC void FlushBuffer();
+
+ // Write packet to buffer/storage, emptying them as needed.
+ void Write(const char* name, const uint64_t timestamp) {
+ if (buffer_size_ == kBufferCapacity) { // Full
+ FlushBuffer();
+ }
+ buffer_[buffer_size_].name = name;
+ buffer_[buffer_size_].timestamp = timestamp;
+ ++buffer_size_;
+ }
+
+ // Write-combining buffer to avoid cache pollution. Must be the first
+ // non-static member to ensure cache-line alignment.
+ Packet buffer_[kBufferCapacity];
+ size_t buffer_size_ = 0;
+
+ // Contiguous storage for zone enter/exit packets.
+ const size_t max_packets_;
+ hwy::AlignedFreeUniquePtr<Packet[]> packets_;
+ size_t num_packets_;
+
+ // Linked list of all threads.
+ ThreadSpecific* next_ = nullptr; // Owned, never released.
+
+ hwy::AlignedUniquePtr<Results> results_;
+};
+
+// RAII zone enter/exit recorder constructed by PROFILER_ZONE; also
+// responsible for initializing ThreadSpecific.
+class Zone {
+ public:
+ HWY_NOINLINE explicit Zone(const char* name) {
+ HWY_FENCE;
+ ThreadSpecific* HWY_RESTRICT thread_specific = GetThreadSpecific();
+ if (HWY_UNLIKELY(thread_specific == nullptr)) {
+ thread_specific = InitThreadSpecific();
+ }
+
+ thread_specific->WriteEntry(name);
+ }
+
+ HWY_NOINLINE ~Zone() { GetThreadSpecific()->WriteExit(); }
+
+ // Call exactly once after all threads have exited all zones.
+ PROFILER_PUBLIC static void PrintResults();
+
+ private:
+ // Returns reference to the thread's ThreadSpecific pointer (initially null).
+ // Function-local static avoids needing a separate definition.
+ static ThreadSpecific*& GetThreadSpecific() {
+ static thread_local ThreadSpecific* thread_specific;
+ return thread_specific;
+ }
+
+ // Non time-critical.
+ PROFILER_PUBLIC ThreadSpecific* InitThreadSpecific();
+};
+
+// Creates a zone starting from here until the end of the current scope.
+// Timestamps will be recorded when entering and exiting the zone.
+// To ensure the name pointer remains valid, we require it to be a string
+// literal (by merging with ""). We also compare strings by address.
+#define PROFILER_ZONE(name) \
+ HWY_FENCE; \
+ const ::jxl::profiler::Zone zone("" name); \
+ HWY_FENCE
+
+// Creates a zone for an entire function (when placed at its beginning).
+// Shorter/more convenient than ZONE.
+#define PROFILER_FUNC \
+ HWY_FENCE; \
+ const ::jxl::profiler::Zone zone(__func__); \
+ HWY_FENCE
+
+#define PROFILER_PRINT_RESULTS ::jxl::profiler::Zone::PrintResults
+
+} // namespace profiler
+} // namespace jxl
+
+#else // !JXL_PROFILER_ENABLED
+#define PROFILER_ZONE(name)
+#define PROFILER_FUNC
+#define PROFILER_PRINT_RESULTS()
+#endif
+
+#endif // LIB_JXL_BASE_PROFILER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/random.cc b/third_party/jpeg-xl/lib/jxl/base/random.cc
new file mode 100644
index 0000000000..c99f88921c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/random.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/random.h"
+
+#include <cmath>
+
+namespace jxl {
+
+Rng::GeometricDistribution::GeometricDistribution(float p)
+ : inv_log_1mp(1.0 / std::log(1 - p)) {}
+
+uint32_t Rng::Geometric(const GeometricDistribution& dist) {
+ float f = UniformF(0, 1);
+ float log = std::log(1 - f) * dist.inv_log_1mp;
+ return static_cast<uint32_t>(log);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/base/random.h b/third_party/jpeg-xl/lib/jxl/base/random.h
new file mode 100644
index 0000000000..663b88c95d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/random.h
@@ -0,0 +1,95 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_RANDOM_
+#define LIB_JXL_BASE_RANDOM_
+
+// Random number generator + distributions.
+// We don't use <random> because the implementation (and thus results) differs
+// between libstdc++ and libc++.
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+struct Rng {
+ explicit Rng(size_t seed)
+ : s{static_cast<uint64_t>(0x94D049BB133111EBull),
+ static_cast<uint64_t>(0xBF58476D1CE4E5B9ull) + seed} {}
+
+ // Xorshift128+ adapted from xorshift128+-inl.h
+ uint64_t operator()() {
+ uint64_t s1 = s[0];
+ const uint64_t s0 = s[1];
+ const uint64_t bits = s1 + s0; // b, c
+ s[0] = s0;
+ s1 ^= s1 << 23;
+ s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+ s[1] = s1;
+ return bits;
+ }
+
+ // Uniformly distributed int64_t in [begin, end), under the assumption that
+ // `end-begin` is significantly smaller than 1<<64, otherwise there is some
+ // bias.
+ int64_t UniformI(int64_t begin, int64_t end) {
+ JXL_DASSERT(end > begin);
+ return static_cast<int64_t>((*this)() %
+ static_cast<uint64_t>(end - begin)) +
+ begin;
+ }
+
+ // Same as UniformI, but for uint64_t.
+ uint64_t UniformU(uint64_t begin, uint64_t end) {
+ JXL_DASSERT(end > begin);
+ return (*this)() % (end - begin) + begin;
+ }
+
+ // Uniformly distributed float in [begin, end) range. Note: only 23 bits of
+ // randomness.
+ float UniformF(float begin, float end) {
+ float f;
+ // Bits of a random [1, 2) float.
+ uint32_t u = ((*this)() >> (64 - 23)) | 0x3F800000;
+ static_assert(sizeof(f) == sizeof(u),
+ "Float and U32 must have the same size");
+ memcpy(&f, &u, sizeof(f));
+ // Note: (end-begin) * f + (2*begin-end) may fail to return a number >=
+ // begin.
+ return (end - begin) * (f - 1.0f) + begin;
+ }
+
+ // Bernoulli trial
+ bool Bernoulli(float p) { return UniformF(0, 1) < p; }
+
+ // State for geometric distributions.
+ struct GeometricDistribution {
+ explicit GeometricDistribution(float p);
+
+ private:
+ float inv_log_1mp;
+ friend struct Rng;
+ };
+
+ uint32_t Geometric(const GeometricDistribution& dist);
+
+ template <typename T>
+ void Shuffle(T* t, size_t n) {
+ for (size_t i = 0; i + 1 < n; i++) {
+ size_t a = UniformU(i, n);
+ std::swap(t[a], t[i]);
+ }
+ }
+
+ private:
+ uint64_t s[2];
+};
+
+} // namespace jxl
+#endif // LIB_JXL_BASE_RANDOM_
diff --git a/third_party/jpeg-xl/lib/jxl/base/sanitizer_definitions.h b/third_party/jpeg-xl/lib/jxl/base/sanitizer_definitions.h
new file mode 100644
index 0000000000..315f3bd003
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/sanitizer_definitions.h
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_
+#define LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_
+
+#ifdef MEMORY_SANITIZER
+#define JXL_MEMORY_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define JXL_MEMORY_SANITIZER 1
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+#else
+#define JXL_MEMORY_SANITIZER 0
+#endif
+
+#ifdef ADDRESS_SANITIZER
+#define JXL_ADDRESS_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define JXL_ADDRESS_SANITIZER 1
+#else
+#define JXL_ADDRESS_SANITIZER 0
+#endif
+#else
+#define JXL_ADDRESS_SANITIZER 0
+#endif
+
+#ifdef THREAD_SANITIZER
+#define JXL_THREAD_SANITIZER 1
+#elif defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define JXL_THREAD_SANITIZER 1
+#else
+#define JXL_THREAD_SANITIZER 0
+#endif
+#else
+#define JXL_THREAD_SANITIZER 0
+#endif
+#endif // LIB_JXL_BASE_SANITIZER_DEFINITIONS_H
diff --git a/third_party/jpeg-xl/lib/jxl/base/scope_guard.h b/third_party/jpeg-xl/lib/jxl/base/scope_guard.h
new file mode 100644
index 0000000000..a18a44cb79
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/scope_guard.h
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SCOPE_GUARD_H_
+#define LIB_JXL_BASE_SCOPE_GUARD_H_
+
+#include <utility>
+
+namespace jxl {
+
+template <typename Callback>
+class ScopeGuard {
+ public:
+ // Discourage unnecessary moves / copies.
+ ScopeGuard(const ScopeGuard &) = delete;
+ ScopeGuard &operator=(const ScopeGuard &) = delete;
+ ScopeGuard &operator=(ScopeGuard &&) = delete;
+
+ // Pre-C++17 does not guarantee RVO -> require move constructor.
+ ScopeGuard(ScopeGuard &&other) : callback_(std::move(other.callback_)) {
+ other.armed_ = false;
+ }
+
+ template <typename CallbackParam>
+ explicit ScopeGuard(CallbackParam &&callback)
+ : callback_(std::forward<CallbackParam>(callback)), armed_(true) {}
+
+ ~ScopeGuard() {
+ if (armed_) callback_();
+ }
+
+ void Disarm() { armed_ = false; }
+
+ private:
+ Callback callback_;
+ bool armed_;
+};
+
+template <typename Callback>
+ScopeGuard<Callback> MakeScopeGuard(Callback &&callback) {
+ return ScopeGuard<Callback>{std::forward<Callback>(callback)};
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_SCOPE_GUARD_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/span.h b/third_party/jpeg-xl/lib/jxl/base/span.h
new file mode 100644
index 0000000000..41c3623a4b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/span.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_SPAN_H_
+#define LIB_JXL_BASE_SPAN_H_
+
+// Span (array view) is a non-owning container that provides cheap "cut"
+// operations and could be used as "ArrayLike" data source for PaddedBytes.
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+template <typename T>
+class Span {
+ public:
+ constexpr Span() noexcept : Span(nullptr, 0) {}
+
+ constexpr Span(T* array, size_t length) noexcept
+ : ptr_(array), len_(length) {}
+
+ template <size_t N>
+ explicit constexpr Span(T (&a)[N]) noexcept : Span(a, N) {}
+
+ template <typename ArrayLike>
+ explicit constexpr Span(const ArrayLike& other) noexcept
+ : Span(reinterpret_cast<T*>(other.data()), other.size()) {
+ static_assert(sizeof(*other.data()) == sizeof(T),
+ "Incompatible type of source.");
+ }
+
+ constexpr T* data() const noexcept { return ptr_; }
+
+ constexpr size_t size() const noexcept { return len_; }
+
+ constexpr bool empty() const noexcept { return len_ == 0; }
+
+ constexpr T& operator[](size_t i) const noexcept {
+ // MSVC 2015 accepts this as constexpr, but not ptr_[i]
+ return *(data() + i);
+ }
+
+ void remove_prefix(size_t n) noexcept {
+ JXL_ASSERT(size() >= n);
+ ptr_ += n;
+ len_ -= n;
+ }
+
+ private:
+ T* ptr_;
+ size_t len_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_SPAN_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/status.h b/third_party/jpeg-xl/lib/jxl/base/status.h
new file mode 100644
index 0000000000..f40be0c434
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/status.h
@@ -0,0 +1,326 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_STATUS_H_
+#define LIB_JXL_BASE_STATUS_H_
+
+// Error handling: Status return type + helper macros.
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/sanitizer_definitions.h"
+
+#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER
+#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace
+#endif // defined(*_SANITIZER)
+
+namespace jxl {
+
+// Uncomment to abort when JXL_FAILURE or JXL_STATUS with a fatal error is
+// reached:
+// #define JXL_CRASH_ON_ERROR
+
+#ifndef JXL_ENABLE_ASSERT
+#define JXL_ENABLE_ASSERT 1
+#endif
+
+#ifndef JXL_ENABLE_CHECK
+#define JXL_ENABLE_CHECK 1
+#endif
+
+// Pass -DJXL_DEBUG_ON_ERROR at compile time to print debug messages when a
+// function returns JXL_FAILURE or calls JXL_NOTIFY_ERROR. Note that this is
+// irrelevant if you also pass -DJXL_CRASH_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ERROR) || defined(JXL_CRASH_ON_ERROR)
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+#ifdef NDEBUG
+#define JXL_DEBUG_ON_ERROR 0
+#else // NDEBUG
+#define JXL_DEBUG_ON_ERROR 1
+#endif // NDEBUG
+#endif // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR
+
+// Pass -DJXL_DEBUG_ON_ALL_ERROR at compile time to print debug messages on
+// all error (fatal and non-fatal) status. This implies JXL_DEBUG_ON_ERROR.
+#if defined(JXL_DEBUG_ON_ALL_ERROR)
+#undef JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 1
+// JXL_DEBUG_ON_ALL_ERROR implies JXL_DEBUG_ON_ERROR too.
+#undef JXL_DEBUG_ON_ERROR
+#define JXL_DEBUG_ON_ERROR 1
+#else // JXL_DEBUG_ON_ALL_ERROR
+#define JXL_DEBUG_ON_ALL_ERROR 0
+#endif // JXL_DEBUG_ON_ALL_ERROR
+
+// The Verbose level for the library
+#ifndef JXL_DEBUG_V_LEVEL
+#define JXL_DEBUG_V_LEVEL 0
+#endif // JXL_DEBUG_V_LEVEL
+
+// Pass -DJXL_DEBUG_ON_ABORT=0 to disable the debug messages on JXL_ASSERT,
+// JXL_CHECK and JXL_ABORT.
+#ifndef JXL_DEBUG_ON_ABORT
+#define JXL_DEBUG_ON_ABORT 1
+#endif // JXL_DEBUG_ON_ABORT
+
+// Print a debug message on standard error. You should use the JXL_DEBUG macro
+// instead of calling Debug directly. This function returns false, so it can be
+// used as a return value in JXL_FAILURE.
+JXL_FORMAT(1, 2)
+inline JXL_NOINLINE bool Debug(const char* format, ...) {
+ va_list args;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+ return false;
+}
+
+// Print a debug message on standard error if "enabled" is true. "enabled" is
+// normally a macro that evaluates to 0 or 1 at compile time, so the Debug
+// function is never called and optimized out in release builds. Note that the
+// arguments are compiled but not evaluated when enabled is false. The format
+// string must be a explicit string in the call, for example:
+// JXL_DEBUG(JXL_DEBUG_MYMODULE, "my module message: %d", some_var);
+// Add a header at the top of your module's .cc or .h file (depending on whether
+// you have JXL_DEBUG calls from the .h as well) like this:
+// #ifndef JXL_DEBUG_MYMODULE
+// #define JXL_DEBUG_MYMODULE 0
+// #endif JXL_DEBUG_MYMODULE
+#define JXL_DEBUG_TMP(format, ...) \
+ ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define JXL_DEBUG(enabled, format, ...) \
+ do { \
+ if (enabled) { \
+ JXL_DEBUG_TMP(format, ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+// JXL_DEBUG version that prints the debug message if the global verbose level
+// defined at compile time by JXL_DEBUG_V_LEVEL is greater or equal than the
+// passed level.
+#define JXL_DEBUG_V(level, format, ...) \
+ JXL_DEBUG(level <= JXL_DEBUG_V_LEVEL, format, ##__VA_ARGS__)
+
+// Warnings (via JXL_WARNING) are enabled by default in debug builds (opt and
+// debug).
+#ifdef JXL_DEBUG_WARNING
+#undef JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#else // JXL_DEBUG_WARNING
+#ifdef NDEBUG
+#define JXL_DEBUG_WARNING 0
+#else // JXL_DEBUG_WARNING
+#define JXL_DEBUG_WARNING 1
+#endif // NDEBUG
+#endif // JXL_DEBUG_WARNING
+#define JXL_WARNING(format, ...) \
+ JXL_DEBUG(JXL_DEBUG_WARNING, format, ##__VA_ARGS__)
+
+// Exits the program after printing a stack trace when possible.
+JXL_NORETURN inline JXL_NOINLINE bool Abort() {
+#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER
+ // If compiled with any sanitizer print a stack trace. This call doesn't crash
+ // the program, instead the trap below will crash it also allowing gdb to
+ // break there.
+ __sanitizer_print_stack_trace();
+#endif // *_SANITIZER)
+
+#if JXL_COMPILER_MSVC
+ __debugbreak();
+ abort();
+#else
+ __builtin_trap();
+#endif
+}
+
+// Exits the program after printing file/line plus a formatted string.
+#define JXL_ABORT(format, ...) \
+ ((JXL_DEBUG_ON_ABORT) && ::jxl::Debug(("%s:%d: JXL_ABORT: " format "\n"), \
+ __FILE__, __LINE__, ##__VA_ARGS__), \
+ ::jxl::Abort())
+
+// Does not guarantee running the code, use only for debug mode checks.
+#if JXL_ENABLE_ASSERT
+#define JXL_ASSERT(condition) \
+ do { \
+ if (!(condition)) { \
+ JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_ASSERT: %s", #condition); \
+ ::jxl::Abort(); \
+ } \
+ } while (0)
+#else
+#define JXL_ASSERT(condition) \
+ do { \
+ } while (0)
+#endif
+
+// Define JXL_IS_DEBUG_BUILD that denotes asan, msan and other debug builds,
+// but not opt or release.
+#ifndef JXL_IS_DEBUG_BUILD
+#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || \
+ defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \
+ defined(__clang_analyzer__)
+#define JXL_IS_DEBUG_BUILD 1
+#else
+#define JXL_IS_DEBUG_BUILD 0
+#endif
+#endif // JXL_IS_DEBUG_BUILD
+
+// Same as above, but only runs in debug builds (builds where NDEBUG is not
+// defined). This is useful for slower asserts that we want to run more rarely
+// than usual. These will run on asan, msan and other debug builds, but not in
+// opt or release.
+#if JXL_IS_DEBUG_BUILD
+#define JXL_DASSERT(condition) \
+ do { \
+ if (!(condition)) { \
+ JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_DASSERT: %s", #condition); \
+ ::jxl::Abort(); \
+ } \
+ } while (0)
+#else
+#define JXL_DASSERT(condition) \
+ do { \
+ } while (0)
+#endif
+
+// Always runs the condition, so can be used for non-debug calls.
+#if JXL_ENABLE_CHECK
+#define JXL_CHECK(condition) \
+ do { \
+ if (!(condition)) { \
+ JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_CHECK: %s", #condition); \
+ ::jxl::Abort(); \
+ } \
+ } while (0)
+#else
+#define JXL_CHECK(condition) \
+ do { \
+ (void)(condition); \
+ } while (0)
+#endif
+
+// A jxl::Status value from a StatusCode or Status which prints a debug message
+// when enabled.
+#define JXL_STATUS(status, format, ...) \
+ ::jxl::StatusMessage(::jxl::Status(status), "%s:%d: " format "\n", __FILE__, \
+ __LINE__, ##__VA_ARGS__)
+
+// Notify of an error but discard the resulting Status value. This is only
+// useful for debug builds or when building with JXL_CRASH_ON_ERROR.
+#define JXL_NOTIFY_ERROR(format, ...) \
+ (void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_ERROR: " format, \
+ ##__VA_ARGS__)
+
+// An error Status with a message. The JXL_STATUS() macro will return a Status
+// object with a kGenericError code, but the comma operator helps with
+// clang-tidy inference and potentially with optimizations.
+#define JXL_FAILURE(format, ...) \
+ ((void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_FAILURE: " format, \
+ ##__VA_ARGS__), \
+ ::jxl::Status(::jxl::StatusCode::kGenericError))
+
+// Always evaluates the status exactly once, so can be used for non-debug calls.
+// Returns from the current context if the passed Status expression is an error
+// (fatal or non-fatal). The return value is the passed Status.
+#define JXL_RETURN_IF_ERROR(status) \
+ do { \
+ ::jxl::Status jxl_return_if_error_status = (status); \
+ if (!jxl_return_if_error_status) { \
+ (void)::jxl::StatusMessage( \
+ jxl_return_if_error_status, \
+ "%s:%d: JXL_RETURN_IF_ERROR code=%d: %s\n", __FILE__, __LINE__, \
+ static_cast<int>(jxl_return_if_error_status.code()), #status); \
+ return jxl_return_if_error_status; \
+ } \
+ } while (0)
+
+// As above, but without calling StatusMessage. Intended for bundles (see
+// fields.h), which have numerous call sites (-> relevant for code size) and do
+// not want to generate excessive messages when decoding partial headers.
+#define JXL_QUIET_RETURN_IF_ERROR(status) \
+ do { \
+ ::jxl::Status jxl_return_if_error_status = (status); \
+ if (!jxl_return_if_error_status) { \
+ return jxl_return_if_error_status; \
+ } \
+ } while (0)
+
+enum class StatusCode : int32_t {
+ // Non-fatal errors (negative values).
+ kNotEnoughBytes = -1,
+
+ // The only non-error status code.
+ kOk = 0,
+
+ // Fatal-errors (positive values)
+ kGenericError = 1,
+};
+
+// Drop-in replacement for bool that raises compiler warnings if not used
+// after being returned from a function. Example:
+// Status LoadFile(...) { return true; } is more compact than
+// bool JXL_MUST_USE_RESULT LoadFile(...) { return true; }
+// In case of error, the status can carry an extra error code in its value which
+// is split between fatal and non-fatal error codes.
+class JXL_MUST_USE_RESULT Status {
+ public:
+ // We want implicit constructor from bool to allow returning "true" or "false"
+ // on a function when using Status. "true" means kOk while "false" means a
+ // generic fatal error.
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ constexpr Status(bool ok)
+ : code_(ok ? StatusCode::kOk : StatusCode::kGenericError) {}
+
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ constexpr Status(StatusCode code) : code_(code) {}
+
+ // We also want implicit cast to bool to check for return values of functions.
+ // NOLINTNEXTLINE(google-explicit-constructor)
+ constexpr operator bool() const { return code_ == StatusCode::kOk; }
+
+ constexpr StatusCode code() const { return code_; }
+
+ // Returns whether the status code is a fatal error.
+ constexpr bool IsFatalError() const {
+ return static_cast<int32_t>(code_) > 0;
+ }
+
+ private:
+ StatusCode code_;
+};
+
+// Helper function to create a Status and print the debug message or abort when
+// needed.
+inline JXL_FORMAT(2, 3) Status
+ StatusMessage(const Status status, const char* format, ...) {
+ // This block will be optimized out when JXL_DEBUG_ON_ERROR and
+ // JXL_DEBUG_ON_ALL_ERROR are both disabled.
+ if ((JXL_DEBUG_ON_ERROR && status.IsFatalError()) ||
+ (JXL_DEBUG_ON_ALL_ERROR && !status)) {
+ va_list args;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+ }
+#ifdef JXL_CRASH_ON_ERROR
+ // JXL_CRASH_ON_ERROR means to Abort() only on non-fatal errors.
+ if (status.IsFatalError()) {
+ Abort();
+ }
+#endif // JXL_CRASH_ON_ERROR
+ return status;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_STATUS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/base/tsc_timer.h b/third_party/jpeg-xl/lib/jxl/base/tsc_timer.h
new file mode 100644
index 0000000000..74d51f72d1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/base/tsc_timer.h
@@ -0,0 +1,172 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BASE_TSC_TIMER_H_
+#define LIB_JXL_BASE_TSC_TIMER_H_
+
+// High-resolution (~10 ns) timestamps, using fences to prevent reordering and
+// ensure exactly the desired regions are measured.
+
+#include <stdint.h>
+#include <time.h> // clock_gettime
+
+#if defined(_WIN32) || defined(_WIN64)
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif // WIN32_LEAN_AND_MEAN
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif // NOMINMAX
+#ifndef NOGDI
+#define NOGDI
+#endif // NOGDI
+#include <windows.h>
+// Undef macros to avoid collisions
+#undef LoadFence
+#endif
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_time.h>
+#endif
+
+#if defined(__HAIKU__)
+#include <OS.h>
+#endif
+
+#include <ctime>
+#include <hwy/base.h>
+#include <hwy/cache_control.h> // LoadFence
+
+namespace jxl {
+namespace profiler {
+
+// Ticks := platform-specific timer values (CPU cycles on x86). Must be
+// unsigned to guarantee wraparound on overflow.
+using Ticks = uint64_t;
+
+// TicksBefore/After return absolute timestamps and must be placed immediately
+// before and after the region to measure. We provide separate Before/After
+// functions because they use different fences.
+//
+// Background: RDTSC is not 'serializing'; earlier instructions may complete
+// after it, and/or later instructions may complete before it. 'Fences' ensure
+// regions' elapsed times are independent of such reordering. The only
+// documented unprivileged serializing instruction is CPUID, which acts as a
+// full fence (no reordering across it in either direction). Unfortunately
+// the latency of CPUID varies wildly (perhaps made worse by not initializing
+// its EAX input). Because it cannot reliably be deducted from the region's
+// elapsed time, it must not be included in the region to measure (i.e.
+// between the two RDTSC).
+//
+// The newer RDTSCP is sometimes described as serializing, but it actually
+// only serves as a half-fence with release semantics. Although all
+// instructions in the region will complete before the final timestamp is
+// captured, subsequent instructions may leak into the region and increase the
+// elapsed time. Inserting another fence after the final RDTSCP would prevent
+// such reordering without affecting the measured region.
+//
+// Fortunately, such a fence exists. The LFENCE instruction is only documented
+// to delay later loads until earlier loads are visible. However, Intel's
+// reference manual says it acts as a full fence (waiting until all earlier
+// instructions have completed, and delaying later instructions until it
+// completes). AMD assigns the same behavior to MFENCE.
+//
+// We need a fence before the initial RDTSC to prevent earlier instructions
+// from leaking into the region, and arguably another after RDTSC to avoid
+// region instructions from completing before the timestamp is recorded.
+// When surrounded by fences, the additional RDTSCP half-fence provides no
+// benefit, so the initial timestamp can be recorded via RDTSC, which has
+// lower overhead than RDTSCP because it does not read TSC_AUX. In summary,
+// we define Before = LFENCE/RDTSC/LFENCE; After = RDTSCP/LFENCE.
+//
+// Using Before+Before leads to higher variance and overhead than After+After.
+// However, After+After includes an LFENCE in the region measurements, which
+// adds a delay dependent on earlier loads. The combination of Before+After
+// is faster than Before+Before and more consistent than After+After because
+// the first LFENCE already delayed subsequent loads before the measured
+// region. This combination seems not to have been considered in prior work:
+// http://akaros.cs.berkeley.edu/lxr/akaros/kern/arch/x86/rdtsc_test.c
+//
+// Note: performance counters can measure 'exact' instructions-retired or
+// (unhalted) cycle counts. The RDPMC instruction is not serializing and also
+// requires fences. Unfortunately, it is not accessible on all OSes and we
+// prefer to avoid kernel-mode drivers. Performance counters are also affected
+// by several under/over-count errata, so we use the TSC instead.
+
+// Returns a 64-bit timestamp in unit of 'ticks'; to convert to seconds,
+// divide by InvariantTicksPerSecond.
+static HWY_INLINE HWY_MAYBE_UNUSED Ticks TicksBefore() {
+ Ticks t;
+#if HWY_ARCH_PPC && defined(__GLIBC__)
+ asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+ hwy::LoadFence();
+ HWY_FENCE;
+ t = __rdtsc();
+ hwy::LoadFence();
+ HWY_FENCE;
+#elif HWY_ARCH_X86_64
+ asm volatile(
+ "lfence\n\t"
+ "rdtsc\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0\n\t"
+ "lfence"
+ : "=a"(t)
+ :
+ // "memory" avoids reordering. rdx = TSC >> 32.
+ // "cc" = flags modified by SHL.
+ : "rdx", "memory", "cc");
+#elif HWY_ARCH_RVV
+ asm volatile("rdcycle %0" : "=r"(t));
+#elif defined(_WIN32) || defined(_WIN64)
+ LARGE_INTEGER counter;
+ (void)QueryPerformanceCounter(&counter);
+ t = counter.QuadPart;
+#elif defined(__APPLE__)
+ t = mach_absolute_time();
+#elif defined(__HAIKU__)
+ t = system_time_nsecs(); // since boot
+#else // POSIX
+ timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ t = static_cast<Ticks>(ts.tv_sec * 1000000000LL + ts.tv_nsec);
+#endif
+ return t;
+}
+
+static HWY_INLINE HWY_MAYBE_UNUSED Ticks TicksAfter() {
+ Ticks t;
+#if HWY_ARCH_PPC && defined(__GLIBC__)
+ asm volatile("mfspr %0, %1" : "=r"(t) : "i"(268));
+#elif HWY_ARCH_X86 && HWY_COMPILER_MSVC
+ HWY_FENCE;
+ unsigned aux;
+ t = __rdtscp(&aux);
+ hwy::LoadFence();
+ HWY_FENCE;
+#elif HWY_ARCH_X86_64
+ // Use inline asm because __rdtscp generates code to store TSC_AUX (ecx).
+ asm volatile(
+ "rdtscp\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0\n\t"
+ "lfence"
+ : "=a"(t)
+ :
+ // "memory" avoids reordering. rcx = TSC_AUX. rdx = TSC >> 32.
+ // "cc" = flags modified by SHL.
+ : "rcx", "rdx", "memory", "cc");
+#else
+ t = TicksBefore(); // no difference on other platforms.
+#endif
+ return t;
+}
+
+} // namespace profiler
+} // namespace jxl
+
+#endif // LIB_JXL_BASE_TSC_TIMER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/bit_reader_test.cc b/third_party/jpeg-xl/lib/jxl/bit_reader_test.cc
new file mode 100644
index 0000000000..24cc9b64e8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/bit_reader_test.cc
@@ -0,0 +1,262 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitReaderTest, ExtendsWithZeroes) {
+ for (size_t size = 4; size < 32; ++size) {
+ std::vector<uint8_t> data(size, 0xff);
+
+ for (size_t n_bytes = 0; n_bytes < size; n_bytes++) {
+ BitReader br(Span<const uint8_t>(data.data(), n_bytes));
+ // Read all the bits
+ for (size_t i = 0; i < n_bytes * kBitsPerByte; i++) {
+ ASSERT_EQ(br.ReadBits(1), 1u) << "n_bytes=" << n_bytes << " i=" << i;
+ }
+
+ // PEEK more than the declared size - all will be zero. Cannot consume.
+ for (size_t i = 0; i < BitReader::kMaxBitsPerCall; i++) {
+ ASSERT_EQ(br.PeekBits(i), 0u)
+ << "size=" << size << "n_bytes=" << n_bytes << " i=" << i;
+ }
+
+ EXPECT_TRUE(br.Close());
+ }
+ }
+}
+
+struct Symbol {
+ uint32_t num_bits;
+ uint32_t value;
+};
+
+// Reading from output gives the same values.
+TEST(BitReaderTest, TestRoundTrip) {
+ test::ThreadPoolForTests pool(8);
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, 1000, ThreadPool::NoInit,
+ [](const uint32_t task, size_t /* thread */) {
+ constexpr size_t kMaxBits = 8000;
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kMaxBits);
+
+ std::vector<Symbol> symbols;
+ symbols.reserve(1000);
+
+ Rng rng(55537 + 129 * task);
+
+ for (;;) {
+ const uint32_t num_bits = rng.UniformU(1, 33);
+ if (writer.BitsWritten() + num_bits > kMaxBits) break;
+ const uint32_t value = rng.UniformU(0, 1ULL << num_bits);
+ symbols.push_back({num_bits, value});
+ writer.Write(num_bits, value);
+ }
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ BitReader reader(writer.GetSpan());
+ for (const Symbol& s : symbols) {
+ EXPECT_EQ(s.value, reader.ReadBits(s.num_bits));
+ }
+ EXPECT_TRUE(reader.Close());
+ },
+ "TestTBitReaderRoundTrip"));
+}
+
+// SkipBits is the same as reading that many bits.
+TEST(BitReaderTest, TestSkip) {
+ test::ThreadPoolForTests pool(8);
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, 96, ThreadPool::NoInit,
+ [](const uint32_t task, size_t /* thread */) {
+ constexpr size_t kSize = 100;
+
+ for (size_t skip = 0; skip < 128; ++skip) {
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kSize * kBitsPerByte);
+ // Start with "task" 1-bits.
+ for (size_t i = 0; i < task; ++i) {
+ writer.Write(1, 1);
+ }
+
+ // Write 0-bits that we will skip over
+ for (size_t i = 0; i < skip; ++i) {
+ writer.Write(1, 0);
+ }
+
+ // Write terminator bits '101'
+ writer.Write(3, 5);
+ EXPECT_EQ(task + skip + 3, writer.BitsWritten());
+ writer.ZeroPadToByte();
+ AuxOut aux_out;
+ allotment.ReclaimAndCharge(&writer, 0, &aux_out);
+ EXPECT_LT(aux_out.layers[0].total_bits, kSize * 8);
+
+ BitReader reader1(writer.GetSpan());
+ BitReader reader2(writer.GetSpan());
+ // Verify initial 1-bits
+ for (size_t i = 0; i < task; ++i) {
+ EXPECT_EQ(1u, reader1.ReadBits(1));
+ EXPECT_EQ(1u, reader2.ReadBits(1));
+ }
+
+ // SkipBits or manually read "skip" bits
+ reader1.SkipBits(skip);
+ for (size_t i = 0; i < skip; ++i) {
+ EXPECT_EQ(0u, reader2.ReadBits(1))
+ << " skip=" << skip << " i=" << i;
+ }
+ EXPECT_EQ(reader1.TotalBitsConsumed(), reader2.TotalBitsConsumed());
+
+ // Ensure both readers see the terminator bits.
+ EXPECT_EQ(5u, reader1.ReadBits(3));
+ EXPECT_EQ(5u, reader2.ReadBits(3));
+
+ EXPECT_TRUE(reader1.Close());
+ EXPECT_TRUE(reader2.Close());
+ }
+ },
+ "TestSkip"));
+}
+
+// Verifies byte order and different groupings of bits.
+TEST(BitReaderTest, TestOrder) {
+ constexpr size_t kMaxBits = 16;
+
+ // u(1) - bits written into LSBs of first byte
+ {
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kMaxBits);
+ for (size_t i = 0; i < 5; ++i) {
+ writer.Write(1, 1);
+ }
+ for (size_t i = 0; i < 5; ++i) {
+ writer.Write(1, 0);
+ }
+ for (size_t i = 0; i < 6; ++i) {
+ writer.Write(1, 1);
+ }
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ BitReader reader(writer.GetSpan());
+ EXPECT_EQ(0x1Fu, reader.ReadFixedBits<8>());
+ EXPECT_EQ(0xFCu, reader.ReadFixedBits<8>());
+ EXPECT_TRUE(reader.Close());
+ }
+
+ // u(8) - get bytes in the same order
+ {
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kMaxBits);
+ writer.Write(8, 0xF8);
+ writer.Write(8, 0x3F);
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ BitReader reader(writer.GetSpan());
+ EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>());
+ EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>());
+ EXPECT_TRUE(reader.Close());
+ }
+
+ // u(16) - little-endian bytes
+ {
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kMaxBits);
+ writer.Write(16, 0xF83F);
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ BitReader reader(writer.GetSpan());
+ EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>());
+ EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>());
+ EXPECT_TRUE(reader.Close());
+ }
+
+ // Non-byte-aligned, mixed sizes
+ {
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, kMaxBits);
+ writer.Write(1, 1);
+ writer.Write(3, 6);
+ writer.Write(8, 0xDB);
+ writer.Write(4, 8);
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ BitReader reader(writer.GetSpan());
+ EXPECT_EQ(0xBDu, reader.ReadFixedBits<8>());
+ EXPECT_EQ(0x8Du, reader.ReadFixedBits<8>());
+ EXPECT_TRUE(reader.Close());
+ }
+}
+
+TEST(BitReaderTest, TotalCountersTest) {
+ uint8_t buf[8] = {1, 2, 3, 4};
+ BitReader reader(Span<const uint8_t>(buf, sizeof(buf)));
+
+ EXPECT_EQ(sizeof(buf), reader.TotalBytes());
+ EXPECT_EQ(0u, reader.TotalBitsConsumed());
+ reader.ReadFixedBits<1>();
+ EXPECT_EQ(1u, reader.TotalBitsConsumed());
+
+ reader.ReadFixedBits<10>();
+ EXPECT_EQ(11u, reader.TotalBitsConsumed());
+
+ reader.ReadFixedBits<4>();
+ EXPECT_EQ(15u, reader.TotalBitsConsumed());
+
+ reader.ReadFixedBits<1>();
+ EXPECT_EQ(16u, reader.TotalBitsConsumed());
+
+ reader.ReadFixedBits<16>();
+ EXPECT_EQ(32u, reader.TotalBitsConsumed());
+
+ EXPECT_TRUE(reader.Close());
+}
+
+TEST(BitReaderTest, MoveTest) {
+ uint8_t buf[8] = {1, 2, 3, 4};
+ BitReader reader2;
+ {
+ BitReader reader1(Span<const uint8_t>(buf, sizeof(buf)));
+
+ EXPECT_EQ(0u, reader1.TotalBitsConsumed());
+ reader1.ReadFixedBits<16>();
+ EXPECT_EQ(16u, reader1.TotalBitsConsumed());
+
+ reader2 = std::move(reader1);
+ // From this point reader1 is invalid, but can continue to access reader2
+ // and we don't need to call Close() on reader1.
+ }
+
+ EXPECT_EQ(16u, reader2.TotalBitsConsumed());
+ EXPECT_EQ(3U, reader2.ReadFixedBits<8>());
+ EXPECT_EQ(24u, reader2.TotalBitsConsumed());
+
+ EXPECT_TRUE(reader2.Close());
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/bits_test.cc b/third_party/jpeg-xl/lib/jxl/bits_test.cc
new file mode 100644
index 0000000000..bd7aa548c8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/bits_test.cc
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/bits.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(BitsTest, TestNumZeroBits) {
+ // Zero input is well-defined.
+ EXPECT_EQ(32u, Num0BitsAboveMS1Bit(0u));
+ EXPECT_EQ(64u, Num0BitsAboveMS1Bit(0ull));
+ EXPECT_EQ(32u, Num0BitsBelowLS1Bit(0u));
+ EXPECT_EQ(64u, Num0BitsBelowLS1Bit(0ull));
+
+ EXPECT_EQ(31u, Num0BitsAboveMS1Bit(1u));
+ EXPECT_EQ(30u, Num0BitsAboveMS1Bit(2u));
+ EXPECT_EQ(63u, Num0BitsAboveMS1Bit(1ull));
+ EXPECT_EQ(62u, Num0BitsAboveMS1Bit(2ull));
+
+ EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1u));
+ EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1ull));
+ EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2u));
+ EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2ull));
+
+ EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x80000000u));
+ EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x8000000000000000ull));
+ EXPECT_EQ(31u, Num0BitsBelowLS1Bit(0x80000000u));
+ EXPECT_EQ(63u, Num0BitsBelowLS1Bit(0x8000000000000000ull));
+}
+
+TEST(BitsTest, TestFloorLog2) {
+ // for input = [1, 7]
+ const size_t expected[7] = {0, 1, 1, 2, 2, 2, 2};
+ for (uint32_t i = 1; i <= 7; ++i) {
+ EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(i)) << " " << i;
+ EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(uint64_t(i))) << " " << i;
+ }
+
+ EXPECT_EQ(11u, FloorLog2Nonzero(0x00000fffu)); // 4095
+ EXPECT_EQ(12u, FloorLog2Nonzero(0x00001000u)); // 4096
+ EXPECT_EQ(12u, FloorLog2Nonzero(0x00001001u)); // 4097
+
+ EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000u));
+ EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001u));
+ EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFu));
+
+ EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000ull));
+ EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001ull));
+ EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFull));
+
+ EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000000ull));
+ EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000001ull));
+ EXPECT_EQ(63u, FloorLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+TEST(BitsTest, TestCeilLog2) {
+ // for input = [1, 7]
+ const size_t expected[7] = {0, 1, 2, 2, 3, 3, 3};
+ for (uint32_t i = 1; i <= 7; ++i) {
+ EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(i)) << " " << i;
+ EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(uint64_t(i))) << " " << i;
+ }
+
+ EXPECT_EQ(12u, CeilLog2Nonzero(0x00000fffu)); // 4095
+ EXPECT_EQ(12u, CeilLog2Nonzero(0x00001000u)); // 4096
+ EXPECT_EQ(13u, CeilLog2Nonzero(0x00001001u)); // 4097
+
+ EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000u));
+ EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001u));
+ EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFu));
+
+ EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000ull));
+ EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001ull));
+ EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFull));
+
+ EXPECT_EQ(63u, CeilLog2Nonzero(0x8000000000000000ull));
+ EXPECT_EQ(64u, CeilLog2Nonzero(0x8000000000000001ull));
+ EXPECT_EQ(64u, CeilLog2Nonzero(0xFFFFFFFFFFFFFFFFull));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/blending.cc b/third_party/jpeg-xl/lib/jxl/blending.cc
new file mode 100644
index 0000000000..ab37fdabb5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/blending.cc
@@ -0,0 +1,152 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/blending.h"
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+bool NeedsBlending(PassesDecoderState* dec_state) {
+ const PassesSharedState& state = *dec_state->shared;
+ if (!(state.frame_header.frame_type == FrameType::kRegularFrame ||
+ state.frame_header.frame_type == FrameType::kSkipProgressive)) {
+ return false;
+ }
+ const auto& info = state.frame_header.blending_info;
+ bool replace_all = (info.mode == BlendMode::kReplace);
+ for (const auto& ec_i : state.frame_header.extra_channel_blending_info) {
+ if (ec_i.mode != BlendMode::kReplace) {
+ replace_all = false;
+ }
+ }
+ // Replace the full frame: nothing to do.
+ if (!state.frame_header.custom_size_or_origin && replace_all) {
+ return false;
+ }
+ return true;
+}
+
+void PerformBlending(const float* const* bg, const float* const* fg,
+ float* const* out, size_t x0, size_t xsize,
+ const PatchBlending& color_blending,
+ const PatchBlending* ec_blending,
+ const std::vector<ExtraChannelInfo>& extra_channel_info) {
+ bool has_alpha = false;
+ size_t num_ec = extra_channel_info.size();
+ for (size_t i = 0; i < num_ec; i++) {
+ if (extra_channel_info[i].type == jxl::ExtraChannel::kAlpha) {
+ has_alpha = true;
+ break;
+ }
+ }
+ ImageF tmp(xsize, 3 + num_ec);
+ // Blend extra channels first so that we use the pre-blending alpha.
+ for (size_t i = 0; i < num_ec; i++) {
+ if (ec_blending[i].mode == PatchBlendMode::kAdd) {
+ for (size_t x = 0; x < xsize; x++) {
+ tmp.Row(3 + i)[x] = bg[3 + i][x + x0] + fg[3 + i][x + x0];
+ }
+ } else if (ec_blending[i].mode == PatchBlendMode::kBlendAbove) {
+ size_t alpha = ec_blending[i].alpha_channel;
+ bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+ PerformAlphaBlending(bg[3 + i] + x0, bg[3 + alpha] + x0, fg[3 + i] + x0,
+ fg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+ is_premultiplied, ec_blending[i].clamp);
+ } else if (ec_blending[i].mode == PatchBlendMode::kBlendBelow) {
+ size_t alpha = ec_blending[i].alpha_channel;
+ bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+ PerformAlphaBlending(fg[3 + i] + x0, fg[3 + alpha] + x0, bg[3 + i] + x0,
+ bg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+ is_premultiplied, ec_blending[i].clamp);
+ } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+ size_t alpha = ec_blending[i].alpha_channel;
+ PerformAlphaWeightedAdd(bg[3 + i] + x0, fg[3 + i] + x0,
+ fg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+ ec_blending[i].clamp);
+ } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+ size_t alpha = ec_blending[i].alpha_channel;
+ PerformAlphaWeightedAdd(fg[3 + i] + x0, bg[3 + i] + x0,
+ bg[3 + alpha] + x0, tmp.Row(3 + i), xsize,
+ ec_blending[i].clamp);
+ } else if (ec_blending[i].mode == PatchBlendMode::kMul) {
+ PerformMulBlending(bg[3 + i] + x0, fg[3 + i] + x0, tmp.Row(3 + i), xsize,
+ ec_blending[i].clamp);
+ } else if (ec_blending[i].mode == PatchBlendMode::kReplace) {
+ memcpy(tmp.Row(3 + i), fg[3 + i] + x0, xsize * sizeof(**fg));
+ } else if (ec_blending[i].mode == PatchBlendMode::kNone) {
+ if (xsize) memcpy(tmp.Row(3 + i), bg[3 + i] + x0, xsize * sizeof(**fg));
+ } else {
+ JXL_ABORT("Unreachable");
+ }
+ }
+ size_t alpha = color_blending.alpha_channel;
+
+ if (color_blending.mode == PatchBlendMode::kAdd ||
+ (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove &&
+ !has_alpha) ||
+ (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow &&
+ !has_alpha)) {
+ for (int p = 0; p < 3; p++) {
+ float* out = tmp.Row(p);
+ for (size_t x = 0; x < xsize; x++) {
+ out[x] = bg[p][x + x0] + fg[p][x + x0];
+ }
+ }
+ } else if (color_blending.mode == PatchBlendMode::kBlendAbove
+ // blend without alpha is just replace
+ && has_alpha) {
+ bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+ PerformAlphaBlending(
+ {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0},
+ {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0},
+ {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+ is_premultiplied, color_blending.clamp);
+ } else if (color_blending.mode == PatchBlendMode::kBlendBelow
+ // blend without alpha is just replace
+ && has_alpha) {
+ bool is_premultiplied = extra_channel_info[alpha].alpha_associated;
+ PerformAlphaBlending(
+ {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0},
+ {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0},
+ {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize,
+ is_premultiplied, color_blending.clamp);
+ } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove) {
+ JXL_DASSERT(has_alpha);
+ for (size_t c = 0; c < 3; c++) {
+ PerformAlphaWeightedAdd(bg[c] + x0, fg[c] + x0, fg[3 + alpha] + x0,
+ tmp.Row(c), xsize, color_blending.clamp);
+ }
+ } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow) {
+ JXL_DASSERT(has_alpha);
+ for (size_t c = 0; c < 3; c++) {
+ PerformAlphaWeightedAdd(fg[c] + x0, bg[c] + x0, bg[3 + alpha] + x0,
+ tmp.Row(c), xsize, color_blending.clamp);
+ }
+ } else if (color_blending.mode == PatchBlendMode::kMul) {
+ for (int p = 0; p < 3; p++) {
+ PerformMulBlending(bg[p] + x0, fg[p] + x0, tmp.Row(p), xsize,
+ color_blending.clamp);
+ }
+ } else if (color_blending.mode == PatchBlendMode::kReplace ||
+ color_blending.mode == PatchBlendMode::kBlendAbove ||
+ color_blending.mode == PatchBlendMode::kBlendBelow) { // kReplace
+ for (size_t p = 0; p < 3; p++) {
+ memcpy(tmp.Row(p), fg[p] + x0, xsize * sizeof(**fg));
+ }
+ } else if (color_blending.mode == PatchBlendMode::kNone) {
+ for (size_t p = 0; p < 3; p++) {
+ memcpy(tmp.Row(p), bg[p] + x0, xsize * sizeof(**fg));
+ }
+ } else {
+ JXL_ABORT("Unreachable");
+ }
+ for (size_t i = 0; i < 3 + num_ec; i++) {
+ if (xsize != 0) memcpy(out[i] + x0, tmp.Row(i), xsize * sizeof(**out));
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/blending.h b/third_party/jpeg-xl/lib/jxl/blending.h
new file mode 100644
index 0000000000..7eab7d50cd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/blending.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BLENDING_H_
+#define LIB_JXL_BLENDING_H_
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+bool NeedsBlending(PassesDecoderState* dec_state);
+
+void PerformBlending(const float* const* bg, const float* const* fg,
+ float* const* out, size_t x0, size_t xsize,
+ const PatchBlending& color_blending,
+ const PatchBlending* ec_blending,
+ const std::vector<ExtraChannelInfo>& extra_channel_info);
+
+} // namespace jxl
+
+#endif // LIB_JXL_BLENDING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/blending_test.cc b/third_party/jpeg-xl/lib/jxl/blending_test.cc
new file mode 100644
index 0000000000..ff4c46c529
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/blending_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::SizeIs;
+
+TEST(BlendingTest, Crops) {
+ const PaddedBytes compressed =
+ jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl");
+ CodecInOut decoded;
+ ASSERT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &decoded));
+ ASSERT_THAT(decoded.frames, SizeIs(4));
+
+ int i = 0;
+ for (const ImageBundle& ib : decoded.frames) {
+ std::ostringstream filename;
+ filename << "jxl/blending/cropped_traffic_light_frame-" << i << ".png";
+ const PaddedBytes compressed_frame =
+ jxl::test::ReadTestData(filename.str());
+ CodecInOut frame;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(compressed_frame), &frame));
+ JXL_EXPECT_OK(SamePixels(ib.color(), *frame.Main().color(), _));
+ ++i;
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/box_content_decoder.cc b/third_party/jpeg-xl/lib/jxl/box_content_decoder.cc
new file mode 100644
index 0000000000..c4cba3a31a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/box_content_decoder.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/box_content_decoder.h"
+
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+JxlBoxContentDecoder::JxlBoxContentDecoder() {}
+
+JxlBoxContentDecoder::~JxlBoxContentDecoder() {
+ if (brotli_dec) {
+ BrotliDecoderDestroyInstance(brotli_dec);
+ }
+}
+
+void JxlBoxContentDecoder::StartBox(bool brob_decode, bool box_until_eof,
+ size_t contents_size) {
+ if (brotli_dec) {
+ BrotliDecoderDestroyInstance(brotli_dec);
+ brotli_dec = nullptr;
+ }
+ header_done_ = false;
+ brob_decode_ = brob_decode;
+ box_until_eof_ = box_until_eof;
+ remaining_ = box_until_eof ? 0 : contents_size;
+ pos_ = 0;
+}
+
+JxlDecoderStatus JxlBoxContentDecoder::Process(const uint8_t* next_in,
+ size_t avail_in, size_t box_pos,
+ uint8_t** next_out,
+ size_t* avail_out) {
+ next_in += pos_ - box_pos;
+ avail_in -= pos_ - box_pos;
+
+ if (brob_decode_) {
+ if (!header_done_) {
+ if (avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+ if (!box_until_eof_) {
+ if (remaining_ < 4) return JXL_DEC_ERROR;
+ remaining_ -= 4;
+ }
+ next_in += 4;
+ avail_in -= 4;
+ pos_ += 4;
+ header_done_ = true;
+ }
+
+ if (!brotli_dec) {
+ brotli_dec = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+ }
+
+ const uint8_t* next_in_before = next_in;
+ uint8_t* next_out_before = *next_out;
+ msan::MemoryIsInitialized(next_in, avail_in);
+ BrotliDecoderResult res = BrotliDecoderDecompressStream(
+ brotli_dec, &avail_in, &next_in, avail_out, next_out, nullptr);
+ size_t consumed = next_in - next_in_before;
+ size_t produced = *next_out - next_out_before;
+ if (res == BROTLI_DECODER_RESULT_ERROR) {
+ return JXL_DEC_ERROR;
+ }
+ msan::UnpoisonMemory(next_out_before, produced);
+ pos_ += consumed;
+ if (!box_until_eof_) remaining_ -= consumed;
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+ if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+ return JXL_DEC_BOX_NEED_MORE_OUTPUT;
+ }
+ if (res == BROTLI_DECODER_RESULT_SUCCESS) {
+ return JXL_DEC_SUCCESS;
+ }
+ // unknown Brotli result
+ return JXL_DEC_ERROR;
+ } else {
+ // remaining box bytes as seen from dec->file_pos
+ size_t can_read = avail_in;
+ if (!box_until_eof_) can_read = std::min<size_t>(can_read, remaining_);
+ size_t to_write = std::min<size_t>(can_read, *avail_out);
+ memcpy(*next_out, next_in, to_write);
+
+ *next_out += to_write;
+ *avail_out -= to_write;
+ if (!box_until_eof_) remaining_ -= to_write;
+ pos_ += to_write;
+
+ if (to_write < can_read) return JXL_DEC_BOX_NEED_MORE_OUTPUT;
+
+ if (!box_until_eof_ && remaining_ > 0) return JXL_DEC_NEED_MORE_INPUT;
+
+ return JXL_DEC_SUCCESS;
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/box_content_decoder.h b/third_party/jpeg-xl/lib/jxl/box_content_decoder.h
new file mode 100644
index 0000000000..6153360a8e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/box_content_decoder.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_BOX_CONTENT_DECODER_H_
+#define LIB_JXL_BOX_CONTENT_DECODER_H_
+
+#include <brotli/decode.h>
+#include <jxl/decode.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <vector>
+
+namespace jxl {
+
+/** Outputs the contents of a box in a streaming fashion, either directly, or
+ * optionally decoding with Brotli, in case of a brob box. The input must be
+ * the contents of a box, excluding the box header.
+ */
+class JxlBoxContentDecoder {
+ public:
+ JxlBoxContentDecoder();
+ ~JxlBoxContentDecoder();
+
+ void StartBox(bool brob_decode, bool box_until_eof, size_t contents_size);
+
+ // Outputs decoded bytes from the box, decoding with brotli if needed.
+ // box_pos is the position in the box content which next_in points to.
+ // Returns success, whether more input or output bytes are needed, or error.
+ JxlDecoderStatus Process(const uint8_t* next_in, size_t avail_in,
+ size_t box_pos, uint8_t** next_out,
+ size_t* avail_out);
+
+ private:
+ BrotliDecoderState* brotli_dec;
+
+ bool header_done_;
+ bool brob_decode_;
+ bool box_until_eof_;
+ size_t remaining_;
+ size_t pos_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_BOX_CONTENT_DECODER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.cc b/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.cc
new file mode 100644
index 0000000000..a412becd0d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.cc
@@ -0,0 +1,1988 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+//
+// The physical architecture of butteraugli is based on the following naming
+// convention:
+// * Opsin - dynamics of the photosensitive chemicals in the retina
+// with their immediate electrical processing
+// * Xyb - hybrid opponent/trichromatic color space
+// x is roughly red-subtract-green.
+// y is yellow.
+// b is blue.
+// Xyb values are computed from Opsin mixing, not directly from rgb.
+// * Mask - for visual masking
+// * Hf - color modeling for spatially high-frequency features
+// * Lf - color modeling for spatially low-frequency features
+// * Diffmap - to cluster and build an image of error between the images
+// * Blur - to hold the smoothing code
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <new>
+#include <vector>
+
+#if JXL_PROFILER_ENABLED
+#include <chrono>
+#endif // JXL_PROFILER_ENABLED
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+#ifndef JXL_BUTTERAUGLI_ONCE
+#define JXL_BUTTERAUGLI_ONCE
+
+namespace jxl {
+
+std::vector<float> ComputeKernel(float sigma) {
+ const float m = 2.25; // Accuracy increases when m is increased.
+ const double scaler = -1.0 / (2.0 * sigma * sigma);
+ const int diff = std::max<int>(1, m * std::fabs(sigma));
+ std::vector<float> kernel(2 * diff + 1);
+ for (int i = -diff; i <= diff; ++i) {
+ kernel[i + diff] = std::exp(scaler * i * i);
+ }
+ return kernel;
+}
+
+void ConvolveBorderColumn(const ImageF& in, const std::vector<float>& kernel,
+ const size_t x, float* BUTTERAUGLI_RESTRICT row_out) {
+ const size_t offset = kernel.size() / 2;
+ int minx = x < offset ? 0 : x - offset;
+ int maxx = std::min<int>(in.xsize() - 1, x + offset);
+ float weight = 0.0f;
+ for (int j = minx; j <= maxx; ++j) {
+ weight += kernel[j - x + offset];
+ }
+ float scale = 1.0f / weight;
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+ float sum = 0.0f;
+ for (int j = minx; j <= maxx; ++j) {
+ sum += row_in[j] * kernel[j - x + offset];
+ }
+ row_out[y] = sum * scale;
+ }
+}
+
+// Computes a horizontal convolution and transposes the result.
+void ConvolutionWithTranspose(const ImageF& in,
+ const std::vector<float>& kernel,
+ ImageF* BUTTERAUGLI_RESTRICT out) {
+ PROFILER_FUNC;
+ JXL_CHECK(out->xsize() == in.ysize());
+ JXL_CHECK(out->ysize() == in.xsize());
+ const size_t len = kernel.size();
+ const size_t offset = len / 2;
+ float weight_no_border = 0.0f;
+ for (size_t j = 0; j < len; ++j) {
+ weight_no_border += kernel[j];
+ }
+ const float scale_no_border = 1.0f / weight_no_border;
+ const size_t border1 = std::min(in.xsize(), offset);
+ const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0;
+ std::vector<float> scaled_kernel(len / 2 + 1);
+ for (size_t i = 0; i <= len / 2; ++i) {
+ scaled_kernel[i] = kernel[i] * scale_no_border;
+ }
+
+ // middle
+ switch (len) {
+ case 7: {
+ PROFILER_ZONE("conv7");
+ const float sk0 = scaled_kernel[0];
+ const float sk1 = scaled_kernel[1];
+ const float sk2 = scaled_kernel[2];
+ const float sk3 = scaled_kernel[3];
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+ for (size_t x = border1; x < border2; ++x, ++row_in) {
+ const float sum0 = (row_in[0] + row_in[6]) * sk0;
+ const float sum1 = (row_in[1] + row_in[5]) * sk1;
+ const float sum2 = (row_in[2] + row_in[4]) * sk2;
+ const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2;
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+ row_out[y] = sum;
+ }
+ }
+ } break;
+ case 13: {
+ PROFILER_ZONE("conv15");
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+ for (size_t x = border1; x < border2; ++x, ++row_in) {
+ float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0];
+ float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1];
+ float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2];
+ float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3];
+ sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4];
+ sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5];
+ const float sum = (row_in[6]) * scaled_kernel[6];
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+ row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+ }
+ }
+ break;
+ }
+ case 15: {
+ PROFILER_ZONE("conv15");
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+ for (size_t x = border1; x < border2; ++x, ++row_in) {
+ float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0];
+ float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1];
+ float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2];
+ float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3];
+ sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4];
+ sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5];
+ sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6];
+ const float sum = (row_in[7]) * scaled_kernel[7];
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+ row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+ }
+ }
+ break;
+ }
+ case 33: {
+ PROFILER_ZONE("conv33");
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset;
+ for (size_t x = border1; x < border2; ++x, ++row_in) {
+ float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0];
+ float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1];
+ float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2];
+ float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3];
+ sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4];
+ sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5];
+ sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6];
+ sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7];
+ sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8];
+ sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9];
+ sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10];
+ sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11];
+ sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12];
+ sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13];
+ sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14];
+ sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15];
+ const float sum = (row_in[16]) * scaled_kernel[16];
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+ row_out[y] = sum + sum0 + sum1 + sum2 + sum3;
+ }
+ }
+ break;
+ }
+ default:
+ printf("Warning: Unexpected kernel size! %" PRIuS "\n", len);
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y);
+ for (size_t x = border1; x < border2; ++x) {
+ const int d = x - offset;
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(x);
+ float sum = 0.0f;
+ size_t j;
+ for (j = 0; j <= len / 2; ++j) {
+ sum += row_in[d + j] * scaled_kernel[j];
+ }
+ for (; j < len; ++j) {
+ sum += row_in[d + j] * scaled_kernel[len - 1 - j];
+ }
+ row_out[y] = sum;
+ }
+ }
+ }
+ // left border
+ for (size_t x = 0; x < border1; ++x) {
+ ConvolveBorderColumn(in, kernel, x, out->Row(x));
+ }
+
+ // right border
+ for (size_t x = border2; x < in.xsize(); ++x) {
+ ConvolveBorderColumn(in, kernel, x, out->Row(x));
+ }
+}
+
+// A blur somewhat similar to a 2D Gaussian blur.
+// See: https://en.wikipedia.org/wiki/Gaussian_blur
+//
+// This is a bottleneck because the sigma can be quite large (>7). We can use
+// gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated
+// Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are
+// zero-valued. This leads to noticeable differences at the edges of diffmaps.
+// We retain a special case for 5x5 kernels (even faster than gauss_blur),
+// optionally use gauss_blur followed by fixup of the borders for large images,
+// or fall back to the previous truncated FIR followed by a transpose.
+void Blur(const ImageF& in, float sigma, const ButteraugliParams& params,
+ BlurTemp* temp, ImageF* out) {
+ std::vector<float> kernel = ComputeKernel(sigma);
+ // Separable5 does an in-place convolution, so this fast path is not safe if
+ // in aliases out.
+ if (kernel.size() == 5 && &in != out) {
+ float sum_weights = 0.0f;
+ for (const float w : kernel) {
+ sum_weights += w;
+ }
+ const float scale = 1.0f / sum_weights;
+ const float w0 = kernel[2] * scale;
+ const float w1 = kernel[1] * scale;
+ const float w2 = kernel[0] * scale;
+ const WeightsSeparable5 weights = {
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+ };
+ Separable5(in, Rect(in), weights, /*pool=*/nullptr, out);
+ return;
+ }
+
+ ImageF* JXL_RESTRICT temp_t = temp->GetTransposed(in);
+ ConvolutionWithTranspose(in, kernel, temp_t);
+ ConvolutionWithTranspose(*temp_t, kernel, out);
+}
+
+// Allows PaddedMaltaUnit to call either function via overloading.
+struct MaltaTagLF {};
+struct MaltaTag {};
+
+} // namespace jxl
+
+#endif // JXL_BUTTERAUGLI_ONCE
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::MulSub;
+using hwy::HWY_NAMESPACE::Neg;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+template <class D, class V>
+HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) {
+ static const double kMul = 0.724216145665;
+ const V mul = Set(d, kMul);
+ const V maxval = Set(d, kMaxVal);
+ // If greater than maxval or less than -maxval, replace with if_*.
+ const V if_pos = MulAdd(Sub(v, maxval), mul, maxval);
+ const V if_neg = MulSub(Add(v, maxval), mul, maxval);
+ const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v);
+ return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v);
+}
+
+// Make area around zero less important (remove it).
+template <class D, class V>
+HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) {
+ const auto w = Set(d, kw);
+ return IfThenElse(Gt(x, w), Sub(x, w),
+ IfThenElseZero(Lt(x, Neg(w)), Add(x, w)));
+}
+
+// Make area around zero more important (2x it until the limit).
+template <class D, class V>
+HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) {
+ const auto w = Set(d, kw);
+ return IfThenElse(Gt(x, w), Add(x, w),
+ IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x)));
+}
+
+// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space.
+// Vals space can be converted to L2-norm space (Euclidean and normalized)
+// through visual masking.
+template <class D, class V>
+HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y,
+ const V& b_arg, V* HWY_RESTRICT valx,
+ V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) {
+ static const double xmul_scalar = 33.832837186260;
+ static const double ymul_scalar = 14.458268100570;
+ static const double bmul_scalar = 49.87984651440;
+ static const double y_to_b_mul_scalar = -0.362267051518;
+ const V xmul = Set(d, xmul_scalar);
+ const V ymul = Set(d, ymul_scalar);
+ const V bmul = Set(d, bmul_scalar);
+ const V y_to_b_mul = Set(d, y_to_b_mul_scalar);
+ const V b = MulAdd(y_to_b_mul, y, b_arg);
+ *valb = Mul(b, bmul);
+ *valx = Mul(x, xmul);
+ *valy = Mul(y, ymul);
+}
+
+void SuppressXByY(const ImageF& in_x, const ImageF& in_y, const double yw,
+ ImageF* HWY_RESTRICT out) {
+ JXL_DASSERT(SameSize(in_x, in_y) && SameSize(in_x, *out));
+ const size_t xsize = in_x.xsize();
+ const size_t ysize = in_x.ysize();
+
+ const HWY_FULL(float) d;
+ static const double s = 0.653020556257;
+ const auto sv = Set(d, s);
+ const auto one_minus_s = Set(d, 1.0 - s);
+ const auto ywv = Set(d, yw);
+
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* HWY_RESTRICT row_x = in_x.ConstRow(y);
+ const float* HWY_RESTRICT row_y = in_y.ConstRow(y);
+ float* HWY_RESTRICT row_out = out->Row(y);
+
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto vx = Load(d, row_x + x);
+ const auto vy = Load(d, row_y + x);
+ const auto scaler =
+ MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv);
+ Store(Mul(scaler, vx), d, row_out + x);
+ }
+ }
+}
+
+static void SeparateFrequencies(size_t xsize, size_t ysize,
+ const ButteraugliParams& params,
+ BlurTemp* blur_temp, const Image3F& xyb,
+ PsychoImage& ps) {
+ PROFILER_FUNC;
+ const HWY_FULL(float) d;
+
+ // Extract lf ...
+ static const double kSigmaLf = 7.15593339443;
+ static const double kSigmaHf = 3.22489901262;
+ static const double kSigmaUhf = 1.56416327805;
+ ps.mf = Image3F(xsize, ysize);
+ ps.hf[0] = ImageF(xsize, ysize);
+ ps.hf[1] = ImageF(xsize, ysize);
+ ps.lf = Image3F(xyb.xsize(), xyb.ysize());
+ ps.mf = Image3F(xyb.xsize(), xyb.ysize());
+ for (int i = 0; i < 3; ++i) {
+ Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &ps.lf.Plane(i));
+
+ // ... and keep everything else in mf.
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_xyb = xyb.PlaneRow(i, y);
+ const float* BUTTERAUGLI_RESTRICT row_lf = ps.lf.ConstPlaneRow(i, y);
+ float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto mf = Sub(Load(d, row_xyb + x), Load(d, row_lf + x));
+ Store(mf, d, row_mf + x);
+ }
+ }
+ if (i == 2) {
+ Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+ break;
+ }
+ // Divide mf into mf and hf.
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ Store(Load(d, row_mf + x), d, row_hf + x);
+ }
+ }
+ Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i));
+ static const double kRemoveMfRange = 0.29;
+ static const double kAddMfRange = 0.1;
+ if (i == 0) {
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ auto mf = Load(d, row_mf + x);
+ auto hf = Sub(Load(d, row_hf + x), mf);
+ mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf);
+ Store(mf, d, row_mf + x);
+ Store(hf, d, row_hf + x);
+ }
+ }
+ } else {
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ auto mf = Load(d, row_mf + x);
+ auto hf = Sub(Load(d, row_hf + x), mf);
+
+ mf = AmplifyRangeAroundZero(d, kAddMfRange, mf);
+ Store(mf, d, row_mf + x);
+ Store(hf, d, row_hf + x);
+ }
+ }
+ }
+ }
+
+ // Temporarily used as output of SuppressXByY
+ ps.uhf[0] = ImageF(xsize, ysize);
+ ps.uhf[1] = ImageF(xsize, ysize);
+
+ // Suppress red-green by intensity change in the high freq channels.
+ static const double suppress = 46.0;
+ SuppressXByY(ps.hf[0], ps.hf[1], suppress, &ps.uhf[0]);
+ // hf is the SuppressXByY output, uhf will be written below.
+ ps.hf[0].Swap(ps.uhf[0]);
+
+ for (int i = 0; i < 2; ++i) {
+ // Divide hf into hf and uhf.
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_uhf[x] = row_hf[x];
+ }
+ }
+ Blur(ps.hf[i], kSigmaUhf, params, blur_temp, &ps.hf[i]);
+ static const double kRemoveHfRange = 1.5;
+ static const double kAddHfRange = 0.132;
+ static const double kRemoveUhfRange = 0.04;
+ static const double kMaxclampHf = 28.4691806922;
+ static const double kMaxclampUhf = 5.19175294647;
+ static double kMulYHf = 2.155;
+ static double kMulYUhf = 2.69313763794;
+ if (i == 0) {
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ auto hf = Load(d, row_hf + x);
+ auto uhf = Sub(Load(d, row_uhf + x), hf);
+ hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf);
+ uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf);
+ Store(hf, d, row_hf + x);
+ Store(uhf, d, row_uhf + x);
+ }
+ }
+ } else {
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y);
+ float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ auto hf = Load(d, row_hf + x);
+ hf = MaximumClamp(d, hf, kMaxclampHf);
+
+ auto uhf = Sub(Load(d, row_uhf + x), hf);
+ uhf = MaximumClamp(d, uhf, kMaxclampUhf);
+ uhf = Mul(uhf, Set(d, kMulYUhf));
+ Store(uhf, d, row_uhf + x);
+
+ hf = Mul(hf, Set(d, kMulYHf));
+ hf = AmplifyRangeAroundZero(d, kAddHfRange, hf);
+ Store(hf, d, row_hf + x);
+ }
+ }
+ }
+ }
+ // Modify range around zero code only concerns the high frequency
+ // planes and only the X and Y channels.
+ // Convert low freq xyb to vals space so that we can do a simple squared sum
+ // diff on the low frequencies later.
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y);
+ float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y);
+ float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ auto valx = Undefined(d);
+ auto valy = Undefined(d);
+ auto valb = Undefined(d);
+ XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x),
+ Load(d, row_b + x), &valx, &valy, &valb);
+ Store(valx, d, row_x + x);
+ Store(valy, d, row_y + x);
+ Store(valb, d, row_b + x);
+ }
+ }
+}
+
+namespace {
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) {
+ return Add(Add(a, b), Add(c, d));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) {
+ return Sum(a, b, c, Add(d, e));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) {
+ return Sum(a, b, c, Sum(d, e, f, g));
+}
+template <typename V>
+BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) {
+ return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i);
+}
+} // namespace
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTagLF /*tag*/, const D df,
+ const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+ const intptr_t xs3 = 3 * xs;
+
+ const auto center = LoadU(df, d);
+
+ // x grows, y constant
+ const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center,
+ LoadU(df, d + 2), LoadU(df, d + 4));
+ // Will return this, sum of all line kernels
+ auto retval = Mul(sum_yconst, sum_yconst);
+ {
+ // y grows, x constant
+ auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center,
+ LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // both grow
+ auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center,
+ LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows, x shrinks
+ auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center,
+ LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows -4 to 4, x shrinks 1 -> -1
+ auto sum =
+ Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center,
+ LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows -4 to 4, x grows -1 -> 1
+ auto sum =
+ Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center,
+ LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // x grows -4 to 4, y grows -1 to 1
+ auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center,
+ LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // x grows -4 to 4, y shrinks 1 to -1
+ auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center,
+ LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1__*______
+ 2___*_____
+ 3_________
+ 4____0____
+ 5_________
+ 6_____*___
+ 7______*__
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center,
+ LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1______*__
+ 2_____*___
+ 3_________
+ 4____0____
+ 5_________
+ 6___*_____
+ 7__*______
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center,
+ LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_*_______
+ 3__*______
+ 4____0____
+ 5______*__
+ 6_______*_
+ 7_________
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center,
+ LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_______*_
+ 3______*__
+ 4____0____
+ 5__*______
+ 6_*_______
+ 7_________
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center,
+ LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2________*
+ 3______*__
+ 4____0____
+ 5__*______
+ 6*________
+ 7_________
+ 8_________ */
+
+ auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center,
+ LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2*________
+ 3__*______
+ 4____0____
+ 5______*__
+ 6________*
+ 7_________
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center,
+ LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0__*______
+ 1_________
+ 2___*_____
+ 3_________
+ 4____0____
+ 5_________
+ 6_____*___
+ 7_________
+ 8______*__ */
+ auto sum =
+ Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center,
+ LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0______*__
+ 1_________
+ 2_____*___
+ 3_________
+ 4____0____
+ 5_________
+ 6___*_____
+ 7_________
+ 8__*______ */
+ auto sum =
+ Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center,
+ LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ return retval;
+}
+
+template <class D>
+Vec<D> MaltaUnit(MaltaTag /*tag*/, const D df,
+ const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) {
+ const intptr_t xs3 = 3 * xs;
+
+ const auto center = LoadU(df, d);
+
+ // x grows, y constant
+ const auto sum_yconst =
+ Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2),
+ LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2),
+ LoadU(df, d + 3), LoadU(df, d + 4));
+ // Will return this, sum of all line kernels
+ auto retval = Mul(sum_yconst, sum_yconst);
+
+ {
+ // y grows, x constant
+ auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3),
+ LoadU(df, d - xs - xs), LoadU(df, d - xs), center,
+ LoadU(df, d + xs), LoadU(df, d + xs + xs),
+ LoadU(df, d + xs3), LoadU(df, d + xs3 + xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // both grow
+ auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2),
+ LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+ LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows, x shrinks
+ auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2),
+ LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+ LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows -4 to 4, x shrinks 1 -> -1
+ auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
+ LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
+ LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
+ LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // y grows -4 to 4, x grows -1 -> 1
+ auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
+ LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
+ LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
+ LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // x grows -4 to 4, y grows -1 to 1
+ auto sum =
+ Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs),
+ LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs),
+ LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ // x grows -4 to 4, y shrinks 1 to -1
+ auto sum =
+ Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs),
+ LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs),
+ LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1__*______
+ 2___*_____
+ 3___*_____
+ 4____0____
+ 5_____*___
+ 6_____*___
+ 7______*__
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1),
+ LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+ LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1______*__
+ 2_____*___
+ 3_____*___
+ 4____0____
+ 5___*_____
+ 6___*_____
+ 7__*______
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1),
+ LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+ LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_*_______
+ 3__**_____
+ 4____0____
+ 5_____**__
+ 6_______*_
+ 7_________
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2),
+ LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1),
+ LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_______*_
+ 3_____**__
+ 4____0____
+ 5__**_____
+ 6_*_______
+ 7_________
+ 8_________ */
+ auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2),
+ LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1),
+ LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_________
+ 3______***
+ 4___*0*___
+ 5***______
+ 6_________
+ 7_________
+ 8_________ */
+
+ auto sum =
+ Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2),
+ LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2),
+ LoadU(df, d - xs + 3), LoadU(df, d - xs + 4));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_________
+ 1_________
+ 2_________
+ 3***______
+ 4___*0*___
+ 5______***
+ 6_________
+ 7_________
+ 8_________ */
+ auto sum =
+ Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2),
+ LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2),
+ LoadU(df, d + xs + 3), LoadU(df, d + xs + 4));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0___*_____
+ 1___*_____
+ 2___*_____
+ 3____*____
+ 4____0____
+ 5____*____
+ 6_____*___
+ 7_____*___
+ 8_____*___ */
+ auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1),
+ LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center,
+ LoadU(df, d + xs), LoadU(df, d + xs + xs + 1),
+ LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ {
+ /* 0_____*___
+ 1_____*___
+ 2____ *___
+ 3____*____
+ 4____0____
+ 5____*____
+ 6___*_____
+ 7___*_____
+ 8___*_____ */
+ auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1),
+ LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center,
+ LoadU(df, d + xs), LoadU(df, d + xs + xs - 1),
+ LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1));
+ retval = MulAdd(sum, sum, retval);
+ }
+ return retval;
+}
+
+// Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known
+// to be far enough from the image borders. "diffs" is a packed image.
+template <class Tag>
+static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs,
+ const size_t x0,
+ const size_t y0) {
+ const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0;
+ const HWY_CAPPED(float, 1) df;
+ if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) &&
+ y0 < (diffs.ysize() - 4))) {
+ return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow()));
+ }
+
+ PROFILER_ZONE("Padded Malta");
+ float borderimage[12 * 9]; // round up to 4
+ for (int dy = 0; dy < 9; ++dy) {
+ int y = y0 + dy - 4;
+ if (y < 0 || static_cast<size_t>(y) >= diffs.ysize()) {
+ for (int dx = 0; dx < 12; ++dx) {
+ borderimage[dy * 12 + dx] = 0.0f;
+ }
+ continue;
+ }
+
+ const float* row_diffs = diffs.ConstRow(y);
+ for (int dx = 0; dx < 9; ++dx) {
+ int x = x0 + dx - 4;
+ if (x < 0 || static_cast<size_t>(x) >= diffs.xsize()) {
+ borderimage[dy * 12 + dx] = 0.0f;
+ } else {
+ borderimage[dy * 12 + dx] = row_diffs[x];
+ }
+ }
+ std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f);
+ }
+ return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12));
+}
+
+template <class Tag>
+static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1,
+ const double w_0gt1, const double w_0lt1,
+ const double norm1, const double len,
+ const double mulli, ImageF* HWY_RESTRICT diffs,
+ Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+ JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs));
+ const size_t xsize_ = lum0.xsize();
+ const size_t ysize_ = lum0.ysize();
+
+ const float kWeight0 = 0.5;
+ const float kWeight1 = 0.33;
+
+ const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1);
+ const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1);
+ const float norm2_0gt1 = w_pre0gt1 * norm1;
+ const float norm2_0lt1 = w_pre0lt1 * norm1;
+
+ for (size_t y = 0; y < ysize_; ++y) {
+ const float* HWY_RESTRICT row0 = lum0.ConstRow(y);
+ const float* HWY_RESTRICT row1 = lum1.ConstRow(y);
+ float* HWY_RESTRICT row_diffs = diffs->Row(y);
+ for (size_t x = 0; x < xsize_; ++x) {
+ const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x]));
+ const float diff = row0[x] - row1[x];
+ const float scaler = norm2_0gt1 / (static_cast<float>(norm1) + absval);
+
+ // Primary symmetric quadratic objective.
+ row_diffs[x] = scaler * diff;
+
+ const float scaler2 = norm2_0lt1 / (static_cast<float>(norm1) + absval);
+ const double fabs0 = std::fabs(row0[x]);
+
+ // Secondary half-open quadratic objectives.
+ const double too_small = 0.55 * fabs0;
+ const double too_big = 1.05 * fabs0;
+
+ if (row0[x] < 0) {
+ if (row1[x] > -too_small) {
+ double impact = scaler2 * (row1[x] + too_small);
+ row_diffs[x] -= impact;
+ } else if (row1[x] < -too_big) {
+ double impact = scaler2 * (-row1[x] - too_big);
+ row_diffs[x] += impact;
+ }
+ } else {
+ if (row1[x] < too_small) {
+ double impact = scaler2 * (too_small - row1[x]);
+ row_diffs[x] += impact;
+ } else if (row1[x] > too_big) {
+ double impact = scaler2 * (row1[x] - too_big);
+ row_diffs[x] -= impact;
+ }
+ }
+ }
+ }
+
+ size_t y0 = 0;
+ // Top
+ for (; y0 < 4; ++y0) {
+ float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+ for (size_t x0 = 0; x0 < xsize_; ++x0) {
+ row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+ }
+ }
+
+ const HWY_FULL(float) df;
+ const size_t aligned_x = std::max(size_t(4), Lanes(df));
+ const intptr_t stride = diffs->PixelsPerRow();
+
+ // Middle
+ for (; y0 < ysize_ - 4; ++y0) {
+ const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0);
+ float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+ size_t x0 = 0;
+ for (; x0 < aligned_x; ++x0) {
+ row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+ }
+ for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) {
+ auto diff = Load(df, row_diff + x0);
+ diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride));
+ Store(diff, df, row_diff + x0);
+ }
+
+ for (; x0 < xsize_; ++x0) {
+ row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+ }
+ }
+
+ // Bottom
+ for (; y0 < ysize_; ++y0) {
+ float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0);
+ for (size_t x0 = 0; x0 < xsize_; ++x0) {
+ row_diff[x0] += PaddedMaltaUnit<Tag>(*diffs, x0, y0);
+ }
+ }
+}
+
+// Need non-template wrapper functions for HWY_EXPORT.
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+ const double w_0lt1, const double norm1, const double len,
+ const double mulli, ImageF* HWY_RESTRICT diffs,
+ Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+ MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+ diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+ const double w_0lt1, const double norm1, const double len,
+ const double mulli, ImageF* HWY_RESTRICT diffs,
+ Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+ MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli,
+ diffs, block_diff_ac, c);
+}
+
+void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) {
+ PROFILER_FUNC;
+ const size_t xsize = xyb.xsize();
+ const size_t ysize = xyb.ysize();
+ const float bias = mul * bias_arg;
+ const float sqrt_bias = sqrt(bias);
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y);
+ float* BUTTERAUGLI_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ // kBias makes sqrt behave more linearly.
+ row_out[x] = sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias;
+ }
+ }
+}
+
+// std::log(80.0) / std::log(255.0);
+constexpr float kIntensityTargetNormalizationHack = 0.79079917404f;
+static const float kInternalGoodQualityThreshold =
+ 17.83f * kIntensityTargetNormalizationHack;
+static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold;
+
+void StoreMin3(const float v, float& min0, float& min1, float& min2) {
+ if (v < min2) {
+ if (v < min0) {
+ min2 = min1;
+ min1 = min0;
+ min0 = v;
+ } else if (v < min1) {
+ min2 = min1;
+ min1 = v;
+ } else {
+ min2 = v;
+ }
+ }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas area generally smooth, don't do masking.
+void FuzzyErosion(const ImageF& from, ImageF* to) {
+ const size_t xsize = from.xsize();
+ const size_t ysize = from.ysize();
+ static const int kStep = 3;
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ float min0 = from.Row(y)[x];
+ float min1 = 2 * min0;
+ float min2 = min1;
+ if (x >= kStep) {
+ float v = from.Row(y)[x - kStep];
+ StoreMin3(v, min0, min1, min2);
+ if (y >= kStep) {
+ float v = from.Row(y - kStep)[x - kStep];
+ StoreMin3(v, min0, min1, min2);
+ }
+ if (y < ysize - kStep) {
+ float v = from.Row(y + kStep)[x - kStep];
+ StoreMin3(v, min0, min1, min2);
+ }
+ }
+ if (x < xsize - kStep) {
+ float v = from.Row(y)[x + kStep];
+ StoreMin3(v, min0, min1, min2);
+ if (y >= kStep) {
+ float v = from.Row(y - kStep)[x + kStep];
+ StoreMin3(v, min0, min1, min2);
+ }
+ if (y < ysize - kStep) {
+ float v = from.Row(y + kStep)[x + kStep];
+ StoreMin3(v, min0, min1, min2);
+ }
+ }
+ if (y >= kStep) {
+ float v = from.Row(y - kStep)[x];
+ StoreMin3(v, min0, min1, min2);
+ }
+ if (y < ysize - kStep) {
+ float v = from.Row(y + kStep)[x];
+ StoreMin3(v, min0, min1, min2);
+ }
+ to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2);
+ }
+ }
+}
+
+// Compute values of local frequency and dc masking based on the activity
+// in the two images. img_diff_ac may be null.
+void Mask(const ImageF& mask0, const ImageF& mask1,
+ const ButteraugliParams& params, BlurTemp* blur_temp,
+ ImageF* BUTTERAUGLI_RESTRICT mask,
+ ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+ // Only X and Y components are involved in masking. B's influence
+ // is considered less important in the high frequency area, and we
+ // don't model masking from lower frequency signals.
+ PROFILER_FUNC;
+ const size_t xsize = mask0.xsize();
+ const size_t ysize = mask0.ysize();
+ *mask = ImageF(xsize, ysize);
+ static const float kMul = 6.19424080439;
+ static const float kBias = 12.61050594197;
+ static const float kRadius = 2.7;
+ ImageF diff0(xsize, ysize);
+ ImageF diff1(xsize, ysize);
+ ImageF blurred0(xsize, ysize);
+ ImageF blurred1(xsize, ysize);
+ DiffPrecompute(mask0, kMul, kBias, &diff0);
+ DiffPrecompute(mask1, kMul, kBias, &diff1);
+ Blur(diff0, kRadius, params, blur_temp, &blurred0);
+ FuzzyErosion(blurred0, &diff0);
+ Blur(diff1, kRadius, params, blur_temp, &blurred1);
+ FuzzyErosion(blurred1, &diff1);
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ mask->Row(y)[x] = diff0.Row(y)[x];
+ if (diff_ac != nullptr) {
+ static const float kMaskToErrorMul = 10.0;
+ float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x];
+ diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff;
+ }
+ }
+ }
+}
+
+// `diff_ac` may be null.
+void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1,
+ const size_t xsize, const size_t ysize,
+ const ButteraugliParams& params, Image3F* temp,
+ BlurTemp* blur_temp, ImageF* BUTTERAUGLI_RESTRICT mask,
+ ImageF* BUTTERAUGLI_RESTRICT diff_ac) {
+ ImageF mask0(xsize, ysize);
+ ImageF mask1(xsize, ysize);
+ static const float muls[3] = {
+ 2.5f,
+ 0.4f,
+ 0.4f,
+ };
+ // Silly and unoptimized approach here. TODO(jyrki): rework this.
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_y_hf0 = pi0.hf[1].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_y_hf1 = pi1.hf[1].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_y_uhf0 = pi0.uhf[1].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_y_uhf1 = pi1.uhf[1].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_x_hf0 = pi0.hf[0].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_x_hf1 = pi1.hf[0].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_x_uhf0 = pi0.uhf[0].Row(y);
+ const float* BUTTERAUGLI_RESTRICT row_x_uhf1 = pi1.uhf[0].Row(y);
+ float* BUTTERAUGLI_RESTRICT row0 = mask0.Row(y);
+ float* BUTTERAUGLI_RESTRICT row1 = mask1.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ float xdiff0 = (row_x_uhf0[x] + row_x_hf0[x]) * muls[0];
+ float xdiff1 = (row_x_uhf1[x] + row_x_hf1[x]) * muls[0];
+ float ydiff0 = row_y_uhf0[x] * muls[1] + row_y_hf0[x] * muls[2];
+ float ydiff1 = row_y_uhf1[x] * muls[1] + row_y_hf1[x] * muls[2];
+ row0[x] = xdiff0 * xdiff0 + ydiff0 * ydiff0;
+ row0[x] = sqrt(row0[x]);
+ row1[x] = xdiff1 * xdiff1 + ydiff1 * ydiff1;
+ row1[x] = sqrt(row1[x]);
+ }
+ }
+ Mask(mask0, mask1, params, blur_temp, mask, diff_ac);
+}
+
+double MaskY(double delta) {
+ static const double offset = 0.829591754942;
+ static const double scaler = 0.451936922203;
+ static const double mul = 2.5485944793;
+ const double c = mul / ((scaler * delta) + offset);
+ const double retval = kGlobalScale * (1.0 + c);
+ return retval * retval;
+}
+
+double MaskDcY(double delta) {
+ static const double offset = 0.20025578522;
+ static const double scaler = 3.87449418804;
+ static const double mul = 0.505054525019;
+ const double c = mul / ((scaler * delta) + offset);
+ const double retval = kGlobalScale * (1.0 + c);
+ return retval * retval;
+}
+
+inline float MaskColor(const float color[3], const float mask) {
+ return color[0] * mask + color[1] * mask + color[2] * mask;
+}
+
+// Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks}
+void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc,
+ const Image3F& block_diff_ac, float xmul,
+ ImageF* result) {
+ PROFILER_FUNC;
+ JXL_CHECK(SameSize(mask, *result));
+ size_t xsize = mask.xsize();
+ size_t ysize = mask.ysize();
+ for (size_t y = 0; y < ysize; ++y) {
+ float* BUTTERAUGLI_RESTRICT row_out = result->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ float val = mask.Row(y)[x];
+ float maskval = MaskY(val);
+ float dc_maskval = MaskDcY(val);
+ float diff_dc[3];
+ float diff_ac[3];
+ for (int i = 0; i < 3; ++i) {
+ diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x];
+ diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x];
+ }
+ diff_ac[0] *= xmul;
+ diff_dc[0] *= xmul;
+ row_out[x] =
+ sqrt(MaskColor(diff_dc, dc_maskval) + MaskColor(diff_ac, maskval));
+ }
+ }
+}
+
+// Adds weighted L2 difference between i0 and i1 to diffmap.
+static void L2Diff(const ImageF& i0, const ImageF& i1, const float w,
+ Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+ if (w == 0) return;
+
+ const HWY_FULL(float) d;
+ const auto weight = Set(d, w);
+
+ for (size_t y = 0; y < i0.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+ const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+ float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+ for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+ const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
+ const auto diff2 = Mul(diff, diff);
+ const auto prev = Load(d, row_diff + x);
+ Store(MulAdd(diff2, weight, prev), d, row_diff + x);
+ }
+ }
+}
+
+// Initializes diffmap to the weighted L2 difference between i0 and i1.
+static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w,
+ Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+ if (w == 0) return;
+
+ const HWY_FULL(float) d;
+ const auto weight = Set(d, w);
+
+ for (size_t y = 0; y < i0.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y);
+ const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y);
+ float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+ for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+ const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x));
+ const auto diff2 = Mul(diff, diff);
+ Store(Mul(diff2, weight), d, row_diff + x);
+ }
+ }
+}
+
+// i0 is the original image.
+// i1 is the deformed copy.
+static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1,
+ float w_0lt1,
+ Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) {
+ if (w_0gt1 == 0 && w_0lt1 == 0) {
+ return;
+ }
+
+ const HWY_FULL(float) d;
+ const auto vw_0gt1 = Set(d, w_0gt1 * 0.8);
+ const auto vw_0lt1 = Set(d, w_0lt1 * 0.8);
+
+ for (size_t y = 0; y < i0.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y);
+ const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y);
+ float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y);
+
+ for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) {
+ const auto val0 = Load(d, row0 + x);
+ const auto val1 = Load(d, row1 + x);
+
+ // Primary symmetric quadratic objective.
+ const auto diff = Sub(val0, val1);
+ auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x));
+
+ // Secondary half-open quadratic objectives.
+ const auto fabs0 = Abs(val0);
+ const auto too_small = Mul(Set(d, 0.4), fabs0);
+ const auto too_big = fabs0;
+
+ const auto if_neg = IfThenElse(
+ Gt(val1, Neg(too_small)), Add(val1, too_small),
+ IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big)));
+ const auto if_pos =
+ IfThenElse(Lt(val1, too_small), Sub(too_small, val1),
+ IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big)));
+ const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos);
+ total = MulAdd(vw_0lt1, Mul(v, v), total);
+ Store(total, d, row_diff + x);
+ }
+ }
+}
+
+// A simple HDR compatible gamma function.
+template <class DF, class V>
+V Gamma(const DF df, V v) {
+ // ln(2) constant folded in because we want std::log but have FastLog2f.
+ const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f);
+ const auto kRetAdd = Set(df, -23.16046239805755);
+ // This should happen rarely, but may lead to a NaN in log, which is
+ // undesirable. Since negative photons don't exist we solve the NaNs by
+ // clamping here.
+ v = ZeroIfNegative(v);
+
+ const auto biased = Add(v, Set(df, 9.9710635769299145));
+ const auto log = FastLog2f(df, biased);
+ // We could fold this into a custom Log2 polynomial, but there would be
+ // relatively little gain.
+ return MulAdd(kRetMul, log, kRetAdd);
+}
+
+template <bool Clamp, class DF, class V>
+BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1,
+ const V& in2, V* JXL_RESTRICT out0,
+ V* JXL_RESTRICT out1,
+ V* JXL_RESTRICT out2) {
+ // https://en.wikipedia.org/wiki/Photopsin absorbance modeling.
+ static const double mixi0 = 0.29956550340058319;
+ static const double mixi1 = 0.63373087833825936;
+ static const double mixi2 = 0.077705617820981968;
+ static const double mixi3 = 1.7557483643287353;
+ static const double mixi4 = 0.22158691104574774;
+ static const double mixi5 = 0.69391388044116142;
+ static const double mixi6 = 0.0987313588422;
+ static const double mixi7 = 1.7557483643287353;
+ static const double mixi8 = 0.02;
+ static const double mixi9 = 0.02;
+ static const double mixi10 = 0.20480129041026129;
+ static const double mixi11 = 12.226454707163354;
+
+ const V mix0 = Set(df, mixi0);
+ const V mix1 = Set(df, mixi1);
+ const V mix2 = Set(df, mixi2);
+ const V mix3 = Set(df, mixi3);
+ const V mix4 = Set(df, mixi4);
+ const V mix5 = Set(df, mixi5);
+ const V mix6 = Set(df, mixi6);
+ const V mix7 = Set(df, mixi7);
+ const V mix8 = Set(df, mixi8);
+ const V mix9 = Set(df, mixi9);
+ const V mix10 = Set(df, mixi10);
+ const V mix11 = Set(df, mixi11);
+
+ *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3)));
+ *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7)));
+ *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11)));
+
+ if (Clamp) {
+ *out0 = Max(*out0, mix3);
+ *out1 = Max(*out1, mix7);
+ *out2 = Max(*out2, mix11);
+ }
+}
+
+// `blurred` is a temporary image used inside this function and not returned.
+Image3F OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params,
+ Image3F* blurred, BlurTemp* blur_temp) {
+ PROFILER_FUNC;
+ Image3F xyb(rgb.xsize(), rgb.ysize());
+ const double kSigma = 1.2;
+ Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0));
+ Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1));
+ Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2));
+ const HWY_FULL(float) df;
+ const auto intensity_target_multiplier = Set(df, params.intensity_target);
+ for (size_t y = 0; y < rgb.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y);
+ const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y);
+ const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y);
+ const float* BUTTERAUGLI_RESTRICT row_blurred_r =
+ blurred->ConstPlaneRow(0, y);
+ const float* BUTTERAUGLI_RESTRICT row_blurred_g =
+ blurred->ConstPlaneRow(1, y);
+ const float* BUTTERAUGLI_RESTRICT row_blurred_b =
+ blurred->ConstPlaneRow(2, y);
+ float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y);
+ float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y);
+ float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y);
+ const auto min = Set(df, 1e-4f);
+ for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) {
+ auto sensitivity0 = Undefined(df);
+ auto sensitivity1 = Undefined(df);
+ auto sensitivity2 = Undefined(df);
+ {
+ // Calculate sensitivity based on the smoothed image gamma derivative.
+ auto pre_mixed0 = Undefined(df);
+ auto pre_mixed1 = Undefined(df);
+ auto pre_mixed2 = Undefined(df);
+ OpsinAbsorbance<true>(
+ df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier),
+ Mul(Load(df, row_blurred_g + x), intensity_target_multiplier),
+ Mul(Load(df, row_blurred_b + x), intensity_target_multiplier),
+ &pre_mixed0, &pre_mixed1, &pre_mixed2);
+ pre_mixed0 = Max(pre_mixed0, min);
+ pre_mixed1 = Max(pre_mixed1, min);
+ pre_mixed2 = Max(pre_mixed2, min);
+ sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0);
+ sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1);
+ sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2);
+ sensitivity0 = Max(sensitivity0, min);
+ sensitivity1 = Max(sensitivity1, min);
+ sensitivity2 = Max(sensitivity2, min);
+ }
+ auto cur_mixed0 = Undefined(df);
+ auto cur_mixed1 = Undefined(df);
+ auto cur_mixed2 = Undefined(df);
+ OpsinAbsorbance<false>(
+ df, Mul(Load(df, row_r + x), intensity_target_multiplier),
+ Mul(Load(df, row_g + x), intensity_target_multiplier),
+ Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0,
+ &cur_mixed1, &cur_mixed2);
+ cur_mixed0 = Mul(cur_mixed0, sensitivity0);
+ cur_mixed1 = Mul(cur_mixed1, sensitivity1);
+ cur_mixed2 = Mul(cur_mixed2, sensitivity2);
+ // This is a kludge. The negative values should be zeroed away before
+ // blurring. Ideally there would be no negative values in the first place.
+ const auto min01 = Set(df, 1.7557483643287353f);
+ const auto min2 = Set(df, 12.226454707163354f);
+ cur_mixed0 = Max(cur_mixed0, min01);
+ cur_mixed1 = Max(cur_mixed1, min01);
+ cur_mixed2 = Max(cur_mixed2, min2);
+
+ Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x);
+ Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x);
+ Store(cur_mixed2, df, row_out_b + x);
+ }
+ }
+ return xyb;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(SeparateFrequencies); // Local function.
+HWY_EXPORT(MaskPsychoImage); // Local function.
+HWY_EXPORT(L2DiffAsymmetric); // Local function.
+HWY_EXPORT(L2Diff); // Local function.
+HWY_EXPORT(SetL2Diff); // Local function.
+HWY_EXPORT(CombineChannelsToDiffmap); // Local function.
+HWY_EXPORT(MaltaDiffMap); // Local function.
+HWY_EXPORT(MaltaDiffMapLF); // Local function.
+HWY_EXPORT(OpsinDynamicsImage); // Local function.
+
+#if BUTTERAUGLI_ENABLE_CHECKS
+
+static inline bool IsNan(const float x) {
+ uint32_t bits;
+ memcpy(&bits, &x, sizeof(bits));
+ const uint32_t bitmask_exp = 0x7F800000;
+ return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF);
+}
+
+static inline bool IsNan(const double x) {
+ uint64_t bits;
+ memcpy(&bits, &x, sizeof(bits));
+ return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) ||
+ (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL);
+}
+
+static inline void CheckImage(const ImageF& image, const char* name) {
+ PROFILER_FUNC;
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row = image.Row(y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ if (IsNan(row[x])) {
+ printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS
+ ")\n",
+ name, x, y, image.xsize(), image.ysize());
+ exit(1);
+ }
+ }
+ }
+}
+
+#define CHECK_NAN(x, str) \
+ do { \
+ if (IsNan(x)) { \
+ printf("%d: %s\n", __LINE__, str); \
+ abort(); \
+ } \
+ } while (0)
+
+#define CHECK_IMAGE(image, name) CheckImage(image, name)
+
+#else // BUTTERAUGLI_ENABLE_CHECKS
+
+#define CHECK_NAN(x, str)
+#define CHECK_IMAGE(image, name)
+
+#endif // BUTTERAUGLI_ENABLE_CHECKS
+
+// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at
+// multiresolution.
+static Image3F SubSample2x(const Image3F& in) {
+ size_t xs = (in.xsize() + 1) / 2;
+ size_t ys = (in.ysize() + 1) / 2;
+ Image3F retval(xs, ys);
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < ys; ++y) {
+ for (size_t x = 0; x < xs; ++x) {
+ retval.PlaneRow(c, y)[x] = 0;
+ }
+ }
+ }
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ for (size_t x = 0; x < in.xsize(); ++x) {
+ retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x];
+ }
+ }
+ if ((in.xsize() & 1) != 0) {
+ for (size_t y = 0; y < retval.ysize(); ++y) {
+ size_t last_column = retval.xsize() - 1;
+ retval.PlaneRow(c, y)[last_column] *= 2.0f;
+ }
+ }
+ if ((in.ysize() & 1) != 0) {
+ for (size_t x = 0; x < retval.xsize(); ++x) {
+ size_t last_row = retval.ysize() - 1;
+ retval.PlaneRow(c, last_row)[x] *= 2.0f;
+ }
+ }
+ }
+ return retval;
+}
+
+// Supersample src by 2x and add it to dest.
+static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) {
+ for (size_t y = 0; y < dest.ysize(); ++y) {
+ for (size_t x = 0; x < dest.xsize(); ++x) {
+ // There will be less errors from the more averaged images.
+ // We take it into account to some extent using a scaler.
+ static const double kHeuristicMixingValue = 0.3;
+ dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w;
+ dest.Row(y)[x] += w * src.Row(y / 2)[x / 2];
+ }
+ }
+}
+
+Image3F* ButteraugliComparator::Temp() const {
+ bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel);
+ JXL_ASSERT(!was_in_use);
+ (void)was_in_use;
+ return &temp_;
+}
+
+void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); }
+
+ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0,
+ const ButteraugliParams& params)
+ : xsize_(rgb0.xsize()),
+ ysize_(rgb0.ysize()),
+ params_(params),
+ temp_(xsize_, ysize_) {
+ if (xsize_ < 8 || ysize_ < 8) {
+ return;
+ }
+
+ Image3F xyb0 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(rgb0, params, Temp(),
+ &blur_temp_);
+ ReleaseTemp();
+ HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+ (xsize_, ysize_, params_, &blur_temp_, xyb0, pi0_);
+
+ // Awful recursive construction of samples of different resolution.
+ // This is an after-thought and possibly somewhat parallel in
+ // functionality with the PsychoImage multi-resolution approach.
+ sub_.reset(new ButteraugliComparator(SubSample2x(rgb0), params));
+}
+
+void ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const {
+ HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+ (pi0_, pi0_, xsize_, ysize_, params_, Temp(), &blur_temp_, mask, nullptr);
+ ReleaseTemp();
+}
+
+void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const {
+ PROFILER_FUNC;
+ if (xsize_ < 8 || ysize_ < 8) {
+ ZeroFillImage(&result);
+ return;
+ }
+ const Image3F xyb1 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+ rgb1, params_, Temp(), &blur_temp_);
+ ReleaseTemp();
+ DiffmapOpsinDynamicsImage(xyb1, result);
+ if (sub_) {
+ if (sub_->xsize_ < 8 || sub_->ysize_ < 8) {
+ return;
+ }
+ const Image3F sub_xyb = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(
+ SubSample2x(rgb1), params_, sub_->Temp(), &sub_->blur_temp_);
+ sub_->ReleaseTemp();
+ ImageF subresult;
+ sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult);
+ AddSupersampled2x(subresult, 0.5, result);
+ }
+}
+
+void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1,
+ ImageF& result) const {
+ PROFILER_FUNC;
+ if (xsize_ < 8 || ysize_ < 8) {
+ ZeroFillImage(&result);
+ return;
+ }
+ PsychoImage pi1;
+ HWY_DYNAMIC_DISPATCH(SeparateFrequencies)
+ (xsize_, ysize_, params_, &blur_temp_, xyb1, pi1);
+ result = ImageF(xsize_, ysize_);
+ DiffmapPsychoImage(pi1, result);
+}
+
+namespace {
+
+void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+ const double w_0lt1, const double norm1,
+ ImageF* HWY_RESTRICT diffs,
+ Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+ PROFILER_FUNC;
+ const double len = 3.75;
+ static const double mulli = 0.39905817637;
+ HWY_DYNAMIC_DISPATCH(MaltaDiffMap)
+ (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1,
+ const double w_0lt1, const double norm1,
+ ImageF* HWY_RESTRICT diffs,
+ Image3F* HWY_RESTRICT block_diff_ac, size_t c) {
+ PROFILER_FUNC;
+ const double len = 3.75;
+ static const double mulli = 0.611612573796;
+ HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF)
+ (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c);
+}
+
+} // namespace
+
+void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1,
+ ImageF& diffmap) const {
+ PROFILER_FUNC;
+ if (xsize_ < 8 || ysize_ < 8) {
+ ZeroFillImage(&diffmap);
+ return;
+ }
+
+ const float hf_asymmetry_ = params_.hf_asymmetry;
+ const float xmul_ = params_.xmul;
+
+ ImageF diffs(xsize_, ysize_);
+ Image3F block_diff_ac(xsize_, ysize_);
+ ZeroFillImage(&block_diff_ac);
+ static const double wUhfMalta = 1.10039032555;
+ static const double norm1Uhf = 71.7800275169;
+ MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_,
+ wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1);
+
+ static const double wUhfMaltaX = 173.5;
+ static const double norm1UhfX = 5.0;
+ MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_,
+ wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac,
+ 0);
+
+ static const double wHfMalta = 18.7237414387;
+ static const double norm1Hf = 4498534.45232;
+ MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_),
+ wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs,
+ &block_diff_ac, 1);
+
+ static const double wHfMaltaX = 6923.99476109;
+ static const double norm1HfX = 8051.15833247;
+ MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_),
+ wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs,
+ &block_diff_ac, 0);
+
+ static const double wMfMalta = 37.0819870399;
+ static const double norm1Mf = 130262059.556;
+ MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf,
+ &diffs, &block_diff_ac, 1);
+
+ static const double wMfMaltaX = 8246.75321353;
+ static const double norm1MfX = 1009002.70582;
+ MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX,
+ norm1MfX, &diffs, &block_diff_ac, 0);
+
+ static const double wmul[9] = {
+ 400.0, 1.50815703118, 0,
+ 2150.0, 10.6195433239, 16.2176043152,
+ 29.2353797994, 0.844626970982, 0.703646627719,
+ };
+ Image3F block_diff_dc(xsize_, ysize_);
+ for (size_t c = 0; c < 3; ++c) {
+ if (c < 2) { // No blue channel error accumulated at HF.
+ HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric)
+ (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_,
+ &block_diff_ac, c);
+ }
+ HWY_DYNAMIC_DISPATCH(L2Diff)
+ (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c);
+ HWY_DYNAMIC_DISPATCH(SetL2Diff)
+ (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c);
+ }
+
+ ImageF mask;
+ HWY_DYNAMIC_DISPATCH(MaskPsychoImage)
+ (pi0_, pi1, xsize_, ysize_, params_, Temp(), &blur_temp_, &mask,
+ &block_diff_ac.Plane(1));
+ ReleaseTemp();
+
+ HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap)
+ (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap);
+}
+
+double ButteraugliScoreFromDiffmap(const ImageF& diffmap,
+ const ButteraugliParams* params) {
+ PROFILER_FUNC;
+ float retval = 0.0f;
+ for (size_t y = 0; y < diffmap.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y);
+ for (size_t x = 0; x < diffmap.xsize(); ++x) {
+ retval = std::max(retval, row[x]);
+ }
+ }
+ return retval;
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+ double hf_asymmetry, double xmul, ImageF& diffmap) {
+ ButteraugliParams params;
+ params.hf_asymmetry = hf_asymmetry;
+ params.xmul = xmul;
+ return ButteraugliDiffmap(rgb0, rgb1, params, diffmap);
+}
+
+bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1,
+ const ButteraugliParams& params, ImageF& diffmap) {
+ PROFILER_FUNC;
+ const size_t xsize = rgb0.xsize();
+ const size_t ysize = rgb0.ysize();
+ if (xsize < 1 || ysize < 1) {
+ return JXL_FAILURE("Zero-sized image");
+ }
+ if (!SameSize(rgb0, rgb1)) {
+ return JXL_FAILURE("Size mismatch");
+ }
+ static const int kMax = 8;
+ if (xsize < kMax || ysize < kMax) {
+ // Butteraugli values for small (where xsize or ysize is smaller
+ // than 8 pixels) images are non-sensical, but most likely it is
+ // less disruptive to try to compute something than just give up.
+ // Temporarily extend the borders of the image to fit 8 x 8 size.
+ size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0;
+ size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0;
+ size_t xscaled = std::max<size_t>(kMax, xsize);
+ size_t yscaled = std::max<size_t>(kMax, ysize);
+ Image3F scaled0(xscaled, yscaled);
+ Image3F scaled1(xscaled, yscaled);
+ for (int i = 0; i < 3; ++i) {
+ for (size_t y = 0; y < yscaled; ++y) {
+ for (size_t x = 0; x < xscaled; ++x) {
+ size_t x2 =
+ std::min<size_t>(xsize - 1, x > xborder ? x - xborder : 0);
+ size_t y2 =
+ std::min<size_t>(ysize - 1, y > yborder ? y - yborder : 0);
+ scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2];
+ scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2];
+ }
+ }
+ }
+ ImageF diffmap_scaled;
+ const bool ok =
+ ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled);
+ diffmap = ImageF(xsize, ysize);
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder];
+ }
+ }
+ return ok;
+ }
+ ButteraugliComparator butteraugli(rgb0, params);
+ butteraugli.Diffmap(rgb1, diffmap);
+ return true;
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+ float hf_asymmetry, float xmul, ImageF& diffmap,
+ double& diffvalue) {
+ ButteraugliParams params;
+ params.hf_asymmetry = hf_asymmetry;
+ params.xmul = xmul;
+ return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue);
+}
+
+bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1,
+ const ButteraugliParams& params, ImageF& diffmap,
+ double& diffvalue) {
+#if JXL_PROFILER_ENABLED
+ auto trace_start = std::chrono::steady_clock::now();
+#endif
+ if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) {
+ return false;
+ }
+#if JXL_PROFILER_ENABLED
+ auto trace_end = std::chrono::steady_clock::now();
+ std::chrono::duration<double> elapsed = trace_end - trace_start;
+ const size_t mp = rgb0.xsize() * rgb0.ysize();
+ printf("diff MP/s %f\n", mp / elapsed.count() * 1E-6);
+#endif
+ diffvalue = ButteraugliScoreFromDiffmap(diffmap, &params);
+ return true;
+}
+
+double ButteraugliFuzzyClass(double score) {
+ static const double fuzzy_width_up = 4.8;
+ static const double fuzzy_width_down = 4.8;
+ static const double m0 = 2.0;
+ static const double scaler = 0.7777;
+ double val;
+ if (score < 1.0) {
+ // val in [scaler .. 2.0]
+ val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down));
+ val -= 1.0; // from [1 .. 2] to [0 .. 1]
+ val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler]
+ val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0]
+ } else {
+ // val in [0 .. scaler]
+ val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up));
+ val *= scaler;
+ }
+ return val;
+}
+
+// #define PRINT_OUT_NORMALIZATION
+
+double ButteraugliFuzzyInverse(double seek) {
+ double pos = 0;
+ // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+ for (double range = 1.0; range >= 1e-10; range *= 0.5) {
+ double cur = ButteraugliFuzzyClass(pos);
+ if (cur < seek) {
+ pos -= range;
+ } else {
+ pos += range;
+ }
+ }
+#ifdef PRINT_OUT_NORMALIZATION
+ if (seek == 1.0) {
+ fprintf(stderr, "Fuzzy inverse %g\n", pos);
+ }
+#endif
+ return pos;
+}
+
+#ifdef PRINT_OUT_NORMALIZATION
+static double print_out_normalization = ButteraugliFuzzyInverse(1.0);
+#endif
+
+namespace {
+
+void ScoreToRgb(double score, double good_threshold, double bad_threshold,
+ float rgb[3]) {
+ double heatmap[12][3] = {
+ {0, 0, 0}, {0, 0, 1},
+ {0, 1, 1}, {0, 1, 0}, // Good level
+ {1, 1, 0}, {1, 0, 0}, // Bad level
+ {1, 0, 1}, {0.5, 0.5, 1.0},
+ {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range.
+ {1.0, 1.0, 0.5}, {1, 1, 1},
+ {1, 1, 1}, // Last color repeated to have a solid range of white.
+ };
+ if (score < good_threshold) {
+ score = (score / good_threshold) * 0.3;
+ } else if (score < bad_threshold) {
+ score = 0.3 +
+ (score - good_threshold) / (bad_threshold - good_threshold) * 0.15;
+ } else {
+ score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5;
+ }
+ static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]);
+ score = std::min<double>(std::max<double>(score * (kTableSize - 1), 0.0),
+ kTableSize - 2);
+ int ix = static_cast<int>(score);
+ ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN
+ double mix = score - ix;
+ for (int i = 0; i < 3; ++i) {
+ double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i];
+ rgb[i] = pow(v, 0.5);
+ }
+}
+
+} // namespace
+
+Image3F CreateHeatMapImage(const ImageF& distmap, double good_threshold,
+ double bad_threshold) {
+ Image3F heatmap(distmap.xsize(), distmap.ysize());
+ for (size_t y = 0; y < distmap.ysize(); ++y) {
+ const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y);
+ float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y);
+ float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y);
+ float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y);
+ for (size_t x = 0; x < distmap.xsize(); ++x) {
+ const float d = row_distmap[x];
+ float rgb[3];
+ ScoreToRgb(d, good_threshold, bad_threshold, rgb);
+ row_h0[x] = rgb[0];
+ row_h1[x] = rgb[1];
+ row_h2[x] = rgb[2];
+ }
+ }
+ return heatmap;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.h b/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.h
new file mode 100644
index 0000000000..652b9528c4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/butteraugli/butteraugli.h
@@ -0,0 +1,209 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com)
+
+#ifndef LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+#define LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <atomic>
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+#define BUTTERAUGLI_ENABLE_CHECKS 0
+#define BUTTERAUGLI_RESTRICT JXL_RESTRICT
+
+// This is the main interface to butteraugli image similarity
+// analysis function.
+
+namespace jxl {
+
+struct ButteraugliParams {
+ // Multiplier for penalizing new HF artifacts more than blurring away
+ // features. 1.0=neutral.
+ float hf_asymmetry = 1.0f;
+
+ // Multiplier for the psychovisual difference in the X channel.
+ float xmul = 1.0f;
+
+ // Number of nits that correspond to 1.0f input values.
+ float intensity_target = 80.0f;
+};
+
+// ButteraugliInterface defines the public interface for butteraugli.
+//
+// It calculates the difference between rgb0 and rgb1.
+//
+// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains
+// the red image for c == 0, green for c == 1, blue for c == 2. Location index
+// px is calculated as y * xsize + x.
+//
+// Value of pixels of images rgb0 and rgb1 need to be represented as raw
+// intensity. Most image formats store gamma corrected intensity in pixel
+// values. This gamma correction has to be removed, by applying the following
+// function to values in the 0-1 range:
+// butteraugli_val = pow(input_val, gamma);
+// A typical value of gamma is 2.2. It is usually stored in the image header.
+// Take care not to confuse that value with its inverse. The gamma value should
+// be always greater than one.
+// Butteraugli does not work as intended if the caller does not perform
+// gamma correction.
+//
+// hf_asymmetry is a multiplier for penalizing new HF artifacts more than
+// blurring away features (1.0 -> neutral).
+//
+// diffmap will contain an image of the size xsize * ysize, containing
+// localized differences for values px (indexed with the px the same as rgb0
+// and rgb1). diffvalue will give a global score of similarity.
+//
+// A diffvalue smaller than kButteraugliGood indicates that images can be
+// observed as the same image.
+// diffvalue larger than kButteraugliBad indicates that a difference between
+// the images can be observed.
+// A diffvalue between kButteraugliGood and kButteraugliBad indicates that
+// a subtle difference can be observed between the images.
+//
+// Returns true on success.
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+ const ButteraugliParams &params, ImageF &diffmap,
+ double &diffvalue);
+
+// Deprecated (calls the previous function)
+bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1,
+ float hf_asymmetry, float xmul, ImageF &diffmap,
+ double &diffvalue);
+
+// Converts the butteraugli score into fuzzy class values that are continuous
+// at the class boundary. The class boundary location is based on human
+// raters, but the slope is arbitrary. Particularly, it does not reflect
+// the expectation value of probabilities of the human raters. It is just
+// expected that a smoother class boundary will allow for higher-level
+// optimization algorithms to work faster.
+//
+// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the
+// scoring is fuzzy, a butteraugli score of 0.96 would return a class of
+// around 1.9.
+double ButteraugliFuzzyClass(double score);
+
+// Input values should be in range 0 (bad) to 2 (good). Use
+// kButteraugliNormalization as normalization.
+double ButteraugliFuzzyInverse(double seek);
+
+// Implementation details, don't use anything below or your code will
+// break in the future.
+
+#ifdef _MSC_VER
+#define BUTTERAUGLI_INLINE __forceinline
+#else
+#define BUTTERAUGLI_INLINE inline
+#endif
+
+#ifdef __clang__
+// Early versions of Clang did not support __builtin_assume_aligned.
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned)
+#elif defined(__GNUC__)
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 1
+#else
+#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 0
+#endif
+
+// Returns a void* pointer which the compiler then assumes is N-byte aligned.
+// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32);
+//
+// The assignment semantics are required by GCC/Clang. ICC provides an in-place
+// __assume_aligned, whereas MSVC's __assume appears unsuitable.
+#if BUTTERAUGLI_HAS_ASSUME_ALIGNED
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) \
+ __builtin_assume_aligned((ptr), (align))
+#else
+#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) (ptr)
+#endif // BUTTERAUGLI_HAS_ASSUME_ALIGNED
+
+struct PsychoImage {
+ ImageF uhf[2]; // XY
+ ImageF hf[2]; // XY
+ Image3F mf; // XYB
+ Image3F lf; // XYB
+};
+
+// Blur needs a transposed image.
+// Hold it here and only allocate on demand to reduce memory usage.
+struct BlurTemp {
+ ImageF *GetTransposed(const ImageF &in) {
+ if (transposed_temp.xsize() == 0) {
+ transposed_temp = ImageF(in.ysize(), in.xsize());
+ }
+ return &transposed_temp;
+ }
+
+ ImageF transposed_temp;
+};
+
+class ButteraugliComparator {
+ public:
+ // Butteraugli is calibrated at xmul = 1.0. We add a multiplier here so that
+ // we can test the hypothesis that a higher weighing of the X channel would
+ // improve results at higher Butteraugli values.
+ ButteraugliComparator(const Image3F &rgb0, const ButteraugliParams &params);
+ virtual ~ButteraugliComparator() = default;
+
+ // Computes the butteraugli map between the original image given in the
+ // constructor and the distorted image give here.
+ void Diffmap(const Image3F &rgb1, ImageF &result) const;
+
+ // Same as above, but OpsinDynamicsImage() was already applied.
+ void DiffmapOpsinDynamicsImage(const Image3F &xyb1, ImageF &result) const;
+
+ // Same as above, but the frequency decomposition was already applied.
+ void DiffmapPsychoImage(const PsychoImage &pi1, ImageF &diffmap) const;
+
+ void Mask(ImageF *BUTTERAUGLI_RESTRICT mask) const;
+
+ private:
+ Image3F *Temp() const;
+ void ReleaseTemp() const;
+
+ const size_t xsize_;
+ const size_t ysize_;
+ ButteraugliParams params_;
+ PsychoImage pi0_;
+
+ // Shared temporary image storage to reduce the number of allocations;
+ // obtained via Temp(), must call ReleaseTemp when no longer needed.
+ mutable Image3F temp_;
+ mutable std::atomic_flag temp_in_use_ = ATOMIC_FLAG_INIT;
+
+ mutable BlurTemp blur_temp_;
+ std::unique_ptr<ButteraugliComparator> sub_;
+};
+
+// Deprecated.
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+ double hf_asymmetry, double xmul, ImageF &diffmap);
+
+bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1,
+ const ButteraugliParams &params, ImageF &diffmap);
+
+double ButteraugliScoreFromDiffmap(const ImageF &diffmap,
+ const ButteraugliParams *params = nullptr);
+
+// Generate rgb-representation of the distance between two images.
+Image3F CreateHeatMapImage(const ImageF &distmap, double good_threshold,
+ double bad_threshold);
+
+} // namespace jxl
+
+#endif // LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_
diff --git a/third_party/jpeg-xl/lib/jxl/butteraugli_test.cc b/third_party/jpeg-xl/lib/jxl/butteraugli_test.cc
new file mode 100644
index 0000000000..3fdec09725
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/butteraugli_test.cc
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/butteraugli.h>
+#include <jxl/butteraugli_cxx.h>
+
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+TEST(ButteraugliTest, Lossless) {
+ uint32_t xsize = 171;
+ uint32_t ysize = 219;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+ JxlButteraugliResultPtr result(JxlButteraugliCompute(
+ api.get(), xsize, ysize, &pixel_format, pixels.data(), pixels.size(),
+ &pixel_format, pixels.data(), pixels.size()));
+ EXPECT_EQ(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+}
+
+TEST(ButteraugliTest, Distmap) {
+ uint32_t xsize = 171;
+ uint32_t ysize = 219;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+ JxlButteraugliResultPtr result(JxlButteraugliCompute(
+ api.get(), xsize, ysize, &pixel_format, pixels.data(), pixels.size(),
+ &pixel_format, pixels.data(), pixels.size()));
+ EXPECT_EQ(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+ const float* distmap;
+ uint32_t row_stride;
+ JxlButteraugliResultGetDistmap(result.get(), &distmap, &row_stride);
+ for (uint32_t y = 0; y < ysize; y++) {
+ for (uint32_t x = 0; x < xsize; x++) {
+ EXPECT_EQ(0.0, distmap[y * row_stride + x]);
+ }
+ }
+}
+
+TEST(ButteraugliTest, Distorted) {
+ uint32_t xsize = 171;
+ uint32_t ysize = 219;
+ std::vector<uint8_t> orig_pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ std::vector<uint8_t> dist_pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ dist_pixels[0] += 128;
+
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+ JxlButteraugliResultPtr result(JxlButteraugliCompute(
+ api.get(), xsize, ysize, &pixel_format, orig_pixels.data(),
+ orig_pixels.size(), &pixel_format, dist_pixels.data(),
+ dist_pixels.size()));
+ EXPECT_NE(0.0, JxlButteraugliResultGetDistance(result.get(), 8.0));
+}
+
+TEST(ButteraugliTest, Api) {
+ uint32_t xsize = 171;
+ uint32_t ysize = 219;
+ std::vector<uint8_t> orig_pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ std::vector<uint8_t> dist_pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ dist_pixels[0] += 128;
+
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ JxlButteraugliApiPtr api(JxlButteraugliApiCreate(nullptr));
+ JxlButteraugliApiSetHFAsymmetry(api.get(), 1.0f);
+ JxlButteraugliApiSetIntensityTarget(api.get(), 250.0f);
+ JxlButteraugliResultPtr result(JxlButteraugliCompute(
+ api.get(), xsize, ysize, &pixel_format, orig_pixels.data(),
+ orig_pixels.size(), &pixel_format, dist_pixels.data(),
+ dist_pixels.size()));
+ double distance0 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+ JxlButteraugliApiSetHFAsymmetry(api.get(), 2.0f);
+ result.reset(JxlButteraugliCompute(api.get(), xsize, ysize, &pixel_format,
+ orig_pixels.data(), orig_pixels.size(),
+ &pixel_format, dist_pixels.data(),
+ dist_pixels.size()));
+ double distance1 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+ EXPECT_NE(distance0, distance1);
+
+ JxlButteraugliApiSetIntensityTarget(api.get(), 80.0f);
+ result.reset(JxlButteraugliCompute(api.get(), xsize, ysize, &pixel_format,
+ orig_pixels.data(), orig_pixels.size(),
+ &pixel_format, dist_pixels.data(),
+ dist_pixels.size()));
+ double distance2 = JxlButteraugliResultGetDistance(result.get(), 8.0);
+
+ EXPECT_NE(distance1, distance2);
+}
diff --git a/third_party/jpeg-xl/lib/jxl/butteraugli_wrapper.cc b/third_party/jpeg-xl/lib/jxl/butteraugli_wrapper.cc
new file mode 100644
index 0000000000..c5a1a8e506
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/butteraugli_wrapper.cc
@@ -0,0 +1,203 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/butteraugli.h>
+#include <jxl/parallel_runner.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/memory_manager_internal.h"
+
+namespace {
+
+void SetMetadataFromPixelFormat(const JxlPixelFormat* pixel_format,
+ jxl::ImageMetadata* metadata) {
+ uint32_t potential_alpha_bits = 0;
+ switch (pixel_format->data_type) {
+ case JXL_TYPE_FLOAT:
+ metadata->SetFloat32Samples();
+ potential_alpha_bits = 16;
+ break;
+ case JXL_TYPE_FLOAT16:
+ metadata->SetFloat16Samples();
+ potential_alpha_bits = 16;
+ break;
+ case JXL_TYPE_UINT16:
+ metadata->SetUintSamples(16);
+ potential_alpha_bits = 16;
+ break;
+ case JXL_TYPE_UINT8:
+ metadata->SetUintSamples(8);
+ potential_alpha_bits = 8;
+ break;
+ default:
+ JXL_ABORT("Unhandled JxlDataType");
+ }
+ if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) {
+ metadata->SetAlphaBits(potential_alpha_bits);
+ }
+}
+
+} // namespace
+
+struct JxlButteraugliResultStruct {
+ JxlMemoryManager memory_manager;
+
+ jxl::ImageF distmap;
+ jxl::ButteraugliParams params;
+};
+
+struct JxlButteraugliApiStruct {
+ // Multiplier for penalizing new HF artifacts more than blurring away
+ // features. 1.0=neutral.
+ float hf_asymmetry = 1.0f;
+
+ // Multiplier for the psychovisual difference in the X channel.
+ float xmul = 1.0f;
+
+ // Number of nits that correspond to 1.0f input values.
+ float intensity_target = jxl::kDefaultIntensityTarget;
+
+ JxlCmsInterface cms;
+ JxlMemoryManager memory_manager;
+ std::unique_ptr<jxl::ThreadPool> thread_pool{nullptr};
+};
+
+JxlButteraugliApi* JxlButteraugliApiCreate(
+ const JxlMemoryManager* memory_manager) {
+ JxlMemoryManager local_memory_manager;
+ if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+ return nullptr;
+
+ void* alloc =
+ jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlButteraugliApi));
+ if (!alloc) return nullptr;
+ // Placement new constructor on allocated memory
+ JxlButteraugliApi* ret = new (alloc) JxlButteraugliApi();
+ ret->cms = jxl::GetJxlCms();
+ ret->memory_manager = local_memory_manager;
+ return ret;
+}
+
+void JxlButteraugliApiSetParallelRunner(JxlButteraugliApi* api,
+ JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque) {
+ api->thread_pool = jxl::make_unique<jxl::ThreadPool>(parallel_runner,
+ parallel_runner_opaque);
+}
+
+void JxlButteraugliApiSetHFAsymmetry(JxlButteraugliApi* api, float v) {
+ api->hf_asymmetry = v;
+}
+
+void JxlButteraugliApiSetIntensityTarget(JxlButteraugliApi* api, float v) {
+ api->intensity_target = v;
+}
+
+void JxlButteraugliApiDestroy(JxlButteraugliApi* api) {
+ if (api) {
+ JxlMemoryManager local_memory_manager = api->memory_manager;
+ // Call destructor directly since custom free function is used.
+ api->~JxlButteraugliApi();
+ jxl::MemoryManagerFree(&local_memory_manager, api);
+ }
+}
+
+JxlButteraugliResult* JxlButteraugliCompute(
+ const JxlButteraugliApi* api, uint32_t xsize, uint32_t ysize,
+ const JxlPixelFormat* pixel_format_orig, const void* buffer_orig,
+ size_t size_orig, const JxlPixelFormat* pixel_format_dist,
+ const void* buffer_dist, size_t size_dist) {
+ jxl::ImageMetadata orig_metadata;
+ SetMetadataFromPixelFormat(pixel_format_orig, &orig_metadata);
+ jxl::ImageBundle orig_ib(&orig_metadata);
+ jxl::ColorEncoding c_current;
+ if (pixel_format_orig->data_type == JXL_TYPE_FLOAT) {
+ c_current =
+ jxl::ColorEncoding::LinearSRGB(pixel_format_orig->num_channels < 3);
+ } else {
+ c_current = jxl::ColorEncoding::SRGB(pixel_format_orig->num_channels < 3);
+ }
+ if (!jxl::BufferToImageBundle(*pixel_format_orig, xsize, ysize, buffer_orig,
+ size_orig, api->thread_pool.get(), c_current,
+ &orig_ib)) {
+ return nullptr;
+ }
+
+ jxl::ImageMetadata dist_metadata;
+ SetMetadataFromPixelFormat(pixel_format_dist, &dist_metadata);
+ jxl::ImageBundle dist_ib(&dist_metadata);
+ if (pixel_format_dist->data_type == JXL_TYPE_FLOAT) {
+ c_current =
+ jxl::ColorEncoding::LinearSRGB(pixel_format_dist->num_channels < 3);
+ } else {
+ c_current = jxl::ColorEncoding::SRGB(pixel_format_dist->num_channels < 3);
+ }
+ if (!jxl::BufferToImageBundle(*pixel_format_dist, xsize, ysize, buffer_dist,
+ size_dist, api->thread_pool.get(), c_current,
+ &dist_ib)) {
+ return nullptr;
+ }
+
+ void* alloc = jxl::MemoryManagerAlloc(&api->memory_manager,
+ sizeof(JxlButteraugliResult));
+ if (!alloc) return nullptr;
+ // Placement new constructor on allocated memory
+ JxlButteraugliResult* result = new (alloc) JxlButteraugliResult();
+ result->memory_manager = api->memory_manager;
+ result->params.hf_asymmetry = api->hf_asymmetry;
+ result->params.xmul = api->xmul;
+ result->params.intensity_target = api->intensity_target;
+ jxl::ButteraugliDistance(orig_ib, dist_ib, result->params, api->cms,
+ &result->distmap, api->thread_pool.get());
+
+ return result;
+}
+
+float JxlButteraugliResultGetDistance(const JxlButteraugliResult* result,
+ float pnorm) {
+ return static_cast<float>(
+ jxl::ComputeDistanceP(result->distmap, result->params, pnorm));
+}
+
+void JxlButteraugliResultGetDistmap(const JxlButteraugliResult* result,
+ const float** buffer,
+ uint32_t* row_stride) {
+ *buffer = result->distmap.Row(0);
+ *row_stride = result->distmap.PixelsPerRow();
+}
+
+float JxlButteraugliResultGetMaxDistance(const JxlButteraugliResult* result) {
+ float max_distance = 0.0;
+ for (uint32_t y = 0; y < result->distmap.ysize(); y++) {
+ for (uint32_t x = 0; x < result->distmap.xsize(); x++) {
+ if (result->distmap.ConstRow(y)[x] > max_distance) {
+ max_distance = result->distmap.ConstRow(y)[x];
+ }
+ }
+ }
+ return max_distance;
+}
+
+void JxlButteraugliResultDestroy(JxlButteraugliResult* result) {
+ if (result) {
+ JxlMemoryManager local_memory_manager = result->memory_manager;
+ // Call destructor directly since custom free function is used.
+ result->~JxlButteraugliResult();
+ jxl::MemoryManagerFree(&local_memory_manager, result);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/byte_order_test.cc b/third_party/jpeg-xl/lib/jxl/byte_order_test.cc
new file mode 100644
index 0000000000..17d7ef6643
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/byte_order_test.cc
@@ -0,0 +1,53 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/byte_order.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ByteOrderTest, TestRoundTripBE16) {
+ const uint32_t in = 0x1234;
+ uint8_t buf[2];
+ StoreBE16(in, buf);
+ EXPECT_EQ(in, LoadBE16(buf));
+ EXPECT_NE(in, LoadLE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE16) {
+ const uint32_t in = 0x1234;
+ uint8_t buf[2];
+ StoreLE16(in, buf);
+ EXPECT_EQ(in, LoadLE16(buf));
+ EXPECT_NE(in, LoadBE16(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripBE32) {
+ const uint32_t in = 0xFEDCBA98u;
+ uint8_t buf[4];
+ StoreBE32(in, buf);
+ EXPECT_EQ(in, LoadBE32(buf));
+ EXPECT_NE(in, LoadLE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE32) {
+ const uint32_t in = 0xFEDCBA98u;
+ uint8_t buf[4];
+ StoreLE32(in, buf);
+ EXPECT_EQ(in, LoadLE32(buf));
+ EXPECT_NE(in, LoadBE32(buf));
+}
+
+TEST(ByteOrderTest, TestRoundTripLE64) {
+ const uint64_t in = 0xFEDCBA9876543210ull;
+ uint8_t buf[8];
+ StoreLE64(in, buf);
+ EXPECT_EQ(in, LoadLE64(buf));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/chroma_from_luma.cc b/third_party/jpeg-xl/lib/jxl/chroma_from_luma.cc
new file mode 100644
index 0000000000..63d21cbb4b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/chroma_from_luma.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/chroma_from_luma.h"
+
+namespace jxl {
+
+ColorCorrelationMap::ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB)
+ : ytox_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)),
+ ytob_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)) {
+ ZeroFillImage(&ytox_map);
+ ZeroFillImage(&ytob_map);
+ if (!XYB) {
+ base_correlation_b_ = 0;
+ }
+ RecomputeDCFactors();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/chroma_from_luma.h b/third_party/jpeg-xl/lib/jxl/chroma_from_luma.h
new file mode 100644
index 0000000000..9a7f3d45bc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/chroma_from_luma.h
@@ -0,0 +1,147 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+// Tile is the rectangular grid of blocks that share color correlation
+// parameters ("factor_x/b" such that residual_b = blue - Y * factor_b).
+static constexpr size_t kColorTileDim = 64;
+
+static_assert(kColorTileDim % kBlockDim == 0,
+ "Color tile dim should be divisible by block dim");
+static constexpr size_t kColorTileDimInBlocks = kColorTileDim / kBlockDim;
+
+static_assert(kGroupDimInBlocks % kColorTileDimInBlocks == 0,
+ "Group dim should be divisible by color tile dim");
+
+static constexpr uint8_t kDefaultColorFactor = 84;
+
+// JPEG DCT coefficients are at most 1024. CfL constants are at most 127, and
+// the ratio of two entries in a JPEG quantization table is at most 255. Thus,
+// since the CfL denominator is 84, this leaves 12 bits of mantissa to be used.
+// For extra caution, we use 11.
+static constexpr uint8_t kCFLFixedPointPrecision = 11;
+
+static constexpr U32Enc kColorFactorDist(Val(kDefaultColorFactor), Val(256),
+ BitsOffset(8, 2), BitsOffset(16, 258));
+
+struct ColorCorrelationMap {
+ ColorCorrelationMap() = default;
+ // xsize/ysize are in pixels
+ // set XYB=false to do something close to no-op cmap (needed for now since
+ // cmap is mandatory)
+ ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB = true);
+
+ float YtoXRatio(int32_t x_factor) const {
+ return base_correlation_x_ + x_factor * color_scale_;
+ }
+
+ float YtoBRatio(int32_t b_factor) const {
+ return base_correlation_b_ + b_factor * color_scale_;
+ }
+
+ Status DecodeDC(BitReader* br) {
+ if (br->ReadFixedBits<1>() == 1) {
+ // All default.
+ return true;
+ }
+ SetColorFactor(U32Coder::Read(kColorFactorDist, br));
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_x_));
+ if (std::abs(base_correlation_x_) > 4.0f) {
+ return JXL_FAILURE("Base X correlation is out of range");
+ }
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_b_));
+ if (std::abs(base_correlation_b_) > 4.0f) {
+ return JXL_FAILURE("Base B correlation is out of range");
+ }
+ ytox_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+ std::numeric_limits<int8_t>::min();
+ ytob_dc_ = static_cast<int>(br->ReadFixedBits<kBitsPerByte>()) +
+ std::numeric_limits<int8_t>::min();
+ RecomputeDCFactors();
+ return true;
+ }
+
+ // We consider a CfL map to be JPEG-reconstruction-compatible if base
+ // correlation is 0, no DC correlation is used, and we use the default color
+ // factor.
+ bool IsJPEGCompatible() const {
+ return base_correlation_x_ == 0 && base_correlation_b_ == 0 &&
+ ytob_dc_ == 0 && ytox_dc_ == 0 &&
+ color_factor_ == kDefaultColorFactor;
+ }
+
+ int32_t RatioJPEG(int32_t factor) const {
+ return factor * (1 << kCFLFixedPointPrecision) / kDefaultColorFactor;
+ }
+
+ void SetColorFactor(uint32_t factor) {
+ color_factor_ = factor;
+ color_scale_ = 1.0f / color_factor_;
+ RecomputeDCFactors();
+ }
+
+ void SetYToBDC(int32_t ytob_dc) {
+ ytob_dc_ = ytob_dc;
+ RecomputeDCFactors();
+ }
+ void SetYToXDC(int32_t ytox_dc) {
+ ytox_dc_ = ytox_dc;
+ RecomputeDCFactors();
+ }
+
+ int32_t GetYToXDC() const { return ytox_dc_; }
+ int32_t GetYToBDC() const { return ytob_dc_; }
+ float GetColorFactor() const { return color_factor_; }
+ float GetBaseCorrelationX() const { return base_correlation_x_; }
+ float GetBaseCorrelationB() const { return base_correlation_b_; }
+
+ const float* DCFactors() const { return dc_factors_; }
+
+ void RecomputeDCFactors() {
+ dc_factors_[0] = YtoXRatio(ytox_dc_);
+ dc_factors_[2] = YtoBRatio(ytob_dc_);
+ }
+
+ ImageSB ytox_map;
+ ImageSB ytob_map;
+
+ private:
+ float dc_factors_[4] = {};
+ // range of factor: -1.51 to +1.52
+ uint32_t color_factor_ = kDefaultColorFactor;
+ float color_scale_ = 1.0f / color_factor_;
+ float base_correlation_x_ = 0.0f;
+ float base_correlation_b_ = kYToBRatio;
+ int32_t ytox_dc_ = 0;
+ int32_t ytob_dc_ = 0;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_CHROMA_FROM_LUMA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/codec_in_out.h b/third_party/jpeg-xl/lib/jxl/codec_in_out.h
new file mode 100644
index 0000000000..9e48b5e937
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/codec_in_out.h
@@ -0,0 +1,116 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CODEC_IN_OUT_H_
+#define LIB_JXL_CODEC_IN_OUT_H_
+
+// Holds inputs/outputs for decoding/encoding images.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/luminance.h"
+
+namespace jxl {
+
+// Optional text/EXIF metadata.
+struct Blobs {
+ std::vector<uint8_t> exif;
+ std::vector<uint8_t> iptc;
+ std::vector<uint8_t> jumbf;
+ std::vector<uint8_t> xmp;
+};
+
+// Holds a preview, a main image or one or more frames, plus the inputs/outputs
+// to/from decoding/encoding.
+class CodecInOut {
+ public:
+ CodecInOut() : preview_frame(&metadata.m) {
+ frames.reserve(1);
+ frames.emplace_back(&metadata.m);
+ }
+
+ // Move-only.
+ CodecInOut(CodecInOut&&) = default;
+ CodecInOut& operator=(CodecInOut&&) = default;
+
+ size_t LastStillFrame() const {
+ JXL_DASSERT(!frames.empty());
+ size_t last = 0;
+ for (size_t i = 0; i < frames.size(); i++) {
+ last = i;
+ if (frames[i].duration > 0) break;
+ }
+ return last;
+ }
+
+ ImageBundle& Main() { return frames[LastStillFrame()]; }
+ const ImageBundle& Main() const { return frames[LastStillFrame()]; }
+
+ // If c_current.IsGray(), all planes must be identical.
+ void SetFromImage(Image3F&& color, const ColorEncoding& c_current) {
+ Main().SetFromImage(std::move(color), c_current);
+ SetIntensityTarget(&this->metadata.m);
+ SetSize(Main().xsize(), Main().ysize());
+ }
+
+ void SetSize(size_t xsize, size_t ysize) {
+ JXL_CHECK(metadata.size.Set(xsize, ysize));
+ }
+
+ void CheckMetadata() const {
+ JXL_CHECK(metadata.m.bit_depth.bits_per_sample != 0);
+ JXL_CHECK(!metadata.m.color_encoding.ICC().empty());
+
+ if (preview_frame.xsize() != 0) preview_frame.VerifyMetadata();
+ JXL_CHECK(preview_frame.metadata() == &metadata.m);
+
+ for (const ImageBundle& ib : frames) {
+ ib.VerifyMetadata();
+ JXL_CHECK(ib.metadata() == &metadata.m);
+ }
+ }
+
+ size_t xsize() const { return metadata.size.xsize(); }
+ size_t ysize() const { return metadata.size.ysize(); }
+ void ShrinkTo(size_t xsize, size_t ysize) {
+ // preview is unaffected.
+ for (ImageBundle& ib : frames) {
+ ib.ShrinkTo(xsize, ysize);
+ }
+ SetSize(xsize, ysize);
+ }
+
+ // -- DECODER OUTPUT, ENCODER INPUT:
+
+ // Metadata stored into / retrieved from bitstreams.
+
+ Blobs blobs;
+
+ CodecMetadata metadata; // applies to preview and all frames
+
+ // If metadata.have_preview:
+ ImageBundle preview_frame;
+
+ std::vector<ImageBundle> frames; // size=1 if !metadata.have_animation
+
+ // If the image should be written to a JPEG, use this quality for encoding.
+ size_t jpeg_quality;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_CODEC_IN_OUT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/coeff_order.cc b/third_party/jpeg-xl/lib/jxl/coeff_order.cc
new file mode 100644
index 0000000000..43adafd82a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/coeff_order.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+uint32_t CoeffOrderContext(uint32_t val) {
+ uint32_t token, nbits, bits;
+ HybridUintConfig(0, 0, 0).Encode(val, &token, &nbits, &bits);
+ return std::min(token, kPermutationContexts - 1);
+}
+
+namespace {
+Status ReadPermutation(size_t skip, size_t size, coeff_order_t* order,
+ BitReader* br, ANSSymbolReader* reader,
+ const std::vector<uint8_t>& context_map) {
+ std::vector<LehmerT> lehmer(size);
+ // temp space needs to be as large as the next power of 2, so doubling the
+ // allocated size is enough.
+ std::vector<uint32_t> temp(size * 2);
+ uint32_t end =
+ reader->ReadHybridUint(CoeffOrderContext(size), br, context_map) + skip;
+ if (end > size) {
+ return JXL_FAILURE("Invalid permutation size");
+ }
+ uint32_t last = 0;
+ for (size_t i = skip; i < end; ++i) {
+ lehmer[i] =
+ reader->ReadHybridUint(CoeffOrderContext(last), br, context_map);
+ last = lehmer[i];
+ if (lehmer[i] + i >= size) {
+ return JXL_FAILURE("Invalid lehmer code");
+ }
+ }
+ if (order == nullptr) return true;
+ DecodeLehmerCode(lehmer.data(), temp.data(), size, order);
+ return true;
+}
+
+} // namespace
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+ BitReader* br) {
+ std::vector<uint8_t> context_map;
+ ANSCode code;
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+ ANSSymbolReader reader(&code, br);
+ JXL_RETURN_IF_ERROR(
+ ReadPermutation(skip, size, order, br, &reader, context_map));
+ if (!reader.CheckANSFinalState()) {
+ return JXL_FAILURE("Invalid ANS stream");
+ }
+ return true;
+}
+
+namespace {
+
+Status DecodeCoeffOrder(AcStrategy acs, coeff_order_t* order, BitReader* br,
+ ANSSymbolReader* reader,
+ std::vector<coeff_order_t>& natural_order,
+ const std::vector<uint8_t>& context_map) {
+ PROFILER_FUNC;
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+
+ JXL_RETURN_IF_ERROR(
+ ReadPermutation(llf, size, order, br, reader, context_map));
+ if (order == nullptr) return true;
+ for (size_t k = 0; k < size; ++k) {
+ order[k] = natural_order[order[k]];
+ }
+ return true;
+}
+
+} // namespace
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+ coeff_order_t* order, BitReader* br) {
+ uint16_t computed = 0;
+ std::vector<uint8_t> context_map;
+ ANSCode code;
+ std::unique_ptr<ANSSymbolReader> reader;
+ std::vector<coeff_order_t> natural_order;
+ // Bitstream does not have histograms if no coefficient order is used.
+ if (used_orders != 0) {
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(br, kPermutationContexts, &code, &context_map));
+ reader = make_unique<ANSSymbolReader>(&code, br);
+ }
+ uint32_t acs_mask = 0;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ if ((used_acs & (1 << o)) == 0) continue;
+ acs_mask |= 1 << kStrategyOrder[o];
+ }
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ uint8_t ord = kStrategyOrder[o];
+ if (computed & (1 << ord)) continue;
+ computed |= 1 << ord;
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ bool used = (acs_mask & (1 << ord)) != 0;
+
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+
+ if (used || (used_orders & (1 << ord))) {
+ if (natural_order.size() < size) natural_order.resize(size);
+ acs.ComputeNaturalCoeffOrder(natural_order.data());
+ }
+
+ if ((used_orders & (1 << ord)) == 0) {
+ // No need to set the default order if no ACS uses this order.
+ if (used) {
+ for (size_t c = 0; c < 3; c++) {
+ memcpy(&order[CoeffOrderOffset(ord, c)], natural_order.data(),
+ size * sizeof(*order));
+ }
+ }
+ } else {
+ for (size_t c = 0; c < 3; c++) {
+ coeff_order_t* dest = used ? &order[CoeffOrderOffset(ord, c)] : nullptr;
+ JXL_RETURN_IF_ERROR(DecodeCoeffOrder(acs, dest, br, reader.get(),
+ natural_order, context_map));
+ }
+ }
+ }
+ if (used_orders && !reader->CheckANSFinalState()) {
+ return JXL_FAILURE("Invalid ANS stream");
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/coeff_order.h b/third_party/jpeg-xl/lib/jxl/coeff_order.h
new file mode 100644
index 0000000000..fb32499f2f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/coeff_order.h
@@ -0,0 +1,64 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_H_
+#define LIB_JXL_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+
+namespace jxl {
+
+class BitReader;
+
+// Those offsets get multiplied by kDCTBlockSize.
+static constexpr size_t kCoeffOrderOffset[] = {
+ 0, 1, 2, 3, 4, 5, 6, 10, 14, 18,
+ 34, 50, 66, 68, 70, 72, 76, 80, 84, 92,
+ 100, 108, 172, 236, 300, 332, 364, 396, 652, 908,
+ 1164, 1292, 1420, 1548, 2572, 3596, 4620, 5132, 5644, 6156,
+};
+static_assert(3 * kNumOrders + 1 ==
+ sizeof(kCoeffOrderOffset) / sizeof(*kCoeffOrderOffset),
+ "Update this array when adding or removing order types.");
+
+static constexpr size_t CoeffOrderOffset(size_t order, size_t c) {
+ return kCoeffOrderOffset[3 * order + c] * kDCTBlockSize;
+}
+
+static constexpr size_t kCoeffOrderMaxSize =
+ kCoeffOrderOffset[3 * kNumOrders] * kDCTBlockSize;
+
+// Mapping from AC strategy to order bucket. Strategies with different natural
+// orders must have different buckets.
+constexpr uint8_t kStrategyOrder[] = {
+ 0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 6, 6, 1, 1,
+ 1, 1, 1, 1, 7, 8, 8, 9, 10, 10, 11, 12, 12,
+};
+
+static_assert(AcStrategy::kNumValidStrategies ==
+ sizeof(kStrategyOrder) / sizeof(*kStrategyOrder),
+ "Update this array when adding or removing AC strategies.");
+
+constexpr uint32_t kPermutationContexts = 8;
+
+uint32_t CoeffOrderContext(uint32_t val);
+
+Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs,
+ coeff_order_t* order, BitReader* br);
+
+Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order,
+ BitReader* br);
+
+} // namespace jxl
+
+#endif // LIB_JXL_COEFF_ORDER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/coeff_order_fwd.h b/third_party/jpeg-xl/lib/jxl/coeff_order_fwd.h
new file mode 100644
index 0000000000..26306575c1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/coeff_order_fwd.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COEFF_ORDER_FWD_H_
+#define LIB_JXL_COEFF_ORDER_FWD_H_
+
+// Breaks circular dependency between ac_strategy and coeff_order.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Needs at least 16 bits. A 32-bit type speeds up DecodeAC by 2% at the cost of
+// more memory.
+using coeff_order_t = uint32_t;
+
+// Maximum number of orders to be used. Note that this needs to be multiplied by
+// the number of channels. One per "size class" (plus one extra for DCT8),
+// shared between transforms of size XxY and of size YxX.
+constexpr uint8_t kNumOrders = 13;
+
+// DCT coefficients are laid out in such a way that the number of rows of
+// coefficients is always the smaller coordinate.
+JXL_INLINE constexpr size_t CoefficientRows(size_t rows, size_t columns) {
+ return rows < columns ? rows : columns;
+}
+
+JXL_INLINE constexpr size_t CoefficientColumns(size_t rows, size_t columns) {
+ return rows < columns ? columns : rows;
+}
+
+JXL_INLINE void CoefficientLayout(size_t* JXL_RESTRICT rows,
+ size_t* JXL_RESTRICT columns) {
+ size_t r = *rows;
+ size_t c = *columns;
+ *rows = CoefficientRows(r, c);
+ *columns = CoefficientColumns(r, c);
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_COEFF_ORDER_FWD_H_
diff --git a/third_party/jpeg-xl/lib/jxl/coeff_order_test.cc b/third_party/jpeg-xl/lib/jxl/coeff_order_test.cc
new file mode 100644
index 0000000000..6fa0775697
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/coeff_order_test.cc
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/coeff_order.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <numeric> // iota
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void RoundtripPermutation(coeff_order_t* perm, coeff_order_t* out, size_t len,
+ size_t* size) {
+ BitWriter writer;
+ EncodePermutation(perm, 0, len, &writer, 0, nullptr);
+ writer.ZeroPadToByte();
+ Status status = true;
+ {
+ BitReader reader(writer.GetSpan());
+ BitReaderScopedCloser closer(&reader, &status);
+ ASSERT_TRUE(DecodePermutation(0, len, out, &reader));
+ }
+ ASSERT_TRUE(status);
+ *size = writer.GetSpan().size();
+}
+
+enum Permutation { kIdentity, kFewSwaps, kFewSlides, kRandom };
+
+constexpr size_t kSwaps = 32;
+
+void TestPermutation(Permutation kind, size_t len) {
+ std::vector<coeff_order_t> perm(len);
+ std::iota(perm.begin(), perm.end(), 0);
+ Rng rng(0);
+ if (kind == kFewSwaps) {
+ for (size_t i = 0; i < kSwaps; i++) {
+ size_t a = rng.UniformU(0, len - 1);
+ size_t b = rng.UniformU(0, len - 1);
+ std::swap(perm[a], perm[b]);
+ }
+ }
+ if (kind == kFewSlides) {
+ for (size_t i = 0; i < kSwaps; i++) {
+ size_t a = rng.UniformU(0, len - 1);
+ size_t b = rng.UniformU(0, len - 1);
+ size_t from = std::min(a, b);
+ size_t to = std::max(a, b);
+ size_t start = perm[from];
+ for (size_t j = from; j < to; j++) {
+ perm[j] = perm[j + 1];
+ }
+ perm[to] = start;
+ }
+ }
+ if (kind == kRandom) {
+ rng.Shuffle(perm.data(), perm.size());
+ }
+ std::vector<coeff_order_t> out(len);
+ size_t size = 0;
+ RoundtripPermutation(perm.data(), out.data(), len, &size);
+ for (size_t idx = 0; idx < len; idx++) {
+ EXPECT_EQ(perm[idx], out[idx]);
+ }
+ printf("Encoded size: %" PRIuS "\n", size);
+}
+
+TEST(CoeffOrderTest, IdentitySmall) { TestPermutation(kIdentity, 256); }
+TEST(CoeffOrderTest, FewSlidesSmall) { TestPermutation(kFewSlides, 256); }
+TEST(CoeffOrderTest, FewSwapsSmall) { TestPermutation(kFewSwaps, 256); }
+TEST(CoeffOrderTest, RandomSmall) { TestPermutation(kRandom, 256); }
+
+TEST(CoeffOrderTest, IdentityMedium) { TestPermutation(kIdentity, 1 << 12); }
+TEST(CoeffOrderTest, FewSlidesMedium) { TestPermutation(kFewSlides, 1 << 12); }
+TEST(CoeffOrderTest, FewSwapsMedium) { TestPermutation(kFewSwaps, 1 << 12); }
+TEST(CoeffOrderTest, RandomMedium) { TestPermutation(kRandom, 1 << 12); }
+
+TEST(CoeffOrderTest, IdentityBig) { TestPermutation(kIdentity, 1 << 16); }
+TEST(CoeffOrderTest, FewSlidesBig) { TestPermutation(kFewSlides, 1 << 16); }
+TEST(CoeffOrderTest, FewSwapsBig) { TestPermutation(kFewSwaps, 1 << 16); }
+TEST(CoeffOrderTest, RandomBig) { TestPermutation(kRandom, 1 << 16); }
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/color_encoding_internal.cc b/third_party/jpeg-xl/lib/jxl/color_encoding_internal.cc
new file mode 100644
index 0000000000..e496accfed
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_encoding_internal.cc
@@ -0,0 +1,753 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <errno.h>
+
+#include <array>
+#include <cmath>
+
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/matrix_ops.h"
+
+namespace jxl {
+namespace {
+
+// Highest reasonable value for the gamma of a transfer curve.
+constexpr uint32_t kMaxGamma = 8192;
+
+// These strings are baked into Description - do not change.
+
+std::string ToString(ColorSpace color_space) {
+ switch (color_space) {
+ case ColorSpace::kRGB:
+ return "RGB";
+ case ColorSpace::kGray:
+ return "Gra";
+ case ColorSpace::kXYB:
+ return "XYB";
+ case ColorSpace::kUnknown:
+ return "CS?";
+ }
+ // Should not happen - visitor fails if enum is invalid.
+ JXL_ABORT("Invalid ColorSpace %u", static_cast<uint32_t>(color_space));
+}
+
+std::string ToString(WhitePoint white_point) {
+ switch (white_point) {
+ case WhitePoint::kD65:
+ return "D65";
+ case WhitePoint::kCustom:
+ return "Cst";
+ case WhitePoint::kE:
+ return "EER";
+ case WhitePoint::kDCI:
+ return "DCI";
+ }
+ // Should not happen - visitor fails if enum is invalid.
+ JXL_ABORT("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+std::string ToString(Primaries primaries) {
+ switch (primaries) {
+ case Primaries::kSRGB:
+ return "SRG";
+ case Primaries::k2100:
+ return "202";
+ case Primaries::kP3:
+ return "DCI";
+ case Primaries::kCustom:
+ return "Cst";
+ }
+ // Should not happen - visitor fails if enum is invalid.
+ JXL_ABORT("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+std::string ToString(TransferFunction transfer_function) {
+ switch (transfer_function) {
+ case TransferFunction::kSRGB:
+ return "SRG";
+ case TransferFunction::kLinear:
+ return "Lin";
+ case TransferFunction::k709:
+ return "709";
+ case TransferFunction::kPQ:
+ return "PeQ";
+ case TransferFunction::kHLG:
+ return "HLG";
+ case TransferFunction::kDCI:
+ return "DCI";
+ case TransferFunction::kUnknown:
+ return "TF?";
+ }
+ // Should not happen - visitor fails if enum is invalid.
+ JXL_ABORT("Invalid TransferFunction %u",
+ static_cast<uint32_t>(transfer_function));
+}
+
+std::string ToString(RenderingIntent rendering_intent) {
+ switch (rendering_intent) {
+ case RenderingIntent::kPerceptual:
+ return "Per";
+ case RenderingIntent::kRelative:
+ return "Rel";
+ case RenderingIntent::kSaturation:
+ return "Sat";
+ case RenderingIntent::kAbsolute:
+ return "Abs";
+ }
+ // Should not happen - visitor fails if enum is invalid.
+ JXL_ABORT("Invalid RenderingIntent %u",
+ static_cast<uint32_t>(rendering_intent));
+}
+
+static double F64FromCustomxyI32(const int32_t i) { return i * 1E-6; }
+static Status F64ToCustomxyI32(const double f, int32_t* JXL_RESTRICT i) {
+ if (!(-4 <= f && f <= 4)) {
+ return JXL_FAILURE("F64 out of bounds for CustomxyI32");
+ }
+ *i = static_cast<int32_t>(roundf(f * 1E6));
+ return true;
+}
+
+Status ConvertExternalToInternalWhitePoint(const JxlWhitePoint external,
+ WhitePoint* internal) {
+ switch (external) {
+ case JXL_WHITE_POINT_D65:
+ *internal = WhitePoint::kD65;
+ return true;
+ case JXL_WHITE_POINT_CUSTOM:
+ *internal = WhitePoint::kCustom;
+ return true;
+ case JXL_WHITE_POINT_E:
+ *internal = WhitePoint::kE;
+ return true;
+ case JXL_WHITE_POINT_DCI:
+ *internal = WhitePoint::kDCI;
+ return true;
+ }
+ return JXL_FAILURE("Invalid WhitePoint enum value");
+}
+
+Status ConvertExternalToInternalPrimaries(const JxlPrimaries external,
+ Primaries* internal) {
+ switch (external) {
+ case JXL_PRIMARIES_SRGB:
+ *internal = Primaries::kSRGB;
+ return true;
+ case JXL_PRIMARIES_CUSTOM:
+ *internal = Primaries::kCustom;
+ return true;
+ case JXL_PRIMARIES_2100:
+ *internal = Primaries::k2100;
+ return true;
+ case JXL_PRIMARIES_P3:
+ *internal = Primaries::kP3;
+ return true;
+ }
+ return JXL_FAILURE("Invalid Primaries enum value");
+}
+
+Status ConvertExternalToInternalTransferFunction(
+ const JxlTransferFunction external, TransferFunction* internal) {
+ switch (external) {
+ case JXL_TRANSFER_FUNCTION_709:
+ *internal = TransferFunction::k709;
+ return true;
+ case JXL_TRANSFER_FUNCTION_UNKNOWN:
+ *internal = TransferFunction::kUnknown;
+ return true;
+ case JXL_TRANSFER_FUNCTION_LINEAR:
+ *internal = TransferFunction::kLinear;
+ return true;
+ case JXL_TRANSFER_FUNCTION_SRGB:
+ *internal = TransferFunction::kSRGB;
+ return true;
+ case JXL_TRANSFER_FUNCTION_PQ:
+ *internal = TransferFunction::kPQ;
+ return true;
+ case JXL_TRANSFER_FUNCTION_DCI:
+ *internal = TransferFunction::kDCI;
+ return true;
+ case JXL_TRANSFER_FUNCTION_HLG:
+ *internal = TransferFunction::kHLG;
+ return true;
+ case JXL_TRANSFER_FUNCTION_GAMMA:
+ return JXL_FAILURE("Gamma should be handled separately");
+ }
+ return JXL_FAILURE("Invalid TransferFunction enum value");
+}
+
+Status ConvertExternalToInternalRenderingIntent(
+ const JxlRenderingIntent external, RenderingIntent* internal) {
+ switch (external) {
+ case JXL_RENDERING_INTENT_PERCEPTUAL:
+ *internal = RenderingIntent::kPerceptual;
+ return true;
+ case JXL_RENDERING_INTENT_RELATIVE:
+ *internal = RenderingIntent::kRelative;
+ return true;
+ case JXL_RENDERING_INTENT_SATURATION:
+ *internal = RenderingIntent::kSaturation;
+ return true;
+ case JXL_RENDERING_INTENT_ABSOLUTE:
+ *internal = RenderingIntent::kAbsolute;
+ return true;
+ }
+ return JXL_FAILURE("Invalid RenderingIntent enum value");
+}
+
+} // namespace
+
+CIExy Customxy::Get() const {
+ CIExy xy;
+ xy.x = F64FromCustomxyI32(x);
+ xy.y = F64FromCustomxyI32(y);
+ return xy;
+}
+
+Status Customxy::Set(const CIExy& xy) {
+ JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.x, &x));
+ JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.y, &y));
+ size_t extension_bits, total_bits;
+ if (!Bundle::CanEncode(*this, &extension_bits, &total_bits)) {
+ return JXL_FAILURE("Unable to encode XY %f %f", xy.x, xy.y);
+ }
+ return true;
+}
+
+bool CustomTransferFunction::SetImplicit() {
+ if (nonserialized_color_space == ColorSpace::kXYB) {
+ if (!SetGamma(1.0 / 3)) JXL_ASSERT(false);
+ return true;
+ }
+ return false;
+}
+
+Status CustomTransferFunction::SetGamma(double gamma) {
+ if (gamma < (1.0f / kMaxGamma) || gamma > 1.0) {
+ return JXL_FAILURE("Invalid gamma %f", gamma);
+ }
+
+ have_gamma_ = false;
+ if (ApproxEq(gamma, 1.0)) {
+ transfer_function_ = TransferFunction::kLinear;
+ return true;
+ }
+ if (ApproxEq(gamma, 1.0 / 2.6)) {
+ transfer_function_ = TransferFunction::kDCI;
+ return true;
+ }
+ // Don't translate 0.45.. to kSRGB nor k709 - that might change pixel
+ // values because those curves also have a linear part.
+
+ have_gamma_ = true;
+ gamma_ = roundf(gamma * kGammaMul);
+ transfer_function_ = TransferFunction::kUnknown;
+ return true;
+}
+
+namespace {
+
+std::array<ColorEncoding, 2> CreateC2(const Primaries pr,
+ const TransferFunction tf) {
+ std::array<ColorEncoding, 2> c2;
+
+ {
+ ColorEncoding* c_rgb = c2.data() + 0;
+ c_rgb->SetColorSpace(ColorSpace::kRGB);
+ c_rgb->white_point = WhitePoint::kD65;
+ c_rgb->primaries = pr;
+ c_rgb->tf.SetTransferFunction(tf);
+ JXL_CHECK(c_rgb->CreateICC());
+ }
+
+ {
+ ColorEncoding* c_gray = c2.data() + 1;
+ c_gray->SetColorSpace(ColorSpace::kGray);
+ c_gray->white_point = WhitePoint::kD65;
+ c_gray->primaries = pr;
+ c_gray->tf.SetTransferFunction(tf);
+ JXL_CHECK(c_gray->CreateICC());
+ }
+
+ return c2;
+}
+
+} // namespace
+
+const ColorEncoding& ColorEncoding::SRGB(bool is_gray) {
+ static std::array<ColorEncoding, 2> c2 =
+ CreateC2(Primaries::kSRGB, TransferFunction::kSRGB);
+ return c2[is_gray];
+}
+const ColorEncoding& ColorEncoding::LinearSRGB(bool is_gray) {
+ static std::array<ColorEncoding, 2> c2 =
+ CreateC2(Primaries::kSRGB, TransferFunction::kLinear);
+ return c2[is_gray];
+}
+
+CIExy ColorEncoding::GetWhitePoint() const {
+ JXL_DASSERT(have_fields_);
+ CIExy xy;
+ switch (white_point) {
+ case WhitePoint::kCustom:
+ return white_.Get();
+
+ case WhitePoint::kD65:
+ xy.x = 0.3127;
+ xy.y = 0.3290;
+ return xy;
+
+ case WhitePoint::kDCI:
+ // From https://ieeexplore.ieee.org/document/7290729 C.2 page 11
+ xy.x = 0.314;
+ xy.y = 0.351;
+ return xy;
+
+ case WhitePoint::kE:
+ xy.x = xy.y = 1.0 / 3;
+ return xy;
+ }
+ JXL_ABORT("Invalid WhitePoint %u", static_cast<uint32_t>(white_point));
+}
+
+Status ColorEncoding::SetWhitePoint(const CIExy& xy) {
+ JXL_DASSERT(have_fields_);
+ if (xy.x == 0.0 || xy.y == 0.0) {
+ return JXL_FAILURE("Invalid white point %f %f", xy.x, xy.y);
+ }
+ if (ApproxEq(xy.x, 0.3127) && ApproxEq(xy.y, 0.3290)) {
+ white_point = WhitePoint::kD65;
+ return true;
+ }
+ if (ApproxEq(xy.x, 1.0 / 3) && ApproxEq(xy.y, 1.0 / 3)) {
+ white_point = WhitePoint::kE;
+ return true;
+ }
+ if (ApproxEq(xy.x, 0.314) && ApproxEq(xy.y, 0.351)) {
+ white_point = WhitePoint::kDCI;
+ return true;
+ }
+ white_point = WhitePoint::kCustom;
+ return white_.Set(xy);
+}
+
+PrimariesCIExy ColorEncoding::GetPrimaries() const {
+ JXL_DASSERT(have_fields_);
+ JXL_ASSERT(HasPrimaries());
+ PrimariesCIExy xy;
+ switch (primaries) {
+ case Primaries::kCustom:
+ xy.r = red_.Get();
+ xy.g = green_.Get();
+ xy.b = blue_.Get();
+ return xy;
+
+ case Primaries::kSRGB:
+ xy.r.x = 0.639998686;
+ xy.r.y = 0.330010138;
+ xy.g.x = 0.300003784;
+ xy.g.y = 0.600003357;
+ xy.b.x = 0.150002046;
+ xy.b.y = 0.059997204;
+ return xy;
+
+ case Primaries::k2100:
+ xy.r.x = 0.708;
+ xy.r.y = 0.292;
+ xy.g.x = 0.170;
+ xy.g.y = 0.797;
+ xy.b.x = 0.131;
+ xy.b.y = 0.046;
+ return xy;
+
+ case Primaries::kP3:
+ xy.r.x = 0.680;
+ xy.r.y = 0.320;
+ xy.g.x = 0.265;
+ xy.g.y = 0.690;
+ xy.b.x = 0.150;
+ xy.b.y = 0.060;
+ return xy;
+ }
+ JXL_ABORT("Invalid Primaries %u", static_cast<uint32_t>(primaries));
+}
+
+Status ColorEncoding::SetPrimaries(const PrimariesCIExy& xy) {
+ JXL_DASSERT(have_fields_);
+ JXL_ASSERT(HasPrimaries());
+ if (xy.r.x == 0.0 || xy.r.y == 0.0 || xy.g.x == 0.0 || xy.g.y == 0.0 ||
+ xy.b.x == 0.0 || xy.b.y == 0.0) {
+ return JXL_FAILURE("Invalid primaries %f %f %f %f %f %f", xy.r.x, xy.r.y,
+ xy.g.x, xy.g.y, xy.b.x, xy.b.y);
+ }
+
+ if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) &&
+ ApproxEq(xy.g.x, 0.30) && ApproxEq(xy.g.y, 0.60) &&
+ ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) {
+ primaries = Primaries::kSRGB;
+ return true;
+ }
+
+ if (ApproxEq(xy.r.x, 0.708) && ApproxEq(xy.r.y, 0.292) &&
+ ApproxEq(xy.g.x, 0.170) && ApproxEq(xy.g.y, 0.797) &&
+ ApproxEq(xy.b.x, 0.131) && ApproxEq(xy.b.y, 0.046)) {
+ primaries = Primaries::k2100;
+ return true;
+ }
+ if (ApproxEq(xy.r.x, 0.680) && ApproxEq(xy.r.y, 0.320) &&
+ ApproxEq(xy.g.x, 0.265) && ApproxEq(xy.g.y, 0.690) &&
+ ApproxEq(xy.b.x, 0.150) && ApproxEq(xy.b.y, 0.060)) {
+ primaries = Primaries::kP3;
+ return true;
+ }
+
+ primaries = Primaries::kCustom;
+ JXL_RETURN_IF_ERROR(red_.Set(xy.r));
+ JXL_RETURN_IF_ERROR(green_.Set(xy.g));
+ JXL_RETURN_IF_ERROR(blue_.Set(xy.b));
+ return true;
+}
+
+Status ColorEncoding::CreateICC() {
+ InternalRemoveICC();
+ if (!MaybeCreateProfile(*this, &icc_)) {
+ return JXL_FAILURE("Failed to create profile from fields");
+ }
+ return true;
+}
+
+std::string Description(const ColorEncoding& c_in) {
+ // Copy required for Implicit*
+ ColorEncoding c = c_in;
+
+ std::string d = ToString(c.GetColorSpace());
+
+ if (!c.ImplicitWhitePoint()) {
+ d += '_';
+ if (c.white_point == WhitePoint::kCustom) {
+ const CIExy wp = c.GetWhitePoint();
+ d += ToString(wp.x) + ';';
+ d += ToString(wp.y);
+ } else {
+ d += ToString(c.white_point);
+ }
+ }
+
+ if (c.HasPrimaries()) {
+ d += '_';
+ if (c.primaries == Primaries::kCustom) {
+ const PrimariesCIExy pr = c.GetPrimaries();
+ d += ToString(pr.r.x) + ';';
+ d += ToString(pr.r.y) + ';';
+ d += ToString(pr.g.x) + ';';
+ d += ToString(pr.g.y) + ';';
+ d += ToString(pr.b.x) + ';';
+ d += ToString(pr.b.y);
+ } else {
+ d += ToString(c.primaries);
+ }
+ }
+
+ d += '_';
+ d += ToString(c.rendering_intent);
+
+ if (!c.tf.SetImplicit()) {
+ d += '_';
+ if (c.tf.IsGamma()) {
+ d += 'g';
+ d += ToString(c.tf.GetGamma());
+ } else {
+ d += ToString(c.tf.GetTransferFunction());
+ }
+ }
+
+ return d;
+}
+
+Customxy::Customxy() { Bundle::Init(this); }
+Status Customxy::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ uint32_t ux = PackSigned(x);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+ BitsOffset(20, 1048576),
+ BitsOffset(21, 2097152), 0, &ux));
+ x = UnpackSigned(ux);
+ uint32_t uy = PackSigned(y);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288),
+ BitsOffset(20, 1048576),
+ BitsOffset(21, 2097152), 0, &uy));
+ y = UnpackSigned(uy);
+ return true;
+}
+
+CustomTransferFunction::CustomTransferFunction() { Bundle::Init(this); }
+Status CustomTransferFunction::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->Conditional(!SetImplicit())) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_gamma_));
+
+ if (visitor->Conditional(have_gamma_)) {
+ // Gamma is represented as a 24-bit int, the exponent used is
+ // gamma_ / 1e7. Valid values are (0, 1]. On the low end side, we also
+ // limit it to kMaxGamma/1e7.
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(24, kGammaMul, &gamma_));
+ if (gamma_ > kGammaMul ||
+ static_cast<uint64_t>(gamma_) * kMaxGamma < kGammaMul) {
+ return JXL_FAILURE("Invalid gamma %u", gamma_);
+ }
+ }
+
+ if (visitor->Conditional(!have_gamma_)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Enum(TransferFunction::kSRGB, &transfer_function_));
+ }
+ }
+
+ return true;
+}
+
+ColorEncoding::ColorEncoding() { Bundle::Init(this); }
+Status ColorEncoding::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &want_icc_));
+
+ // Always send even if want_icc_ because this affects decoding.
+ // We can skip the white point/primaries because they do not.
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ColorSpace::kRGB, &color_space_));
+
+ if (visitor->Conditional(!WantICC())) {
+ // Serialize enums. NOTE: we set the defaults to the most common values so
+ // ImageMetadata.all_default is true in the common case.
+
+ if (visitor->Conditional(!ImplicitWhitePoint())) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(WhitePoint::kD65, &white_point));
+ if (visitor->Conditional(white_point == WhitePoint::kCustom)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&white_));
+ }
+ }
+
+ if (visitor->Conditional(HasPrimaries())) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(Primaries::kSRGB, &primaries));
+ if (visitor->Conditional(primaries == Primaries::kCustom)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&red_));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&green_));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blue_));
+ }
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tf));
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Enum(RenderingIntent::kRelative, &rendering_intent));
+
+ // We didn't have ICC, so all fields should be known.
+ if (color_space_ == ColorSpace::kUnknown || tf.IsUnknown()) {
+ return JXL_FAILURE(
+ "No ICC but cs %u and tf %u%s",
+ static_cast<unsigned int>(color_space_),
+ tf.IsGamma() ? 0
+ : static_cast<unsigned int>(tf.GetTransferFunction()),
+ tf.IsGamma() ? "(gamma)" : "");
+ }
+
+ JXL_RETURN_IF_ERROR(CreateICC());
+ }
+
+ if (WantICC() && visitor->IsReading()) {
+ // Haven't called SetICC() yet, do nothing.
+ } else {
+ if (ICC().empty()) return JXL_FAILURE("Empty ICC");
+ }
+
+ return true;
+}
+
+void ConvertInternalToExternalColorEncoding(const ColorEncoding& internal,
+ JxlColorEncoding* external) {
+ external->color_space = static_cast<JxlColorSpace>(internal.GetColorSpace());
+
+ external->white_point = static_cast<JxlWhitePoint>(internal.white_point);
+
+ jxl::CIExy whitepoint = internal.GetWhitePoint();
+ external->white_point_xy[0] = whitepoint.x;
+ external->white_point_xy[1] = whitepoint.y;
+
+ if (external->color_space == JXL_COLOR_SPACE_RGB ||
+ external->color_space == JXL_COLOR_SPACE_UNKNOWN) {
+ external->primaries = static_cast<JxlPrimaries>(internal.primaries);
+ jxl::PrimariesCIExy primaries = internal.GetPrimaries();
+ external->primaries_red_xy[0] = primaries.r.x;
+ external->primaries_red_xy[1] = primaries.r.y;
+ external->primaries_green_xy[0] = primaries.g.x;
+ external->primaries_green_xy[1] = primaries.g.y;
+ external->primaries_blue_xy[0] = primaries.b.x;
+ external->primaries_blue_xy[1] = primaries.b.y;
+ }
+
+ if (internal.tf.IsGamma()) {
+ external->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA;
+ external->gamma = internal.tf.GetGamma();
+ } else {
+ external->transfer_function =
+ static_cast<JxlTransferFunction>(internal.tf.GetTransferFunction());
+ external->gamma = 0;
+ }
+
+ external->rendering_intent =
+ static_cast<JxlRenderingIntent>(internal.rendering_intent);
+}
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+ ColorEncoding* internal) {
+ internal->SetColorSpace(static_cast<ColorSpace>(external.color_space));
+
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalWhitePoint(
+ external.white_point, &internal->white_point));
+ if (external.white_point == JXL_WHITE_POINT_CUSTOM) {
+ CIExy wp;
+ wp.x = external.white_point_xy[0];
+ wp.y = external.white_point_xy[1];
+ JXL_RETURN_IF_ERROR(internal->SetWhitePoint(wp));
+ }
+
+ if (external.color_space == JXL_COLOR_SPACE_RGB ||
+ external.color_space == JXL_COLOR_SPACE_UNKNOWN) {
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalPrimaries(
+ external.primaries, &internal->primaries));
+ if (external.primaries == JXL_PRIMARIES_CUSTOM) {
+ PrimariesCIExy primaries;
+ primaries.r.x = external.primaries_red_xy[0];
+ primaries.r.y = external.primaries_red_xy[1];
+ primaries.g.x = external.primaries_green_xy[0];
+ primaries.g.y = external.primaries_green_xy[1];
+ primaries.b.x = external.primaries_blue_xy[0];
+ primaries.b.y = external.primaries_blue_xy[1];
+ JXL_RETURN_IF_ERROR(internal->SetPrimaries(primaries));
+ }
+ }
+ CustomTransferFunction tf;
+ tf.nonserialized_color_space = internal->GetColorSpace();
+ if (external.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) {
+ JXL_RETURN_IF_ERROR(tf.SetGamma(external.gamma));
+ } else {
+ TransferFunction tf_enum;
+ // JXL_TRANSFER_FUNCTION_GAMMA is not handled by this function since there's
+ // no internal enum value for it.
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalTransferFunction(
+ external.transfer_function, &tf_enum));
+ tf.SetTransferFunction(tf_enum);
+ }
+ internal->tf = tf;
+
+ JXL_RETURN_IF_ERROR(ConvertExternalToInternalRenderingIntent(
+ external.rendering_intent, &internal->rendering_intent));
+
+ // The ColorEncoding caches an ICC profile it created earlier that may no
+ // longer match the profile with the changed fields, so re-create it.
+ if (!(internal->CreateICC())) {
+ // This is not an error: for example, it doesn't have ICC profile creation
+ // implemented for XYB. This should not be returned as error, since
+ // ConvertExternalToInternalColorEncoding still worked correctly, and what
+ // matters is that internal->ICC() will not return the wrong profile.
+ }
+
+ return true;
+}
+
+/* Chromatic adaptation matrices*/
+static const float kBradford[9] = {
+ 0.8951f, 0.2664f, -0.1614f, -0.7502f, 1.7135f,
+ 0.0367f, 0.0389f, -0.0685f, 1.0296f,
+};
+
+static const float kBradfordInv[9] = {
+ 0.9869929f, -0.1470543f, 0.1599627f, 0.4323053f, 0.5183603f,
+ 0.0492912f, -0.0085287f, 0.0400428f, 0.9684867f,
+};
+
+// Adapts whitepoint x, y to D50
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]) {
+ if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+ // Out of range values can cause division through zero
+ // further down with the bradford adaptation too.
+ return JXL_FAILURE("Invalid white point");
+ }
+ float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+ // 1 / tiny float can still overflow
+ JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+ float w50[3] = {0.96422f, 1.0f, 0.82521f};
+
+ float lms[3];
+ float lms50[3];
+
+ Mul3x3Vector(kBradford, w, lms);
+ Mul3x3Vector(kBradford, w50, lms50);
+
+ if (lms[0] == 0 || lms[1] == 0 || lms[2] == 0) {
+ return JXL_FAILURE("Invalid white point");
+ }
+ float a[9] = {
+ // /----> 0, 1, 2, 3, /----> 4, 5, 6, 7, /----> 8,
+ lms50[0] / lms[0], 0, 0, 0, lms50[1] / lms[1], 0, 0, 0, lms50[2] / lms[2],
+ };
+ if (!std::isfinite(a[0]) || !std::isfinite(a[4]) || !std::isfinite(a[8])) {
+ return JXL_FAILURE("Invalid white point");
+ }
+
+ float b[9];
+ Mul3x3Matrix(a, kBradford, b);
+ Mul3x3Matrix(kBradfordInv, b, matrix);
+
+ return true;
+}
+
+Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx,
+ float by, float wx, float wy, float matrix[9]) {
+ if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) {
+ return JXL_FAILURE("Invalid white point");
+ }
+ // TODO(lode): also require rx, ry, gx, gy, bx, to be in range 0-1? ICC
+ // profiles in theory forbid negative XYZ values, but in practice the ACES P0
+ // color space uses a negative y for the blue primary.
+ float primaries[9] = {
+ rx, gx, bx, ry, gy, by, 1.0f - rx - ry, 1.0f - gx - gy, 1.0f - bx - by};
+ float primaries_inv[9];
+ memcpy(primaries_inv, primaries, sizeof(float) * 9);
+ JXL_RETURN_IF_ERROR(Inv3x3Matrix(primaries_inv));
+
+ float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy};
+ // 1 / tiny float can still overflow
+ JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2]));
+ float xyz[3];
+ Mul3x3Vector(primaries_inv, w, xyz);
+
+ float a[9] = {
+ xyz[0], 0, 0, 0, xyz[1], 0, 0, 0, xyz[2],
+ };
+
+ Mul3x3Matrix(primaries, a, matrix);
+ return true;
+}
+
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+ float by, float wx, float wy, float matrix[9]) {
+ float toXYZ[9];
+ JXL_RETURN_IF_ERROR(PrimariesToXYZ(rx, ry, gx, gy, bx, by, wx, wy, toXYZ));
+ float d50[9];
+ JXL_RETURN_IF_ERROR(AdaptToXYZD50(wx, wy, d50));
+
+ Mul3x3Matrix(d50, toXYZ, matrix);
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/color_encoding_internal.h b/third_party/jpeg-xl/lib/jxl/color_encoding_internal.h
new file mode 100644
index 0000000000..713f216538
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_encoding_internal.h
@@ -0,0 +1,463 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+#define LIB_JXL_COLOR_ENCODING_INTERNAL_H_
+
+// Metadata for color space conversions.
+
+#include <jxl/color_encoding.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cmath> // std::abs
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (All CIE units are for the standard 1931 2 degree observer)
+
+// Color space the color pixel data is encoded in. The color pixel data is
+// 3-channel in all cases except in case of kGray, where it uses only 1 channel.
+// This also determines the amount of channels used in modular encoding.
+enum class ColorSpace : uint32_t {
+ // Trichromatic color data. This also includes CMYK if a kBlack
+ // ExtraChannelInfo is present. This implies, if there is an ICC profile, that
+ // the ICC profile uses a 3-channel color space if no kBlack extra channel is
+ // present, or uses color space 'CMYK' if a kBlack extra channel is present.
+ kRGB,
+ // Single-channel data. This implies, if there is an ICC profile, that the ICC
+ // profile also represents single-channel data and has the appropriate color
+ // space ('GRAY').
+ kGray,
+ // Like kRGB, but implies fixed values for primaries etc.
+ kXYB,
+ // For non-RGB/gray data, e.g. from non-electro-optical sensors. Otherwise
+ // the same conditions as kRGB apply.
+ kUnknown
+};
+
+static inline const char* EnumName(ColorSpace /*unused*/) {
+ return "ColorSpace";
+}
+static inline constexpr uint64_t EnumBits(ColorSpace /*unused*/) {
+ using CS = ColorSpace;
+ return MakeBit(CS::kRGB) | MakeBit(CS::kGray) | MakeBit(CS::kXYB) |
+ MakeBit(CS::kUnknown);
+}
+
+// Values from CICP ColourPrimaries.
+enum class WhitePoint : uint32_t {
+ kD65 = 1, // sRGB/BT.709/Display P3/BT.2020
+ kCustom = 2, // Actual values encoded in separate fields
+ kE = 10, // XYZ
+ kDCI = 11, // DCI-P3
+};
+
+static inline const char* EnumName(WhitePoint /*unused*/) {
+ return "WhitePoint";
+}
+static inline constexpr uint64_t EnumBits(WhitePoint /*unused*/) {
+ return MakeBit(WhitePoint::kD65) | MakeBit(WhitePoint::kCustom) |
+ MakeBit(WhitePoint::kE) | MakeBit(WhitePoint::kDCI);
+}
+
+// Values from CICP ColourPrimaries
+enum class Primaries : uint32_t {
+ kSRGB = 1, // Same as BT.709
+ kCustom = 2, // Actual values encoded in separate fields
+ k2100 = 9, // Same as BT.2020
+ kP3 = 11,
+};
+
+static inline const char* EnumName(Primaries /*unused*/) { return "Primaries"; }
+static inline constexpr uint64_t EnumBits(Primaries /*unused*/) {
+ using Pr = Primaries;
+ return MakeBit(Pr::kSRGB) | MakeBit(Pr::kCustom) | MakeBit(Pr::k2100) |
+ MakeBit(Pr::kP3);
+}
+
+// Values from CICP TransferCharacteristics
+enum class TransferFunction : uint32_t {
+ k709 = 1,
+ kUnknown = 2,
+ kLinear = 8,
+ kSRGB = 13,
+ kPQ = 16, // from BT.2100
+ kDCI = 17, // from SMPTE RP 431-2 reference projector
+ kHLG = 18, // from BT.2100
+};
+
+static inline const char* EnumName(TransferFunction /*unused*/) {
+ return "TransferFunction";
+}
+static inline constexpr uint64_t EnumBits(TransferFunction /*unused*/) {
+ using TF = TransferFunction;
+ return MakeBit(TF::k709) | MakeBit(TF::kLinear) | MakeBit(TF::kSRGB) |
+ MakeBit(TF::kPQ) | MakeBit(TF::kDCI) | MakeBit(TF::kHLG) |
+ MakeBit(TF::kUnknown);
+}
+
+enum class RenderingIntent : uint32_t {
+ // Values match ICC sRGB encodings.
+ kPerceptual = 0, // good for photos, requires a profile with LUT.
+ kRelative, // good for logos.
+ kSaturation, // perhaps useful for CG with fully saturated colors.
+ kAbsolute, // leaves white point unchanged; good for proofing.
+};
+
+static inline const char* EnumName(RenderingIntent /*unused*/) {
+ return "RenderingIntent";
+}
+static inline constexpr uint64_t EnumBits(RenderingIntent /*unused*/) {
+ using RI = RenderingIntent;
+ return MakeBit(RI::kPerceptual) | MakeBit(RI::kRelative) |
+ MakeBit(RI::kSaturation) | MakeBit(RI::kAbsolute);
+}
+
+// Chromaticity (Y is omitted because it is 1 for primaries/white points)
+struct CIExy {
+ double x = 0.0;
+ double y = 0.0;
+};
+
+struct PrimariesCIExy {
+ CIExy r;
+ CIExy g;
+ CIExy b;
+};
+
+// Serializable form of CIExy.
+struct Customxy : public Fields {
+ Customxy();
+ JXL_FIELDS_NAME(Customxy)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ CIExy Get() const;
+ // Returns false if x or y do not fit in the encoding.
+ Status Set(const CIExy& xy);
+
+ int32_t x;
+ int32_t y;
+};
+
+struct CustomTransferFunction : public Fields {
+ CustomTransferFunction();
+ JXL_FIELDS_NAME(CustomTransferFunction)
+
+ // Sets fields and returns true if nonserialized_color_space has an implicit
+ // transfer function, otherwise leaves fields unchanged and returns false.
+ bool SetImplicit();
+
+ // Gamma: only used for PNG inputs
+ bool IsGamma() const { return have_gamma_; }
+ double GetGamma() const {
+ JXL_ASSERT(IsGamma());
+ return gamma_ * 1E-7; // (0, 1)
+ }
+ Status SetGamma(double gamma);
+
+ TransferFunction GetTransferFunction() const {
+ JXL_ASSERT(!IsGamma());
+ return transfer_function_;
+ }
+ void SetTransferFunction(const TransferFunction tf) {
+ have_gamma_ = false;
+ transfer_function_ = tf;
+ }
+
+ bool IsUnknown() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kUnknown);
+ }
+ bool IsSRGB() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kSRGB);
+ }
+ bool IsLinear() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kLinear);
+ }
+ bool IsPQ() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kPQ);
+ }
+ bool IsHLG() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kHLG);
+ }
+ bool Is709() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::k709);
+ }
+ bool IsDCI() const {
+ return !have_gamma_ && (transfer_function_ == TransferFunction::kDCI);
+ }
+ bool IsSame(const CustomTransferFunction& other) const {
+ if (have_gamma_ != other.have_gamma_) return false;
+ if (have_gamma_) {
+ if (gamma_ != other.gamma_) return false;
+ } else {
+ if (transfer_function_ != other.transfer_function_) return false;
+ }
+ return true;
+ }
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Must be set before calling VisitFields!
+ ColorSpace nonserialized_color_space = ColorSpace::kRGB;
+
+ private:
+ static constexpr uint32_t kGammaMul = 10000000;
+
+ bool have_gamma_;
+
+ // OETF exponent to go from linear to gamma-compressed.
+ uint32_t gamma_; // Only used if have_gamma_.
+
+ // Can be kUnknown.
+ TransferFunction transfer_function_; // Only used if !have_gamma_.
+};
+
+// Compact encoding of data required to interpret and translate pixels to a
+// known color space. Stored in Metadata. Thread-compatible.
+struct ColorEncoding : public Fields {
+ ColorEncoding();
+ JXL_FIELDS_NAME(ColorEncoding)
+
+ // Returns ready-to-use color encodings (initialized on-demand).
+ static const ColorEncoding& SRGB(bool is_gray = false);
+ static const ColorEncoding& LinearSRGB(bool is_gray = false);
+
+ // Returns true if an ICC profile was successfully created from fields.
+ // Must be called after modifying fields. Defined in color_management.cc.
+ Status CreateICC();
+
+ // Returns non-empty and valid ICC profile, unless:
+ // - between calling InternalRemoveICC() and CreateICC() in tests;
+ // - WantICC() == true and SetICC() was not yet called;
+ // - after a failed call to SetSRGB(), SetICC(), or CreateICC().
+ const PaddedBytes& ICC() const { return icc_; }
+
+ // Internal only, do not call except from tests.
+ void InternalRemoveICC() { icc_.clear(); }
+
+ // Returns true if `icc` is assigned and decoded successfully. If so,
+ // subsequent WantICC() will return true until DecideIfWantICC() changes it.
+ // Returning false indicates data has been lost.
+ Status SetICC(PaddedBytes&& icc) {
+ if (icc.empty()) return false;
+ icc_ = std::move(icc);
+
+ if (!SetFieldsFromICC()) {
+ InternalRemoveICC();
+ return false;
+ }
+
+ want_icc_ = true;
+ return true;
+ }
+
+ // Sets the raw ICC profile bytes, without parsing the ICC, and without
+ // updating the direct fields such as whitepoint, primaries and color
+ // space. Functions to get and set fields, such as SetWhitePoint, cannot be
+ // used anymore after this and functions such as IsSRGB return false no matter
+ // what the contents of the icc profile.
+ Status SetICCRaw(PaddedBytes&& icc) {
+ if (icc.empty()) return false;
+ icc_ = std::move(icc);
+
+ want_icc_ = true;
+ have_fields_ = false;
+ return true;
+ }
+
+ // Returns whether to send the ICC profile in the codestream.
+ bool WantICC() const { return want_icc_; }
+
+ // Return whether the direct fields are set, if false but ICC is set, only
+ // raw ICC bytes are known.
+ bool HaveFields() const { return have_fields_; }
+
+ // Causes WantICC() to return false if ICC() can be reconstructed from fields.
+ // Defined in color_management.cc.
+ void DecideIfWantICC();
+
+ bool IsGray() const { return color_space_ == ColorSpace::kGray; }
+ bool IsCMYK() const { return cmyk_; }
+ size_t Channels() const { return IsGray() ? 1 : 3; }
+
+ // Returns false if the field is invalid and unusable.
+ bool HasPrimaries() const {
+ return !IsGray() && color_space_ != ColorSpace::kXYB;
+ }
+
+ // Returns true after setting the field to a value defined by color_space,
+ // otherwise false and leaves the field unchanged.
+ bool ImplicitWhitePoint() {
+ if (color_space_ == ColorSpace::kXYB) {
+ white_point = WhitePoint::kD65;
+ return true;
+ }
+ return false;
+ }
+
+ // Returns whether the color space is known to be sRGB. If a raw unparsed ICC
+ // profile is set without the fields being set, this returns false, even if
+ // the content of the ICC profile would match sRGB.
+ bool IsSRGB() const {
+ if (!have_fields_) return false;
+ if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+ if (white_point != WhitePoint::kD65) return false;
+ if (primaries != Primaries::kSRGB) return false;
+ if (!tf.IsSRGB()) return false;
+ return true;
+ }
+
+ // Returns whether the color space is known to be linear sRGB. If a raw
+ // unparsed ICC profile is set without the fields being set, this returns
+ // false, even if the content of the ICC profile would match linear sRGB.
+ bool IsLinearSRGB() const {
+ if (!have_fields_) return false;
+ if (!IsGray() && color_space_ != ColorSpace::kRGB) return false;
+ if (white_point != WhitePoint::kD65) return false;
+ if (primaries != Primaries::kSRGB) return false;
+ if (!tf.IsLinear()) return false;
+ return true;
+ }
+
+ Status SetSRGB(const ColorSpace cs,
+ const RenderingIntent ri = RenderingIntent::kRelative) {
+ InternalRemoveICC();
+ JXL_ASSERT(cs == ColorSpace::kGray || cs == ColorSpace::kRGB);
+ color_space_ = cs;
+ white_point = WhitePoint::kD65;
+ primaries = Primaries::kSRGB;
+ tf.SetTransferFunction(TransferFunction::kSRGB);
+ rendering_intent = ri;
+ return CreateICC();
+ }
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Accessors ensure tf.nonserialized_color_space is updated at the same time.
+ ColorSpace GetColorSpace() const { return color_space_; }
+ void SetColorSpace(const ColorSpace cs) {
+ color_space_ = cs;
+ tf.nonserialized_color_space = cs;
+ }
+
+ CIExy GetWhitePoint() const;
+ Status SetWhitePoint(const CIExy& xy);
+
+ PrimariesCIExy GetPrimaries() const;
+ Status SetPrimaries(const PrimariesCIExy& xy);
+
+ // Checks if the color spaces (including white point / primaries) are the
+ // same, but ignores the transfer function, rendering intent and ICC bytes.
+ bool SameColorSpace(const ColorEncoding& other) const {
+ if (color_space_ != other.color_space_) return false;
+
+ if (white_point != other.white_point) return false;
+ if (white_point == WhitePoint::kCustom) {
+ if (white_.x != other.white_.x || white_.y != other.white_.y)
+ return false;
+ }
+
+ if (HasPrimaries() != other.HasPrimaries()) return false;
+ if (HasPrimaries()) {
+ if (primaries != other.primaries) return false;
+ if (primaries == Primaries::kCustom) {
+ if (red_.x != other.red_.x || red_.y != other.red_.y) return false;
+ if (green_.x != other.green_.x || green_.y != other.green_.y)
+ return false;
+ if (blue_.x != other.blue_.x || blue_.y != other.blue_.y) return false;
+ }
+ }
+ return true;
+ }
+
+ // Checks if the color space and transfer function are the same, ignoring
+ // rendering intent and ICC bytes
+ bool SameColorEncoding(const ColorEncoding& other) const {
+ return SameColorSpace(other) && tf.IsSame(other.tf);
+ }
+
+ mutable bool all_default;
+
+ // Only valid if HaveFields()
+ WhitePoint white_point;
+ Primaries primaries; // Only valid if HasPrimaries()
+ CustomTransferFunction tf;
+ RenderingIntent rendering_intent;
+
+ private:
+ // Returns true if all fields have been initialized (possibly to kUnknown).
+ // Returns false if the ICC profile is invalid or decoding it fails.
+ // Defined in enc_color_management.cc.
+ Status SetFieldsFromICC();
+
+ // If true, the codestream contains an ICC profile and we do not serialize
+ // fields. Otherwise, fields are serialized and we create an ICC profile.
+ bool want_icc_;
+
+ // When false, fields such as white_point and tf are invalid and must not be
+ // used. This occurs after setting a raw bytes-only ICC profile, only the
+ // ICC bytes may be used. The color_space_ field is still valid.
+ bool have_fields_ = true;
+
+ PaddedBytes icc_; // Valid ICC profile
+
+ ColorSpace color_space_; // Can be kUnknown
+ bool cmyk_ = false;
+
+ // Only used if white_point == kCustom.
+ Customxy white_;
+
+ // Only used if primaries == kCustom.
+ Customxy red_;
+ Customxy green_;
+ Customxy blue_;
+};
+
+// Returns whether the two inputs are approximately equal.
+static inline bool ApproxEq(const double a, const double b,
+#if JPEGXL_ENABLE_SKCMS
+ double max_l1 = 1E-3) {
+#else
+ double max_l1 = 8E-5) {
+#endif
+ // Threshold should be sufficient for ICC's 15-bit fixed-point numbers.
+ // We have seen differences of 7.1E-5 with lcms2 and 1E-3 with skcms.
+ return std::abs(a - b) <= max_l1;
+}
+
+// Returns a representation of the ColorEncoding fields (not icc).
+// Example description: "RGB_D65_SRG_Rel_Lin"
+std::string Description(const ColorEncoding& c);
+static inline std::ostream& operator<<(std::ostream& os,
+ const ColorEncoding& c) {
+ return os << Description(c);
+}
+
+void ConvertInternalToExternalColorEncoding(const jxl::ColorEncoding& internal,
+ JxlColorEncoding* external);
+
+Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external,
+ jxl::ColorEncoding* internal);
+
+Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx,
+ float by, float wx, float wy, float matrix[9]);
+Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx,
+ float by, float wx, float wy, float matrix[9]);
+Status AdaptToXYZD50(float wx, float wy, float matrix[9]);
+
+} // namespace jxl
+
+#endif // LIB_JXL_COLOR_ENCODING_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/color_encoding_internal_test.cc b/third_party/jpeg-xl/lib/jxl/color_encoding_internal_test.cc
new file mode 100644
index 0000000000..6ad47e1923
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_encoding_internal_test.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_encoding_internal.h"
+
+#include <stdio.h>
+
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ColorEncodingTest, RoundTripAll) {
+ for (const test::ColorEncodingDescriptor& cdesc : test::AllEncodings()) {
+ const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc);
+ // Verify Set(Get) yields the same white point/primaries/gamma.
+ {
+ ColorEncoding c;
+ EXPECT_TRUE(c.SetWhitePoint(c_original.GetWhitePoint()));
+ EXPECT_EQ(c_original.white_point, c.white_point);
+ }
+ {
+ ColorEncoding c;
+ EXPECT_TRUE(c.SetPrimaries(c_original.GetPrimaries()));
+ EXPECT_EQ(c_original.primaries, c.primaries);
+ }
+ if (c_original.tf.IsGamma()) {
+ ColorEncoding c;
+ EXPECT_TRUE(c.tf.SetGamma(c_original.tf.GetGamma()));
+ EXPECT_TRUE(c_original.tf.IsSame(c.tf));
+ }
+ }
+}
+
+TEST(ColorEncodingTest, CustomWhitePoint) {
+ ColorEncoding c;
+ // Nonsensical values
+ CIExy xy_in;
+ xy_in.x = 0.8;
+ xy_in.y = 0.01;
+ EXPECT_TRUE(c.SetWhitePoint(xy_in));
+ const CIExy xy = c.GetWhitePoint();
+
+ ColorEncoding c2;
+ EXPECT_TRUE(c2.SetWhitePoint(xy));
+ EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomPrimaries) {
+ ColorEncoding c;
+ PrimariesCIExy xy_in;
+ // Nonsensical values
+ xy_in.r.x = -0.01;
+ xy_in.r.y = 0.2;
+ xy_in.g.x = 0.4;
+ xy_in.g.y = 0.401;
+ xy_in.b.x = 1.1;
+ xy_in.b.y = -1.2;
+ EXPECT_TRUE(c.SetPrimaries(xy_in));
+ const PrimariesCIExy xy = c.GetPrimaries();
+
+ ColorEncoding c2;
+ EXPECT_TRUE(c2.SetPrimaries(xy));
+ EXPECT_TRUE(c.SameColorSpace(c2));
+}
+
+TEST(ColorEncodingTest, CustomGamma) {
+ ColorEncoding c;
+#ifndef JXL_CRASH_ON_ERROR
+ EXPECT_FALSE(c.tf.SetGamma(0.0));
+ EXPECT_FALSE(c.tf.SetGamma(-1E-6));
+ EXPECT_FALSE(c.tf.SetGamma(1.001));
+#endif
+ EXPECT_TRUE(c.tf.SetGamma(1.0));
+ EXPECT_FALSE(c.tf.IsGamma());
+ EXPECT_TRUE(c.tf.IsLinear());
+
+ EXPECT_TRUE(c.tf.SetGamma(0.123));
+ EXPECT_TRUE(c.tf.IsGamma());
+ const double gamma = c.tf.GetGamma();
+
+ ColorEncoding c2;
+ EXPECT_TRUE(c2.tf.SetGamma(gamma));
+ EXPECT_TRUE(c.SameColorEncoding(c2));
+ EXPECT_TRUE(c2.tf.IsGamma());
+}
+
+TEST(ColorEncodingTest, InternalExternalConversion) {
+ ColorEncoding source_internal;
+ JxlColorEncoding external;
+ ColorEncoding destination_internal;
+
+ for (int i = 0; i < 100; i++) {
+ source_internal.SetColorSpace(static_cast<ColorSpace>(rand() % 4));
+ CIExy wp;
+ wp.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ wp.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ EXPECT_TRUE(source_internal.SetWhitePoint(wp));
+ if (source_internal.HasPrimaries()) {
+ PrimariesCIExy primaries;
+ primaries.r.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ primaries.r.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ primaries.g.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ primaries.g.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ primaries.b.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ primaries.b.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25;
+ EXPECT_TRUE(source_internal.SetPrimaries(primaries));
+ }
+ CustomTransferFunction tf;
+ EXPECT_TRUE(tf.SetGamma((float(rand()) / float((RAND_MAX)) * 0.5) + 0.25));
+ source_internal.tf = tf;
+ source_internal.rendering_intent = static_cast<RenderingIntent>(rand() % 4);
+
+ ConvertInternalToExternalColorEncoding(source_internal, &external);
+ EXPECT_TRUE(ConvertExternalToInternalColorEncoding(external,
+ &destination_internal));
+
+ EXPECT_EQ(source_internal.GetColorSpace(),
+ destination_internal.GetColorSpace());
+ EXPECT_EQ(source_internal.white_point, destination_internal.white_point);
+ EXPECT_EQ(source_internal.GetWhitePoint().x,
+ destination_internal.GetWhitePoint().x);
+ EXPECT_EQ(source_internal.GetWhitePoint().y,
+ destination_internal.GetWhitePoint().y);
+ if (source_internal.HasPrimaries()) {
+ EXPECT_EQ(source_internal.GetPrimaries().r.x,
+ destination_internal.GetPrimaries().r.x);
+ EXPECT_EQ(source_internal.GetPrimaries().r.y,
+ destination_internal.GetPrimaries().r.y);
+ EXPECT_EQ(source_internal.GetPrimaries().g.x,
+ destination_internal.GetPrimaries().g.x);
+ EXPECT_EQ(source_internal.GetPrimaries().g.y,
+ destination_internal.GetPrimaries().g.y);
+ EXPECT_EQ(source_internal.GetPrimaries().b.x,
+ destination_internal.GetPrimaries().b.x);
+ EXPECT_EQ(source_internal.GetPrimaries().b.y,
+ destination_internal.GetPrimaries().b.y);
+ }
+ EXPECT_EQ(source_internal.tf.IsGamma(), destination_internal.tf.IsGamma());
+ if (source_internal.tf.IsGamma()) {
+ EXPECT_EQ(source_internal.tf.GetGamma(),
+ destination_internal.tf.GetGamma());
+ } else {
+ EXPECT_EQ(source_internal.tf.GetTransferFunction(),
+ destination_internal.tf.GetTransferFunction());
+ }
+ EXPECT_EQ(source_internal.rendering_intent,
+ destination_internal.rendering_intent);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/color_management.cc b/third_party/jpeg-xl/lib/jxl/color_management.cc
new file mode 100644
index 0000000000..d656888a8b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_management.cc
@@ -0,0 +1,682 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_management.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// NOTE: this is only used to provide a reasonable ICC profile that other
+// software can read. Our own transforms use ExtraTF instead because that is
+// more precise and supports unbounded mode.
+std::vector<uint16_t> CreateTableCurve(uint32_t N, const ExtraTF tf) {
+ JXL_ASSERT(N <= 4096); // ICC MFT2 only allows 4K entries
+ JXL_ASSERT(tf == ExtraTF::kPQ || tf == ExtraTF::kHLG);
+ // No point using float - LCMS converts to 16-bit for A2B/MFT.
+ std::vector<uint16_t> table(N);
+ for (uint32_t i = 0; i < N; ++i) {
+ const float x = static_cast<float>(i) / (N - 1); // 1.0 at index N - 1.
+ const double dx = static_cast<double>(x);
+ // LCMS requires EOTF (e.g. 2.4 exponent).
+ double y = (tf == ExtraTF::kHLG) ? TF_HLG().DisplayFromEncoded(dx)
+ : TF_PQ().DisplayFromEncoded(dx);
+ JXL_ASSERT(y >= 0.0);
+ // Clamp to table range - necessary for HLG.
+ if (y > 1.0) y = 1.0;
+ // 1.0 corresponds to table value 0xFFFF.
+ table[i] = static_cast<uint16_t>(roundf(y * 65535.0));
+ }
+ return table;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(CreateTableCurve); // Local function.
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]) {
+ // Target Y = 1.
+ if (std::abs(xy.y) < 1e-12) return JXL_FAILURE("Y value is too small");
+ const float factor = 1 / xy.y;
+ XYZ[0] = xy.x * factor;
+ XYZ[1] = 1;
+ XYZ[2] = (1 - xy.x - xy.y) * factor;
+ return true;
+}
+
+namespace {
+
+// NOTE: this is only used to provide a reasonable ICC profile that other
+// software can read. Our own transforms use ExtraTF instead because that is
+// more precise and supports unbounded mode.
+template <class Func>
+std::vector<uint16_t> CreateTableCurve(uint32_t N, const Func& func) {
+ JXL_ASSERT(N <= 4096); // ICC MFT2 only allows 4K entries
+ // No point using float - LCMS converts to 16-bit for A2B/MFT.
+ std::vector<uint16_t> table(N);
+ for (uint32_t i = 0; i < N; ++i) {
+ const float x = static_cast<float>(i) / (N - 1); // 1.0 at index N - 1.
+ // LCMS requires EOTF (e.g. 2.4 exponent).
+ double y = func.DisplayFromEncoded(static_cast<double>(x));
+ JXL_ASSERT(y >= 0.0);
+ // Clamp to table range - necessary for HLG.
+ if (y > 1.0) y = 1.0;
+ // 1.0 corresponds to table value 0xFFFF.
+ table[i] = static_cast<uint16_t>(roundf(y * 65535.0));
+ }
+ return table;
+}
+
+void ICCComputeMD5(const PaddedBytes& data, uint8_t sum[16])
+ JXL_NO_SANITIZE("unsigned-integer-overflow") {
+ PaddedBytes data64 = data;
+ data64.push_back(128);
+ // Add bytes such that ((size + 8) & 63) == 0.
+ size_t extra = ((64 - ((data64.size() + 8) & 63)) & 63);
+ data64.resize(data64.size() + extra, 0);
+ for (uint64_t i = 0; i < 64; i += 8) {
+ data64.push_back(static_cast<uint64_t>(data.size() << 3u) >> i);
+ }
+
+ static const uint32_t sineparts[64] = {
+ 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a,
+ 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+ 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340,
+ 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+ 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8,
+ 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+ 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa,
+ 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+ 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92,
+ 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+ 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+ };
+ static const uint32_t shift[64] = {
+ 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
+ 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
+ 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
+ 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21,
+ };
+
+ uint32_t a0 = 0x67452301, b0 = 0xefcdab89, c0 = 0x98badcfe, d0 = 0x10325476;
+
+ for (size_t i = 0; i < data64.size(); i += 64) {
+ uint32_t a = a0, b = b0, c = c0, d = d0, f, g;
+ for (size_t j = 0; j < 64; j++) {
+ if (j < 16) {
+ f = (b & c) | ((~b) & d);
+ g = j;
+ } else if (j < 32) {
+ f = (d & b) | ((~d) & c);
+ g = (5 * j + 1) & 0xf;
+ } else if (j < 48) {
+ f = b ^ c ^ d;
+ g = (3 * j + 5) & 0xf;
+ } else {
+ f = c ^ (b | (~d));
+ g = (7 * j) & 0xf;
+ }
+ uint32_t dg0 = data64[i + g * 4 + 0], dg1 = data64[i + g * 4 + 1],
+ dg2 = data64[i + g * 4 + 2], dg3 = data64[i + g * 4 + 3];
+ uint32_t u = dg0 | (dg1 << 8u) | (dg2 << 16u) | (dg3 << 24u);
+ f += a + sineparts[j] + u;
+ a = d;
+ d = c;
+ c = b;
+ b += (f << shift[j]) | (f >> (32u - shift[j]));
+ }
+ a0 += a;
+ b0 += b;
+ c0 += c;
+ d0 += d;
+ }
+ sum[0] = a0;
+ sum[1] = a0 >> 8u;
+ sum[2] = a0 >> 16u;
+ sum[3] = a0 >> 24u;
+ sum[4] = b0;
+ sum[5] = b0 >> 8u;
+ sum[6] = b0 >> 16u;
+ sum[7] = b0 >> 24u;
+ sum[8] = c0;
+ sum[9] = c0 >> 8u;
+ sum[10] = c0 >> 16u;
+ sum[11] = c0 >> 24u;
+ sum[12] = d0;
+ sum[13] = d0 >> 8u;
+ sum[14] = d0 >> 16u;
+ sum[15] = d0 >> 24u;
+}
+
+Status CreateICCChadMatrix(CIExy w, float result[9]) {
+ float m[9];
+ if (w.y == 0) { // WhitePoint can not be pitch-black.
+ return JXL_FAILURE("Invalid WhitePoint");
+ }
+ JXL_RETURN_IF_ERROR(AdaptToXYZD50(w.x, w.y, m));
+ memcpy(result, m, sizeof(float) * 9);
+ return true;
+}
+
+// Creates RGB to XYZ matrix given RGB primaries and whitepoint in xy.
+Status CreateICCRGBMatrix(CIExy r, CIExy g, CIExy b, CIExy w, float result[9]) {
+ float m[9];
+ JXL_RETURN_IF_ERROR(
+ PrimariesToXYZD50(r.x, r.y, g.x, g.y, b.x, b.y, w.x, w.y, m));
+ memcpy(result, m, sizeof(float) * 9);
+ return true;
+}
+
+void WriteICCUint32(uint32_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+ if (icc->size() < pos + 4) icc->resize(pos + 4);
+ (*icc)[pos + 0] = (value >> 24u) & 255;
+ (*icc)[pos + 1] = (value >> 16u) & 255;
+ (*icc)[pos + 2] = (value >> 8u) & 255;
+ (*icc)[pos + 3] = value & 255;
+}
+
+void WriteICCUint16(uint16_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+ if (icc->size() < pos + 2) icc->resize(pos + 2);
+ (*icc)[pos + 0] = (value >> 8u) & 255;
+ (*icc)[pos + 1] = value & 255;
+}
+
+void WriteICCUint8(uint8_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+ if (icc->size() < pos + 1) icc->resize(pos + 1);
+ (*icc)[pos] = value;
+}
+
+// Writes a 4-character tag
+void WriteICCTag(const char* value, size_t pos, PaddedBytes* JXL_RESTRICT icc) {
+ if (icc->size() < pos + 4) icc->resize(pos + 4);
+ memcpy(icc->data() + pos, value, 4);
+}
+
+Status WriteICCS15Fixed16(float value, size_t pos,
+ PaddedBytes* JXL_RESTRICT icc) {
+ // "nextafterf" for 32768.0f towards zero are:
+ // 32767.998046875, 32767.99609375, 32767.994140625
+ // Even the first value works well,...
+ bool ok = (-32767.995f <= value) && (value <= 32767.995f);
+ if (!ok) return JXL_FAILURE("ICC value is out of range / NaN");
+ int32_t i = value * 65536.0f + 0.5f;
+ // Use two's complement
+ uint32_t u = static_cast<uint32_t>(i);
+ WriteICCUint32(u, pos, icc);
+ return true;
+}
+
+Status CreateICCHeader(const ColorEncoding& c,
+ PaddedBytes* JXL_RESTRICT header) {
+ // TODO(lode): choose color management engine name, e.g. "skia" if
+ // integrated in skia.
+ static const char* kCmm = "jxl ";
+
+ header->resize(128, 0);
+
+ WriteICCUint32(0, 0, header); // size, correct value filled in at end
+ WriteICCTag(kCmm, 4, header);
+ WriteICCUint32(0x04400000u, 8, header);
+ const char* profile_type =
+ c.GetColorSpace() == ColorSpace::kXYB ? "scnr" : "mntr";
+ WriteICCTag(profile_type, 12, header);
+ WriteICCTag(c.IsGray() ? "GRAY" : "RGB ", 16, header);
+ WriteICCTag("XYZ ", 20, header);
+
+ // Three uint32_t's date/time encoding.
+ // TODO(lode): encode actual date and time, this is a placeholder
+ uint32_t year = 2019, month = 12, day = 1;
+ uint32_t hour = 0, minute = 0, second = 0;
+ WriteICCUint16(year, 24, header);
+ WriteICCUint16(month, 26, header);
+ WriteICCUint16(day, 28, header);
+ WriteICCUint16(hour, 30, header);
+ WriteICCUint16(minute, 32, header);
+ WriteICCUint16(second, 34, header);
+
+ WriteICCTag("acsp", 36, header);
+ WriteICCTag("APPL", 40, header);
+ WriteICCUint32(0, 44, header); // flags
+ WriteICCUint32(0, 48, header); // device manufacturer
+ WriteICCUint32(0, 52, header); // device model
+ WriteICCUint32(0, 56, header); // device attributes
+ WriteICCUint32(0, 60, header); // device attributes
+ WriteICCUint32(static_cast<uint32_t>(c.rendering_intent), 64, header);
+
+ // Mandatory D50 white point of profile connection space
+ WriteICCUint32(0x0000f6d6, 68, header);
+ WriteICCUint32(0x00010000, 72, header);
+ WriteICCUint32(0x0000d32d, 76, header);
+
+ WriteICCTag(kCmm, 80, header);
+
+ return true;
+}
+
+void AddToICCTagTable(const char* tag, size_t offset, size_t size,
+ PaddedBytes* JXL_RESTRICT tagtable,
+ std::vector<size_t>* offsets) {
+ WriteICCTag(tag, tagtable->size(), tagtable);
+ // writing true offset deferred to later
+ WriteICCUint32(0, tagtable->size(), tagtable);
+ offsets->push_back(offset);
+ WriteICCUint32(size, tagtable->size(), tagtable);
+}
+
+void FinalizeICCTag(PaddedBytes* JXL_RESTRICT tags, size_t* offset,
+ size_t* size) {
+ while ((tags->size() & 3) != 0) {
+ tags->push_back(0);
+ }
+ *offset += *size;
+ *size = tags->size() - *offset;
+}
+
+// The input text must be ASCII, writing other characters to UTF-16 is not
+// implemented.
+void CreateICCMlucTag(const std::string& text, PaddedBytes* JXL_RESTRICT tags) {
+ WriteICCTag("mluc", tags->size(), tags);
+ WriteICCUint32(0, tags->size(), tags);
+ WriteICCUint32(1, tags->size(), tags);
+ WriteICCUint32(12, tags->size(), tags);
+ WriteICCTag("enUS", tags->size(), tags);
+ WriteICCUint32(text.size() * 2, tags->size(), tags);
+ WriteICCUint32(28, tags->size(), tags);
+ for (size_t i = 0; i < text.size(); i++) {
+ tags->push_back(0); // prepend 0 for UTF-16
+ tags->push_back(text[i]);
+ }
+}
+
+Status CreateICCXYZTag(float xyz[3], PaddedBytes* JXL_RESTRICT tags) {
+ WriteICCTag("XYZ ", tags->size(), tags);
+ WriteICCUint32(0, tags->size(), tags);
+ for (size_t i = 0; i < 3; ++i) {
+ JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(xyz[i], tags->size(), tags));
+ }
+ return true;
+}
+
+Status CreateICCChadTag(float chad[9], PaddedBytes* JXL_RESTRICT tags) {
+ WriteICCTag("sf32", tags->size(), tags);
+ WriteICCUint32(0, tags->size(), tags);
+ for (size_t i = 0; i < 9; i++) {
+ JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(chad[i], tags->size(), tags));
+ }
+ return true;
+}
+
+void MaybeCreateICCCICPTag(const ColorEncoding& c,
+ PaddedBytes* JXL_RESTRICT tags, size_t* offset,
+ size_t* size, PaddedBytes* JXL_RESTRICT tagtable,
+ std::vector<size_t>* offsets) {
+ if (c.GetColorSpace() != ColorSpace::kRGB) {
+ return;
+ }
+ uint8_t primaries = 0;
+ if (c.primaries == Primaries::kP3) {
+ if (c.white_point == WhitePoint::kD65) {
+ primaries = 12;
+ } else if (c.white_point == WhitePoint::kDCI) {
+ primaries = 11;
+ } else {
+ return;
+ }
+ } else if (c.primaries != Primaries::kCustom &&
+ c.white_point == WhitePoint::kD65) {
+ primaries = static_cast<uint8_t>(c.primaries);
+ } else {
+ return;
+ }
+ if (c.tf.IsUnknown() || c.tf.IsGamma()) {
+ return;
+ }
+ WriteICCTag("cicp", tags->size(), tags);
+ WriteICCUint32(0, tags->size(), tags);
+ WriteICCUint8(primaries, tags->size(), tags);
+ WriteICCUint8(static_cast<uint8_t>(c.tf.GetTransferFunction()), tags->size(),
+ tags);
+ // Matrix
+ WriteICCUint8(0, tags->size(), tags);
+ // Full range
+ WriteICCUint8(1, tags->size(), tags);
+ FinalizeICCTag(tags, offset, size);
+ AddToICCTagTable("cicp", *offset, *size, tagtable, offsets);
+}
+
+void CreateICCCurvCurvTag(const std::vector<uint16_t>& curve,
+ PaddedBytes* JXL_RESTRICT tags) {
+ size_t pos = tags->size();
+ tags->resize(tags->size() + 12 + curve.size() * 2, 0);
+ WriteICCTag("curv", pos, tags);
+ WriteICCUint32(0, pos + 4, tags);
+ WriteICCUint32(curve.size(), pos + 8, tags);
+ for (size_t i = 0; i < curve.size(); i++) {
+ WriteICCUint16(curve[i], pos + 12 + i * 2, tags);
+ }
+}
+
+// Writes 12 + 4*params.size() bytes
+Status CreateICCCurvParaTag(std::vector<float> params, size_t curve_type,
+ PaddedBytes* JXL_RESTRICT tags) {
+ WriteICCTag("para", tags->size(), tags);
+ WriteICCUint32(0, tags->size(), tags);
+ WriteICCUint16(curve_type, tags->size(), tags);
+ WriteICCUint16(0, tags->size(), tags);
+ for (size_t i = 0; i < params.size(); i++) {
+ JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(params[i], tags->size(), tags));
+ }
+ return true;
+}
+
+Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) {
+ WriteICCTag("mAB ", tags->size(), tags);
+ // 4 reserved bytes set to 0
+ WriteICCUint32(0, tags->size(), tags);
+ // number of input channels
+ WriteICCUint8(3, tags->size(), tags);
+ // number of output channels
+ WriteICCUint8(3, tags->size(), tags);
+ // 2 reserved bytes for padding
+ WriteICCUint16(0, tags->size(), tags);
+ // offset to first B curve
+ WriteICCUint32(32, tags->size(), tags);
+ // offset to matrix
+ WriteICCUint32(244, tags->size(), tags);
+ // offset to first M curve
+ WriteICCUint32(148, tags->size(), tags);
+ // offset to CLUT
+ WriteICCUint32(80, tags->size(), tags);
+ // offset to first A curve
+ // (reuse linear B curves)
+ WriteICCUint32(32, tags->size(), tags);
+
+ // offset = 32
+ // no-op curves
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags));
+ // offset = 80
+ // number of grid points for each input channel
+ for (int i = 0; i < 16; ++i) {
+ WriteICCUint8(i < 3 ? 2 : 0, tags->size(), tags);
+ }
+ // precision = 2
+ WriteICCUint8(2, tags->size(), tags);
+ // 3 bytes of padding
+ WriteICCUint8(0, tags->size(), tags);
+ WriteICCUint16(0, tags->size(), tags);
+ const float kOffsets[3] = {
+ kScaledXYBOffset[0] + kScaledXYBOffset[1],
+ kScaledXYBOffset[1] - kScaledXYBOffset[0] + 1.0f / kScaledXYBScale[0],
+ kScaledXYBOffset[1] + kScaledXYBOffset[2]};
+ const float kScaling[3] = {
+ 1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
+ 1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]),
+ 1.0f / (1.0f / kScaledXYBScale[1] + 1.0f / kScaledXYBScale[2])};
+ // 2*2*2*3 entries of 2 bytes each = 48 bytes
+ for (size_t ix = 0; ix < 2; ++ix) {
+ for (size_t iy = 0; iy < 2; ++iy) {
+ for (size_t ib = 0; ib < 2; ++ib) {
+ float in_f[3] = {ix * 1.0f, iy * 1.0f, ib * 1.0f};
+ for (size_t c = 0; c < 3; ++c) {
+ in_f[c] /= kScaledXYBScale[c];
+ in_f[c] -= kScaledXYBOffset[c];
+ }
+ float out_f[3];
+ out_f[0] = in_f[1] + in_f[0];
+ out_f[1] = in_f[1] - in_f[0];
+ out_f[2] = in_f[2] + in_f[1];
+ for (int i = 0; i < 3; ++i) {
+ out_f[i] += kOffsets[i];
+ out_f[i] *= kScaling[i];
+ }
+ for (int i = 0; i < 3; ++i) {
+ JXL_RETURN_IF_ERROR(out_f[i] >= 0.f && out_f[i] <= 1.f);
+ uint16_t val = static_cast<uint16_t>(
+ 0.5f + 65535 * std::max(0.f, std::min(1.f, out_f[i])));
+ WriteICCUint16(val, tags->size(), tags);
+ }
+ }
+ }
+ }
+ // offset = 148
+ // 3 curves with 5 parameters = 3 * (12 + 5 * 4) = 96 bytes
+ for (size_t i = 0; i < 3; ++i) {
+ const float b =
+ -kOffsets[i] - std::cbrt(jxl::kNegOpsinAbsorbanceBiasRGB[i]);
+ std::vector<float> params = {
+ 3,
+ 1.0f / kScaling[i],
+ b,
+ 0, // unused
+ std::max(0.f, -b * kScaling[i]), // make skcms happy
+ };
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(params, 3, tags));
+ }
+ // offset = 244
+ const double matrix[] = {1.5170095, -1.1065225, 0.071623,
+ -0.050022, 0.5683655, -0.018344,
+ -1.387676, 1.1145555, 0.6857255};
+ // 12 * 4 = 48 bytes
+ for (size_t i = 0; i < 9; ++i) {
+ JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(matrix[i], tags->size(), tags));
+ }
+ for (size_t i = 0; i < 3; ++i) {
+ float intercept = 0;
+ for (size_t j = 0; j < 3; ++j) {
+ intercept += matrix[i * 3 + j] * jxl::kNegOpsinAbsorbanceBiasRGB[j];
+ }
+ JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(intercept, tags->size(), tags));
+ }
+ return true;
+}
+} // namespace
+
+Status MaybeCreateProfile(const ColorEncoding& c,
+ PaddedBytes* JXL_RESTRICT icc) {
+ PaddedBytes header, tagtable, tags;
+
+ if (c.GetColorSpace() == ColorSpace::kUnknown || c.tf.IsUnknown()) {
+ return false; // Not an error
+ }
+
+ switch (c.GetColorSpace()) {
+ case ColorSpace::kRGB:
+ case ColorSpace::kGray:
+ case ColorSpace::kXYB:
+ break; // OK
+ default:
+ return JXL_FAILURE("Invalid CS %u",
+ static_cast<unsigned int>(c.GetColorSpace()));
+ }
+
+ if (c.GetColorSpace() == ColorSpace::kXYB &&
+ c.rendering_intent != RenderingIntent::kPerceptual) {
+ return JXL_FAILURE(
+ "Only perceptual rendering intent implemented for XYB "
+ "ICC profile.");
+ }
+
+ JXL_RETURN_IF_ERROR(CreateICCHeader(c, &header));
+
+ std::vector<size_t> offsets;
+ // tag count, deferred to later
+ WriteICCUint32(0, tagtable.size(), &tagtable);
+
+ size_t tag_offset = 0, tag_size = 0;
+
+ CreateICCMlucTag(Description(c), &tags);
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("desc", tag_offset, tag_size, &tagtable, &offsets);
+
+ const std::string copyright = "CC0";
+ CreateICCMlucTag(copyright, &tags);
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("cprt", tag_offset, tag_size, &tagtable, &offsets);
+
+ // TODO(eustas): isn't it the other way round: gray image has d50 WhitePoint?
+ if (c.IsGray()) {
+ float wtpt[3];
+ JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(c.GetWhitePoint(), wtpt));
+ JXL_RETURN_IF_ERROR(CreateICCXYZTag(wtpt, &tags));
+ } else {
+ float d50[3] = {0.964203, 1.0, 0.824905};
+ JXL_RETURN_IF_ERROR(CreateICCXYZTag(d50, &tags));
+ }
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("wtpt", tag_offset, tag_size, &tagtable, &offsets);
+
+ if (!c.IsGray()) {
+ // Chromatic adaptation matrix
+ float chad[9];
+ JXL_RETURN_IF_ERROR(CreateICCChadMatrix(c.GetWhitePoint(), chad));
+
+ JXL_RETURN_IF_ERROR(CreateICCChadTag(chad, &tags));
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("chad", tag_offset, tag_size, &tagtable, &offsets);
+ }
+
+ if (c.GetColorSpace() == ColorSpace::kRGB) {
+ MaybeCreateICCCICPTag(c, &tags, &tag_offset, &tag_size, &tagtable,
+ &offsets);
+
+ const PrimariesCIExy primaries = c.GetPrimaries();
+ float m[9];
+ JXL_RETURN_IF_ERROR(CreateICCRGBMatrix(primaries.r, primaries.g,
+ primaries.b, c.GetWhitePoint(), m));
+ float r[3] = {m[0], m[3], m[6]};
+ float g[3] = {m[1], m[4], m[7]};
+ float b[3] = {m[2], m[5], m[8]};
+
+ JXL_RETURN_IF_ERROR(CreateICCXYZTag(r, &tags));
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("rXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+ JXL_RETURN_IF_ERROR(CreateICCXYZTag(g, &tags));
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("gXYZ", tag_offset, tag_size, &tagtable, &offsets);
+
+ JXL_RETURN_IF_ERROR(CreateICCXYZTag(b, &tags));
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("bXYZ", tag_offset, tag_size, &tagtable, &offsets);
+ }
+
+ if (c.GetColorSpace() == ColorSpace::kXYB) {
+ JXL_RETURN_IF_ERROR(CreateICCLutAtoBTagForXYB(&tags));
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ AddToICCTagTable("A2B0", tag_offset, tag_size, &tagtable, &offsets);
+ } else {
+ if (c.tf.IsGamma()) {
+ float gamma = 1.0 / c.tf.GetGamma();
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({gamma}, 0, &tags));
+ } else if (c.GetColorSpace() != ColorSpace::kXYB) {
+ switch (c.tf.GetTransferFunction()) {
+ case TransferFunction::kHLG:
+ CreateICCCurvCurvTag(
+ HWY_DYNAMIC_DISPATCH(CreateTableCurve)(4096, ExtraTF::kHLG),
+ &tags);
+ break;
+ case TransferFunction::kPQ:
+ CreateICCCurvCurvTag(
+ HWY_DYNAMIC_DISPATCH(CreateTableCurve)(4096, ExtraTF::kPQ),
+ &tags);
+ break;
+ case TransferFunction::kSRGB:
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+ {2.4, 1.0 / 1.055, 0.055 / 1.055, 1.0 / 12.92, 0.04045}, 3,
+ &tags));
+ break;
+ case TransferFunction::k709:
+ JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(
+ {1.0 / 0.45, 1.0 / 1.099, 0.099 / 1.099, 1.0 / 4.5, 0.081}, 3,
+ &tags));
+ break;
+ case TransferFunction::kLinear:
+ JXL_RETURN_IF_ERROR(
+ CreateICCCurvParaTag({1.0, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+ break;
+ case TransferFunction::kDCI:
+ JXL_RETURN_IF_ERROR(
+ CreateICCCurvParaTag({2.6, 1.0, 0.0, 1.0, 0.0}, 3, &tags));
+ break;
+ default:
+ JXL_ABORT("Unknown TF %u",
+ static_cast<unsigned int>(c.tf.GetTransferFunction()));
+ }
+ }
+ FinalizeICCTag(&tags, &tag_offset, &tag_size);
+ if (c.IsGray()) {
+ AddToICCTagTable("kTRC", tag_offset, tag_size, &tagtable, &offsets);
+ } else {
+ AddToICCTagTable("rTRC", tag_offset, tag_size, &tagtable, &offsets);
+ AddToICCTagTable("gTRC", tag_offset, tag_size, &tagtable, &offsets);
+ AddToICCTagTable("bTRC", tag_offset, tag_size, &tagtable, &offsets);
+ }
+ }
+
+ // Tag count
+ WriteICCUint32(offsets.size(), 0, &tagtable);
+ for (size_t i = 0; i < offsets.size(); i++) {
+ WriteICCUint32(offsets[i] + header.size() + tagtable.size(), 4 + 12 * i + 4,
+ &tagtable);
+ }
+
+ // ICC profile size
+ WriteICCUint32(header.size() + tagtable.size() + tags.size(), 0, &header);
+
+ *icc = header;
+ icc->append(tagtable);
+ icc->append(tags);
+
+ // The MD5 checksum must be computed on the profile with profile flags,
+ // rendering intent, and region of the checksum itself, set to 0.
+ // TODO(lode): manually verify with a reliable tool that this creates correct
+ // signature (profile id) for ICC profiles.
+ PaddedBytes icc_sum = *icc;
+ if (icc_sum.size() >= 64 + 4) {
+ memset(icc_sum.data() + 44, 0, 4);
+ memset(icc_sum.data() + 64, 0, 4);
+ }
+ uint8_t checksum[16];
+ ICCComputeMD5(icc_sum, checksum);
+
+ memcpy(icc->data() + 84, checksum, sizeof(checksum));
+
+ return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/color_management.h b/third_party/jpeg-xl/lib/jxl/color_management.h
new file mode 100644
index 0000000000..f623aa1c90
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_management.h
@@ -0,0 +1,40 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COLOR_MANAGEMENT_H_
+#define LIB_JXL_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+enum class ExtraTF {
+ kNone,
+ kPQ,
+ kHLG,
+ kSRGB,
+};
+
+// NOTE: for XYB colorspace, the created profile can be used to transform a
+// *scaled* XYB image (created by ScaleXYB()) to another colorspace.
+Status MaybeCreateProfile(const ColorEncoding& c,
+ PaddedBytes* JXL_RESTRICT icc);
+
+Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]);
+
+} // namespace jxl
+
+#endif // LIB_JXL_COLOR_MANAGEMENT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/color_management_test.cc b/third_party/jpeg-xl/lib/jxl/color_management_test.cc
new file mode 100644
index 0000000000..fc7a1c57ff
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/color_management_test.cc
@@ -0,0 +1,405 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/color_management.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <new>
+#include <string>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const CIExy& xy) {
+ return os << "{x=" << xy.x << ", y=" << xy.y << "}";
+}
+
+std::ostream& operator<<(std::ostream& os, const PrimariesCIExy& primaries) {
+ return os << "{r=" << primaries.r << ", g=" << primaries.g
+ << ", b=" << primaries.b << "}";
+}
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::FloatNear;
+
+// Small enough to be fast. If changed, must update Generate*.
+static constexpr size_t kWidth = 16;
+
+static constexpr size_t kNumThreads = 1; // only have a single row.
+
+struct Globals {
+ // TODO(deymo): Make this a const.
+ static Globals* GetInstance() {
+ static Globals ret;
+ return &ret;
+ }
+
+ private:
+ Globals() {
+ in_gray = GenerateGray();
+ in_color = GenerateColor();
+ out_gray = ImageF(kWidth, 1);
+ out_color = ImageF(kWidth * 3, 1);
+
+ c_native = ColorEncoding::LinearSRGB(/*is_gray=*/false);
+ c_gray = ColorEncoding::LinearSRGB(/*is_gray=*/true);
+ }
+
+ static ImageF GenerateGray() {
+ ImageF gray(kWidth, 1);
+ float* JXL_RESTRICT row = gray.Row(0);
+ // Increasing left to right
+ for (uint32_t x = 0; x < kWidth; ++x) {
+ row[x] = x * 1.0f / (kWidth - 1); // [0, 1]
+ }
+ return gray;
+ }
+
+ static ImageF GenerateColor() {
+ ImageF image(kWidth * 3, 1);
+ float* JXL_RESTRICT interleaved = image.Row(0);
+ std::fill(interleaved, interleaved + kWidth * 3, 0.0f);
+
+ // [0, 4): neutral
+ for (int32_t x = 0; x < 4; ++x) {
+ interleaved[3 * x + 0] = x * 1.0f / 3; // [0, 1]
+ interleaved[3 * x + 2] = interleaved[3 * x + 1] = interleaved[3 * x + 0];
+ }
+
+ // [4, 13): pure RGB with low/medium/high saturation
+ for (int32_t c = 0; c < 3; ++c) {
+ interleaved[3 * (4 + c) + c] = 0.08f + c * 0.01f;
+ interleaved[3 * (7 + c) + c] = 0.75f + c * 0.01f;
+ interleaved[3 * (10 + c) + c] = 1.0f;
+ }
+
+ // [13, 16): impure, not quite saturated RGB
+ interleaved[3 * 13 + 0] = 0.86f;
+ interleaved[3 * 13 + 2] = interleaved[3 * 13 + 1] = 0.16f;
+ interleaved[3 * 14 + 1] = 0.87f;
+ interleaved[3 * 14 + 2] = interleaved[3 * 14 + 0] = 0.16f;
+ interleaved[3 * 15 + 2] = 0.88f;
+ interleaved[3 * 15 + 1] = interleaved[3 * 15 + 0] = 0.16f;
+
+ return image;
+ }
+
+ public:
+ // ImageF so we can use VerifyRelativeError; all are interleaved RGB.
+ ImageF in_gray;
+ ImageF in_color;
+ ImageF out_gray;
+ ImageF out_color;
+ ColorEncoding c_native;
+ ColorEncoding c_gray;
+};
+
+class ColorManagementTest
+ : public ::testing::TestWithParam<test::ColorEncodingDescriptor> {
+ public:
+ static void VerifySameFields(const ColorEncoding& c,
+ const ColorEncoding& c2) {
+ ASSERT_EQ(c.rendering_intent, c2.rendering_intent);
+ ASSERT_EQ(c.GetColorSpace(), c2.GetColorSpace());
+ ASSERT_EQ(c.white_point, c2.white_point);
+ if (c.HasPrimaries()) {
+ ASSERT_EQ(c.primaries, c2.primaries);
+ }
+ ASSERT_TRUE(c.tf.IsSame(c2.tf));
+ }
+
+ // "Same" pixels after converting g->c_native -> c -> g->c_native.
+ static void VerifyPixelRoundTrip(const ColorEncoding& c) {
+ Globals* g = Globals::GetInstance();
+ const ColorEncoding& c_native = c.IsGray() ? g->c_gray : g->c_native;
+ const JxlCmsInterface& cms = GetJxlCms();
+ ColorSpaceTransform xform_fwd(cms);
+ ColorSpaceTransform xform_rev(cms);
+ const float intensity_target =
+ c.tf.IsHLG() ? 1000 : kDefaultIntensityTarget;
+ ASSERT_TRUE(
+ xform_fwd.Init(c_native, c, intensity_target, kWidth, kNumThreads));
+ ASSERT_TRUE(
+ xform_rev.Init(c, c_native, intensity_target, kWidth, kNumThreads));
+
+ const size_t thread = 0;
+ const ImageF& in = c.IsGray() ? g->in_gray : g->in_color;
+ ImageF* JXL_RESTRICT out = c.IsGray() ? &g->out_gray : &g->out_color;
+ ASSERT_TRUE(xform_fwd.Run(thread, in.Row(0), xform_fwd.BufDst(thread)));
+ ASSERT_TRUE(xform_rev.Run(thread, xform_fwd.BufDst(thread), out->Row(0)));
+
+#if JPEGXL_ENABLE_SKCMS
+ double max_l1 = 7E-4;
+ double max_rel = 4E-7;
+#else
+ double max_l1 = 5E-5;
+ // Most are lower; reached 3E-7 with D60 AP0.
+ double max_rel = 4E-7;
+#endif
+ if (c.IsGray()) max_rel = 2E-5;
+ JXL_ASSERT_OK(VerifyRelativeError(in, *out, max_l1, max_rel, _));
+ }
+};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(ColorManagementTestInstantiation,
+ ColorManagementTest,
+ ::testing::ValuesIn(test::AllEncodings()));
+
+// Exercises the ColorManagement interface for ALL ColorEncoding synthesizable
+// via enums.
+TEST_P(ColorManagementTest, VerifyAllProfiles) {
+ ColorEncoding c = ColorEncodingFromDescriptor(GetParam());
+ printf("%s\n", Description(c).c_str());
+
+ // Can create profile.
+ ASSERT_TRUE(c.CreateICC());
+
+ // Can set an equivalent ColorEncoding from the generated ICC profile.
+ ColorEncoding c3;
+ ASSERT_TRUE(c3.SetICC(PaddedBytes(c.ICC())));
+ VerifySameFields(c, c3);
+
+ VerifyPixelRoundTrip(c);
+}
+
+testing::Matcher<CIExy> CIExyIs(const double x, const double y) {
+ static constexpr double kMaxError = 1e-4;
+ return testing::AllOf(
+ testing::Field(&CIExy::x, testing::DoubleNear(x, kMaxError)),
+ testing::Field(&CIExy::y, testing::DoubleNear(y, kMaxError)));
+}
+
+testing::Matcher<PrimariesCIExy> PrimariesAre(
+ const testing::Matcher<CIExy>& r, const testing::Matcher<CIExy>& g,
+ const testing::Matcher<CIExy>& b) {
+ return testing::AllOf(testing::Field(&PrimariesCIExy::r, r),
+ testing::Field(&PrimariesCIExy::g, g),
+ testing::Field(&PrimariesCIExy::b, b));
+}
+
+TEST_F(ColorManagementTest, sRGBChromaticity) {
+ const ColorEncoding sRGB = ColorEncoding::SRGB();
+ EXPECT_THAT(sRGB.GetWhitePoint(), CIExyIs(0.3127, 0.3290));
+ EXPECT_THAT(sRGB.GetPrimaries(),
+ PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+ CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700Chromaticity) {
+ PaddedBytes icc =
+ jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc");
+ ColorEncoding sRGB_D2700;
+ ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc)));
+
+ EXPECT_THAT(sRGB_D2700.GetWhitePoint(), CIExyIs(0.45986, 0.41060));
+ // The illuminant-relative chromaticities of this profile's primaries are the
+ // same as for sRGB. It is the PCS-relative chromaticities that would be
+ // different.
+ EXPECT_THAT(sRGB_D2700.GetPrimaries(),
+ PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60),
+ CIExyIs(0.15, 0.06)));
+}
+
+TEST_F(ColorManagementTest, D2700ToSRGB) {
+ PaddedBytes icc =
+ jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc");
+ ColorEncoding sRGB_D2700;
+ ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc)));
+
+ ColorSpaceTransform transform(GetJxlCms());
+ ASSERT_TRUE(transform.Init(sRGB_D2700, ColorEncoding::SRGB(),
+ kDefaultIntensityTarget, 1, 1));
+ const float sRGB_D2700_values[3] = {0.863, 0.737, 0.490};
+ float sRGB_values[3];
+ ASSERT_TRUE(transform.Run(0, sRGB_D2700_values, sRGB_values));
+ EXPECT_THAT(sRGB_values,
+ ElementsAre(FloatNear(0.914, 1e-3), FloatNear(0.745, 1e-3),
+ FloatNear(0.601, 1e-3)));
+}
+
+TEST_F(ColorManagementTest, P3HlgTo2020Hlg) {
+ ColorEncoding p3_hlg;
+ p3_hlg.SetColorSpace(ColorSpace::kRGB);
+ p3_hlg.white_point = WhitePoint::kD65;
+ p3_hlg.primaries = Primaries::kP3;
+ p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+ ASSERT_TRUE(p3_hlg.CreateICC());
+
+ ColorEncoding rec2020_hlg = p3_hlg;
+ rec2020_hlg.primaries = Primaries::k2100;
+ ASSERT_TRUE(rec2020_hlg.CreateICC());
+
+ ColorSpaceTransform transform(GetJxlCms());
+ ASSERT_TRUE(transform.Init(p3_hlg, rec2020_hlg, 1000, 1, 1));
+ const float p3_hlg_values[3] = {0., 0.75, 0.};
+ float rec2020_hlg_values[3];
+ ASSERT_TRUE(transform.Run(0, p3_hlg_values, rec2020_hlg_values));
+ EXPECT_THAT(rec2020_hlg_values,
+ ElementsAre(FloatNear(0.3973, 1e-4), FloatNear(0.7382, 1e-4),
+ FloatNear(0.1183, 1e-4)));
+}
+
+TEST_F(ColorManagementTest, HlgOotf) {
+ ColorEncoding p3_hlg;
+ p3_hlg.SetColorSpace(ColorSpace::kRGB);
+ p3_hlg.white_point = WhitePoint::kD65;
+ p3_hlg.primaries = Primaries::kP3;
+ p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+ ASSERT_TRUE(p3_hlg.CreateICC());
+
+ ColorSpaceTransform transform_to_1000(GetJxlCms());
+ ASSERT_TRUE(
+ transform_to_1000.Init(p3_hlg, ColorEncoding::LinearSRGB(), 1000, 1, 1));
+ // HDR reference white: https://www.itu.int/pub/R-REP-BT.2408-4-2021
+ float p3_hlg_values[3] = {0.75, 0.75, 0.75};
+ float linear_srgb_values[3];
+ ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values));
+ // On a 1000-nit display, HDR reference white should be 203 cd/m² which is
+ // 0.203 times the maximum.
+ EXPECT_THAT(linear_srgb_values,
+ ElementsAre(FloatNear(0.203, 1e-3), FloatNear(0.203, 1e-3),
+ FloatNear(0.203, 1e-3)));
+
+ ColorSpaceTransform transform_to_400(GetJxlCms());
+ ASSERT_TRUE(
+ transform_to_400.Init(p3_hlg, ColorEncoding::LinearSRGB(), 400, 1, 1));
+ ASSERT_TRUE(transform_to_400.Run(0, p3_hlg_values, linear_srgb_values));
+ // On a 400-nit display, it should be 100 cd/m².
+ EXPECT_THAT(linear_srgb_values,
+ ElementsAre(FloatNear(0.250, 1e-3), FloatNear(0.250, 1e-3),
+ FloatNear(0.250, 1e-3)));
+
+ p3_hlg_values[2] = 0.50;
+ ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values));
+ EXPECT_THAT(linear_srgb_values,
+ ElementsAre(FloatNear(0.201, 1e-3), FloatNear(0.201, 1e-3),
+ FloatNear(0.050, 1e-3)));
+
+ ColorSpaceTransform transform_from_400(GetJxlCms());
+ ASSERT_TRUE(
+ transform_from_400.Init(ColorEncoding::LinearSRGB(), p3_hlg, 400, 1, 1));
+ linear_srgb_values[0] = linear_srgb_values[1] = linear_srgb_values[2] = 0.250;
+ ASSERT_TRUE(transform_from_400.Run(0, linear_srgb_values, p3_hlg_values));
+ EXPECT_THAT(p3_hlg_values,
+ ElementsAre(FloatNear(0.75, 1e-3), FloatNear(0.75, 1e-3),
+ FloatNear(0.75, 1e-3)));
+
+ ColorEncoding grayscale_hlg;
+ grayscale_hlg.SetColorSpace(ColorSpace::kGray);
+ grayscale_hlg.white_point = WhitePoint::kD65;
+ grayscale_hlg.tf.SetTransferFunction(TransferFunction::kHLG);
+ ASSERT_TRUE(grayscale_hlg.CreateICC());
+
+ ColorSpaceTransform grayscale_transform(GetJxlCms());
+ ASSERT_TRUE(grayscale_transform.Init(
+ grayscale_hlg, ColorEncoding::LinearSRGB(/*is_gray=*/true), 1000, 1, 1));
+ const float grayscale_hlg_value = 0.75;
+ float linear_grayscale_value;
+ ASSERT_TRUE(grayscale_transform.Run(0, &grayscale_hlg_value,
+ &linear_grayscale_value));
+ EXPECT_THAT(linear_grayscale_value, FloatNear(0.203, 1e-3));
+}
+
+TEST_F(ColorManagementTest, XYBProfile) {
+ ColorEncoding c_xyb;
+ c_xyb.SetColorSpace(ColorSpace::kXYB);
+ c_xyb.rendering_intent = RenderingIntent::kPerceptual;
+ ASSERT_TRUE(c_xyb.CreateICC());
+ ColorEncoding c_native = ColorEncoding::LinearSRGB(false);
+
+ static const size_t kGridDim = 17;
+ static const size_t kNumColors = kGridDim * kGridDim * kGridDim;
+ const JxlCmsInterface& cms = GetJxlCms();
+ ColorSpaceTransform xform(cms);
+ ASSERT_TRUE(
+ xform.Init(c_xyb, c_native, kDefaultIntensityTarget, kNumColors, 1));
+
+ ImageMetadata metadata;
+ metadata.color_encoding = c_native;
+ ImageBundle ib(&metadata);
+ Image3F native(kNumColors, 1);
+ float mul = 1.0f / (kGridDim - 1);
+ for (size_t ir = 0, x = 0; ir < kGridDim; ++ir) {
+ for (size_t ig = 0; ig < kGridDim; ++ig) {
+ for (size_t ib = 0; ib < kGridDim; ++ib, ++x) {
+ native.PlaneRow(0, 0)[x] = ir * mul;
+ native.PlaneRow(1, 0)[x] = ig * mul;
+ native.PlaneRow(2, 0)[x] = ib * mul;
+ }
+ }
+ }
+ ib.SetFromImage(std::move(native), c_native);
+ const Image3F& in = *ib.color();
+ Image3F opsin(kNumColors, 1);
+ ToXYB(ib, nullptr, &opsin, cms, nullptr);
+
+ Image3F opsin2 = CopyImage(opsin);
+ ScaleXYB(&opsin2);
+
+ float* src = xform.BufSrc(0);
+ for (size_t i = 0; i < kNumColors; ++i) {
+ for (size_t c = 0; c < 3; ++c) {
+ src[3 * i + c] = opsin2.PlaneRow(c, 0)[i];
+ }
+ }
+
+ float* dst = xform.BufDst(0);
+ ASSERT_TRUE(xform.Run(0, src, dst));
+
+ Image3F out(kNumColors, 1);
+ for (size_t i = 0; i < kNumColors; ++i) {
+ for (size_t c = 0; c < 3; ++c) {
+ out.PlaneRow(c, 0)[i] = dst[3 * i + c];
+ }
+ }
+
+ auto debug_print_color = [&](size_t i) {
+ printf(
+ "(%f, %f, %f) -> (%9.6f, %f, %f) -> (%f, %f, %f) -> "
+ "(%9.6f, %9.6f, %9.6f)",
+ in.PlaneRow(0, 0)[i], in.PlaneRow(1, 0)[i], in.PlaneRow(2, 0)[i],
+ opsin.PlaneRow(0, 0)[i], opsin.PlaneRow(1, 0)[i],
+ opsin.PlaneRow(2, 0)[i], opsin2.PlaneRow(0, 0)[i],
+ opsin2.PlaneRow(1, 0)[i], opsin2.PlaneRow(2, 0)[i],
+ out.PlaneRow(0, 0)[i], out.PlaneRow(1, 0)[i], out.PlaneRow(2, 0)[i]);
+ };
+
+ float max_err[3] = {};
+ size_t max_err_i[3] = {};
+ for (size_t i = 0; i < kNumColors; ++i) {
+ for (size_t c = 0; c < 3; ++c) {
+ // debug_print_color(i); printf("\n");
+ float err = std::abs(in.PlaneRow(c, 0)[i] - out.PlaneRow(c, 0)[i]);
+ if (err > max_err[c]) {
+ max_err[c] = err;
+ max_err_i[c] = i;
+ }
+ }
+ }
+ static float kMaxError[3] = {9e-4, 4e-4, 5e-4};
+ printf("Maximum errors:\n");
+ for (size_t c = 0; c < 3; ++c) {
+ debug_print_color(max_err_i[c]);
+ printf(" %f\n", max_err[c]);
+ EXPECT_LT(max_err[c], kMaxError[c]);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/common.h b/third_party/jpeg-xl/lib/jxl/common.h
new file mode 100644
index 0000000000..c2ebe029a8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/common.h
@@ -0,0 +1,245 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMMON_H_
+#define LIB_JXL_COMMON_H_
+
+// Shared constants and helper functions.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <limits> // numeric_limits
+#include <memory> // unique_ptr
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+
+#ifndef JXL_HIGH_PRECISION
+#define JXL_HIGH_PRECISION 1
+#endif
+
+// Macro that defines whether support for decoding JXL files to JPEG is enabled.
+#ifndef JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JPEGXL_ENABLE_TRANSCODE_JPEG 1
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+// Macro that defines whether support for decoding boxes is enabled.
+#ifndef JPEGXL_ENABLE_BOXES
+#define JPEGXL_ENABLE_BOXES 1
+#endif // JPEGXL_ENABLE_BOXES
+
+namespace jxl {
+// Some enums and typedefs used by more than one header file.
+
+constexpr size_t kBitsPerByte = 8; // more clear than CHAR_BIT
+
+constexpr inline size_t RoundUpBitsToByteMultiple(size_t bits) {
+ return (bits + 7) & ~size_t(7);
+}
+
+constexpr inline size_t RoundUpToBlockDim(size_t dim) {
+ return (dim + 7) & ~size_t(7);
+}
+
+static inline bool JXL_MAYBE_UNUSED SafeAdd(const uint64_t a, const uint64_t b,
+ uint64_t& sum) {
+ sum = a + b;
+ return sum >= a; // no need to check b - either sum >= both or < both.
+}
+
+template <typename T1, typename T2>
+constexpr inline T1 DivCeil(T1 a, T2 b) {
+ return (a + b - 1) / b;
+}
+
+// Works for any `align`; if a power of two, compiler emits ADD+AND.
+constexpr inline size_t RoundUpTo(size_t what, size_t align) {
+ return DivCeil(what, align) * align;
+}
+
+constexpr double kPi = 3.14159265358979323846264338327950288;
+
+// Reasonable default for sRGB, matches common monitors. We map white to this
+// many nits (cd/m^2) by default. Butteraugli was tuned for 250 nits, which is
+// very close.
+static constexpr float kDefaultIntensityTarget = 255;
+
+template <typename T>
+constexpr T Pi(T multiplier) {
+ return static_cast<T>(multiplier * kPi);
+}
+
+// Block is the square grid of pixels to which an "energy compaction"
+// transformation (e.g. DCT) is applied. Each block has its own AC quantizer.
+constexpr size_t kBlockDim = 8;
+
+constexpr size_t kDCTBlockSize = kBlockDim * kBlockDim;
+
+constexpr size_t kGroupDim = 256;
+static_assert(kGroupDim % kBlockDim == 0,
+ "Group dim should be divisible by block dim");
+constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim;
+
+// Maximum number of passes in an image.
+constexpr size_t kMaxNumPasses = 11;
+
+// Maximum number of reference frames.
+constexpr size_t kMaxNumReferenceFrames = 4;
+
+// Dimensions of a frame, in pixels, and other derived dimensions.
+// Computed from FrameHeader.
+// TODO(veluca): add extra channels.
+struct FrameDimensions {
+ void Set(size_t xsize, size_t ysize, size_t group_size_shift,
+ size_t max_hshift, size_t max_vshift, bool modular_mode,
+ size_t upsampling) {
+ group_dim = (kGroupDim >> 1) << group_size_shift;
+ dc_group_dim = group_dim * kBlockDim;
+ xsize_upsampled = xsize;
+ ysize_upsampled = ysize;
+ this->xsize = DivCeil(xsize, upsampling);
+ this->ysize = DivCeil(ysize, upsampling);
+ xsize_blocks = DivCeil(this->xsize, kBlockDim << max_hshift) << max_hshift;
+ ysize_blocks = DivCeil(this->ysize, kBlockDim << max_vshift) << max_vshift;
+ xsize_padded = xsize_blocks * kBlockDim;
+ ysize_padded = ysize_blocks * kBlockDim;
+ if (modular_mode) {
+ // Modular mode doesn't have any padding.
+ xsize_padded = this->xsize;
+ ysize_padded = this->ysize;
+ }
+ xsize_upsampled_padded = xsize_padded * upsampling;
+ ysize_upsampled_padded = ysize_padded * upsampling;
+ xsize_groups = DivCeil(this->xsize, group_dim);
+ ysize_groups = DivCeil(this->ysize, group_dim);
+ xsize_dc_groups = DivCeil(xsize_blocks, group_dim);
+ ysize_dc_groups = DivCeil(ysize_blocks, group_dim);
+ num_groups = xsize_groups * ysize_groups;
+ num_dc_groups = xsize_dc_groups * ysize_dc_groups;
+ }
+
+ // Image size without any upsampling, i.e. original_size / upsampling.
+ size_t xsize;
+ size_t ysize;
+ // Original image size.
+ size_t xsize_upsampled;
+ size_t ysize_upsampled;
+ // Image size after upsampling the padded image.
+ size_t xsize_upsampled_padded;
+ size_t ysize_upsampled_padded;
+ // Image size after padding to a multiple of kBlockDim (if VarDCT mode).
+ size_t xsize_padded;
+ size_t ysize_padded;
+ // Image size in kBlockDim blocks.
+ size_t xsize_blocks;
+ size_t ysize_blocks;
+ // Image size in number of groups.
+ size_t xsize_groups;
+ size_t ysize_groups;
+ // Image size in number of DC groups.
+ size_t xsize_dc_groups;
+ size_t ysize_dc_groups;
+ // Number of AC or DC groups.
+ size_t num_groups;
+ size_t num_dc_groups;
+ // Size of a group.
+ size_t group_dim;
+ size_t dc_group_dim;
+};
+
+// Prior to C++14 (i.e. C++11): provide our own make_unique
+#if __cplusplus < 201402L
+template <typename T, typename... Args>
+std::unique_ptr<T> make_unique(Args&&... args) {
+ return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+#else
+using std::make_unique;
+#endif
+
+template <typename T>
+JXL_INLINE T Clamp1(T val, T low, T hi) {
+ return val < low ? low : val > hi ? hi : val;
+}
+
+// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1)
+constexpr uint32_t PackSigned(int32_t value)
+ JXL_NO_SANITIZE("unsigned-integer-overflow") {
+ return (static_cast<uint32_t>(value) << 1) ^
+ ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+// Reverse to PackSigned, i.e. UnpackSigned(PackSigned(X)) == X.
+// (((~value) & 1) - 1) is either 0 or 0xFF...FF and it will have an expected
+// unsigned-integer-overflow.
+constexpr intptr_t UnpackSigned(size_t value)
+ JXL_NO_SANITIZE("unsigned-integer-overflow") {
+ return static_cast<intptr_t>((value >> 1) ^ (((~value) & 1) - 1));
+}
+
+// conversion from integer to string.
+template <typename T>
+std::string ToString(T n) {
+ char data[32] = {};
+ if (T(0.1) != T(0)) {
+ // float
+ snprintf(data, sizeof(data), "%g", static_cast<double>(n));
+ } else if (T(-1) > T(0)) {
+ // unsigned
+ snprintf(data, sizeof(data), "%llu", static_cast<unsigned long long>(n));
+ } else {
+ // signed
+ snprintf(data, sizeof(data), "%lld", static_cast<long long>(n));
+ }
+ return data;
+}
+
+static inline JXL_MAYBE_UNUSED uint64_t DecodeVarInt(const uint8_t* input,
+ size_t inputSize,
+ size_t* pos) {
+ size_t i;
+ uint64_t ret = 0;
+ for (i = 0; *pos + i < inputSize && i < 10; ++i) {
+ ret |= uint64_t(input[*pos + i] & 127) << uint64_t(7 * i);
+ // If the next-byte flag is not set, stop
+ if ((input[*pos + i] & 128) == 0) break;
+ }
+ // TODO: Return a decoding error if i == 10.
+ *pos += i + 1;
+ return ret;
+}
+
+static inline JXL_MAYBE_UNUSED bool EncodeVarInt(uint64_t value,
+ size_t output_size,
+ size_t* output_pos,
+ uint8_t* output) {
+ // While more than 7 bits of data are left,
+ // store 7 bits and set the next byte flag
+ while (value > 127) {
+ if (*output_pos > output_size) return false;
+ // |128: Set the next byte flag
+ output[(*output_pos)++] = ((uint8_t)(value & 127)) | 128;
+ // Remove the seven bits we just wrote
+ value >>= 7;
+ }
+ if (*output_pos > output_size) return false;
+ output[(*output_pos)++] = ((uint8_t)value) & 127;
+ return true;
+}
+
+static inline JXL_MAYBE_UNUSED void EncodeVarInt(uint64_t value,
+ PaddedBytes* data) {
+ size_t pos = data->size();
+ data->resize(data->size() + 9);
+ JXL_CHECK(EncodeVarInt(value, data->size(), &pos, data->data()));
+ data->resize(pos);
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_COMMON_H_
diff --git a/third_party/jpeg-xl/lib/jxl/compressed_dc.cc b/third_party/jpeg-xl/lib/jxl/compressed_dc.cc
new file mode 100644
index 0000000000..f9a8f149dd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/compressed_dc.cc
@@ -0,0 +1,318 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/compressed_dc.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/compressed_dc.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using D = HWY_FULL(float);
+using DScalar = HWY_CAPPED(float, 1);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// TODO(veluca): optimize constants.
+const float w1 = 0.20345139757231578f;
+const float w2 = 0.0334829185968739f;
+const float w0 = 1.0f - 4.0f * (w1 + w2);
+
+template <class V>
+V MaxWorkaround(V a, V b) {
+#if (HWY_TARGET == HWY_AVX3) && HWY_COMPILER_CLANG <= 800
+ // Prevents "Do not know how to split the result of this operator" error
+ return IfThenElse(a > b, a, b);
+#else
+ return Max(a, b);
+#endif
+}
+
+template <typename D>
+JXL_INLINE void ComputePixelChannel(const D d, const float dc_factor,
+ const float* JXL_RESTRICT row_top,
+ const float* JXL_RESTRICT row,
+ const float* JXL_RESTRICT row_bottom,
+ Vec<D>* JXL_RESTRICT mc,
+ Vec<D>* JXL_RESTRICT sm,
+ Vec<D>* JXL_RESTRICT gap, size_t x) {
+ const auto tl = LoadU(d, row_top + x - 1);
+ const auto tc = Load(d, row_top + x);
+ const auto tr = LoadU(d, row_top + x + 1);
+
+ const auto ml = LoadU(d, row + x - 1);
+ *mc = Load(d, row + x);
+ const auto mr = LoadU(d, row + x + 1);
+
+ const auto bl = LoadU(d, row_bottom + x - 1);
+ const auto bc = Load(d, row_bottom + x);
+ const auto br = LoadU(d, row_bottom + x + 1);
+
+ const auto w_center = Set(d, w0);
+ const auto w_side = Set(d, w1);
+ const auto w_corner = Set(d, w2);
+
+ const auto corner = Add(Add(tl, tr), Add(bl, br));
+ const auto side = Add(Add(ml, mr), Add(tc, bc));
+ *sm = MulAdd(corner, w_corner, MulAdd(side, w_side, Mul(*mc, w_center)));
+
+ const auto dc_quant = Set(d, dc_factor);
+ *gap = MaxWorkaround(*gap, Abs(Div(Sub(*mc, *sm), dc_quant)));
+}
+
+template <typename D>
+JXL_INLINE void ComputePixel(
+ const float* JXL_RESTRICT dc_factors,
+ const float* JXL_RESTRICT* JXL_RESTRICT rows_top,
+ const float* JXL_RESTRICT* JXL_RESTRICT rows,
+ const float* JXL_RESTRICT* JXL_RESTRICT rows_bottom,
+ float* JXL_RESTRICT* JXL_RESTRICT out_rows, size_t x) {
+ const D d;
+ auto mc_x = Undefined(d);
+ auto mc_y = Undefined(d);
+ auto mc_b = Undefined(d);
+ auto sm_x = Undefined(d);
+ auto sm_y = Undefined(d);
+ auto sm_b = Undefined(d);
+ auto gap = Set(d, 0.5f);
+ ComputePixelChannel(d, dc_factors[0], rows_top[0], rows[0], rows_bottom[0],
+ &mc_x, &sm_x, &gap, x);
+ ComputePixelChannel(d, dc_factors[1], rows_top[1], rows[1], rows_bottom[1],
+ &mc_y, &sm_y, &gap, x);
+ ComputePixelChannel(d, dc_factors[2], rows_top[2], rows[2], rows_bottom[2],
+ &mc_b, &sm_b, &gap, x);
+ auto factor = MulAdd(Set(d, -4.0f), gap, Set(d, 3.0f));
+ factor = ZeroIfNegative(factor);
+
+ auto out = MulAdd(Sub(sm_x, mc_x), factor, mc_x);
+ Store(out, d, out_rows[0] + x);
+ out = MulAdd(Sub(sm_y, mc_y), factor, mc_y);
+ Store(out, d, out_rows[1] + x);
+ out = MulAdd(Sub(sm_b, mc_b), factor, mc_b);
+ Store(out, d, out_rows[2] + x);
+}
+
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+ ThreadPool* pool) {
+ const size_t xsize = dc->xsize();
+ const size_t ysize = dc->ysize();
+ if (ysize <= 2 || xsize <= 2) return;
+
+ // TODO(veluca): use tile-based processing?
+ // TODO(veluca): decide if changes to the y channel should be propagated to
+ // the x and b channels through color correlation.
+ JXL_ASSERT(w1 + w2 < 0.25f);
+
+ PROFILER_FUNC;
+
+ Image3F smoothed(xsize, ysize);
+ // Fill in borders that the loop below will not. First and last are unused.
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y : {size_t(0), ysize - 1}) {
+ memcpy(smoothed.PlaneRow(c, y), dc->PlaneRow(c, y),
+ xsize * sizeof(float));
+ }
+ }
+ auto process_row = [&](const uint32_t y, size_t /*thread*/) {
+ const float* JXL_RESTRICT rows_top[3]{
+ dc->ConstPlaneRow(0, y - 1),
+ dc->ConstPlaneRow(1, y - 1),
+ dc->ConstPlaneRow(2, y - 1),
+ };
+ const float* JXL_RESTRICT rows[3] = {
+ dc->ConstPlaneRow(0, y),
+ dc->ConstPlaneRow(1, y),
+ dc->ConstPlaneRow(2, y),
+ };
+ const float* JXL_RESTRICT rows_bottom[3] = {
+ dc->ConstPlaneRow(0, y + 1),
+ dc->ConstPlaneRow(1, y + 1),
+ dc->ConstPlaneRow(2, y + 1),
+ };
+ float* JXL_RESTRICT rows_out[3] = {
+ smoothed.PlaneRow(0, y),
+ smoothed.PlaneRow(1, y),
+ smoothed.PlaneRow(2, y),
+ };
+ for (size_t x : {size_t(0), xsize - 1}) {
+ for (size_t c = 0; c < 3; c++) {
+ rows_out[c][x] = rows[c][x];
+ }
+ }
+
+ size_t x = 1;
+ // First pixels
+ const size_t N = Lanes(D());
+ for (; x < std::min(N, xsize - 1); x++) {
+ ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+ x);
+ }
+ // Full vectors.
+ for (; x + N <= xsize - 1; x += N) {
+ ComputePixel<D>(dc_factors, rows_top, rows, rows_bottom, rows_out, x);
+ }
+ // Last pixels.
+ for (; x < xsize - 1; x++) {
+ ComputePixel<DScalar>(dc_factors, rows_top, rows, rows_bottom, rows_out,
+ x);
+ }
+ };
+ JXL_CHECK(RunOnPool(pool, 1, ysize - 1, ThreadPool::NoInit, process_row,
+ "DCSmoothingRow"));
+ dc->Swap(smoothed);
+}
+
+// DC dequantization.
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+ const float* dc_factors, float mul, const float* cfl_factors,
+ YCbCrChromaSubsampling chroma_subsampling,
+ const BlockCtxMap& bctx) {
+ const HWY_FULL(float) df;
+ const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float
+ if (chroma_subsampling.Is444()) {
+ const auto fac_x = Set(df, dc_factors[0] * mul);
+ const auto fac_y = Set(df, dc_factors[1] * mul);
+ const auto fac_b = Set(df, dc_factors[2] * mul);
+ const auto cfl_fac_x = Set(df, cfl_factors[0]);
+ const auto cfl_fac_b = Set(df, cfl_factors[2]);
+ for (size_t y = 0; y < r.ysize(); y++) {
+ float* dec_row_x = r.PlaneRow(dc, 0, y);
+ float* dec_row_y = r.PlaneRow(dc, 1, y);
+ float* dec_row_b = r.PlaneRow(dc, 2, y);
+ const int32_t* quant_row_x = in.channel[1].plane.Row(y);
+ const int32_t* quant_row_y = in.channel[0].plane.Row(y);
+ const int32_t* quant_row_b = in.channel[2].plane.Row(y);
+ for (size_t x = 0; x < r.xsize(); x += Lanes(di)) {
+ const auto in_q_x = Load(di, quant_row_x + x);
+ const auto in_q_y = Load(di, quant_row_y + x);
+ const auto in_q_b = Load(di, quant_row_b + x);
+ const auto in_x = Mul(ConvertTo(df, in_q_x), fac_x);
+ const auto in_y = Mul(ConvertTo(df, in_q_y), fac_y);
+ const auto in_b = Mul(ConvertTo(df, in_q_b), fac_b);
+ Store(in_y, df, dec_row_y + x);
+ Store(MulAdd(in_y, cfl_fac_x, in_x), df, dec_row_x + x);
+ Store(MulAdd(in_y, cfl_fac_b, in_b), df, dec_row_b + x);
+ }
+ }
+ } else {
+ for (size_t c : {1, 0, 2}) {
+ Rect rect(r.x0() >> chroma_subsampling.HShift(c),
+ r.y0() >> chroma_subsampling.VShift(c),
+ r.xsize() >> chroma_subsampling.HShift(c),
+ r.ysize() >> chroma_subsampling.VShift(c));
+ const auto fac = Set(df, dc_factors[c] * mul);
+ const Channel& ch = in.channel[c < 2 ? c ^ 1 : c];
+ for (size_t y = 0; y < rect.ysize(); y++) {
+ const int32_t* quant_row = ch.plane.Row(y);
+ float* row = rect.PlaneRow(dc, c, y);
+ for (size_t x = 0; x < rect.xsize(); x += Lanes(di)) {
+ const auto in_q = Load(di, quant_row + x);
+ const auto in = Mul(ConvertTo(df, in_q), fac);
+ Store(in, df, row + x);
+ }
+ }
+ }
+ }
+ if (bctx.num_dc_ctxs <= 1) {
+ for (size_t y = 0; y < r.ysize(); y++) {
+ uint8_t* qdc_row = r.Row(quant_dc, y);
+ memset(qdc_row, 0, sizeof(*qdc_row) * r.xsize());
+ }
+ } else {
+ for (size_t y = 0; y < r.ysize(); y++) {
+ uint8_t* qdc_row_val = r.Row(quant_dc, y);
+ const int32_t* quant_row_x =
+ in.channel[1].plane.Row(y >> chroma_subsampling.VShift(0));
+ const int32_t* quant_row_y =
+ in.channel[0].plane.Row(y >> chroma_subsampling.VShift(1));
+ const int32_t* quant_row_b =
+ in.channel[2].plane.Row(y >> chroma_subsampling.VShift(2));
+ for (size_t x = 0; x < r.xsize(); x++) {
+ int bucket_x = 0, bucket_y = 0, bucket_b = 0;
+ for (int t : bctx.dc_thresholds[0]) {
+ if (quant_row_x[x >> chroma_subsampling.HShift(0)] > t) bucket_x++;
+ }
+ for (int t : bctx.dc_thresholds[1]) {
+ if (quant_row_y[x >> chroma_subsampling.HShift(1)] > t) bucket_y++;
+ }
+ for (int t : bctx.dc_thresholds[2]) {
+ if (quant_row_b[x >> chroma_subsampling.HShift(2)] > t) bucket_b++;
+ }
+ int bucket = bucket_x;
+ bucket *= bctx.dc_thresholds[2].size() + 1;
+ bucket += bucket_b;
+ bucket *= bctx.dc_thresholds[1].size() + 1;
+ bucket += bucket_y;
+ qdc_row_val[x] = bucket;
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(DequantDC);
+HWY_EXPORT(AdaptiveDCSmoothing);
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+ ThreadPool* pool) {
+ return HWY_DYNAMIC_DISPATCH(AdaptiveDCSmoothing)(dc_factors, dc, pool);
+}
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+ const float* dc_factors, float mul, const float* cfl_factors,
+ YCbCrChromaSubsampling chroma_subsampling,
+ const BlockCtxMap& bctx) {
+ return HWY_DYNAMIC_DISPATCH(DequantDC)(r, dc, quant_dc, in, dc_factors, mul,
+ cfl_factors, chroma_subsampling, bctx);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/compressed_dc.h b/third_party/jpeg-xl/lib/jxl/compressed_dc.h
new file mode 100644
index 0000000000..b06e5931f0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/compressed_dc.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_COMPRESSED_DC_H_
+#define LIB_JXL_COMPRESSED_DC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/modular_image.h"
+
+// DC handling functions: encoding and decoding of DC to and from bitstream, and
+// related function to initialize the per-group decoder cache.
+
+namespace jxl {
+
+// Smooth DC in already-smooth areas, to counteract banding.
+void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc,
+ ThreadPool* pool);
+
+void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in,
+ const float* dc_factors, float mul, const float* cfl_factors,
+ YCbCrChromaSubsampling chroma_subsampling,
+ const BlockCtxMap& bctx);
+
+} // namespace jxl
+
+#endif // LIB_JXL_COMPRESSED_DC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/convolve-inl.h b/third_party/jpeg-xl/lib/jxl/convolve-inl.h
new file mode 100644
index 0000000000..054c9c6f0d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve-inl.h
@@ -0,0 +1,297 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_CONVOLVE_INL_H_
+#undef LIB_JXL_CONVOLVE_INL_H_
+#else
+#define LIB_JXL_CONVOLVE_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Broadcast;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::CombineShiftRightBytes;
+#endif
+using hwy::HWY_NAMESPACE::TableLookupLanes;
+using hwy::HWY_NAMESPACE::Vec;
+
+// Synthesizes left/right neighbors from a vector of center pixels.
+class Neighbors {
+ public:
+ using D = HWY_CAPPED(float, 16);
+ using V = Vec<D>;
+
+ // Returns l[i] == c[Mirror(i - 1)].
+ HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) {
+#if HWY_CAP_GE256
+ const D d;
+ HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2, 3, 4, 5, 6,
+ 7, 8, 9, 10, 11, 12, 13, 14};
+ const auto indices = SetTableIndices(d, lanes);
+ // c = PONM'LKJI
+ return TableLookupLanes(c, indices); // ONML'KJII
+#elif HWY_TARGET == HWY_SCALAR
+ return c; // Same (the first mirrored value is the last valid one)
+#else // 128 bit
+ // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+ return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))}; // KJII
+#else
+ const D d;
+ // TODO(deymo): Figure out if this can be optimized using a single vsri
+ // instruction to convert LKJI to KJII.
+ HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2}; // KJII
+ const auto indices = SetTableIndices(d, lanes);
+ return TableLookupLanes(c, indices);
+#endif
+#endif
+ }
+
+ // Returns l[i] == c[Mirror(i - 2)].
+ HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) {
+#if HWY_CAP_GE256
+ const D d;
+ HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11, 12, 13};
+ const auto indices = SetTableIndices(d, lanes);
+ // c = PONM'LKJI
+ return TableLookupLanes(c, indices); // NMLK'JIIJ
+#elif HWY_TARGET == HWY_SCALAR
+ const D d;
+ JXL_ASSERT(false); // unsupported, avoid calling this.
+ return Zero(d);
+#else // 128 bit
+ // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+ return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))}; // JIIJ
+#else
+ const D d;
+ HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1}; // JIIJ
+ const auto indices = SetTableIndices(d, lanes);
+ return TableLookupLanes(c, indices);
+#endif
+#endif
+ }
+
+ // Returns l[i] == c[Mirror(i - 3)].
+ HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) {
+#if HWY_CAP_GE256
+ const D d;
+ HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12};
+ const auto indices = SetTableIndices(d, lanes);
+ // c = PONM'LKJI
+ return TableLookupLanes(c, indices); // MLKJ'IIJK
+#elif HWY_TARGET == HWY_SCALAR
+ const D d;
+ JXL_ASSERT(false); // unsupported, avoid calling this.
+ return Zero(d);
+#else // 128 bit
+ // c = LKJI
+#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86)
+ return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))}; // IIJK
+#else
+ const D d;
+ HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0}; // IIJK
+ const auto indices = SetTableIndices(d, lanes);
+ return TableLookupLanes(c, indices);
+#endif
+#endif
+ }
+};
+
+#if HWY_TARGET != HWY_SCALAR
+
+// Returns indices for SetTableIndices such that TableLookupLanes on the
+// rightmost unaligned vector (rightmost sample in its most-significant lane)
+// returns the mirrored values, with the mirror outside the last valid sample.
+static inline const int32_t* MirrorLanes(const size_t mod) {
+ const HWY_CAPPED(float, 16) d;
+ constexpr size_t kN = MaxLanes(d);
+
+ // For mod = `image width mod 16` 0..15:
+ // last full vec mirrored (mem order) loadedVec mirrorVec idxVec
+ // 0123456789abcdef| fedcba9876543210 fed..210 012..def 012..def
+ // 0123456789abcdef|0 0fedcba98765432 0fe..321 234..f00 123..eff
+ // 0123456789abcdef|01 10fedcba987654 10f..432 456..110 234..ffe
+ // 0123456789abcdef|012 210fedcba9876 210..543 67..2210 34..ffed
+ // 0123456789abcdef|0123 3210fedcba98 321..654 8..33210 4..ffedc
+ // 0123456789abcdef|01234 43210fedcba
+ // 0123456789abcdef|012345 543210fedc
+ // 0123456789abcdef|0123456 6543210fe
+ // 0123456789abcdef|01234567 76543210
+ // 0123456789abcdef|012345678 8765432
+ // 0123456789abcdef|0123456789 987654
+ // 0123456789abcdef|0123456789A A9876
+ // 0123456789abcdef|0123456789AB BA98
+ // 0123456789abcdef|0123456789ABC CBA
+ // 0123456789abcdef|0123456789ABCD DC
+ // 0123456789abcdef|0123456789ABCDE E EDC..10f EED..210 ffe..321
+#if HWY_CAP_GE512
+ HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, //
+ 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
+#elif HWY_CAP_GE256
+ HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {
+ 1, 2, 3, 4, 5, 6, 7, 7, //
+ 6, 5, 4, 3, 2, 1, 0};
+#else // 128-bit
+ HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {1, 2, 3, 3, //
+ 2, 1, 0};
+#endif
+ return idx_lanes + kN - 1 - mod;
+}
+
+#endif // HWY_TARGET != HWY_SCALAR
+
+// Single entry point for convolution.
+// "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it.
+template <class Strategy>
+class ConvolveT {
+ static constexpr int64_t kRadius = Strategy::kRadius;
+ using Simd = HWY_CAPPED(float, 16);
+
+ public:
+ static size_t MinWidth() {
+#if HWY_TARGET == HWY_SCALAR
+ // First/Last use mirrored loads of up to +/- kRadius.
+ return 2 * kRadius;
+#else
+ return Lanes(Simd()) + kRadius;
+#endif
+ }
+
+ // "Image" is ImageF or Image3F.
+ template <class Image, class Weights>
+ static void Run(const Image& in, const Rect& rect, const Weights& weights,
+ ThreadPool* pool, Image* out) {
+ PROFILER_ZONE("ConvolveT::Run");
+ JXL_CHECK(SameSize(rect, *out));
+ JXL_CHECK(rect.xsize() >= MinWidth());
+
+ static_assert(int64_t(kRadius) <= 3,
+ "Must handle [0, kRadius) and >= kRadius");
+ switch (rect.xsize() % Lanes(Simd())) {
+ case 0:
+ return RunRows<0>(in, rect, weights, pool, out);
+ case 1:
+ return RunRows<1>(in, rect, weights, pool, out);
+ case 2:
+ return RunRows<2>(in, rect, weights, pool, out);
+ default:
+ return RunRows<3>(in, rect, weights, pool, out);
+ }
+ }
+
+ private:
+ template <size_t kSizeModN, class WrapRow, class Weights>
+ static JXL_INLINE void RunRow(const float* JXL_RESTRICT in,
+ const size_t xsize, const int64_t stride,
+ const WrapRow& wrap_row, const Weights& weights,
+ float* JXL_RESTRICT out) {
+ Strategy::template ConvolveRow<kSizeModN>(in, xsize, stride, wrap_row,
+ weights, out);
+ }
+
+ template <size_t kSizeModN, class Weights>
+ static JXL_INLINE void RunBorderRows(const ImageF& in, const Rect& rect,
+ const int64_t ybegin, const int64_t yend,
+ const Weights& weights, ImageF* out) {
+ const int64_t stride = in.PixelsPerRow();
+ const WrapRowMirror wrap_row(in, rect.ysize());
+ for (int64_t y = ybegin; y < yend; ++y) {
+ RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride, wrap_row,
+ weights, out->Row(y));
+ }
+ }
+
+ // Image3F.
+ template <size_t kSizeModN, class Weights>
+ static JXL_INLINE void RunBorderRows(const Image3F& in, const Rect& rect,
+ const int64_t ybegin, const int64_t yend,
+ const Weights& weights, Image3F* out) {
+ const int64_t stride = in.PixelsPerRow();
+ for (int64_t y = ybegin; y < yend; ++y) {
+ for (size_t c = 0; c < 3; ++c) {
+ const WrapRowMirror wrap_row(in.Plane(c), rect.ysize());
+ RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride,
+ wrap_row, weights, out->PlaneRow(c, y));
+ }
+ }
+ }
+
+ template <size_t kSizeModN, class Weights>
+ static JXL_INLINE void RunInteriorRows(const ImageF& in, const Rect& rect,
+ const int64_t ybegin,
+ const int64_t yend,
+ const Weights& weights,
+ ThreadPool* pool, ImageF* out) {
+ const int64_t stride = in.PixelsPerRow();
+ JXL_CHECK(RunOnPool(
+ pool, ybegin, yend, ThreadPool::NoInit,
+ [&](const uint32_t y, size_t /*thread*/) HWY_ATTR {
+ RunRow<kSizeModN>(rect.ConstRow(in, y), rect.xsize(), stride,
+ WrapRowUnchanged(), weights, out->Row(y));
+ },
+ "Convolve"));
+ }
+
+ // Image3F.
+ template <size_t kSizeModN, class Weights>
+ static JXL_INLINE void RunInteriorRows(const Image3F& in, const Rect& rect,
+ const int64_t ybegin,
+ const int64_t yend,
+ const Weights& weights,
+ ThreadPool* pool, Image3F* out) {
+ const int64_t stride = in.PixelsPerRow();
+ JXL_CHECK(RunOnPool(
+ pool, ybegin, yend, ThreadPool::NoInit,
+ [&](const uint32_t y, size_t /*thread*/) HWY_ATTR {
+ for (size_t c = 0; c < 3; ++c) {
+ RunRow<kSizeModN>(rect.ConstPlaneRow(in, c, y), rect.xsize(),
+ stride, WrapRowUnchanged(), weights,
+ out->PlaneRow(c, y));
+ }
+ },
+ "Convolve3"));
+ }
+
+ template <size_t kSizeModN, class Image, class Weights>
+ static JXL_INLINE void RunRows(const Image& in, const Rect& rect,
+ const Weights& weights, ThreadPool* pool,
+ Image* out) {
+ const int64_t ysize = rect.ysize();
+ RunBorderRows<kSizeModN>(in, rect, 0, std::min(int64_t(kRadius), ysize),
+ weights, out);
+ if (ysize > 2 * int64_t(kRadius)) {
+ RunInteriorRows<kSizeModN>(in, rect, int64_t(kRadius),
+ ysize - int64_t(kRadius), weights, pool, out);
+ }
+ if (ysize > int64_t(kRadius)) {
+ RunBorderRows<kSizeModN>(in, rect, ysize - int64_t(kRadius), ysize,
+ weights, out);
+ }
+ }
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_CONVOLVE_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/convolve.h b/third_party/jpeg-xl/lib/jxl/convolve.h
new file mode 100644
index 0000000000..2fcd2d0980
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve.h
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_CONVOLVE_H_
+#define LIB_JXL_CONVOLVE_H_
+
+// 2D convolution.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// No valid values outside [0, xsize), but the strategy may still safely load
+// the preceding vector, and/or round xsize up to the vector lane count. This
+// avoids needing PadImage.
+// Requires xsize >= kConvolveLanes + kConvolveMaxRadius.
+static constexpr size_t kConvolveMaxRadius = 3;
+
+// Weights must already be normalized.
+
+struct WeightsSymmetric3 {
+ // d r d (each replicated 4x)
+ // r c r
+ // d r d
+ float c[4];
+ float r[4];
+ float d[4];
+};
+
+struct WeightsSymmetric5 {
+ // The lower-right quadrant is: c r R (each replicated 4x)
+ // r d L
+ // R L D
+ float c[4];
+ float r[4];
+ float R[4];
+ float d[4];
+ float D[4];
+ float L[4];
+};
+
+// Weights for separable 5x5 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+struct WeightsSeparable5 {
+ // Horizontal 1D, distances 0..2 (each replicated 4x)
+ float horz[3 * 4];
+ float vert[3 * 4];
+};
+
+// Weights for separable 7x7 filters (typically but not necessarily the same
+// values for horizontal and vertical directions). The kernel must already be
+// normalized, but note that values for negative offsets are omitted, so the
+// given values do not sum to 1.
+//
+// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead,
+// at least when images exceed the L1 cache size.
+struct WeightsSeparable7 {
+ // Horizontal 1D, distances 0..3 (each replicated 4x)
+ float horz[4 * 4];
+ float vert[4 * 4];
+};
+
+const WeightsSymmetric3& WeightsSymmetric3Lowpass();
+const WeightsSeparable5& WeightsSeparable5Lowpass();
+const WeightsSymmetric5& WeightsSymmetric5Lowpass();
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric3& weights, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out);
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+ const WeightsSeparable5& weights, ThreadPool* pool,
+ ImageF* out);
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+ const WeightsSeparable7& weights, ThreadPool* pool,
+ ImageF* out);
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric3& weights, ThreadPool* pool,
+ ImageF* out);
+
+void Symmetric5(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric5& weights, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out);
+
+void Separable5(const ImageF& in, const Rect& rect,
+ const WeightsSeparable5& weights, ThreadPool* pool,
+ ImageF* out);
+
+void Separable7(const ImageF& in, const Rect& rect,
+ const WeightsSeparable7& weights, ThreadPool* pool,
+ ImageF* out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_CONVOLVE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_separable5.cc b/third_party/jpeg-xl/lib/jxl/convolve_separable5.cc
new file mode 100644
index 0000000000..b26ff54bbc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_separable5.cc
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable5.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+// 5x5 convolution by separable kernel with a single scan through the input.
+// This is more cache-efficient than separate horizontal/vertical passes, and
+// possibly faster (given enough registers) than tiling and/or transposing.
+//
+// Overview: imagine a 5x5 window around a central pixel. First convolve the
+// rows by multiplying the pixels with the corresponding weights from
+// WeightsSeparable5.horz[abs(x_offset) * 4]. Then multiply each of these
+// intermediate results by the corresponding vertical weight, i.e.
+// vert[abs(y_offset) * 4]. Finally, store the sum of these values as the
+// convolution result at the position of the central pixel in the output.
+//
+// Each of these operations uses SIMD vectors. The central pixel and most
+// importantly the output are aligned, so neighnoring pixels (e.g. x_offset=1)
+// require unaligned loads. Because weights are supplied in identical groups of
+// 4, we can use LoadDup128 to load them (slightly faster).
+//
+// Uses mirrored boundary handling. Until x >= kRadius, the horizontal
+// convolution uses Neighbors class to shuffle vectors as if each of its lanes
+// had been loaded from the mirrored offset. Similarly, the last full vector to
+// write uses mirroring. In the case of scalar vectors, Neighbors is not usable
+// and the value is loaded directly. Otherwise, the number of valid pixels
+// modulo the vector size enables a small optimization: for smaller offsets,
+// a non-mirrored load is sufficient.
+class Separable5Strategy {
+ using D = HWY_CAPPED(float, 16);
+ using V = Vec<D>;
+
+ public:
+ static constexpr int64_t kRadius = 2;
+
+ template <size_t kSizeModN, class WrapRow>
+ static JXL_MAYBE_INLINE void ConvolveRow(
+ const float* const JXL_RESTRICT row_m, const size_t xsize,
+ const int64_t stride, const WrapRow& wrap_row,
+ const WeightsSeparable5& weights, float* const JXL_RESTRICT row_out) {
+ const D d;
+ const int64_t neg_stride = -stride; // allows LEA addressing.
+ const float* const JXL_RESTRICT row_t2 =
+ wrap_row(row_m + 2 * neg_stride, stride);
+ const float* const JXL_RESTRICT row_t1 =
+ wrap_row(row_m + 1 * neg_stride, stride);
+ const float* const JXL_RESTRICT row_b1 =
+ wrap_row(row_m + 1 * stride, stride);
+ const float* const JXL_RESTRICT row_b2 =
+ wrap_row(row_m + 2 * stride, stride);
+
+ const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+ const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+ const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+ const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+ const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+ const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+
+ size_t x = 0;
+
+ // More than one iteration for scalars.
+ for (; x < kRadius; x += Lanes(d)) {
+ const V conv0 =
+ Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2), wv0);
+
+ const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2);
+ const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2);
+ const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+ Store(conv2, d, row_out + x);
+ }
+
+ // Main loop: load inputs without padding
+ for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+ const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2), wv0);
+
+ const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2);
+ const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2);
+ const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+ Store(conv2, d, row_out + x);
+ }
+
+ // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+ while (x < xsize) {
+#else
+ if (kSizeModN < kRadius) {
+#endif
+ const V conv0 =
+ Mul(HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2), wv0);
+
+ const V conv1t =
+ HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2);
+ const V conv1b =
+ HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t =
+ HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2);
+ const V conv2b =
+ HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+ Store(conv2, d, row_out + x);
+ x += Lanes(d);
+ }
+
+ // If mod = 0, the above vector was the last.
+ if (kSizeModN != 0) {
+ for (; x < xsize; ++x) {
+ float mul = 0.0f;
+ for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+ const float wy = weights.vert[std::abs(dy) * 4];
+ const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+ for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+ const float wx = weights.horz[std::abs(dx) * 4];
+ const int64_t clamped_x = Mirror(x + dx, xsize);
+ mul += clamped_row[clamped_x] * wx * wy;
+ }
+ }
+ row_out[x] = mul;
+ }
+ }
+ }
+
+ private:
+ // Same as HorzConvolve for the first/last vector in a row.
+ static JXL_MAYBE_INLINE V HorzConvolveFirst(
+ const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+ const V wh0, const V wh1, const V wh2) {
+ const D d;
+ const V c = LoadU(d, row + x);
+ const V mul0 = Mul(c, wh0);
+
+#if HWY_TARGET == HWY_SCALAR
+ const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+ const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+#else
+ (void)xsize;
+ const V l1 = Neighbors::FirstL1(c);
+ const V l2 = Neighbors::FirstL2(c);
+#endif
+
+ const V r1 = LoadU(d, row + x + 1);
+ const V r2 = LoadU(d, row + x + 2);
+
+ const V mul1 = MulAdd(Add(l1, r1), wh1, mul0);
+ const V mul2 = MulAdd(Add(l2, r2), wh2, mul1);
+ return mul2;
+ }
+
+ template <size_t kSizeModN>
+ static JXL_MAYBE_INLINE V
+ HorzConvolveLast(const float* const JXL_RESTRICT row, const int64_t x,
+ const int64_t xsize, const V wh0, const V wh1, const V wh2) {
+ const D d;
+ const V c = LoadU(d, row + x);
+ const V mul0 = Mul(c, wh0);
+
+ const V l1 = LoadU(d, row + x - 1);
+ const V l2 = LoadU(d, row + x - 2);
+
+ V r1, r2;
+#if HWY_TARGET == HWY_SCALAR
+ r1 = LoadU(d, row + Mirror(x + 1, xsize));
+ r2 = LoadU(d, row + Mirror(x + 2, xsize));
+#else
+ const size_t N = Lanes(d);
+ if (kSizeModN == 0) {
+ r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+ r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+ } else { // == 1
+ const auto last = LoadU(d, row + xsize - N);
+ r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+ r1 = last;
+ }
+#endif
+
+ // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+ const V sum1 = Add(l1, r1);
+ const V mul1 = MulAdd(sum1, wh1, mul0);
+ const V sum2 = Add(l2, r2);
+ const V mul2 = MulAdd(sum2, wh2, mul1);
+ return mul2;
+ }
+
+ // Requires kRadius valid pixels before/after pos.
+ static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+ const V wh0, const V wh1,
+ const V wh2) {
+ const D d;
+ const V c = LoadU(d, pos);
+ const V mul0 = Mul(c, wh0);
+
+ // Loading anew is faster than combining vectors.
+ const V l1 = LoadU(d, pos - 1);
+ const V r1 = LoadU(d, pos + 1);
+ const V l2 = LoadU(d, pos - 2);
+ const V r2 = LoadU(d, pos + 2);
+ // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+ const V sum1 = Add(l1, r1);
+ const V mul1 = MulAdd(sum1, wh1, mul0);
+ const V sum2 = Add(l2, r2);
+ const V mul2 = MulAdd(sum2, wh2, mul1);
+ return mul2;
+ }
+};
+
+void Separable5(const ImageF& in, const Rect& rect,
+ const WeightsSeparable5& weights, ThreadPool* pool,
+ ImageF* out) {
+ using Conv = ConvolveT<Separable5Strategy>;
+ if (rect.xsize() >= Conv::MinWidth()) {
+ return Conv::Run(in, rect, weights, pool, out);
+ }
+
+ return SlowSeparable5(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Separable5);
+void Separable5(const ImageF& in, const Rect& rect,
+ const WeightsSeparable5& weights, ThreadPool* pool,
+ ImageF* out) {
+ return HWY_DYNAMIC_DISPATCH(Separable5)(in, rect, weights, pool, out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_separable7.cc b/third_party/jpeg-xl/lib/jxl/convolve_separable7.cc
new file mode 100644
index 0000000000..086dfd22b5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_separable7.cc
@@ -0,0 +1,285 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable7.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+// 7x7 convolution by separable kernel with a single scan through the input.
+// Extended version of Separable5, see documentation there.
+class Separable7Strategy {
+ using D = HWY_CAPPED(float, 16);
+ using V = Vec<D>;
+
+ public:
+ static constexpr int64_t kRadius = 3;
+
+ template <size_t kSizeModN, class WrapRow>
+ static JXL_MAYBE_INLINE void ConvolveRow(
+ const float* const JXL_RESTRICT row_m, const size_t xsize,
+ const int64_t stride, const WrapRow& wrap_row,
+ const WeightsSeparable7& weights, float* const JXL_RESTRICT row_out) {
+ const D d;
+ const int64_t neg_stride = -stride; // allows LEA addressing.
+ const float* const JXL_RESTRICT row_t3 =
+ wrap_row(row_m + 3 * neg_stride, stride);
+ const float* const JXL_RESTRICT row_t2 =
+ wrap_row(row_m + 2 * neg_stride, stride);
+ const float* const JXL_RESTRICT row_t1 =
+ wrap_row(row_m + 1 * neg_stride, stride);
+ const float* const JXL_RESTRICT row_b1 =
+ wrap_row(row_m + 1 * stride, stride);
+ const float* const JXL_RESTRICT row_b2 =
+ wrap_row(row_m + 2 * stride, stride);
+ const float* const JXL_RESTRICT row_b3 =
+ wrap_row(row_m + 3 * stride, stride);
+
+ const V wh0 = LoadDup128(d, weights.horz + 0 * 4);
+ const V wh1 = LoadDup128(d, weights.horz + 1 * 4);
+ const V wh2 = LoadDup128(d, weights.horz + 2 * 4);
+ const V wh3 = LoadDup128(d, weights.horz + 3 * 4);
+ const V wv0 = LoadDup128(d, weights.vert + 0 * 4);
+ const V wv1 = LoadDup128(d, weights.vert + 1 * 4);
+ const V wv2 = LoadDup128(d, weights.vert + 2 * 4);
+ const V wv3 = LoadDup128(d, weights.vert + 3 * 4);
+
+ size_t x = 0;
+
+ // More than one iteration for scalars.
+ for (; x < kRadius; x += Lanes(d)) {
+ const V conv0 =
+ Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2, wh3), wv0);
+
+ const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+ const V conv3t = HorzConvolveFirst(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv3b = HorzConvolveFirst(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+ Store(conv3, d, row_out + x);
+ }
+
+ // Main loop: load inputs without padding
+ for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) {
+ const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2, wh3), wv0);
+
+ const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2, wh3);
+ const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2, wh3);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2, wh3);
+ const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2, wh3);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+ const V conv3t = HorzConvolve(row_t3 + x, wh0, wh1, wh2, wh3);
+ const V conv3b = HorzConvolve(row_b3 + x, wh0, wh1, wh2, wh3);
+ const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+ Store(conv3, d, row_out + x);
+ }
+
+ // Last full vector to write (the above loop handled mod >= kRadius)
+#if HWY_TARGET == HWY_SCALAR
+ while (x < xsize) {
+#else
+ if (kSizeModN < kRadius) {
+#endif
+ const V conv0 =
+ Mul(HorzConvolveLast<kSizeModN>(row_m, x, xsize, wh0, wh1, wh2, wh3),
+ wv0);
+
+ const V conv1t =
+ HorzConvolveLast<kSizeModN>(row_t1, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv1b =
+ HorzConvolveLast<kSizeModN>(row_b1, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0);
+
+ const V conv2t =
+ HorzConvolveLast<kSizeModN>(row_t2, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv2b =
+ HorzConvolveLast<kSizeModN>(row_b2, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1);
+
+ const V conv3t =
+ HorzConvolveLast<kSizeModN>(row_t3, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv3b =
+ HorzConvolveLast<kSizeModN>(row_b3, x, xsize, wh0, wh1, wh2, wh3);
+ const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2);
+
+ Store(conv3, d, row_out + x);
+ x += Lanes(d);
+ }
+
+ // If mod = 0, the above vector was the last.
+ if (kSizeModN != 0) {
+ for (; x < xsize; ++x) {
+ float mul = 0.0f;
+ for (int64_t dy = -kRadius; dy <= kRadius; ++dy) {
+ const float wy = weights.vert[std::abs(dy) * 4];
+ const float* clamped_row = wrap_row(row_m + dy * stride, stride);
+ for (int64_t dx = -kRadius; dx <= kRadius; ++dx) {
+ const float wx = weights.horz[std::abs(dx) * 4];
+ const int64_t clamped_x = Mirror(x + dx, xsize);
+ mul += clamped_row[clamped_x] * wx * wy;
+ }
+ }
+ row_out[x] = mul;
+ }
+ }
+ }
+
+ private:
+ // Same as HorzConvolve for the first/last vector in a row.
+ static JXL_MAYBE_INLINE V HorzConvolveFirst(
+ const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+ const V wh0, const V wh1, const V wh2, const V wh3) {
+ const D d;
+ const V c = LoadU(d, row + x);
+ const V mul0 = Mul(c, wh0);
+
+#if HWY_TARGET == HWY_SCALAR
+ const V l1 = LoadU(d, row + Mirror(x - 1, xsize));
+ const V l2 = LoadU(d, row + Mirror(x - 2, xsize));
+ const V l3 = LoadU(d, row + Mirror(x - 3, xsize));
+#else
+ (void)xsize;
+ const V l1 = Neighbors::FirstL1(c);
+ const V l2 = Neighbors::FirstL2(c);
+ const V l3 = Neighbors::FirstL3(c);
+#endif
+
+ const V r1 = LoadU(d, row + x + 1);
+ const V r2 = LoadU(d, row + x + 2);
+ const V r3 = LoadU(d, row + x + 3);
+
+ const V mul1 = MulAdd(Add(l1, r1), wh1, mul0);
+ const V mul2 = MulAdd(Add(l2, r2), wh2, mul1);
+ const V mul3 = MulAdd(Add(l3, r3), wh3, mul2);
+ return mul3;
+ }
+
+ template <size_t kSizeModN>
+ static JXL_MAYBE_INLINE V HorzConvolveLast(
+ const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize,
+ const V wh0, const V wh1, const V wh2, const V wh3) {
+ const D d;
+ const V c = LoadU(d, row + x);
+ const V mul0 = Mul(c, wh0);
+
+ const V l1 = LoadU(d, row + x - 1);
+ const V l2 = LoadU(d, row + x - 2);
+ const V l3 = LoadU(d, row + x - 3);
+
+ V r1, r2, r3;
+#if HWY_TARGET == HWY_SCALAR
+ r1 = LoadU(d, row + Mirror(x + 1, xsize));
+ r2 = LoadU(d, row + Mirror(x + 2, xsize));
+ r3 = LoadU(d, row + Mirror(x + 3, xsize));
+#else
+ const size_t N = Lanes(d);
+ if (kSizeModN == 0) {
+ r3 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 3)));
+ r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2)));
+ r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1)));
+ } else if (kSizeModN == 1) {
+ const auto last = LoadU(d, row + xsize - N);
+ r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 2)));
+ r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+ r1 = last;
+ } else /* kSizeModN >= 2 */ {
+ const auto last = LoadU(d, row + xsize - N);
+ r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1)));
+ r2 = last;
+ r1 = LoadU(d, row + x + 1);
+ }
+#endif
+
+ // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+ const V sum1 = Add(l1, r1);
+ const V mul1 = MulAdd(sum1, wh1, mul0);
+ const V sum2 = Add(l2, r2);
+ const V mul2 = MulAdd(sum2, wh2, mul1);
+ const V sum3 = Add(l3, r3);
+ const V mul3 = MulAdd(sum3, wh3, mul2);
+ return mul3;
+ }
+
+ // Returns one vector of horizontal convolution results; lane i is the result
+ // for pixel pos + i. This is the fast path for interior pixels, i.e. kRadius
+ // valid pixels before/after pos.
+ static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos,
+ const V wh0, const V wh1, const V wh2,
+ const V wh3) {
+ const D d;
+ const V c = LoadU(d, pos);
+ const V mul0 = Mul(c, wh0);
+
+ // TODO(janwas): better to Combine
+ const V l1 = LoadU(d, pos - 1);
+ const V r1 = LoadU(d, pos + 1);
+ const V l2 = LoadU(d, pos - 2);
+ const V r2 = LoadU(d, pos + 2);
+ const V l3 = LoadU(d, pos - 3);
+ const V r3 = LoadU(d, pos + 3);
+ // Sum of pixels with Manhattan distance i, multiplied by weights[i].
+ const V sum1 = Add(l1, r1);
+ const V mul1 = MulAdd(sum1, wh1, mul0);
+ const V sum2 = Add(l2, r2);
+ const V mul2 = MulAdd(sum2, wh2, mul1);
+ const V sum3 = Add(l3, r3);
+ const V mul3 = MulAdd(sum3, wh3, mul2);
+ return mul3;
+ }
+};
+
+void Separable7(const ImageF& in, const Rect& rect,
+ const WeightsSeparable7& weights, ThreadPool* pool,
+ ImageF* out) {
+ using Conv = ConvolveT<Separable7Strategy>;
+ if (rect.xsize() >= Conv::MinWidth()) {
+ return Conv::Run(in, rect, weights, pool, out);
+ }
+
+ return SlowSeparable7(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Separable7);
+void Separable7(const ImageF& in, const Rect& rect,
+ const WeightsSeparable7& weights, ThreadPool* pool,
+ ImageF* out) {
+ return HWY_DYNAMIC_DISPATCH(Separable7)(in, rect, weights, pool, out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_slow.cc b/third_party/jpeg-xl/lib/jxl/convolve_slow.cc
new file mode 100644
index 0000000000..fffe5f74c8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_slow.cc
@@ -0,0 +1,212 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#include "lib/jxl/convolve-inl.h"
+
+namespace jxl {
+
+//------------------------------------------------------------------------------
+// Kernels
+
+// 4 instances of a given literal value, useful as input to LoadDup128.
+#define JXL_REP4(literal) literal, literal, literal, literal
+
+// Concentrates energy in low-frequency components (e.g. for antialiasing).
+const WeightsSymmetric3& WeightsSymmetric3Lowpass() {
+ // Computed by research/convolve_weights.py's cubic spline approximations of
+ // prolate spheroidal wave functions.
+ constexpr float w0 = 0.36208932f;
+ constexpr float w1 = 0.12820096f;
+ constexpr float w2 = 0.03127668f;
+ static constexpr WeightsSymmetric3 weights = {
+ {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}};
+ return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Lowpass() {
+ constexpr float w0 = 0.41714928f;
+ constexpr float w1 = 0.25539268f;
+ constexpr float w2 = 0.03603267f;
+ static constexpr WeightsSeparable5 weights = {
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+ return weights;
+}
+
+const WeightsSymmetric5& WeightsSymmetric5Lowpass() {
+ static constexpr WeightsSymmetric5 weights = {
+ {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)},
+ {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}};
+ return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian1() {
+ constexpr float w0 = 0.38774f;
+ constexpr float w1 = 0.24477f;
+ constexpr float w2 = 0.06136f;
+ static constexpr WeightsSeparable5 weights = {
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+ return weights;
+}
+
+const WeightsSeparable5& WeightsSeparable5Gaussian2() {
+ constexpr float w0 = 0.250301f;
+ constexpr float w1 = 0.221461f;
+ constexpr float w2 = 0.153388f;
+ static constexpr WeightsSeparable5 weights = {
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)},
+ {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}};
+ return weights;
+}
+
+#undef JXL_REP4
+
+//------------------------------------------------------------------------------
+// Slow
+
+namespace {
+
+template <class WrapX, class WrapY>
+float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy,
+ const int64_t xsize, const int64_t ysize,
+ const WeightsSymmetric3& weights) {
+ float sum = 0.0f;
+
+ // ix: image; kx: kernel
+ for (int64_t ky = -1; ky <= 1; ky++) {
+ const int64_t y = WrapY()(iy + ky, ysize);
+ const float* JXL_RESTRICT row_in = in.ConstRow(static_cast<size_t>(y));
+
+ const float wc = ky == 0 ? weights.c[0] : weights.r[0];
+ const float wlr = ky == 0 ? weights.r[0] : weights.d[0];
+
+ const int64_t xm1 = WrapX()(ix - 1, xsize);
+ const int64_t xp1 = WrapX()(ix + 1, xsize);
+ sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr;
+ }
+ return sum;
+}
+
+template <class WrapY>
+void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize,
+ const int64_t ysize, const WeightsSymmetric3& weights,
+ float* JXL_RESTRICT row_out) {
+ row_out[0] =
+ SlowSymmetric3Pixel<WrapMirror, WrapY>(in, 0, iy, xsize, ysize, weights);
+ for (int64_t ix = 1; ix < xsize - 1; ix++) {
+ row_out[ix] = SlowSymmetric3Pixel<WrapUnchanged, WrapY>(in, ix, iy, xsize,
+ ysize, weights);
+ }
+ {
+ const int64_t ix = xsize - 1;
+ row_out[ix] = SlowSymmetric3Pixel<WrapMirror, WrapY>(in, ix, iy, xsize,
+ ysize, weights);
+ }
+}
+
+} // namespace
+
+void SlowSymmetric3(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric3& weights, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out) {
+ PROFILER_FUNC;
+
+ const int64_t xsize = static_cast<int64_t>(rect.xsize());
+ const int64_t ysize = static_cast<int64_t>(rect.ysize());
+ const int64_t kRadius = 1;
+
+ JXL_CHECK(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t iy = task;
+ float* JXL_RESTRICT out_row = out->Row(static_cast<size_t>(iy));
+
+ if (iy < kRadius || iy >= ysize - kRadius) {
+ SlowSymmetric3Row<WrapMirror>(in, iy, xsize, ysize, weights, out_row);
+ } else {
+ SlowSymmetric3Row<WrapUnchanged>(in, iy, xsize, ysize, weights,
+ out_row);
+ }
+ },
+ "SlowSymmetric3"));
+}
+
+namespace {
+
+// Separable kernels, any radius.
+float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x,
+ const int64_t y, const int64_t radius,
+ const float* JXL_RESTRICT horz_weights,
+ const float* JXL_RESTRICT vert_weights) {
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ const WrapMirror wrap;
+
+ float mul = 0.0f;
+ for (int dy = -radius; dy <= radius; ++dy) {
+ const float wy = vert_weights[std::abs(dy) * 4];
+ const size_t sy = wrap(y + dy, ysize);
+ JXL_CHECK(sy < ysize);
+ const float* const JXL_RESTRICT row = rect.ConstRow(in, sy);
+ for (int dx = -radius; dx <= radius; ++dx) {
+ const float wx = horz_weights[std::abs(dx) * 4];
+ const size_t sx = wrap(x + dx, xsize);
+ JXL_CHECK(sx < xsize);
+ mul += row[sx] * wx * wy;
+ }
+ }
+ return mul;
+}
+
+} // namespace
+
+void SlowSeparable5(const ImageF& in, const Rect& rect,
+ const WeightsSeparable5& weights, ThreadPool* pool,
+ ImageF* out) {
+ PROFILER_FUNC;
+ const float* horz_weights = &weights.horz[0];
+ const float* vert_weights = &weights.vert[0];
+
+ const size_t ysize = rect.ysize();
+ JXL_CHECK(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+
+ float* const JXL_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2,
+ horz_weights, vert_weights);
+ }
+ },
+ "SlowSeparable5"));
+}
+
+void SlowSeparable7(const ImageF& in, const Rect& rect,
+ const WeightsSeparable7& weights, ThreadPool* pool,
+ ImageF* out) {
+ PROFILER_FUNC;
+ const float* horz_weights = &weights.horz[0];
+ const float* vert_weights = &weights.vert[0];
+
+ const size_t ysize = rect.ysize();
+ JXL_CHECK(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+
+ float* const JXL_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3,
+ horz_weights, vert_weights);
+ }
+ },
+ "SlowSeparable7"));
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_symmetric3.cc b/third_party/jpeg-xl/lib/jxl/convolve_symmetric3.cc
new file mode 100644
index 0000000000..06b59dfb60
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_symmetric3.cc
@@ -0,0 +1,194 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric3.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+
+template <class WrapY, class V>
+static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
+ const int64_t iy, const size_t ysize, const V wx0,
+ const V wx1, const V wx2) {
+ const HWY_FULL(float) d;
+ const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
+ const auto in_m2 = LoadU(d, center - 2);
+ const auto in_p2 = LoadU(d, center + 2);
+ const auto in_m1 = LoadU(d, center - 1);
+ const auto in_p1 = LoadU(d, center + 1);
+ const auto in_00 = Load(d, center);
+ const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
+ const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
+ const auto sum_0 = Mul(wx0, in_00);
+ return Add(sum_2, Add(sum_1, sum_0));
+}
+
+// 3x3 convolution by symmetric kernel with a single scan through the input.
+class Symmetric3Strategy {
+ using D = HWY_CAPPED(float, 16);
+ using V = Vec<D>;
+
+ public:
+ static constexpr int64_t kRadius = 1;
+
+ // Only accesses pixels in [0, xsize).
+ template <size_t kSizeModN, class WrapRow>
+ static JXL_MAYBE_INLINE void ConvolveRow(
+ const float* const JXL_RESTRICT row_m, const size_t xsize,
+ const int64_t stride, const WrapRow& wrap_row,
+ const WeightsSymmetric3& weights, float* const JXL_RESTRICT row_out) {
+ const D d;
+ // t, m, b = top, middle, bottom row;
+ const float* const JXL_RESTRICT row_t = wrap_row(row_m - stride, stride);
+ const float* const JXL_RESTRICT row_b = wrap_row(row_m + stride, stride);
+
+ // Must load in advance - compiler doesn't understand LoadDup128 and
+ // schedules them too late.
+ const V w0 = LoadDup128(d, weights.c);
+ const V w1 = LoadDup128(d, weights.r);
+ const V w2 = LoadDup128(d, weights.d);
+
+ // l, c, r = left, center, right. Leftmost vector: need FirstL1.
+ {
+ const V tc = LoadU(d, row_t + 0);
+ const V mc = LoadU(d, row_m + 0);
+ const V bc = LoadU(d, row_b + 0);
+ const V tl = Neighbors::FirstL1(tc);
+ const V tr = LoadU(d, row_t + 0 + 1);
+ const V ml = Neighbors::FirstL1(mc);
+ const V mr = LoadU(d, row_m + 0 + 1);
+ const V bl = Neighbors::FirstL1(bc);
+ const V br = LoadU(d, row_b + 0 + 1);
+ const V conv =
+ WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+ Store(conv, d, row_out + 0);
+ }
+
+ // Loop as long as we can load enough new values:
+ const size_t N = Lanes(d);
+ size_t x = N;
+ for (; x + N + kRadius <= xsize; x += N) {
+ const auto conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2);
+ Store(conv, d, row_out + x);
+ }
+
+ // For final (partial) vector:
+ const V tc = LoadU(d, row_t + x);
+ const V mc = LoadU(d, row_m + x);
+ const V bc = LoadU(d, row_b + x);
+
+ V tr, mr, br;
+#if HWY_TARGET == HWY_SCALAR
+ tr = tc; // Single-lane => mirrored right neighbor = center value.
+ mr = mc;
+ br = bc;
+#else
+ if (kSizeModN == 0) {
+ // The above loop didn't handle the last vector because it needs an
+ // additional right neighbor (generated via mirroring).
+ auto mirror = SetTableIndices(d, MirrorLanes(N - 1));
+ tr = TableLookupLanes(tc, mirror);
+ mr = TableLookupLanes(mc, mirror);
+ br = TableLookupLanes(bc, mirror);
+ } else {
+ auto mirror = SetTableIndices(d, MirrorLanes((xsize % N) - 1));
+ // Loads last valid value into uppermost lane and mirrors.
+ tr = TableLookupLanes(LoadU(d, row_t + xsize - N), mirror);
+ mr = TableLookupLanes(LoadU(d, row_m + xsize - N), mirror);
+ br = TableLookupLanes(LoadU(d, row_b + xsize - N), mirror);
+ }
+#endif
+
+ const V tl = LoadU(d, row_t + x - 1);
+ const V ml = LoadU(d, row_m + x - 1);
+ const V bl = LoadU(d, row_b + x - 1);
+ const V conv = WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+ Store(conv, d, row_out + x);
+ }
+
+ private:
+ // Returns sum{x_i * w_i}.
+ template <class V>
+ static JXL_MAYBE_INLINE V WeightedSum(const V tl, const V tc, const V tr,
+ const V ml, const V mc, const V mr,
+ const V bl, const V bc, const V br,
+ const V w0, const V w1, const V w2) {
+ const V sum_tb = Add(tc, bc);
+
+ // Faster than 5 mul + 4 FMA.
+ const V mul0 = Mul(mc, w0);
+ const V sum_lr = Add(ml, mr);
+
+ const V x1 = Add(sum_tb, sum_lr);
+ const V mul1 = MulAdd(x1, w1, mul0);
+
+ const V sum_t2 = Add(tl, tr);
+ const V sum_b2 = Add(bl, br);
+ const V x2 = Add(sum_t2, sum_b2);
+ const V mul2 = MulAdd(x2, w2, mul1);
+ return mul2;
+ }
+
+ static JXL_MAYBE_INLINE V ConvolveValid(const float* JXL_RESTRICT row_t,
+ const float* JXL_RESTRICT row_m,
+ const float* JXL_RESTRICT row_b,
+ const int64_t x, const V w0,
+ const V w1, const V w2) {
+ const D d;
+ const V tc = LoadU(d, row_t + x);
+ const V mc = LoadU(d, row_m + x);
+ const V bc = LoadU(d, row_b + x);
+ const V tl = LoadU(d, row_t + x - 1);
+ const V tr = LoadU(d, row_t + x + 1);
+ const V ml = LoadU(d, row_m + x - 1);
+ const V mr = LoadU(d, row_m + x + 1);
+ const V bl = LoadU(d, row_b + x - 1);
+ const V br = LoadU(d, row_b + x + 1);
+ return WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2);
+ }
+};
+
+void Symmetric3(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric3& weights, ThreadPool* pool,
+ ImageF* out) {
+ using Conv = ConvolveT<Symmetric3Strategy>;
+ if (rect.xsize() >= Conv::MinWidth()) {
+ return Conv::Run(in, rect, weights, pool, out);
+ }
+
+ return SlowSymmetric3(in, rect, weights, pool, out);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Symmetric3);
+void Symmetric3(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric3& weights, ThreadPool* pool,
+ ImageF* out) {
+ return HWY_DYNAMIC_DISPATCH(Symmetric3)(in, rect, weights, pool, out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc b/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc
new file mode 100644
index 0000000000..55a16899c3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_symmetric5.cc
@@ -0,0 +1,185 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h" // RoundUpTo
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Vec;
+
+// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2].
+template <class WrapY>
+static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y,
+ const int64_t ix, const int64_t iy,
+ const size_t xsize, const size_t ysize,
+ const float wx0, const float wx1,
+ const float wx2) {
+ const WrapMirror wrap_x;
+ const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize));
+ const float in_m2 = row[wrap_x(ix - 2, xsize)];
+ const float in_p2 = row[wrap_x(ix + 2, xsize)];
+ const float in_m1 = row[wrap_x(ix - 1, xsize)];
+ const float in_p1 = row[wrap_x(ix + 1, xsize)];
+ const float in_00 = row[ix];
+ const float sum_2 = wx2 * (in_m2 + in_p2);
+ const float sum_1 = wx1 * (in_m1 + in_p1);
+ const float sum_0 = wx0 * in_00;
+ return sum_2 + sum_1 + sum_0;
+}
+
+template <class WrapY, class V>
+static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix,
+ const int64_t iy, const size_t ysize, const V wx0,
+ const V wx1, const V wx2) {
+ const HWY_FULL(float) d;
+ const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix;
+ const auto in_m2 = LoadU(d, center - 2);
+ const auto in_p2 = LoadU(d, center + 2);
+ const auto in_m1 = LoadU(d, center - 1);
+ const auto in_p1 = LoadU(d, center + 1);
+ const auto in_00 = Load(d, center);
+ const auto sum_2 = Mul(wx2, Add(in_m2, in_p2));
+ const auto sum_1 = Mul(wx1, Add(in_m1, in_p1));
+ const auto sum_0 = Mul(wx0, in_00);
+ return Add(sum_2, Add(sum_1, sum_0));
+}
+
+// Produces result for one pixel
+template <class WrapY>
+float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix,
+ const int64_t iy, const WeightsSymmetric5& weights) {
+ const float w0 = weights.c[0];
+ const float w1 = weights.r[0];
+ const float w2 = weights.R[0];
+ const float w4 = weights.d[0];
+ const float w5 = weights.L[0];
+ const float w8 = weights.D[0];
+
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ const WrapY wrap_y;
+ // Unrolled loop over all 5 rows of the kernel.
+ float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2);
+
+ sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8);
+ float sum1 =
+ WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8);
+
+ sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5);
+ sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5);
+
+ return sum0 + sum1;
+}
+
+// Produces result for one vector's worth of pixels
+template <class WrapY>
+static void Symmetric5Interior(const ImageF& in, const Rect& rect,
+ const int64_t ix, const int64_t iy,
+ const WeightsSymmetric5& weights,
+ float* JXL_RESTRICT row_out) {
+ const HWY_FULL(float) d;
+
+ const auto w0 = LoadDup128(d, weights.c);
+ const auto w1 = LoadDup128(d, weights.r);
+ const auto w2 = LoadDup128(d, weights.R);
+ const auto w4 = LoadDup128(d, weights.d);
+ const auto w5 = LoadDup128(d, weights.L);
+ const auto w8 = LoadDup128(d, weights.D);
+
+ const size_t ysize = rect.ysize();
+ const WrapY wrap_y;
+ // Unrolled loop over all 5 rows of the kernel.
+ auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2);
+
+ sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8));
+ auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8);
+
+ sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5));
+ sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5));
+
+ Store(Add(sum0, sum1), d, row_out + ix);
+}
+
+template <class WrapY>
+static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy,
+ const WeightsSymmetric5& weights,
+ float* JXL_RESTRICT row_out) {
+ const int64_t kRadius = 2;
+ const size_t xsize = rect.xsize();
+
+ size_t ix = 0;
+ const HWY_FULL(float) d;
+ const size_t N = Lanes(d);
+ const size_t aligned_x = RoundUpTo(kRadius, N);
+ for (; ix < std::min(aligned_x, xsize); ++ix) {
+ row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+ }
+ for (; ix + N + kRadius <= xsize; ix += N) {
+ Symmetric5Interior<WrapY>(in, rect, ix, iy, weights, row_out);
+ }
+ for (; ix < xsize; ++ix) {
+ row_out[ix] = Symmetric5Border<WrapY>(in, rect, ix, iy, weights);
+ }
+}
+
+static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect,
+ const int64_t iy,
+ const WeightsSymmetric5& weights,
+ float* JXL_RESTRICT row_out) {
+ return Symmetric5Row<WrapMirror>(in, rect, iy, weights, row_out);
+}
+
+// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike
+// the fully vectorized strategies below.
+void Symmetric5(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric5& weights, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out) {
+ PROFILER_FUNC;
+
+ const size_t ysize = rect.ysize();
+ JXL_CHECK(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t iy = task;
+
+ if (iy < 2 || iy >= static_cast<ssize_t>(ysize) - 2) {
+ Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy));
+ } else {
+ Symmetric5Row<WrapUnchanged>(in, rect, iy, weights, out->Row(iy));
+ }
+ },
+ "Symmetric5x5Convolution"));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Symmetric5);
+void Symmetric5(const ImageF& in, const Rect& rect,
+ const WeightsSymmetric5& weights, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out) {
+ return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/convolve_test.cc b/third_party/jpeg-xl/lib/jxl/convolve_test.cc
new file mode 100644
index 0000000000..e86d637114
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/convolve_test.cc
@@ -0,0 +1,252 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/convolve.h"
+
+#include <time.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/convolve_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/nanobenchmark.h>
+#include <hwy/tests/test_util-inl.h>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+#ifndef JXL_DEBUG_CONVOLVE
+#define JXL_DEBUG_CONVOLVE 0
+#endif
+
+#include "lib/jxl/convolve-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+void TestNeighbors() {
+ const Neighbors::D d;
+ const Neighbors::V v = Iota(d, 0);
+ HWY_ALIGN float actual[hwy::kTestMaxVectorSize / sizeof(float)] = {0};
+
+ HWY_ALIGN float first_l1[hwy::kTestMaxVectorSize / sizeof(float)] = {
+ 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14};
+ Store(Neighbors::FirstL1(v), d, actual);
+ const size_t N = Lanes(d);
+ EXPECT_EQ(std::vector<float>(first_l1, first_l1 + N),
+ std::vector<float>(actual, actual + N));
+
+#if HWY_TARGET != HWY_SCALAR
+ HWY_ALIGN float first_l2[hwy::kTestMaxVectorSize / sizeof(float)] = {
+ 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13};
+ Store(Neighbors::FirstL2(v), d, actual);
+ EXPECT_EQ(std::vector<float>(first_l2, first_l2 + N),
+ std::vector<float>(actual, actual + N));
+
+ HWY_ALIGN float first_l3[hwy::kTestMaxVectorSize / sizeof(float)] = {
+ 2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+ Store(Neighbors::FirstL3(v), d, actual);
+ EXPECT_EQ(std::vector<float>(first_l3, first_l3 + N),
+ std::vector<float>(actual, actual + N));
+#endif // HWY_TARGET != HWY_SCALAR
+}
+
+void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool,
+ Rng* rng) {
+ const Rect rect(0, 0, xsize, ysize);
+
+ ImageF in(xsize, ysize);
+ GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+ ImageF out_expected(xsize, ysize);
+ ImageF out_actual(xsize, ysize);
+
+ const WeightsSymmetric3& weights = WeightsSymmetric3Lowpass();
+ Symmetric3(in, rect, weights, pool, &out_expected);
+ SlowSymmetric3(in, rect, weights, pool, &out_actual);
+
+ JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+// Ensures Symmetric and Separable give the same result.
+void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+ Rng* rng) {
+ const Rect rect(0, 0, xsize, ysize);
+
+ ImageF in(xsize, ysize);
+ GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+ ImageF out_expected(xsize, ysize);
+ ImageF out_actual(xsize, ysize);
+
+ Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected);
+ Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual);
+
+ JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool,
+ Rng* rng) {
+ const Rect rect(0, 0, xsize, ysize);
+
+ ImageF in(xsize, ysize);
+ GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+ ImageF out_expected(xsize, ysize);
+ ImageF out_actual(xsize, ysize);
+
+ const WeightsSeparable5& weights = WeightsSeparable5Lowpass();
+ Separable5(in, Rect(in), weights, pool, &out_expected);
+ SlowSeparable5(in, rect, weights, pool, &out_actual);
+
+ JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool,
+ Rng* rng) {
+ const Rect rect(0, 0, xsize, ysize);
+
+ ImageF in(xsize, ysize);
+ GenerateImage(*rng, &in, 0.0f, 1.0f);
+
+ ImageF out_expected(xsize, ysize);
+ ImageF out_actual(xsize, ysize);
+
+ // Gaussian sigma 1.0
+ const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+ HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
+ {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+ HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
+
+ SlowSeparable7(in, rect, weights, pool, &out_expected);
+ Separable7(in, Rect(in), weights, pool, &out_actual);
+
+ JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _));
+}
+
+// For all xsize/ysize and kernels:
+void TestConvolve() {
+ TestNeighbors();
+
+ test::ThreadPoolForTests pool(4);
+ EXPECT_EQ(true,
+ RunOnPool(
+ &pool, kConvolveMaxRadius, 40, ThreadPool::NoInit,
+ [](const uint32_t task, size_t /*thread*/) {
+ const size_t xsize = task;
+ Rng rng(129 + 13 * xsize);
+
+ ThreadPool* null_pool = nullptr;
+ test::ThreadPoolForTests pool3(3);
+ for (size_t ysize = kConvolveMaxRadius; ysize < 16; ++ysize) {
+ JXL_DEBUG(JXL_DEBUG_CONVOLVE,
+ "%" PRIuS " x %" PRIuS " (target %" PRIx64
+ ")===============================",
+ xsize, ysize, static_cast<int64_t>(HWY_TARGET));
+
+ JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym3------------------");
+ VerifySymmetric3(xsize, ysize, null_pool, &rng);
+ VerifySymmetric3(xsize, ysize, &pool3, &rng);
+
+ JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym5------------------");
+ VerifySymmetric5(xsize, ysize, null_pool, &rng);
+ VerifySymmetric5(xsize, ysize, &pool3, &rng);
+
+ JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------");
+ VerifySeparable5(xsize, ysize, null_pool, &rng);
+ VerifySeparable5(xsize, ysize, &pool3, &rng);
+
+ JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep7------------------");
+ VerifySeparable7(xsize, ysize, null_pool, &rng);
+ VerifySeparable7(xsize, ysize, &pool3, &rng);
+ }
+ },
+ "TestConvolve"));
+}
+
+// Measures durations, verifies results, prints timings. `unpredictable1`
+// must have value 1 (unknown to the compiler to prevent elision).
+template <class Conv>
+void BenchmarkConv(const char* caption, const Conv& conv,
+ const hwy::FuncInput unpredictable1) {
+ const size_t kNumInputs = 1;
+ const hwy::FuncInput inputs[kNumInputs] = {unpredictable1};
+ hwy::Result results[kNumInputs];
+
+ const size_t kDim = 160; // in+out fit in L2
+ ImageF in(kDim, kDim);
+ ZeroFillImage(&in);
+ in.Row(kDim / 2)[kDim / 2] = unpredictable1;
+ ImageF out(kDim, kDim);
+
+ hwy::Params p;
+ p.verbose = false;
+ p.max_evals = 7;
+ p.target_rel_mad = 0.002;
+ const size_t num_results = MeasureClosure(
+ [&in, &conv, &out](const hwy::FuncInput input) {
+ conv(in, &out);
+ return out.Row(input)[0];
+ },
+ inputs, kNumInputs, results, p);
+ if (num_results != kNumInputs) {
+ fprintf(stderr, "MeasureClosure failed.\n");
+ }
+ for (size_t i = 0; i < num_results; ++i) {
+ const double seconds = static_cast<double>(results[i].ticks) /
+ hwy::platform::InvariantTicksPerSecond();
+ printf("%12s: %7.2f MP/s (MAD=%4.2f%%)\n", caption,
+ kDim * kDim * 1E-6 / seconds,
+ static_cast<double>(results[i].variability) * 100.0);
+ }
+}
+
+struct ConvSymmetric3 {
+ void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+ ThreadPool* null_pool = nullptr;
+ Symmetric3(in, Rect(in), WeightsSymmetric3Lowpass(), null_pool, out);
+ }
+};
+
+struct ConvSeparable5 {
+ void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const {
+ ThreadPool* null_pool = nullptr;
+ Separable5(in, Rect(in), WeightsSeparable5Lowpass(), null_pool, out);
+ }
+};
+
+void BenchmarkAll() {
+#if 0 // disabled to avoid test timeouts, run manually on demand
+ const hwy::FuncInput unpredictable1 = time(nullptr) != 1234;
+ BenchmarkConv("Symmetric3", ConvSymmetric3(), unpredictable1);
+ BenchmarkConv("Separable5", ConvSeparable5(), unpredictable1);
+#endif
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class ConvolveTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(ConvolveTest);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, TestConvolve);
+
+HWY_EXPORT_AND_TEST_P(ConvolveTest, BenchmarkAll);
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/data_parallel_test.cc b/third_party/jpeg-xl/lib/jxl/data_parallel_test.cc
new file mode 100644
index 0000000000..ee2a97f93a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/data_parallel_test.cc
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+class DataParallelTest : public ::testing::Test {
+ protected:
+ // A fake class to verify that DataParallel is properly calling the
+ // client-provided runner functions.
+ static int FakeRunner(void* runner_opaque, void* jpegxl_opaque,
+ JxlParallelRunInit init, JxlParallelRunFunction func,
+ uint32_t start_range, uint32_t end_range) {
+ DataParallelTest* self = static_cast<DataParallelTest*>(runner_opaque);
+ self->runner_called_++;
+ self->jpegxl_opaque_ = jpegxl_opaque;
+ self->init_ = init;
+ self->func_ = func;
+ self->start_range_ = start_range;
+ self->end_range_ = end_range;
+ return self->runner_return_;
+ }
+
+ ThreadPool pool_{&DataParallelTest::FakeRunner, this};
+
+ // Number of times FakeRunner() was called.
+ int runner_called_ = 0;
+
+ // Parameters passed to FakeRunner.
+ void* jpegxl_opaque_ = nullptr;
+ JxlParallelRunInit init_ = nullptr;
+ JxlParallelRunFunction func_ = nullptr;
+ uint32_t start_range_ = -1;
+ uint32_t end_range_ = -1;
+
+ // Return value that FakeRunner will return.
+ int runner_return_ = 0;
+};
+
+// JxlParallelRunInit interface.
+typedef int (*JxlParallelRunInit)();
+
+} // namespace
+
+TEST_F(DataParallelTest, RunnerCalledParameters) {
+ EXPECT_TRUE(pool_.Run(
+ 1234, 5678, [](size_t /* num_threads */) { return true; },
+ [](uint32_t /* task */, size_t /* thread */) { return; }));
+ EXPECT_EQ(1, runner_called_);
+ EXPECT_NE(nullptr, init_);
+ EXPECT_NE(nullptr, func_);
+ EXPECT_NE(nullptr, jpegxl_opaque_);
+ EXPECT_EQ(1234u, start_range_);
+ EXPECT_EQ(5678u, end_range_);
+}
+
+TEST_F(DataParallelTest, RunnerFailurePropagates) {
+ runner_return_ = -1; // FakeRunner return value.
+ EXPECT_FALSE(pool_.Run(
+ 1234, 5678, [](size_t /* num_threads */) { return false; },
+ [](uint32_t /* task */, size_t /* thread */) { return; }));
+ EXPECT_FALSE(RunOnPool(
+ nullptr, 1234, 5678, [](size_t /* num_threads */) { return false; },
+ [](uint32_t /* task */, size_t /* thread */) { return; }, "Test"));
+}
+
+TEST_F(DataParallelTest, RunnerNotCalledOnEmptyRange) {
+ runner_return_ = -1; // FakeRunner return value.
+ EXPECT_TRUE(pool_.Run(
+ 123, 123, [](size_t /* num_threads */) { return false; },
+ [](uint32_t /* task */, size_t /* thread */) { return; }));
+ EXPECT_TRUE(RunOnPool(
+ nullptr, 123, 123, [](size_t /* num_threads */) { return false; },
+ [](uint32_t /* task */, size_t /* thread */) { return; }, "Test"));
+ // We don't call the external runner when the range is empty. We don't even
+ // need to call the init function.
+ EXPECT_EQ(0, runner_called_);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dct-inl.h b/third_party/jpeg-xl/lib/jxl/dct-inl.h
new file mode 100644
index 0000000000..532606075e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct-inl.h
@@ -0,0 +1,334 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD floating-point (I)DCT, any power of two.
+
+#if defined(LIB_JXL_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_INL_H_
+#undef LIB_JXL_DCT_INL_H_
+#else
+#define LIB_JXL_DCT_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_block-inl.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/transpose-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+template <size_t SZ>
+struct FVImpl {
+ using type = HWY_CAPPED(float, SZ);
+};
+
+template <>
+struct FVImpl<0> {
+ using type = HWY_FULL(float);
+};
+
+template <size_t SZ>
+using FV = typename FVImpl<SZ>::type;
+
+// Implementation of Lowest Complexity Self Recursive Radix-2 DCT II/III
+// Algorithms, by Siriani M. Perera and Jianhua Liu.
+
+template <size_t N, size_t SZ>
+struct CoeffBundle {
+ static void AddReverse(const float* JXL_RESTRICT ain1,
+ const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+ auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+ Store(Add(in1, in2), FV<SZ>(), aout + i * SZ);
+ }
+ }
+ static void SubReverse(const float* JXL_RESTRICT ain1,
+ const float* JXL_RESTRICT ain2,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N; i++) {
+ auto in1 = Load(FV<SZ>(), ain1 + i * SZ);
+ auto in2 = Load(FV<SZ>(), ain2 + (N - i - 1) * SZ);
+ Store(Sub(in1, in2), FV<SZ>(), aout + i * SZ);
+ }
+ }
+ static void B(float* JXL_RESTRICT coeff) {
+ auto sqrt2 = Set(FV<SZ>(), kSqrt2);
+ auto in1 = Load(FV<SZ>(), coeff);
+ auto in2 = Load(FV<SZ>(), coeff + SZ);
+ Store(MulAdd(in1, sqrt2, in2), FV<SZ>(), coeff);
+ for (size_t i = 1; i + 1 < N; i++) {
+ auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+ auto in2 = Load(FV<SZ>(), coeff + (i + 1) * SZ);
+ Store(Add(in1, in2), FV<SZ>(), coeff + i * SZ);
+ }
+ }
+ static void BTranspose(float* JXL_RESTRICT coeff) {
+ for (size_t i = N - 1; i > 0; i--) {
+ auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+ auto in2 = Load(FV<SZ>(), coeff + (i - 1) * SZ);
+ Store(Add(in1, in2), FV<SZ>(), coeff + i * SZ);
+ }
+ auto sqrt2 = Set(FV<SZ>(), kSqrt2);
+ auto in1 = Load(FV<SZ>(), coeff);
+ Store(Mul(in1, sqrt2), FV<SZ>(), coeff);
+ }
+ // Ideally optimized away by compiler (except the multiply).
+ static void InverseEvenOdd(const float* JXL_RESTRICT ain,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(FV<SZ>(), ain + i * SZ);
+ Store(in1, FV<SZ>(), aout + 2 * i * SZ);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = Load(FV<SZ>(), ain + i * SZ);
+ Store(in1, FV<SZ>(), aout + (2 * (i - N / 2) + 1) * SZ);
+ }
+ }
+ // Ideally optimized away by compiler.
+ static void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride,
+ float* JXL_RESTRICT aout) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = LoadU(FV<SZ>(), ain + 2 * i * ain_stride);
+ Store(in1, FV<SZ>(), aout + i * SZ);
+ }
+ for (size_t i = N / 2; i < N; i++) {
+ auto in1 = LoadU(FV<SZ>(), ain + (2 * (i - N / 2) + 1) * ain_stride);
+ Store(in1, FV<SZ>(), aout + i * SZ);
+ }
+ }
+ // Invoked on full vector.
+ static void Multiply(float* JXL_RESTRICT coeff) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto in1 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+ auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+ Store(Mul(in1, mul), FV<SZ>(), coeff + (N / 2 + i) * SZ);
+ }
+ }
+ static void MultiplyAndAdd(const float* JXL_RESTRICT coeff,
+ float* JXL_RESTRICT out, size_t out_stride) {
+ for (size_t i = 0; i < N / 2; i++) {
+ auto mul = Set(FV<SZ>(), WcMultipliers<N>::kMultipliers[i]);
+ auto in1 = Load(FV<SZ>(), coeff + i * SZ);
+ auto in2 = Load(FV<SZ>(), coeff + (N / 2 + i) * SZ);
+ auto out1 = MulAdd(mul, in2, in1);
+ auto out2 = NegMulAdd(mul, in2, in1);
+ StoreU(out1, FV<SZ>(), out + i * out_stride);
+ StoreU(out2, FV<SZ>(), out + (N - i - 1) * out_stride);
+ }
+ }
+ template <typename Block>
+ static void LoadFromBlock(const Block& in, size_t off,
+ float* JXL_RESTRICT coeff) {
+ for (size_t i = 0; i < N; i++) {
+ Store(in.LoadPart(FV<SZ>(), i, off), FV<SZ>(), coeff + i * SZ);
+ }
+ }
+ template <typename Block>
+ static void StoreToBlockAndScale(const float* JXL_RESTRICT coeff,
+ const Block& out, size_t off) {
+ auto mul = Set(FV<SZ>(), 1.0f / N);
+ for (size_t i = 0; i < N; i++) {
+ out.StorePart(FV<SZ>(), Mul(mul, Load(FV<SZ>(), coeff + i * SZ)), i, off);
+ }
+ }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl;
+
+template <size_t SZ>
+struct DCT1DImpl<1, SZ> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {}
+};
+
+template <size_t SZ>
+struct DCT1DImpl<2, SZ> {
+ JXL_INLINE void operator()(float* JXL_RESTRICT mem) {
+ auto in1 = Load(FV<SZ>(), mem);
+ auto in2 = Load(FV<SZ>(), mem + SZ);
+ Store(Add(in1, in2), FV<SZ>(), mem);
+ Store(Sub(in1, in2), FV<SZ>(), mem + SZ);
+ }
+};
+
+template <size_t N, size_t SZ>
+struct DCT1DImpl {
+ void operator()(float* JXL_RESTRICT mem) {
+ // This is relatively small (4kB with 64-DCT and AVX-512)
+ HWY_ALIGN float tmp[N * SZ];
+ CoeffBundle<N / 2, SZ>::AddReverse(mem, mem + N / 2 * SZ, tmp);
+ DCT1DImpl<N / 2, SZ>()(tmp);
+ CoeffBundle<N / 2, SZ>::SubReverse(mem, mem + N / 2 * SZ, tmp + N / 2 * SZ);
+ CoeffBundle<N, SZ>::Multiply(tmp);
+ DCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ);
+ CoeffBundle<N / 2, SZ>::B(tmp + N / 2 * SZ);
+ CoeffBundle<N, SZ>::InverseEvenOdd(tmp, mem);
+ }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl;
+
+template <size_t SZ>
+struct IDCT1DImpl<1, SZ> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ StoreU(LoadU(FV<SZ>(), from), FV<SZ>(), to);
+ }
+};
+
+template <size_t SZ>
+struct IDCT1DImpl<2, SZ> {
+ JXL_INLINE void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= SZ);
+ JXL_DASSERT(to_stride >= SZ);
+ auto in1 = LoadU(FV<SZ>(), from);
+ auto in2 = LoadU(FV<SZ>(), from + from_stride);
+ StoreU(Add(in1, in2), FV<SZ>(), to);
+ StoreU(Sub(in1, in2), FV<SZ>(), to + to_stride);
+ }
+};
+
+template <size_t N, size_t SZ>
+struct IDCT1DImpl {
+ void operator()(const float* from, size_t from_stride, float* to,
+ size_t to_stride) {
+ JXL_DASSERT(from_stride >= SZ);
+ JXL_DASSERT(to_stride >= SZ);
+ // This is relatively small (4kB with 64-DCT and AVX-512)
+ HWY_ALIGN float tmp[N * SZ];
+ CoeffBundle<N, SZ>::ForwardEvenOdd(from, from_stride, tmp);
+ IDCT1DImpl<N / 2, SZ>()(tmp, SZ, tmp, SZ);
+ CoeffBundle<N / 2, SZ>::BTranspose(tmp + N / 2 * SZ);
+ IDCT1DImpl<N / 2, SZ>()(tmp + N / 2 * SZ, SZ, tmp + N / 2 * SZ, SZ);
+ CoeffBundle<N, SZ>::MultiplyAndAdd(tmp, to, to_stride);
+ }
+};
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void DCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+ size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+ constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+ HWY_ALIGN float tmp[N * SZ];
+ for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+ // TODO(veluca): consider removing the temporary memory here (as is done in
+ // IDCT), if it turns out that some compilers don't optimize away the loads
+ // and this is performance-critical.
+ CoeffBundle<N, SZ>::LoadFromBlock(from, i, tmp);
+ DCT1DImpl<N, SZ>()(tmp);
+ CoeffBundle<N, SZ>::StoreToBlockAndScale(tmp, to, i);
+ }
+}
+
+template <size_t N, size_t M_or_0, typename FromBlock, typename ToBlock>
+void IDCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) {
+ size_t M = M_or_0 != 0 ? M_or_0 : Mp;
+ constexpr size_t SZ = MaxLanes(FV<M_or_0>());
+ for (size_t i = 0; i < M; i += Lanes(FV<M_or_0>())) {
+ IDCT1DImpl<N, SZ>()(from.Address(0, i), from.Stride(), to.Address(0, i),
+ to.Stride());
+ }
+}
+
+template <size_t N, size_t M, typename = void>
+struct DCT1D {
+ template <typename FromBlock, typename ToBlock>
+ void operator()(const FromBlock& from, const ToBlock& to) {
+ return DCT1DWrapper<N, M>(from, to, M);
+ }
+};
+
+template <size_t N, size_t M>
+struct DCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+ template <typename FromBlock, typename ToBlock>
+ void operator()(const FromBlock& from, const ToBlock& to) {
+ return NoInlineWrapper(DCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to, M);
+ }
+};
+
+template <size_t N, size_t M, typename = void>
+struct IDCT1D {
+ template <typename FromBlock, typename ToBlock>
+ void operator()(const FromBlock& from, const ToBlock& to) {
+ return IDCT1DWrapper<N, M>(from, to, M);
+ }
+};
+
+template <size_t N, size_t M>
+struct IDCT1D<N, M, typename std::enable_if<(M > MaxLanes(FV<0>()))>::type> {
+ template <typename FromBlock, typename ToBlock>
+ void operator()(const FromBlock& from, const ToBlock& to) {
+ return NoInlineWrapper(IDCT1DWrapper<N, 0, FromBlock, ToBlock>, from, to,
+ M);
+ }
+};
+
+// Computes the maybe-transposed, scaled DCT of a block, that needs to be
+// HWY_ALIGN'ed.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledDCT {
+ // scratch_space must be aligned, and should have space for ROWS*COLS
+ // floats.
+ template <class From>
+ HWY_MAYBE_UNUSED void operator()(const From& from, float* to,
+ float* JXL_RESTRICT scratch_space) {
+ float* JXL_RESTRICT block = scratch_space;
+ if (ROWS < COLS) {
+ DCT1D<ROWS, COLS>()(from, DCTTo(block, COLS));
+ Transpose<ROWS, COLS>::Run(DCTFrom(block, COLS), DCTTo(to, ROWS));
+ DCT1D<COLS, ROWS>()(DCTFrom(to, ROWS), DCTTo(block, ROWS));
+ Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(to, COLS));
+ } else {
+ DCT1D<ROWS, COLS>()(from, DCTTo(to, COLS));
+ Transpose<ROWS, COLS>::Run(DCTFrom(to, COLS), DCTTo(block, ROWS));
+ DCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(to, ROWS));
+ }
+ }
+};
+// Computes the maybe-transposed, scaled IDCT of a block, that needs to be
+// HWY_ALIGN'ed.
+template <size_t ROWS, size_t COLS>
+struct ComputeScaledIDCT {
+ // scratch_space must be aligned, and should have space for ROWS*COLS
+ // floats.
+ template <class To>
+ HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to,
+ float* JXL_RESTRICT scratch_space) {
+ float* JXL_RESTRICT block = scratch_space;
+ // Reverse the steps done in ComputeScaledDCT.
+ if (ROWS < COLS) {
+ Transpose<ROWS, COLS>::Run(DCTFrom(from, COLS), DCTTo(block, ROWS));
+ IDCT1D<COLS, ROWS>()(DCTFrom(block, ROWS), DCTTo(from, ROWS));
+ Transpose<COLS, ROWS>::Run(DCTFrom(from, ROWS), DCTTo(block, COLS));
+ IDCT1D<ROWS, COLS>()(DCTFrom(block, COLS), to);
+ } else {
+ IDCT1D<COLS, ROWS>()(DCTFrom(from, ROWS), DCTTo(block, ROWS));
+ Transpose<COLS, ROWS>::Run(DCTFrom(block, ROWS), DCTTo(from, COLS));
+ IDCT1D<ROWS, COLS>()(DCTFrom(from, COLS), to);
+ }
+ }
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JXL_DCT_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dct_block-inl.h b/third_party/jpeg-xl/lib/jxl/dct_block-inl.h
new file mode 100644
index 0000000000..50646a737f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_block-inl.h
@@ -0,0 +1,108 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Adapters for DCT input/output: from/to contiguous blocks or image rows.
+
+#if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DCT_BLOCK_INL_H_
+#undef LIB_JXL_DCT_BLOCK_INL_H_
+#else
+#define LIB_JXL_DCT_BLOCK_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Vec;
+
+// Block: (x, y) <-> (N * y + x)
+// Lines: (x, y) <-> (stride * y + x)
+//
+// I.e. Block is a specialization of Lines with fixed stride.
+//
+// FromXXX should implement Read and Load (Read vector).
+// ToXXX should implement Write and Store (Write vector).
+
+template <size_t N>
+using BlockDesc = HWY_CAPPED(float, N);
+
+// Here and in the following, the SZ template parameter specifies the number of
+// values to load/store. Needed because we want to handle 4x4 sub-blocks of
+// 16x16 blocks.
+class DCTFrom {
+ public:
+ DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {}
+
+ template <typename D>
+ HWY_INLINE Vec<D> LoadPart(D, const size_t row, size_t i) const {
+ JXL_DASSERT(Lanes(D()) <= stride_);
+ // Since these functions are used also for DC, no alignment at all is
+ // guaranteed in the case of floating blocks.
+ // TODO(veluca): consider using a different class for DC-to-LF and
+ // DC-from-LF, or copying DC values to/from a temporary aligned location.
+ return LoadU(D(), Address(row, i));
+ }
+
+ HWY_INLINE float Read(const size_t row, const size_t i) const {
+ return *Address(row, i);
+ }
+
+ constexpr HWY_INLINE const float* Address(const size_t row,
+ const size_t i) const {
+ return data_ + row * stride_ + i;
+ }
+
+ size_t Stride() const { return stride_; }
+
+ private:
+ size_t stride_;
+ const float* JXL_RESTRICT data_;
+};
+
+class DCTTo {
+ public:
+ DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {}
+
+ template <typename D>
+ HWY_INLINE void StorePart(D, const Vec<D>& v, const size_t row,
+ size_t i) const {
+ JXL_DASSERT(Lanes(D()) <= stride_);
+ // Since these functions are used also for DC, no alignment at all is
+ // guaranteed in the case of floating blocks.
+ // TODO(veluca): consider using a different class for DC-to-LF and
+ // DC-from-LF, or copying DC values to/from a temporary aligned location.
+ StoreU(v, D(), Address(row, i));
+ }
+
+ HWY_INLINE void Write(float v, const size_t row, const size_t i) const {
+ *Address(row, i) = v;
+ }
+
+ constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const {
+ return data_ + row * stride_ + i;
+ }
+
+ size_t Stride() const { return stride_; }
+
+ private:
+ size_t stride_;
+ float* JXL_RESTRICT data_;
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_DCT_BLOCK_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dct_for_test.h b/third_party/jpeg-xl/lib/jxl/dct_for_test.h
new file mode 100644
index 0000000000..8e32aa7eff
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_for_test.h
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_FOR_TEST_H_
+#define LIB_JXL_DCT_FOR_TEST_H_
+
+// Unoptimized DCT only for use in tests.
+
+#include <string.h> // memcpy
+
+#include <cmath>
+#include <vector>
+
+#include "lib/jxl/common.h" // Pi
+
+namespace jxl {
+
+namespace test {
+static inline double alpha(int u) { return u == 0 ? 0.7071067811865475 : 1.0; }
+
+// N-DCT on M columns, divided by sqrt(N). Matches the definition in the spec.
+template <size_t N, size_t M>
+void DCT1D(double block[N * M], double out[N * M]) {
+ std::vector<double> matrix(N * N);
+ const double scale = std::sqrt(2.0) / N;
+ for (size_t y = 0; y < N; y++) {
+ for (size_t u = 0; u < N; u++) {
+ matrix[N * u + y] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+ }
+ }
+ for (size_t x = 0; x < M; x++) {
+ for (size_t u = 0; u < N; u++) {
+ out[M * u + x] = 0;
+ for (size_t y = 0; y < N; y++) {
+ out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+ }
+ }
+ }
+}
+
+// N-IDCT on M columns, multiplied by sqrt(N). Matches the definition in the
+// spec.
+template <size_t N, size_t M>
+void IDCT1D(double block[N * M], double out[N * M]) {
+ std::vector<double> matrix(N * N);
+ const double scale = std::sqrt(2.0);
+ for (size_t y = 0; y < N; y++) {
+ for (size_t u = 0; u < N; u++) {
+ // Transpose of DCT matrix.
+ matrix[N * y + u] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale;
+ }
+ }
+ for (size_t x = 0; x < M; x++) {
+ for (size_t u = 0; u < N; u++) {
+ out[M * u + x] = 0;
+ for (size_t y = 0; y < N; y++) {
+ out[M * u + x] += matrix[N * u + y] * block[M * y + x];
+ }
+ }
+ }
+}
+
+template <size_t N, size_t M>
+void TransposeBlock(double in[N * M], double out[M * N]) {
+ for (size_t x = 0; x < N; x++) {
+ for (size_t y = 0; y < M; y++) {
+ out[y * N + x] = in[x * M + y];
+ }
+ }
+}
+} // namespace test
+
+// Untransposed DCT.
+template <size_t N>
+void DCTSlow(double block[N * N]) {
+ constexpr size_t kBlockSize = N * N;
+ std::vector<double> g(kBlockSize);
+ test::DCT1D<N, N>(block, g.data());
+ test::TransposeBlock<N, N>(g.data(), block);
+ test::DCT1D<N, N>(block, g.data());
+ test::TransposeBlock<N, N>(g.data(), block);
+}
+
+// Untransposed IDCT.
+template <size_t N>
+void IDCTSlow(double block[N * N]) {
+ constexpr size_t kBlockSize = N * N;
+ std::vector<double> g(kBlockSize);
+ test::IDCT1D<N, N>(block, g.data());
+ test::TransposeBlock<N, N>(g.data(), block);
+ test::IDCT1D<N, N>(block, g.data());
+ test::TransposeBlock<N, N>(g.data(), block);
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_DCT_FOR_TEST_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dct_scales.cc b/third_party/jpeg-xl/lib/jxl/dct_scales.cc
new file mode 100644
index 0000000000..f9e89a6014
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_scales.cc
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dct_scales.h"
+
+namespace jxl {
+
+// Definition of constexpr arrays.
+constexpr float DCTResampleScales<1, 8>::kScales[];
+constexpr float DCTResampleScales<2, 16>::kScales[];
+constexpr float DCTResampleScales<4, 32>::kScales[];
+constexpr float DCTResampleScales<8, 64>::kScales[];
+constexpr float DCTResampleScales<16, 128>::kScales[];
+constexpr float DCTResampleScales<32, 256>::kScales[];
+constexpr float DCTResampleScales<8, 1>::kScales[];
+constexpr float DCTResampleScales<16, 2>::kScales[];
+constexpr float DCTResampleScales<32, 4>::kScales[];
+constexpr float DCTResampleScales<64, 8>::kScales[];
+constexpr float DCTResampleScales<128, 16>::kScales[];
+constexpr float DCTResampleScales<256, 32>::kScales[];
+constexpr float WcMultipliers<4>::kMultipliers[];
+constexpr float WcMultipliers<8>::kMultipliers[];
+constexpr float WcMultipliers<16>::kMultipliers[];
+constexpr float WcMultipliers<32>::kMultipliers[];
+constexpr float WcMultipliers<64>::kMultipliers[];
+constexpr float WcMultipliers<128>::kMultipliers[];
+constexpr float WcMultipliers<256>::kMultipliers[];
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dct_scales.h b/third_party/jpeg-xl/lib/jxl/dct_scales.h
new file mode 100644
index 0000000000..23af03d60f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_scales.h
@@ -0,0 +1,379 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_SCALES_H_
+#define LIB_JXL_DCT_SCALES_H_
+
+// Scaling factors.
+
+#include <stddef.h>
+
+namespace jxl {
+
+static constexpr float kSqrt2 = 1.41421356237f;
+static constexpr float kSqrt0_5 = 0.70710678118f;
+
+// For n != 0, the n-th basis function of a N-DCT, evaluated in pixel k, has a
+// value of cos((k+1/2) n/(2N) pi). When downsampling by 2x, we average
+// the values for pixel k and k+1 to get the value for pixel (k/2), thus we get
+//
+// [cos((k+1/2) n/N pi) + cos((k+3/2) n/N pi)]/2 =
+// cos(n/(2N) pi) cos((k+1) n/N pi) =
+// cos(n/(2N) pi) cos(((k/2)+1/2) n/(N/2) pi)
+//
+// which is exactly the same as the value of pixel k/2 of a N/2-sized DCT,
+// except for the cos(n/(2N) pi) scaling factor (which does *not*
+// depend on the pixel). Thus, when using the lower-frequency coefficients of a
+// DCT-N to compute a DCT-(N/2), they should be scaled by this constant. Scaling
+// factors for a DCT-(N/4) etc can then be obtained by successive
+// multiplications. The structs below contain the above-mentioned scaling
+// factors.
+//
+// Python code for the tables below:
+//
+// for i in range(N // 8):
+// v = math.cos(i / (2 * N) * math.pi)
+// v *= math.cos(i / (N) * math.pi)
+// v *= math.cos(i / (N / 2) * math.pi)
+// print(v, end=", ")
+
+template <size_t FROM, size_t TO>
+struct DCTResampleScales;
+
+template <>
+struct DCTResampleScales<8, 1> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ };
+};
+
+template <>
+struct DCTResampleScales<16, 2> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ 0.901764195028874394,
+ };
+};
+
+template <>
+struct DCTResampleScales<32, 4> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ 0.974886821136879522,
+ 0.901764195028874394,
+ 0.787054918159101335,
+ };
+};
+
+template <>
+struct DCTResampleScales<64, 8> {
+ static constexpr float kScales[] = {
+ 1.0000000000000000, 0.9936866130906366, 0.9748868211368796,
+ 0.9440180941651672, 0.9017641950288744, 0.8490574973847023,
+ 0.7870549181591013, 0.7171081282466044,
+ };
+};
+
+template <>
+struct DCTResampleScales<128, 16> {
+ static constexpr float kScales[] = {
+ 1.0,
+ 0.9984194528776054,
+ 0.9936866130906366,
+ 0.9858278282666936,
+ 0.9748868211368796,
+ 0.9609244059440204,
+ 0.9440180941651672,
+ 0.9242615922757944,
+ 0.9017641950288744,
+ 0.8766500784429904,
+ 0.8490574973847023,
+ 0.8191378932865928,
+ 0.7870549181591013,
+ 0.7529833816270532,
+ 0.7171081282466044,
+ 0.6796228528314651,
+ };
+};
+
+template <>
+struct DCTResampleScales<256, 32> {
+ static constexpr float kScales[] = {
+ 1.0,
+ 0.9996047255830407,
+ 0.9984194528776054,
+ 0.9964458326264695,
+ 0.9936866130906366,
+ 0.9901456355893141,
+ 0.9858278282666936,
+ 0.9807391980963174,
+ 0.9748868211368796,
+ 0.9682788310563117,
+ 0.9609244059440204,
+ 0.9528337534340876,
+ 0.9440180941651672,
+ 0.9344896436056892,
+ 0.9242615922757944,
+ 0.913348084400198,
+ 0.9017641950288744,
+ 0.8895259056651056,
+ 0.8766500784429904,
+ 0.8631544288990163,
+ 0.8490574973847023,
+ 0.8343786191696513,
+ 0.8191378932865928,
+ 0.8033561501721485,
+ 0.7870549181591013,
+ 0.7702563888779096,
+ 0.7529833816270532,
+ 0.7352593067735488,
+ 0.7171081282466044,
+ 0.6985543251889097,
+ 0.6796228528314651,
+ 0.6603391026591464,
+ };
+};
+
+// Inverses of the above.
+template <>
+struct DCTResampleScales<1, 8> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ };
+};
+
+template <>
+struct DCTResampleScales<2, 16> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ 1.108937353592731823,
+ };
+};
+
+template <>
+struct DCTResampleScales<4, 32> {
+ static constexpr float kScales[] = {
+ 1.000000000000000000,
+ 1.025760096781116015,
+ 1.108937353592731823,
+ 1.270559368765487251,
+ };
+};
+
+template <>
+struct DCTResampleScales<8, 64> {
+ static constexpr float kScales[] = {
+ 1.0000000000000000, 1.0063534990068217, 1.0257600967811158,
+ 1.0593017296817173, 1.1089373535927318, 1.1777765381970435,
+ 1.2705593687654873, 1.3944898413647777,
+ };
+};
+
+template <>
+struct DCTResampleScales<16, 128> {
+ static constexpr float kScales[] = {
+ 1.0,
+ 1.0015830492062623,
+ 1.0063534990068217,
+ 1.0143759095928793,
+ 1.0257600967811158,
+ 1.0406645869480142,
+ 1.0593017296817173,
+ 1.0819447744633812,
+ 1.1089373535927318,
+ 1.1407059950032632,
+ 1.1777765381970435,
+ 1.2207956782315876,
+ 1.2705593687654873,
+ 1.3280505578213306,
+ 1.3944898413647777,
+ 1.4714043176061107,
+ };
+};
+
+template <>
+struct DCTResampleScales<32, 256> {
+ static constexpr float kScales[] = {
+ 1.0,
+ 1.0003954307206069,
+ 1.0015830492062623,
+ 1.0035668445360069,
+ 1.0063534990068217,
+ 1.009952439375063,
+ 1.0143759095928793,
+ 1.0196390660647288,
+ 1.0257600967811158,
+ 1.0327603660498115,
+ 1.0406645869480142,
+ 1.049501024072585,
+ 1.0593017296817173,
+ 1.0701028169146336,
+ 1.0819447744633812,
+ 1.0948728278734026,
+ 1.1089373535927318,
+ 1.124194353004584,
+ 1.1407059950032632,
+ 1.158541237256391,
+ 1.1777765381970435,
+ 1.1984966740820495,
+ 1.2207956782315876,
+ 1.244777922949508,
+ 1.2705593687654873,
+ 1.2982690107339132,
+ 1.3280505578213306,
+ 1.3600643892400104,
+ 1.3944898413647777,
+ 1.4315278911623237,
+ 1.4714043176061107,
+ 1.5143734423314616,
+ };
+};
+
+// Constants for DCT implementation. Generated by the following snippet:
+// for i in range(N // 2):
+// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ")
+template <size_t N>
+struct WcMultipliers;
+
+template <>
+struct WcMultipliers<4> {
+ static constexpr float kMultipliers[] = {
+ 0.541196100146197,
+ 1.3065629648763764,
+ };
+};
+
+template <>
+struct WcMultipliers<8> {
+ static constexpr float kMultipliers[] = {
+ 0.5097955791041592,
+ 0.6013448869350453,
+ 0.8999762231364156,
+ 2.5629154477415055,
+ };
+};
+
+template <>
+struct WcMultipliers<16> {
+ static constexpr float kMultipliers[] = {
+ 0.5024192861881557, 0.5224986149396889, 0.5669440348163577,
+ 0.6468217833599901, 0.7881546234512502, 1.060677685990347,
+ 1.7224470982383342, 5.101148618689155,
+ };
+};
+
+template <>
+struct WcMultipliers<32> {
+ static constexpr float kMultipliers[] = {
+ 0.5006029982351963, 0.5054709598975436, 0.5154473099226246,
+ 0.5310425910897841, 0.5531038960344445, 0.5829349682061339,
+ 0.6225041230356648, 0.6748083414550057, 0.7445362710022986,
+ 0.8393496454155268, 0.9725682378619608, 1.1694399334328847,
+ 1.4841646163141662, 2.057781009953411, 3.407608418468719,
+ 10.190008123548033,
+ };
+};
+template <>
+struct WcMultipliers<64> {
+ static constexpr float kMultipliers[] = {
+ 0.500150636020651, 0.5013584524464084, 0.5037887256810443,
+ 0.5074711720725553, 0.5124514794082247, 0.5187927131053328,
+ 0.52657731515427, 0.535909816907992, 0.5469204379855088,
+ 0.5597698129470802, 0.57465518403266, 0.5918185358574165,
+ 0.6115573478825099, 0.6342389366884031, 0.6603198078137061,
+ 0.6903721282002123, 0.7251205223771985, 0.7654941649730891,
+ 0.8127020908144905, 0.8683447152233481, 0.9345835970364075,
+ 1.0144082649970547, 1.1120716205797176, 1.233832737976571,
+ 1.3892939586328277, 1.5939722833856311, 1.8746759800084078,
+ 2.282050068005162, 2.924628428158216, 4.084611078129248,
+ 6.796750711673633, 20.373878167231453,
+ };
+};
+template <>
+struct WcMultipliers<128> {
+ static constexpr float kMultipliers[] = {
+ 0.5000376519155477, 0.5003390374428216, 0.5009427176380873,
+ 0.5018505174842379, 0.5030651913013697, 0.5045904432216454,
+ 0.5064309549285542, 0.5085924210498143, 0.5110815927066812,
+ 0.5139063298475396, 0.5170756631334912, 0.5205998663018917,
+ 0.524490540114724, 0.5287607092074876, 0.5334249333971333,
+ 0.538499435291984, 0.5440022463817783, 0.549953374183236,
+ 0.5563749934898856, 0.5632916653417023, 0.5707305880121454,
+ 0.5787218851348208, 0.5872989370937893, 0.5964987630244563,
+ 0.606362462272146, 0.6169357260050706, 0.6282694319707711,
+ 0.6404203382416639, 0.6534518953751283, 0.6674352009263413,
+ 0.6824501259764195, 0.6985866506472291, 0.7159464549705746,
+ 0.7346448236478627, 0.7548129391165311, 0.776600658233963,
+ 0.8001798956216941, 0.8257487738627852, 0.8535367510066064,
+ 0.8838110045596234, 0.9168844461846523, 0.9531258743921193,
+ 0.9929729612675466, 1.036949040910389, 1.0856850642580145,
+ 1.1399486751015042, 1.2006832557294167, 1.2690611716991191,
+ 1.346557628206286, 1.4350550884414341, 1.5369941008524954,
+ 1.6555965242641195, 1.7952052190778898, 1.961817848571166,
+ 2.163957818751979, 2.4141600002500763, 2.7316450287739396,
+ 3.147462191781909, 3.7152427383269746, 4.5362909369693565,
+ 5.827688377844654, 8.153848602466814, 13.58429025728446,
+ 40.744688103351834,
+ };
+};
+
+template <>
+struct WcMultipliers<256> {
+ static constexpr float kMultipliers[128] = {
+ 0.5000094125358878, 0.500084723455784, 0.5002354020255269,
+ 0.5004615618093246, 0.5007633734146156, 0.5011410648064231,
+ 0.5015949217281668, 0.502125288230386, 0.5027325673091954,
+ 0.5034172216566842, 0.5041797745258774, 0.5050208107132756,
+ 0.5059409776624396, 0.5069409866925212, 0.5080216143561264,
+ 0.509183703931388, 0.5104281670536573, 0.5117559854927805,
+ 0.5131682130825206, 0.5146659778093218, 0.516250484068288,
+ 0.5179230150949777, 0.5196849355823947, 0.5215376944933958,
+ 0.5234828280796439, 0.52552196311921, 0.5276568203859896,
+ 0.5298892183652453, 0.5322210772308335, 0.5346544231010253,
+ 0.537191392591309, 0.5398342376841637, 0.5425853309375497,
+ 0.545447171055775, 0.5484223888484947, 0.551513753605893,
+ 0.554724179920619, 0.5580567349898085, 0.5615146464335654,
+ 0.5651013106696203, 0.5688203018875696, 0.5726753816701664,
+ 0.5766705093136241, 0.5808098529038624, 0.5850978012111273,
+ 0.58953897647151, 0.5941382481306648, 0.5989007476325463,
+ 0.6038318843443582, 0.6089373627182432, 0.614223200800649,
+ 0.6196957502119484, 0.6253617177319102, 0.6312281886412079,
+ 0.6373026519855411, 0.6435930279473415, 0.6501076975307724,
+ 0.6568555347890955, 0.6638459418498757, 0.6710888870233562,
+ 0.6785949463131795, 0.6863753486870501, 0.6944420255086364,
+ 0.7028076645818034, 0.7114857693151208, 0.7204907235796304,
+ 0.7298378629074134, 0.7395435527641373, 0.749625274727372,
+ 0.7601017215162176, 0.7709929019493761, 0.7823202570613161,
+ 0.7941067887834509, 0.8063772028037925, 0.8191580674598145,
+ 0.83247799080191, 0.8463678182968619, 0.860860854031955,
+ 0.8759931087426972, 0.8918035785352535, 0.9083345588266809,
+ 0.9256319988042384, 0.9437459026371479, 0.962730784794803,
+ 0.9826461881778968, 1.0035572754078206, 1.0255355056139732,
+ 1.048659411496106, 1.0730154944316674, 1.0986992590905857,
+ 1.1258164135986009, 1.1544842669978943, 1.184833362908442,
+ 1.217009397314603, 1.2511754798461228, 1.287514812536712,
+ 1.326233878832723, 1.3675662599582539, 1.411777227500661,
+ 1.459169302866857, 1.5100890297227016, 1.5649352798258847,
+ 1.6241695131835794, 1.6883285509131505, 1.7580406092704062,
+ 1.8340456094306077, 1.9172211551275689, 2.0086161135167564,
+ 2.1094945286246385, 2.22139377701127, 2.346202662531156,
+ 2.486267909203593, 2.644541877144861, 2.824791402350551,
+ 3.0318994541759925, 3.2723115884254845, 3.5547153325075804,
+ 3.891107790700307, 4.298537526449054, 4.802076008665048,
+ 5.440166215091329, 6.274908408039339, 7.413566756422303,
+ 9.058751453879703, 11.644627325175037, 16.300023088031555,
+ 27.163977662448232, 81.48784219222516,
+ };
+};
+
+// Apply the DCT algorithm-intrinsic constants to DCTResampleScale.
+template <size_t FROM, size_t TO>
+constexpr float DCTTotalResampleScale(size_t x) {
+ return DCTResampleScales<FROM, TO>::kScales[x];
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_DCT_SCALES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dct_test.cc b/third_party/jpeg-xl/lib/jxl/dct_test.cc
new file mode 100644
index 0000000000..9f5eff41e9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_test.cc
@@ -0,0 +1,389 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include <cmath>
+#include <numeric>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/test_utils.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// Computes the in-place NxN DCT of block.
+// Requires that block is HWY_ALIGN'ed.
+//
+// Performs ComputeTransposedScaledDCT and then transposes and scales it to
+// obtain "vanilla" DCT.
+template <size_t N>
+void ComputeDCT(float block[N * N]) {
+ HWY_ALIGN float tmp_block[N * N];
+ HWY_ALIGN float scratch_space[N * N];
+ ComputeScaledDCT<N, N>()(DCTFrom(block, N), tmp_block, scratch_space);
+
+ // Untranspose.
+ Transpose<N, N>::Run(DCTFrom(tmp_block, N), DCTTo(block, N));
+}
+
+// Computes the in-place 8x8 iDCT of block.
+// Requires that block is HWY_ALIGN'ed.
+template <int N>
+void ComputeIDCT(float block[N * N]) {
+ HWY_ALIGN float tmp_block[N * N];
+ HWY_ALIGN float scratch_space[N * N];
+ // Untranspose.
+ Transpose<N, N>::Run(DCTFrom(block, N), DCTTo(tmp_block, N));
+
+ ComputeScaledIDCT<N, N>()(tmp_block, DCTTo(block, N), scratch_space);
+}
+
+template <size_t N>
+void TransposeTestT(float accuracy) {
+ constexpr size_t kBlockSize = N * N;
+ HWY_ALIGN float src[kBlockSize];
+ DCTTo to_src(src, N);
+ for (size_t y = 0; y < N; ++y) {
+ for (size_t x = 0; x < N; ++x) {
+ to_src.Write(y * N + x, y, x);
+ }
+ }
+ HWY_ALIGN float dst[kBlockSize];
+ Transpose<N, N>::Run(DCTFrom(src, N), DCTTo(dst, N));
+ DCTFrom from_dst(dst, N);
+ for (size_t y = 0; y < N; ++y) {
+ for (size_t x = 0; x < N; ++x) {
+ float expected = x * N + y;
+ float actual = from_dst.Read(y, x);
+ EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y;
+ }
+ }
+}
+
+void TransposeTest() {
+ TransposeTestT<8>(1e-7f);
+ TransposeTestT<16>(1e-7f);
+ TransposeTestT<32>(1e-7f);
+}
+
+template <size_t N>
+void ColumnDctRoundtripT(float accuracy) {
+ constexpr size_t kBlockSize = N * N;
+ // Though we are only interested in single column result, dct.h has built-in
+ // limit on minimal number of columns processed. So, to be safe, we do
+ // regular 8x8 block transformation. On the bright side - we could check all
+ // 8 basis vectors at once.
+ HWY_ALIGN float block[kBlockSize];
+ DCTTo to(block, N);
+ DCTFrom from(block, N);
+ for (size_t i = 0; i < N; ++i) {
+ for (size_t j = 0; j < N; ++j) {
+ to.Write((i == j) ? 1.0f : 0.0f, i, j);
+ }
+ }
+
+ // Running (I)DCT on the same memory block seems to trigger a compiler bug on
+ // ARMv7 with clang6.
+ HWY_ALIGN float tmp[kBlockSize];
+ DCTTo to_tmp(tmp, N);
+ DCTFrom from_tmp(tmp, N);
+
+ DCT1D<N, N>()(from, to_tmp);
+ IDCT1D<N, N>()(from_tmp, to);
+
+ for (size_t i = 0; i < N; ++i) {
+ for (size_t j = 0; j < N; ++j) {
+ float expected = (i == j) ? 1.0f : 0.0f;
+ float actual = from.Read(i, j);
+ EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j;
+ }
+ }
+}
+
+void ColumnDctRoundtrip() {
+ ColumnDctRoundtripT<8>(1e-6f);
+ ColumnDctRoundtripT<16>(1e-6f);
+ ColumnDctRoundtripT<32>(1e-6f);
+}
+
+template <size_t N>
+void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+ constexpr size_t kBlockSize = N * N;
+ for (size_t i = start; i < end; i++) {
+ HWY_ALIGN float fast[kBlockSize] = {0.0f};
+ double slow[kBlockSize] = {0.0};
+ fast[i] = 1.0;
+ slow[i] = 1.0;
+ DCTSlow<N>(slow);
+ ComputeDCT<N>(fast);
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ EXPECT_NEAR(fast[k], slow[k], accuracy / N)
+ << "i = " << i << ", k = " << k << ", N = " << N;
+ }
+ }
+}
+
+template <size_t N>
+void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) {
+ constexpr size_t kBlockSize = N * N;
+ for (size_t i = start; i < end; i++) {
+ HWY_ALIGN float fast[kBlockSize] = {0.0f};
+ double slow[kBlockSize] = {0.0};
+ fast[i] = 1.0;
+ slow[i] = 1.0;
+ IDCTSlow<N>(slow);
+ ComputeIDCT<N>(fast);
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ EXPECT_NEAR(fast[k], slow[k], accuracy * N)
+ << "i = " << i << ", k = " << k << ", N = " << N;
+ }
+ }
+}
+
+template <size_t N>
+void TestInverseT(float accuracy) {
+ test::ThreadPoolForTests pool(N < 32 ? 0 : 8);
+ enum { kBlockSize = N * N };
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, kBlockSize, ThreadPool::NoInit,
+ [accuracy](const uint32_t task, size_t /*thread*/) {
+ const size_t i = static_cast<size_t>(task);
+ HWY_ALIGN float x[kBlockSize] = {0.0f};
+ x[i] = 1.0;
+
+ ComputeIDCT<N>(x);
+ ComputeDCT<N>(x);
+
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+ << "i = " << i << ", k = " << k;
+ }
+ },
+ "TestInverse"));
+}
+
+void InverseTest() {
+ TestInverseT<8>(1e-6f);
+ TestInverseT<16>(1e-6f);
+ TestInverseT<32>(3e-6f);
+}
+
+template <size_t N>
+void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) {
+ constexpr size_t kBlockSize = N * N;
+ for (size_t i = start; i < end; i++) {
+ for (size_t j = 0; j < kBlockSize; ++j) {
+ // We check that <e_i, Me_j> = <M^\dagger{}e_i, e_j>.
+ // That means (Me_j)_i = (M^\dagger{}e_i)_j
+
+ // x := Me_j
+ HWY_ALIGN float x[kBlockSize] = {0.0f};
+ x[j] = 1.0;
+ ComputeIDCT<N>(x);
+ // y := M^\dagger{}e_i
+ HWY_ALIGN float y[kBlockSize] = {0.0f};
+ y[i] = 1.0;
+ ComputeDCT<N>(y);
+
+ EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j;
+ }
+ }
+}
+
+template <size_t N>
+void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) {
+ constexpr size_t kBlockSize = N * N;
+ for (size_t i = start; i < end; i++) {
+ double x[kBlockSize] = {0.0f};
+ x[i] = 1.0;
+
+ DCTSlow<N>(x);
+ IDCTSlow<N>(x);
+
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy)
+ << "i = " << i << ", k = " << k;
+ }
+ }
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectInverseT(float accuracy) {
+ constexpr size_t kBlockSize = ROWS * COLS;
+ for (size_t i = 0; i < kBlockSize; ++i) {
+ HWY_ALIGN float x[kBlockSize] = {0.0f};
+ HWY_ALIGN float out[kBlockSize] = {0.0f};
+ x[i] = 1.0;
+ HWY_ALIGN float coeffs[kBlockSize] = {0.0f};
+ HWY_ALIGN float scratch_space[kBlockSize * 2];
+
+ ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x, COLS), coeffs, scratch_space);
+ ComputeScaledIDCT<ROWS, COLS>()(coeffs, DCTTo(out, COLS), scratch_space);
+
+ for (size_t k = 0; k < kBlockSize; ++k) {
+ EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy)
+ << "i = " << i << ", k = " << k << " ROWS = " << ROWS
+ << " COLS = " << COLS;
+ }
+ }
+}
+
+void TestRectInverse() {
+ TestRectInverseT<16, 32>(1e-6f);
+ TestRectInverseT<8, 32>(1e-6f);
+ TestRectInverseT<8, 16>(1e-6f);
+ TestRectInverseT<4, 8>(1e-6f);
+ TestRectInverseT<2, 4>(1e-6f);
+ TestRectInverseT<1, 4>(1e-6f);
+ TestRectInverseT<1, 2>(1e-6f);
+
+ TestRectInverseT<32, 16>(1e-6f);
+ TestRectInverseT<32, 8>(1e-6f);
+ TestRectInverseT<16, 8>(1e-6f);
+ TestRectInverseT<8, 4>(1e-6f);
+ TestRectInverseT<4, 2>(1e-6f);
+ TestRectInverseT<4, 1>(1e-6f);
+ TestRectInverseT<2, 1>(1e-6f);
+}
+
+template <size_t ROWS, size_t COLS>
+void TestRectTransposeT(float accuracy) {
+ constexpr size_t kBlockSize = ROWS * COLS;
+ HWY_ALIGN float scratch_space[kBlockSize * 2];
+ for (size_t px = 0; px < COLS; ++px) {
+ for (size_t py = 0; py < ROWS; ++py) {
+ HWY_ALIGN float x1[kBlockSize] = {0.0f};
+ HWY_ALIGN float x2[kBlockSize] = {0.0f};
+ HWY_ALIGN float coeffs1[kBlockSize] = {0.0f};
+ HWY_ALIGN float coeffs2[kBlockSize] = {0.0f};
+ x1[py * COLS + px] = 1;
+ x2[px * ROWS + py] = 1;
+
+ constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS;
+ constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS;
+
+ ComputeScaledDCT<ROWS, COLS>()(DCTFrom(x1, COLS), coeffs1, scratch_space);
+ ComputeScaledDCT<COLS, ROWS>()(DCTFrom(x2, ROWS), coeffs2, scratch_space);
+
+ for (size_t x = 0; x < OUT_COLS; ++x) {
+ for (size_t y = 0; y < OUT_ROWS; ++y) {
+ EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x],
+ accuracy)
+ << " px = " << px << ", py = " << py << ", x = " << x
+ << ", y = " << y;
+ }
+ }
+ }
+ }
+}
+
+void TestRectTranspose() {
+ TestRectTransposeT<16, 32>(1e-6f);
+ TestRectTransposeT<8, 32>(1e-6f);
+ TestRectTransposeT<8, 16>(1e-6f);
+ TestRectTransposeT<4, 8>(1e-6f);
+ TestRectTransposeT<2, 4>(1e-6f);
+ TestRectTransposeT<1, 4>(1e-6f);
+ TestRectTransposeT<1, 2>(1e-6f);
+
+ // Identical to 8, 16
+ // TestRectTranspose<16, 8>(1e-6f);
+}
+
+void TestDctAccuracyShard(size_t shard) {
+ if (shard == 0) {
+ TestDctAccuracy<1>(1.1E-7f);
+ TestDctAccuracy<2>(1.1E-7f);
+ TestDctAccuracy<4>(1.1E-7f);
+ TestDctAccuracy<8>(1.1E-7f);
+ TestDctAccuracy<16>(1.3E-7f);
+ }
+ TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestIdctAccuracyShard(size_t shard) {
+ if (shard == 0) {
+ TestIdctAccuracy<1>(1E-7f);
+ TestIdctAccuracy<2>(1E-7f);
+ TestIdctAccuracy<4>(1E-7f);
+ TestIdctAccuracy<8>(1E-7f);
+ TestIdctAccuracy<16>(1E-7f);
+ }
+ TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestDctTransposeShard(size_t shard) {
+ if (shard == 0) {
+ TestDctTranspose<8>(1E-6f);
+ TestDctTranspose<16>(1E-6f);
+ }
+ TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1));
+}
+
+void TestSlowInverseShard(size_t shard) {
+ if (shard == 0) {
+ TestSlowInverse<1>(1E-5f);
+ TestSlowInverse<2>(1E-5f);
+ TestSlowInverse<4>(1E-5f);
+ TestSlowInverse<8>(1E-5f);
+ TestSlowInverse<16>(1E-5f);
+ }
+ TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class TransposeTest : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest);
+
+HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest);
+HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse);
+HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose);
+
+// Tests in the DctShardedTest class are sharded for N=32.
+class DctShardedTest : public ::hwy::TestWithParamTargetAndT<uint32_t> {};
+
+std::vector<uint32_t> ShardRange(uint32_t n) {
+#ifdef JXL_DISABLE_SLOW_TESTS
+ JXL_ASSERT(n > 6);
+ std::vector<uint32_t> ret = {0, 1, 3, 5, n - 1};
+#else
+ std::vector<uint32_t> ret(n);
+ std::iota(ret.begin(), ret.end(), 0);
+#endif // JXL_DISABLE_SLOW_TESTS
+ return ret;
+}
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest,
+ ::testing::ValuesIn(ShardRange(32)));
+
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard);
+HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard);
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dct_util.h b/third_party/jpeg-xl/lib/jxl/dct_util.h
new file mode 100644
index 0000000000..fb6ce3b971
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dct_util.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DCT_UTIL_H_
+#define LIB_JXL_DCT_UTIL_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+union ACPtr {
+ int32_t* ptr32;
+ int16_t* ptr16;
+ ACPtr() = default;
+ explicit ACPtr(int16_t* p) : ptr16(p) {}
+ explicit ACPtr(int32_t* p) : ptr32(p) {}
+};
+
+union ConstACPtr {
+ const int32_t* ptr32;
+ const int16_t* ptr16;
+ ConstACPtr() = default;
+ explicit ConstACPtr(const int16_t* p) : ptr16(p) {}
+ explicit ConstACPtr(const int32_t* p) : ptr32(p) {}
+};
+
+enum class ACType { k16 = 0, k32 = 1 };
+
+class ACImage {
+ public:
+ virtual ~ACImage() = default;
+ virtual ACType Type() const = 0;
+ virtual ACPtr PlaneRow(size_t c, size_t y, size_t xbase) = 0;
+ virtual ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const = 0;
+ virtual size_t PixelsPerRow() const = 0;
+ virtual void ZeroFill() = 0;
+ virtual void ZeroFillPlane(size_t c) = 0;
+ virtual bool IsEmpty() const = 0;
+};
+
+template <typename T>
+class ACImageT final : public ACImage {
+ public:
+ ACImageT() = default;
+ ACImageT(size_t xsize, size_t ysize) {
+ static_assert(
+ std::is_same<T, int16_t>::value || std::is_same<T, int32_t>::value,
+ "ACImage must be either 32- or 16- bit");
+ img_ = Image3<T>(xsize, ysize);
+ }
+ ACType Type() const override {
+ return sizeof(T) == 2 ? ACType::k16 : ACType::k32;
+ }
+ ACPtr PlaneRow(size_t c, size_t y, size_t xbase) override {
+ return ACPtr(img_.PlaneRow(c, y) + xbase);
+ }
+ ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const override {
+ return ConstACPtr(img_.PlaneRow(c, y) + xbase);
+ }
+
+ size_t PixelsPerRow() const override { return img_.PixelsPerRow(); }
+
+ void ZeroFill() override { ZeroFillImage(&img_); }
+
+ void ZeroFillPlane(size_t c) override { ZeroFillImage(&img_.Plane(c)); }
+
+ bool IsEmpty() const override {
+ return img_.xsize() == 0 || img_.ysize() == 0;
+ }
+
+ private:
+ Image3<T> img_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DCT_UTIL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_ans.cc b/third_party/jpeg-xl/lib/jxl/dec_ans.cc
new file mode 100644
index 0000000000..c9145472e0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_ans.cc
@@ -0,0 +1,374 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_ans.h"
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+// Decodes a number in the range [0..255], by reading 1 - 11 bits.
+inline int DecodeVarLenUint8(BitReader* input) {
+ if (input->ReadFixedBits<1>()) {
+ int nbits = static_cast<int>(input->ReadFixedBits<3>());
+ if (nbits == 0) {
+ return 1;
+ } else {
+ return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+ }
+ }
+ return 0;
+}
+
+// Decodes a number in the range [0..65535], by reading 1 - 21 bits.
+inline int DecodeVarLenUint16(BitReader* input) {
+ if (input->ReadFixedBits<1>()) {
+ int nbits = static_cast<int>(input->ReadFixedBits<4>());
+ if (nbits == 0) {
+ return 1;
+ } else {
+ return static_cast<int>(input->ReadBits(nbits)) + (1 << nbits);
+ }
+ }
+ return 0;
+}
+
+Status ReadHistogram(int precision_bits, std::vector<int32_t>* counts,
+ BitReader* input) {
+ int simple_code = input->ReadBits(1);
+ if (simple_code == 1) {
+ int i;
+ int symbols[2] = {0};
+ int max_symbol = 0;
+ const int num_symbols = input->ReadBits(1) + 1;
+ for (i = 0; i < num_symbols; ++i) {
+ symbols[i] = DecodeVarLenUint8(input);
+ if (symbols[i] > max_symbol) max_symbol = symbols[i];
+ }
+ counts->resize(max_symbol + 1);
+ if (num_symbols == 1) {
+ (*counts)[symbols[0]] = 1 << precision_bits;
+ } else {
+ if (symbols[0] == symbols[1]) { // corrupt data
+ return false;
+ }
+ (*counts)[symbols[0]] = input->ReadBits(precision_bits);
+ (*counts)[symbols[1]] = (1 << precision_bits) - (*counts)[symbols[0]];
+ }
+ } else {
+ int is_flat = input->ReadBits(1);
+ if (is_flat == 1) {
+ int alphabet_size = DecodeVarLenUint8(input) + 1;
+ *counts = CreateFlatHistogram(alphabet_size, 1 << precision_bits);
+ return true;
+ }
+
+ uint32_t shift;
+ {
+ // TODO(veluca): speed up reading with table lookups.
+ int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+ int log = 0;
+ for (; log < upper_bound_log; log++) {
+ if (input->ReadFixedBits<1>() == 0) break;
+ }
+ shift = (input->ReadBits(log) | (1 << log)) - 1;
+ if (shift > ANS_LOG_TAB_SIZE + 1) {
+ return JXL_FAILURE("Invalid shift value");
+ }
+ }
+
+ int length = DecodeVarLenUint8(input) + 3;
+ counts->resize(length);
+ int total_count = 0;
+
+ static const uint8_t huff[128][2] = {
+ {3, 10}, {7, 12}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {7, 13}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5},
+ {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2},
+ };
+
+ std::vector<int> logcounts(counts->size());
+ int omit_log = -1;
+ int omit_pos = -1;
+ // This array remembers which symbols have an RLE length.
+ std::vector<int> same(counts->size(), 0);
+ for (size_t i = 0; i < logcounts.size(); ++i) {
+ input->Refill(); // for PeekFixedBits + Advance
+ int idx = input->PeekFixedBits<7>();
+ input->Consume(huff[idx][0]);
+ logcounts[i] = huff[idx][1];
+ // The RLE symbol.
+ if (logcounts[i] == ANS_LOG_TAB_SIZE + 1) {
+ int rle_length = DecodeVarLenUint8(input);
+ same[i] = rle_length + 5;
+ i += rle_length + 3;
+ continue;
+ }
+ if (logcounts[i] > omit_log) {
+ omit_log = logcounts[i];
+ omit_pos = i;
+ }
+ }
+ // Invalid input, e.g. due to invalid usage of RLE.
+ if (omit_pos < 0) return JXL_FAILURE("Invalid histogram.");
+ if (static_cast<size_t>(omit_pos) + 1 < logcounts.size() &&
+ logcounts[omit_pos + 1] == ANS_TAB_SIZE + 1) {
+ return JXL_FAILURE("Invalid histogram.");
+ }
+ int prev = 0;
+ int numsame = 0;
+ for (size_t i = 0; i < logcounts.size(); ++i) {
+ if (same[i]) {
+ // RLE sequence, let this loop output the same count for the next
+ // iterations.
+ numsame = same[i] - 1;
+ prev = i > 0 ? (*counts)[i - 1] : 0;
+ }
+ if (numsame > 0) {
+ (*counts)[i] = prev;
+ numsame--;
+ } else {
+ int code = logcounts[i];
+ // omit_pos may not be negative at this point (checked before).
+ if (i == static_cast<size_t>(omit_pos)) {
+ continue;
+ } else if (code == 0) {
+ continue;
+ } else if (code == 1) {
+ (*counts)[i] = 1;
+ } else {
+ int bitcount = GetPopulationCountPrecision(code - 1, shift);
+ (*counts)[i] = (1 << (code - 1)) +
+ (input->ReadBits(bitcount) << (code - 1 - bitcount));
+ }
+ }
+ total_count += (*counts)[i];
+ }
+ (*counts)[omit_pos] = (1 << precision_bits) - total_count;
+ if ((*counts)[omit_pos] <= 0) {
+ // The histogram we've read sums to more than total_count (including at
+ // least 1 for the omitted value).
+ return JXL_FAILURE("Invalid histogram count.");
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+Status DecodeANSCodes(const size_t num_histograms,
+ const size_t max_alphabet_size, BitReader* in,
+ ANSCode* result) {
+ result->degenerate_symbols.resize(num_histograms, -1);
+ if (result->use_prefix_code) {
+ JXL_ASSERT(max_alphabet_size <= 1 << PREFIX_MAX_BITS);
+ result->huffman_data.resize(num_histograms);
+ std::vector<uint16_t> alphabet_sizes(num_histograms);
+ for (size_t c = 0; c < num_histograms; c++) {
+ alphabet_sizes[c] = DecodeVarLenUint16(in) + 1;
+ if (alphabet_sizes[c] > max_alphabet_size) {
+ return JXL_FAILURE("Alphabet size is too long: %u", alphabet_sizes[c]);
+ }
+ }
+ for (size_t c = 0; c < num_histograms; c++) {
+ if (alphabet_sizes[c] > 1) {
+ if (!result->huffman_data[c].ReadFromBitStream(alphabet_sizes[c], in)) {
+ if (!in->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for huffman code");
+ }
+ return JXL_FAILURE("Invalid huffman tree number %" PRIuS
+ ", alphabet size %u",
+ c, alphabet_sizes[c]);
+ }
+ } else {
+ // 0-bit codes does not require extension tables.
+ result->huffman_data[c].table_.clear();
+ result->huffman_data[c].table_.resize(1u << kHuffmanTableBits);
+ }
+ for (const auto& h : result->huffman_data[c].table_) {
+ if (h.bits <= kHuffmanTableBits) {
+ result->UpdateMaxNumBits(c, h.value);
+ }
+ }
+ }
+ } else {
+ JXL_ASSERT(max_alphabet_size <= ANS_MAX_ALPHABET_SIZE);
+ result->alias_tables =
+ AllocateArray(num_histograms * (1 << result->log_alpha_size) *
+ sizeof(AliasTable::Entry));
+ AliasTable::Entry* alias_tables =
+ reinterpret_cast<AliasTable::Entry*>(result->alias_tables.get());
+ for (size_t c = 0; c < num_histograms; ++c) {
+ std::vector<int32_t> counts;
+ if (!ReadHistogram(ANS_LOG_TAB_SIZE, &counts, in)) {
+ return JXL_FAILURE("Invalid histogram bitstream.");
+ }
+ if (counts.size() > max_alphabet_size) {
+ return JXL_FAILURE("Alphabet size is too long: %" PRIuS, counts.size());
+ }
+ while (!counts.empty() && counts.back() == 0) {
+ counts.pop_back();
+ }
+ for (size_t s = 0; s < counts.size(); s++) {
+ if (counts[s] != 0) {
+ result->UpdateMaxNumBits(c, s);
+ }
+ }
+ // InitAliasTable "fixes" empty counts to contain degenerate "0" symbol.
+ int degenerate_symbol = counts.empty() ? 0 : (counts.size() - 1);
+ for (int s = 0; s < degenerate_symbol; ++s) {
+ if (counts[s] != 0) {
+ degenerate_symbol = -1;
+ break;
+ }
+ }
+ result->degenerate_symbols[c] = degenerate_symbol;
+ InitAliasTable(counts, ANS_TAB_SIZE, result->log_alpha_size,
+ alias_tables + c * (1 << result->log_alpha_size));
+ }
+ }
+ return true;
+}
+Status DecodeUintConfig(size_t log_alpha_size, HybridUintConfig* uint_config,
+ BitReader* br) {
+ br->Refill();
+ size_t split_exponent = br->ReadBits(CeilLog2Nonzero(log_alpha_size + 1));
+ size_t msb_in_token = 0, lsb_in_token = 0;
+ if (split_exponent != log_alpha_size) {
+ // otherwise, msb/lsb don't matter.
+ size_t nbits = CeilLog2Nonzero(split_exponent + 1);
+ msb_in_token = br->ReadBits(nbits);
+ if (msb_in_token > split_exponent) {
+ // This could be invalid here already and we need to check this before
+ // we use its value to read more bits.
+ return JXL_FAILURE("Invalid HybridUintConfig");
+ }
+ nbits = CeilLog2Nonzero(split_exponent - msb_in_token + 1);
+ lsb_in_token = br->ReadBits(nbits);
+ }
+ if (lsb_in_token + msb_in_token > split_exponent) {
+ return JXL_FAILURE("Invalid HybridUintConfig");
+ }
+ *uint_config = HybridUintConfig(split_exponent, msb_in_token, lsb_in_token);
+ return true;
+}
+
+Status DecodeUintConfigs(size_t log_alpha_size,
+ std::vector<HybridUintConfig>* uint_config,
+ BitReader* br) {
+ // TODO(veluca): RLE?
+ for (size_t i = 0; i < uint_config->size(); i++) {
+ JXL_RETURN_IF_ERROR(
+ DecodeUintConfig(log_alpha_size, &(*uint_config)[i], br));
+ }
+ return true;
+}
+
+LZ77Params::LZ77Params() { Bundle::Init(this); }
+Status LZ77Params::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &enabled));
+ if (!visitor->Conditional(enabled)) return true;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(224), Val(512), Val(4096),
+ BitsOffset(15, 8), 224, &min_symbol));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(3), Val(4), BitsOffset(2, 5),
+ BitsOffset(8, 9), 3, &min_length));
+ return true;
+}
+
+void ANSCode::UpdateMaxNumBits(size_t ctx, size_t symbol) {
+ HybridUintConfig* cfg = &uint_config[ctx];
+ // LZ77 symbols use a different uint config.
+ if (lz77.enabled && lz77.nonserialized_distance_context != ctx &&
+ symbol >= lz77.min_symbol) {
+ symbol -= lz77.min_symbol;
+ cfg = &lz77.length_uint_config;
+ }
+ size_t split_token = cfg->split_token;
+ size_t msb_in_token = cfg->msb_in_token;
+ size_t lsb_in_token = cfg->lsb_in_token;
+ size_t split_exponent = cfg->split_exponent;
+ if (symbol < split_token) {
+ max_num_bits = std::max(max_num_bits, split_exponent);
+ return;
+ }
+ uint32_t n_extra_bits =
+ split_exponent - (msb_in_token + lsb_in_token) +
+ ((symbol - split_token) >> (msb_in_token + lsb_in_token));
+ size_t total_bits = msb_in_token + lsb_in_token + n_extra_bits + 1;
+ max_num_bits = std::max(max_num_bits, total_bits);
+}
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+ std::vector<uint8_t>* context_map, bool disallow_lz77) {
+ PROFILER_FUNC;
+ JXL_RETURN_IF_ERROR(Bundle::Read(br, &code->lz77));
+ if (code->lz77.enabled) {
+ num_contexts++;
+ JXL_RETURN_IF_ERROR(DecodeUintConfig(/*log_alpha_size=*/8,
+ &code->lz77.length_uint_config, br));
+ }
+ if (code->lz77.enabled && disallow_lz77) {
+ return JXL_FAILURE("Using LZ77 when explicitly disallowed");
+ }
+ size_t num_histograms = 1;
+ context_map->resize(num_contexts);
+ if (num_contexts > 1) {
+ JXL_RETURN_IF_ERROR(DecodeContextMap(context_map, &num_histograms, br));
+ }
+ code->lz77.nonserialized_distance_context = context_map->back();
+ code->use_prefix_code = br->ReadFixedBits<1>();
+ if (code->use_prefix_code) {
+ code->log_alpha_size = PREFIX_MAX_BITS;
+ } else {
+ code->log_alpha_size = br->ReadFixedBits<2>() + 5;
+ }
+ code->uint_config.resize(num_histograms);
+ JXL_RETURN_IF_ERROR(
+ DecodeUintConfigs(code->log_alpha_size, &code->uint_config, br));
+ const size_t max_alphabet_size = 1 << code->log_alpha_size;
+ JXL_RETURN_IF_ERROR(
+ DecodeANSCodes(num_histograms, max_alphabet_size, br, code));
+ // When using LZ77, flat codes might result in valid codestreams with
+ // histograms that potentially allow very large bit counts.
+ // TODO(veluca): in principle, a valid codestream might contain a histogram
+ // that could allow very large numbers of bits that is never used during ANS
+ // decoding. There's no benefit to doing that, though.
+ if (!code->lz77.enabled && code->max_num_bits > 32) {
+ // Just emit a warning as there are many opportunities for false positives.
+ JXL_WARNING("Histogram can represent numbers that are too large: %" PRIuS
+ "\n",
+ code->max_num_bits);
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_ans.h b/third_party/jpeg-xl/lib/jxl/dec_ans.h
new file mode 100644
index 0000000000..0f4406745a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_ans.h
@@ -0,0 +1,462 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_ANS_H_
+#define LIB_JXL_DEC_ANS_H_
+
+// Library to decode the ANS population counts from the bit-stream and build a
+// decoding table from them.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstring>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_huffman.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+class ANSSymbolReader;
+
+// Experiments show that best performance is typically achieved for a
+// split-exponent of 3 or 4. Trend seems to be that '4' is better
+// for large-ish pictures, and '3' better for rather small-ish pictures.
+// This is plausible - the more special symbols we have, the better
+// statistics we need to get a benefit out of them.
+
+// Our hybrid-encoding scheme has dedicated tokens for the smallest
+// (1 << split_exponents) numbers, and for the rest
+// encodes (number of bits) + (msb_in_token sub-leading binary digits) +
+// (lsb_in_token lowest binary digits) in the token, with the remaining bits
+// then being encoded as data.
+//
+// Example with split_exponent = 4, msb_in_token = 2, lsb_in_token = 0.
+//
+// Numbers N in [0 .. 15]:
+// These get represented as (token=N, bits='').
+// Numbers N >= 16:
+// If n is such that 2**n <= N < 2**(n+1),
+// and m = N - 2**n is the 'mantissa',
+// these get represented as:
+// (token=split_token +
+// ((n - split_exponent) * 4) +
+// (m >> (n - msb_in_token)),
+// bits=m & (1 << (n - msb_in_token)) - 1)
+// Specifically, we would get:
+// N = 0 - 15: (token=N, nbits=0, bits='')
+// N = 16 (10000): (token=16, nbits=2, bits='00')
+// N = 17 (10001): (token=16, nbits=2, bits='01')
+// N = 20 (10100): (token=17, nbits=2, bits='00')
+// N = 24 (11000): (token=18, nbits=2, bits='00')
+// N = 28 (11100): (token=19, nbits=2, bits='00')
+// N = 32 (100000): (token=20, nbits=3, bits='000')
+// N = 65535: (token=63, nbits=13, bits='1111111111111')
+struct HybridUintConfig {
+ uint32_t split_exponent;
+ uint32_t split_token;
+ uint32_t msb_in_token;
+ uint32_t lsb_in_token;
+ JXL_INLINE void Encode(uint32_t value, uint32_t* JXL_RESTRICT token,
+ uint32_t* JXL_RESTRICT nbits,
+ uint32_t* JXL_RESTRICT bits) const {
+ if (value < split_token) {
+ *token = value;
+ *nbits = 0;
+ *bits = 0;
+ } else {
+ uint32_t n = FloorLog2Nonzero(value);
+ uint32_t m = value - (1 << n);
+ *token = split_token +
+ ((n - split_exponent) << (msb_in_token + lsb_in_token)) +
+ ((m >> (n - msb_in_token)) << lsb_in_token) +
+ (m & ((1 << lsb_in_token) - 1));
+ *nbits = n - msb_in_token - lsb_in_token;
+ *bits = (value >> lsb_in_token) & ((1UL << *nbits) - 1);
+ }
+ }
+
+ explicit HybridUintConfig(uint32_t split_exponent = 4,
+ uint32_t msb_in_token = 2,
+ uint32_t lsb_in_token = 0)
+ : split_exponent(split_exponent),
+ split_token(1 << split_exponent),
+ msb_in_token(msb_in_token),
+ lsb_in_token(lsb_in_token) {
+ JXL_DASSERT(split_exponent >= msb_in_token + lsb_in_token);
+ }
+};
+
+struct LZ77Params : public Fields {
+ LZ77Params();
+ JXL_FIELDS_NAME(LZ77Params)
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+ bool enabled;
+
+ // Symbols above min_symbol use a special hybrid uint encoding and
+ // represent a length, to be added to min_length.
+ uint32_t min_symbol;
+ uint32_t min_length;
+
+ // Not serialized by VisitFields.
+ HybridUintConfig length_uint_config{0, 0, 0};
+
+ size_t nonserialized_distance_context;
+};
+
+static constexpr size_t kWindowSize = 1 << 20;
+static constexpr size_t kNumSpecialDistances = 120;
+// Table of special distance codes from WebP lossless.
+static constexpr int8_t kSpecialDistances[kNumSpecialDistances][2] = {
+ {0, 1}, {1, 0}, {1, 1}, {-1, 1}, {0, 2}, {2, 0}, {1, 2}, {-1, 2},
+ {2, 1}, {-2, 1}, {2, 2}, {-2, 2}, {0, 3}, {3, 0}, {1, 3}, {-1, 3},
+ {3, 1}, {-3, 1}, {2, 3}, {-2, 3}, {3, 2}, {-3, 2}, {0, 4}, {4, 0},
+ {1, 4}, {-1, 4}, {4, 1}, {-4, 1}, {3, 3}, {-3, 3}, {2, 4}, {-2, 4},
+ {4, 2}, {-4, 2}, {0, 5}, {3, 4}, {-3, 4}, {4, 3}, {-4, 3}, {5, 0},
+ {1, 5}, {-1, 5}, {5, 1}, {-5, 1}, {2, 5}, {-2, 5}, {5, 2}, {-5, 2},
+ {4, 4}, {-4, 4}, {3, 5}, {-3, 5}, {5, 3}, {-5, 3}, {0, 6}, {6, 0},
+ {1, 6}, {-1, 6}, {6, 1}, {-6, 1}, {2, 6}, {-2, 6}, {6, 2}, {-6, 2},
+ {4, 5}, {-4, 5}, {5, 4}, {-5, 4}, {3, 6}, {-3, 6}, {6, 3}, {-6, 3},
+ {0, 7}, {7, 0}, {1, 7}, {-1, 7}, {5, 5}, {-5, 5}, {7, 1}, {-7, 1},
+ {4, 6}, {-4, 6}, {6, 4}, {-6, 4}, {2, 7}, {-2, 7}, {7, 2}, {-7, 2},
+ {3, 7}, {-3, 7}, {7, 3}, {-7, 3}, {5, 6}, {-5, 6}, {6, 5}, {-6, 5},
+ {8, 0}, {4, 7}, {-4, 7}, {7, 4}, {-7, 4}, {8, 1}, {8, 2}, {6, 6},
+ {-6, 6}, {8, 3}, {5, 7}, {-5, 7}, {7, 5}, {-7, 5}, {8, 4}, {6, 7},
+ {-6, 7}, {7, 6}, {-7, 6}, {8, 5}, {7, 7}, {-7, 7}, {8, 6}, {8, 7}};
+
+struct ANSCode {
+ CacheAlignedUniquePtr alias_tables;
+ std::vector<HuffmanDecodingData> huffman_data;
+ std::vector<HybridUintConfig> uint_config;
+ std::vector<int> degenerate_symbols;
+ bool use_prefix_code;
+ uint8_t log_alpha_size; // for ANS.
+ LZ77Params lz77;
+ // Maximum number of bits necessary to represent the result of a
+ // ReadHybridUint call done with this ANSCode.
+ size_t max_num_bits = 0;
+ void UpdateMaxNumBits(size_t ctx, size_t symbol);
+};
+
+class ANSSymbolReader {
+ public:
+ // Invalid symbol reader, to be overwritten.
+ ANSSymbolReader() = default;
+ ANSSymbolReader(const ANSCode* code, BitReader* JXL_RESTRICT br,
+ size_t distance_multiplier = 0)
+ : alias_tables_(
+ reinterpret_cast<AliasTable::Entry*>(code->alias_tables.get())),
+ huffman_data_(code->huffman_data.data()),
+ use_prefix_code_(code->use_prefix_code),
+ configs(code->uint_config.data()) {
+ if (!use_prefix_code_) {
+ state_ = static_cast<uint32_t>(br->ReadFixedBits<32>());
+ log_alpha_size_ = code->log_alpha_size;
+ log_entry_size_ = ANS_LOG_TAB_SIZE - code->log_alpha_size;
+ entry_size_minus_1_ = (1 << log_entry_size_) - 1;
+ } else {
+ state_ = (ANS_SIGNATURE << 16u);
+ }
+ if (!code->lz77.enabled) return;
+ // a std::vector incurs unacceptable decoding speed loss because of
+ // initialization.
+ lz77_window_storage_ = AllocateArray(kWindowSize * sizeof(uint32_t));
+ lz77_window_ = reinterpret_cast<uint32_t*>(lz77_window_storage_.get());
+ lz77_ctx_ = code->lz77.nonserialized_distance_context;
+ lz77_length_uint_ = code->lz77.length_uint_config;
+ lz77_threshold_ = code->lz77.min_symbol;
+ lz77_min_length_ = code->lz77.min_length;
+ num_special_distances_ =
+ distance_multiplier == 0 ? 0 : kNumSpecialDistances;
+ for (size_t i = 0; i < num_special_distances_; i++) {
+ int dist = kSpecialDistances[i][0];
+ dist += static_cast<int>(distance_multiplier) * kSpecialDistances[i][1];
+ if (dist < 1) dist = 1;
+ special_distances_[i] = dist;
+ }
+ }
+
+ JXL_INLINE size_t ReadSymbolANSWithoutRefill(const size_t histo_idx,
+ BitReader* JXL_RESTRICT br) {
+ const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+
+ const AliasTable::Entry* table =
+ &alias_tables_[histo_idx << log_alpha_size_];
+ const AliasTable::Symbol symbol =
+ AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+ state_ = symbol.freq * (state_ >> ANS_LOG_TAB_SIZE) + symbol.offset;
+
+#if 1
+ // Branchless version is about equally fast on SKX.
+ const uint32_t new_state =
+ (state_ << 16u) | static_cast<uint32_t>(br->PeekFixedBits<16>());
+ const bool normalize = state_ < (1u << 16u);
+ state_ = normalize ? new_state : state_;
+ br->Consume(normalize ? 16 : 0);
+#else
+ if (JXL_UNLIKELY(state_ < (1u << 16u))) {
+ state_ = (state_ << 16u) | br->PeekFixedBits<16>();
+ br->Consume(16);
+ }
+#endif
+ const uint32_t next_res = state_ & (ANS_TAB_SIZE - 1u);
+ AliasTable::Prefetch(table, next_res, log_entry_size_);
+
+ return symbol.value;
+ }
+
+ JXL_INLINE size_t ReadSymbolHuffWithoutRefill(const size_t histo_idx,
+ BitReader* JXL_RESTRICT br) {
+ return huffman_data_[histo_idx].ReadSymbol(br);
+ }
+
+ JXL_INLINE size_t ReadSymbolWithoutRefill(const size_t histo_idx,
+ BitReader* JXL_RESTRICT br) {
+ // TODO(veluca): hoist if in hotter loops.
+ if (JXL_UNLIKELY(use_prefix_code_)) {
+ return ReadSymbolHuffWithoutRefill(histo_idx, br);
+ }
+ return ReadSymbolANSWithoutRefill(histo_idx, br);
+ }
+
+ JXL_INLINE size_t ReadSymbol(const size_t histo_idx,
+ BitReader* JXL_RESTRICT br) {
+ br->Refill();
+ return ReadSymbolWithoutRefill(histo_idx, br);
+ }
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ bool CheckANSFinalState() const { return true; }
+#else
+ bool CheckANSFinalState() const { return state_ == (ANS_SIGNATURE << 16u); }
+#endif
+
+ template <typename BitReader>
+ static JXL_INLINE uint32_t ReadHybridUintConfig(
+ const HybridUintConfig& config, size_t token, BitReader* br) {
+ size_t split_token = config.split_token;
+ size_t msb_in_token = config.msb_in_token;
+ size_t lsb_in_token = config.lsb_in_token;
+ size_t split_exponent = config.split_exponent;
+ // Fast-track version of hybrid integer decoding.
+ if (token < split_token) return token;
+ uint32_t nbits = split_exponent - (msb_in_token + lsb_in_token) +
+ ((token - split_token) >> (msb_in_token + lsb_in_token));
+ // Max amount of bits for ReadBits is 32 and max valid left shift is 29
+ // bits. However, for speed no error is propagated here, instead limit the
+ // nbits size. If nbits > 29, the code stream is invalid, but no error is
+ // returned.
+ // Note that in most cases we will emit an error if the histogram allows
+ // representing numbers that would cause invalid shifts, but we need to
+ // keep this check as when LZ77 is enabled it might make sense to have an
+ // histogram that could in principle cause invalid shifts.
+ nbits &= 31u;
+ uint32_t low = token & ((1 << lsb_in_token) - 1);
+ token >>= lsb_in_token;
+ const size_t bits = br->PeekBits(nbits);
+ br->Consume(nbits);
+ size_t ret = (((((1 << msb_in_token) | (token & ((1 << msb_in_token) - 1)))
+ << nbits) |
+ bits)
+ << lsb_in_token) |
+ low;
+ // TODO(eustas): mark BitReader as unhealthy if nbits > 29 or ret does not
+ // fit uint32_t
+ return static_cast<uint32_t>(ret);
+ }
+
+ // Takes a *clustered* idx. Can only use if HuffRleOnly() is true.
+ void ReadHybridUintClusteredHuffRleOnly(size_t ctx,
+ BitReader* JXL_RESTRICT br,
+ uint32_t* value, uint32_t* run) {
+ JXL_DASSERT(HuffRleOnly());
+ br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits
+ size_t token = ReadSymbolHuffWithoutRefill(ctx, br);
+ if (JXL_UNLIKELY(token >= lz77_threshold_)) {
+ *run =
+ ReadHybridUintConfig(lz77_length_uint_, token - lz77_threshold_, br) +
+ lz77_min_length_ - 1;
+ return;
+ }
+ *value = ReadHybridUintConfig(configs[ctx], token, br);
+ }
+ bool HuffRleOnly() {
+ if (lz77_window_ == nullptr) return false;
+ if (!use_prefix_code_) return false;
+ for (size_t i = 0; i < kHuffmanTableBits; i++) {
+ if (huffman_data_[lz77_ctx_].table_[i].bits) return false;
+ if (huffman_data_[lz77_ctx_].table_[i].value != 1) return false;
+ }
+ if (configs[lz77_ctx_].split_token > 1) return false;
+ return true;
+ }
+
+ // Takes a *clustered* idx.
+ size_t ReadHybridUintClustered(size_t ctx, BitReader* JXL_RESTRICT br) {
+ if (JXL_UNLIKELY(num_to_copy_ > 0)) {
+ size_t ret = lz77_window_[(copy_pos_++) & kWindowMask];
+ num_to_copy_--;
+ lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+ return ret;
+ }
+ br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits
+ size_t token = ReadSymbolWithoutRefill(ctx, br);
+ if (JXL_UNLIKELY(token >= lz77_threshold_)) {
+ num_to_copy_ =
+ ReadHybridUintConfig(lz77_length_uint_, token - lz77_threshold_, br) +
+ lz77_min_length_;
+ br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits
+ // Distance code.
+ size_t token = ReadSymbolWithoutRefill(lz77_ctx_, br);
+ size_t distance = ReadHybridUintConfig(configs[lz77_ctx_], token, br);
+ if (JXL_LIKELY(distance < num_special_distances_)) {
+ distance = special_distances_[distance];
+ } else {
+ distance = distance + 1 - num_special_distances_;
+ }
+ if (JXL_UNLIKELY(distance > num_decoded_)) {
+ distance = num_decoded_;
+ }
+ if (JXL_UNLIKELY(distance > kWindowSize)) {
+ distance = kWindowSize;
+ }
+ copy_pos_ = num_decoded_ - distance;
+ if (JXL_UNLIKELY(distance == 0)) {
+ JXL_DASSERT(lz77_window_ != nullptr);
+ // distance 0 -> num_decoded_ == copy_pos_ == 0
+ size_t to_fill = std::min<size_t>(num_to_copy_, kWindowSize);
+ memset(lz77_window_, 0, to_fill * sizeof(lz77_window_[0]));
+ }
+ // TODO(eustas): overflow; mark BitReader as unhealthy
+ if (num_to_copy_ < lz77_min_length_) return 0;
+ return ReadHybridUintClustered(ctx, br); // will trigger a copy.
+ }
+ size_t ret = ReadHybridUintConfig(configs[ctx], token, br);
+ if (lz77_window_) lz77_window_[(num_decoded_++) & kWindowMask] = ret;
+ return ret;
+ }
+
+ JXL_INLINE size_t ReadHybridUint(size_t ctx, BitReader* JXL_RESTRICT br,
+ const std::vector<uint8_t>& context_map) {
+ return ReadHybridUintClustered(context_map[ctx], br);
+ }
+
+ // ctx is a *clustered* context!
+ // This function will modify the ANS state as if `count` symbols have been
+ // decoded.
+ bool IsSingleValueAndAdvance(size_t ctx, uint32_t* value, size_t count) {
+ // TODO(veluca): No optimization for Huffman mode yet.
+ if (use_prefix_code_) return false;
+ // TODO(eustas): propagate "degenerate_symbol" to simplify this method.
+ const uint32_t res = state_ & (ANS_TAB_SIZE - 1u);
+ const AliasTable::Entry* table = &alias_tables_[ctx << log_alpha_size_];
+ AliasTable::Symbol symbol =
+ AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_);
+ if (symbol.freq != ANS_TAB_SIZE) return false;
+ if (configs[ctx].split_token <= symbol.value) return false;
+ if (symbol.value >= lz77_threshold_) return false;
+ *value = symbol.value;
+ if (lz77_window_) {
+ for (size_t i = 0; i < count; i++) {
+ lz77_window_[(num_decoded_++) & kWindowMask] = symbol.value;
+ }
+ }
+ return true;
+ }
+
+ static constexpr size_t kMaxCheckpointInterval = 512;
+ struct Checkpoint {
+ uint32_t state;
+ uint32_t num_to_copy;
+ uint32_t copy_pos;
+ uint32_t num_decoded;
+ uint32_t lz77_window[kMaxCheckpointInterval];
+ };
+ void Save(Checkpoint* checkpoint) {
+ checkpoint->state = state_;
+ checkpoint->num_decoded = num_decoded_;
+ checkpoint->num_to_copy = num_to_copy_;
+ checkpoint->copy_pos = copy_pos_;
+ if (lz77_window_) {
+ size_t win_start = num_decoded_ & kWindowMask;
+ size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+ if (win_end > win_start) {
+ memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+ (win_end - win_start) * sizeof(*lz77_window_));
+ } else {
+ memcpy(checkpoint->lz77_window, lz77_window_ + win_start,
+ (kWindowSize - win_start) * sizeof(*lz77_window_));
+ memcpy(checkpoint->lz77_window + (kWindowSize - win_start),
+ lz77_window_, win_end * sizeof(*lz77_window_));
+ }
+ }
+ }
+ void Restore(const Checkpoint& checkpoint) {
+ state_ = checkpoint.state;
+ JXL_DASSERT(num_decoded_ <=
+ checkpoint.num_decoded + kMaxCheckpointInterval);
+ num_decoded_ = checkpoint.num_decoded;
+ num_to_copy_ = checkpoint.num_to_copy;
+ copy_pos_ = checkpoint.copy_pos;
+ if (lz77_window_) {
+ size_t win_start = num_decoded_ & kWindowMask;
+ size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask;
+ if (win_end > win_start) {
+ memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+ (win_end - win_start) * sizeof(*lz77_window_));
+ } else {
+ memcpy(lz77_window_ + win_start, checkpoint.lz77_window,
+ (kWindowSize - win_start) * sizeof(*lz77_window_));
+ memcpy(lz77_window_, checkpoint.lz77_window + (kWindowSize - win_start),
+ win_end * sizeof(*lz77_window_));
+ }
+ }
+ }
+
+ private:
+ const AliasTable::Entry* JXL_RESTRICT alias_tables_; // not owned
+ const HuffmanDecodingData* huffman_data_;
+ bool use_prefix_code_;
+ uint32_t state_ = ANS_SIGNATURE << 16u;
+ const HybridUintConfig* JXL_RESTRICT configs;
+ uint32_t log_alpha_size_{};
+ uint32_t log_entry_size_{};
+ uint32_t entry_size_minus_1_{};
+
+ // LZ77 structures and constants.
+ static constexpr size_t kWindowMask = kWindowSize - 1;
+ CacheAlignedUniquePtr lz77_window_storage_;
+ uint32_t* lz77_window_ = nullptr;
+ uint32_t num_decoded_ = 0;
+ uint32_t num_to_copy_ = 0;
+ uint32_t copy_pos_ = 0;
+ uint32_t lz77_ctx_ = 0;
+ uint32_t lz77_min_length_ = 0;
+ uint32_t lz77_threshold_ = 1 << 20; // bigger than any symbol.
+ HybridUintConfig lz77_length_uint_;
+ uint32_t special_distances_[kNumSpecialDistances]{};
+ uint32_t num_special_distances_{};
+};
+
+Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code,
+ std::vector<uint8_t>* context_map,
+ bool disallow_lz77 = false);
+
+// Exposed for tests.
+Status DecodeUintConfigs(size_t log_alpha_size,
+ std::vector<HybridUintConfig>* uint_config,
+ BitReader* br);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_ANS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_bit_reader.h b/third_party/jpeg-xl/lib/jxl/dec_bit_reader.h
new file mode 100644
index 0000000000..df70284e3b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_bit_reader.h
@@ -0,0 +1,354 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_BIT_READER_H_
+#define LIB_JXL_DEC_BIT_READER_H_
+
+// Bounds-checked bit reader; 64-bit buffer with support for deferred refills
+// and switching to reading byte-aligned words.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // memcpy
+
+#ifdef __BMI2__
+#include <immintrin.h>
+#endif
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+// Reads bits previously written to memory by BitWriter. Uses unaligned 8-byte
+// little-endian loads.
+class BitReader {
+ public:
+ static constexpr size_t kMaxBitsPerCall = 56;
+
+ // Constructs an invalid BitReader, to be overwritten before usage.
+ BitReader()
+ : buf_(0),
+ bits_in_buf_(0),
+ next_byte_{nullptr},
+ end_minus_8_{nullptr},
+ first_byte_(nullptr) {}
+ BitReader(const BitReader&) = delete;
+
+ // bytes need not be aligned nor padded!
+ template <class ArrayLike>
+ explicit BitReader(const ArrayLike& bytes)
+ : buf_(0),
+ bits_in_buf_(0),
+ next_byte_(bytes.data()),
+ // Assumes first_byte_ >= 8.
+ end_minus_8_(bytes.data() - 8 + bytes.size()),
+ first_byte_(bytes.data()) {
+ Refill();
+ }
+ ~BitReader() {
+ // Close() must be called before destroying an initialized bit reader.
+ // Invalid bit readers will have a nullptr in first_byte_.
+ JXL_ASSERT(close_called_ || !first_byte_);
+ }
+
+ // Move operator needs to invalidate the other BitReader such that it is
+ // irrelevant if we call Close() on it or not.
+ BitReader& operator=(BitReader&& other) noexcept {
+ // Ensure the current instance was already closed, before we overwrite it
+ // with other.
+ JXL_ASSERT(close_called_ || !first_byte_);
+
+ JXL_DASSERT(!other.close_called_);
+ buf_ = other.buf_;
+ bits_in_buf_ = other.bits_in_buf_;
+ next_byte_ = other.next_byte_;
+ end_minus_8_ = other.end_minus_8_;
+ first_byte_ = other.first_byte_;
+ overread_bytes_ = other.overread_bytes_;
+ close_called_ = other.close_called_;
+
+ other.first_byte_ = nullptr;
+ other.next_byte_ = nullptr;
+ return *this;
+ }
+ BitReader& operator=(const BitReader& other) = delete;
+
+ // For time-critical reads, refills can be shared by multiple reads.
+ // Based on variant 4 (plus bounds-checking), see
+ // fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/
+ JXL_INLINE void Refill() {
+ if (JXL_UNLIKELY(next_byte_ > end_minus_8_)) {
+ BoundsCheckedRefill();
+ } else {
+ // It's safe to load 64 bits; insert valid (possibly nonzero) bits above
+ // bits_in_buf_. The shift requires bits_in_buf_ < 64.
+ buf_ |= LoadLE64(next_byte_) << bits_in_buf_;
+
+ // Advance by bytes fully absorbed into the buffer.
+ next_byte_ += (63 - bits_in_buf_) >> 3;
+
+ // We absorbed a multiple of 8 bits, so the lower 3 bits of bits_in_buf_
+ // must remain unchanged, otherwise the next refill's shifted bits will
+ // not align with buf_. Set the three upper bits so the result >= 56.
+ bits_in_buf_ |= 56;
+ JXL_DASSERT(56 <= bits_in_buf_ && bits_in_buf_ < 64);
+ }
+ }
+
+ // Returns the bits that would be returned by Read without calling Advance().
+ // It is legal to PEEK at more bits than present in the bitstream (required
+ // by Huffman), and those bits will be zero.
+ template <size_t N>
+ JXL_INLINE uint64_t PeekFixedBits() const {
+ static_assert(N <= kMaxBitsPerCall, "Reading too many bits in one call.");
+ JXL_DASSERT(!close_called_);
+ return buf_ & ((1ULL << N) - 1);
+ }
+
+ JXL_INLINE uint64_t PeekBits(size_t nbits) const {
+ JXL_DASSERT(nbits <= kMaxBitsPerCall);
+ JXL_DASSERT(!close_called_);
+
+ // Slightly faster but requires BMI2. It is infeasible to make the many
+ // callers reside between begin/end_target, especially because only the
+ // callers in dec_ans are time-critical. Therefore only enabled if the
+ // entire binary is compiled for (and thus requires) BMI2.
+#if defined(__BMI2__) && defined(__x86_64__)
+ return _bzhi_u64(buf_, nbits);
+#else
+ const uint64_t mask = (1ULL << nbits) - 1;
+ return buf_ & mask;
+#endif
+ }
+
+ // Removes bits from the buffer. Need not match the previous Peek size, but
+ // the buffer must contain at least num_bits (this prevents consuming more
+ // than the total number of bits).
+ JXL_INLINE void Consume(size_t num_bits) {
+ JXL_DASSERT(!close_called_);
+ JXL_DASSERT(bits_in_buf_ >= num_bits);
+#ifdef JXL_CRASH_ON_ERROR
+ // When JXL_CRASH_ON_ERROR is defined, it is a fatal error to read more bits
+ // than available in the stream. A non-zero overread_bytes_ implies that
+ // next_byte_ is already at the end of the stream, so we don't need to
+ // check that.
+ JXL_ASSERT(bits_in_buf_ >= num_bits + overread_bytes_ * kBitsPerByte);
+#endif
+ bits_in_buf_ -= num_bits;
+ buf_ >>= num_bits;
+ }
+
+ JXL_INLINE uint64_t ReadBits(size_t nbits) {
+ JXL_DASSERT(!close_called_);
+ Refill();
+ const uint64_t bits = PeekBits(nbits);
+ Consume(nbits);
+ return bits;
+ }
+
+ template <size_t N>
+ JXL_INLINE uint64_t ReadFixedBits() {
+ JXL_DASSERT(!close_called_);
+ Refill();
+ const uint64_t bits = PeekFixedBits<N>();
+ Consume(N);
+ return bits;
+ }
+
+ // Equivalent to calling ReadFixedBits(1) `skip` times, but much faster.
+ // `skip` is typically large.
+ void SkipBits(size_t skip) {
+ JXL_DASSERT(!close_called_);
+ // Buffer is large enough - don't zero buf_ below.
+ if (JXL_UNLIKELY(skip <= bits_in_buf_)) {
+ Consume(skip);
+ return;
+ }
+
+ // First deduct what we can satisfy from the buffer
+ skip -= bits_in_buf_;
+ bits_in_buf_ = 0;
+ // Not enough to call Advance - that may leave some bits in the buffer
+ // which were previously ABOVE bits_in_buf.
+ buf_ = 0;
+
+ // Skip whole bytes
+ const size_t whole_bytes = skip / kBitsPerByte;
+ skip %= kBitsPerByte;
+ if (JXL_UNLIKELY(whole_bytes >
+ static_cast<size_t>(end_minus_8_ + 8 - next_byte_))) {
+ // This is already an overflow condition (skipping past the end of the bit
+ // stream). However if we increase next_byte_ too much we risk overflowing
+ // that value and potentially making it valid again (next_byte_ < end).
+ // This will set next_byte_ to the end of the stream and still consume
+ // some bits in overread_bytes_, however the TotalBitsConsumed() will be
+ // incorrect (still larger than the TotalBytes()).
+ next_byte_ = end_minus_8_ + 8;
+ skip += kBitsPerByte;
+ } else {
+ next_byte_ += whole_bytes;
+ }
+
+ Refill();
+ Consume(skip);
+ }
+
+ size_t TotalBitsConsumed() const {
+ const size_t bytes_read = static_cast<size_t>(next_byte_ - first_byte_);
+ return (bytes_read + overread_bytes_) * kBitsPerByte - bits_in_buf_;
+ }
+
+ Status JumpToByteBoundary() {
+ const size_t remainder = TotalBitsConsumed() % kBitsPerByte;
+ if (remainder == 0) return true;
+ if (JXL_UNLIKELY(ReadBits(kBitsPerByte - remainder) != 0)) {
+ return JXL_FAILURE("Non-zero padding bits");
+ }
+ return true;
+ }
+
+ // For interoperability with other bitreaders (for resuming at
+ // non-byte-aligned positions).
+ const uint8_t* FirstByte() const { return first_byte_; }
+ size_t TotalBytes() const {
+ return static_cast<size_t>(end_minus_8_ + 8 - first_byte_);
+ }
+
+ // Returns span of the remaining (unconsumed) bytes, e.g. for passing to
+ // external decoders such as Brotli.
+ Span<const uint8_t> GetSpan() const {
+ JXL_DASSERT(first_byte_ != nullptr);
+ JXL_ASSERT(TotalBitsConsumed() % kBitsPerByte == 0);
+ const size_t offset = TotalBitsConsumed() / kBitsPerByte; // no remainder
+ JXL_ASSERT(offset <= TotalBytes());
+ return Span<const uint8_t>(first_byte_ + offset, TotalBytes() - offset);
+ }
+
+ // Returns whether all the bits read so far have been within the input bounds.
+ // When reading past the EOF, the Read*() and Consume() functions return zeros
+ // but flag a failure when calling Close() without checking this function.
+ Status AllReadsWithinBounds() {
+ // Mark up to which point the user checked the out of bounds condition. If
+ // the user handles the condition at higher level (e.g. fetch more bytes
+ // from network, return a custom JXL_FAILURE, ...), Close() should not
+ // output a debug error (which would break tests with JXL_CRASH_ON_ERROR
+ // even when legitimately handling the situation at higher level). This is
+ // used by Bundle::CanRead.
+ checked_out_of_bounds_bits_ = TotalBitsConsumed();
+ if (TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+ return false;
+ }
+ return true;
+ }
+
+ // Close the bit reader and return whether all the previous reads were
+ // successful. Close must be called once.
+ Status Close() {
+ JXL_DASSERT(!close_called_);
+ close_called_ = true;
+ if (!first_byte_) return true;
+ if (TotalBitsConsumed() > checked_out_of_bounds_bits_ &&
+ TotalBitsConsumed() > TotalBytes() * kBitsPerByte) {
+ return JXL_FAILURE("Read more bits than available in the bit_reader");
+ }
+ return true;
+ }
+
+ private:
+ // Separate function avoids inlining this relatively cold code into callers.
+ JXL_NOINLINE void BoundsCheckedRefill() {
+ PROFILER_FUNC;
+ const uint8_t* end = end_minus_8_ + 8;
+
+ // Read whole bytes until we have [56, 64) bits (same as LoadLE64)
+ for (; bits_in_buf_ < 64 - kBitsPerByte; bits_in_buf_ += kBitsPerByte) {
+ if (next_byte_ >= end) break;
+ buf_ |= static_cast<uint64_t>(*next_byte_++) << bits_in_buf_;
+ }
+ JXL_DASSERT(bits_in_buf_ < 64);
+
+ // Add extra bytes as 0 at the end of the stream in the bit_buffer_. If
+ // these bits are read, Close() will return a failure.
+ size_t extra_bytes = (63 - bits_in_buf_) / kBitsPerByte;
+ overread_bytes_ += extra_bytes;
+ bits_in_buf_ += extra_bytes * kBitsPerByte;
+
+ JXL_DASSERT(bits_in_buf_ < 64);
+ JXL_DASSERT(bits_in_buf_ >= 56);
+ }
+
+ JXL_NOINLINE uint32_t BoundsCheckedReadByteAlignedWord() {
+ if (next_byte_ + 1 < end_minus_8_ + 8) {
+ uint32_t ret = LoadLE16(next_byte_);
+ next_byte_ += 2;
+ return ret;
+ }
+ overread_bytes_ += 2;
+ return 0;
+ }
+
+ uint64_t buf_;
+ size_t bits_in_buf_; // [0, 64)
+ const uint8_t* JXL_RESTRICT next_byte_;
+ const uint8_t* end_minus_8_; // for refill bounds check
+ const uint8_t* first_byte_; // for GetSpan
+
+ // Number of bytes past the end that were loaded into the buf_. These bytes
+ // are not read from memory, but instead assumed 0. It is an error (likely due
+ // to an invalid stream) to Consume() more bits than specified in the range
+ // passed to the constructor.
+ uint64_t overread_bytes_{0};
+ bool close_called_{false};
+
+ uint64_t checked_out_of_bounds_bits_{0};
+};
+
+// Closes a BitReader when the BitReaderScopedCloser goes out of scope. When
+// closing the bit reader, if the status result was failure it sets this failure
+// to the passed variable pointer. Typical usage.
+//
+// Status ret = true;
+// {
+// BitReader reader(...);
+// BitReaderScopedCloser reader_closer(&reader, &ret);
+//
+// // ... code that can return errors here ...
+// }
+// // ... more code that doesn't use the BitReader.
+// return ret;
+
+class BitReaderScopedCloser {
+ public:
+ BitReaderScopedCloser(BitReader* reader, Status* status)
+ : reader_(reader), status_(status) {
+ JXL_DASSERT(reader_ != nullptr);
+ JXL_DASSERT(status_ != nullptr);
+ }
+ ~BitReaderScopedCloser() {
+ if (reader_ != nullptr) {
+ Status close_ret = reader_->Close();
+ if (!close_ret) *status_ = close_ret;
+ }
+ }
+ void CloseAndSuppressError() {
+ JXL_ASSERT(reader_ != nullptr);
+ (void)reader_->Close();
+ reader_ = nullptr;
+ }
+ BitReaderScopedCloser(const BitReaderScopedCloser&) = delete;
+
+ private:
+ BitReader* reader_;
+ Status* status_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_BIT_READER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_cache.cc b/third_party/jpeg-xl/lib/jxl/dec_cache.cc
new file mode 100644
index 0000000000..4db6f1d9a5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_cache.cc
@@ -0,0 +1,229 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_cache.h"
+
+#include "lib/jxl/blending.h"
+#include "lib/jxl/render_pipeline/stage_blending.h"
+#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"
+#include "lib/jxl/render_pipeline/stage_epf.h"
+#include "lib/jxl/render_pipeline/stage_from_linear.h"
+#include "lib/jxl/render_pipeline/stage_gaborish.h"
+#include "lib/jxl/render_pipeline/stage_noise.h"
+#include "lib/jxl/render_pipeline/stage_patches.h"
+#include "lib/jxl/render_pipeline/stage_splines.h"
+#include "lib/jxl/render_pipeline/stage_spot.h"
+#include "lib/jxl/render_pipeline/stage_to_linear.h"
+#include "lib/jxl/render_pipeline/stage_tone_mapping.h"
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+#include "lib/jxl/render_pipeline/stage_write.h"
+#include "lib/jxl/render_pipeline/stage_xyb.h"
+#include "lib/jxl/render_pipeline/stage_ycbcr.h"
+
+namespace jxl {
+
+Status PassesDecoderState::PreparePipeline(ImageBundle* decoded,
+ PipelineOptions options) {
+ const FrameHeader& frame_header = shared->frame_header;
+ size_t num_c = 3 + frame_header.nonserialized_metadata->m.num_extra_channels;
+ if ((frame_header.flags & FrameHeader::kNoise) != 0) {
+ num_c += 3;
+ }
+
+ if (frame_header.CanBeReferenced()) {
+ // Necessary so that SetInputSizes() can allocate output buffers as needed.
+ frame_storage_for_referencing = ImageBundle(decoded->metadata());
+ }
+
+ RenderPipeline::Builder builder(num_c);
+
+ if (options.use_slow_render_pipeline) {
+ builder.UseSimpleImplementation();
+ }
+
+ if (!frame_header.chroma_subsampling.Is444()) {
+ for (size_t c = 0; c < 3; c++) {
+ if (frame_header.chroma_subsampling.HShift(c) != 0) {
+ builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/true));
+ }
+ if (frame_header.chroma_subsampling.VShift(c) != 0) {
+ builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/false));
+ }
+ }
+ }
+
+ if (frame_header.loop_filter.gab) {
+ builder.AddStage(GetGaborishStage(frame_header.loop_filter));
+ }
+
+ {
+ const LoopFilter& lf = frame_header.loop_filter;
+ if (lf.epf_iters >= 3) {
+ builder.AddStage(GetEPFStage(lf, sigma, 0));
+ }
+ if (lf.epf_iters >= 1) {
+ builder.AddStage(GetEPFStage(lf, sigma, 1));
+ }
+ if (lf.epf_iters >= 2) {
+ builder.AddStage(GetEPFStage(lf, sigma, 2));
+ }
+ }
+
+ bool late_ec_upsample = frame_header.upsampling != 1;
+ for (auto ecups : frame_header.extra_channel_upsampling) {
+ if (ecups != frame_header.upsampling) {
+ // If patches are applied, either frame_header.upsampling == 1 or
+ // late_ec_upsample is true.
+ late_ec_upsample = false;
+ }
+ }
+
+ if (!late_ec_upsample) {
+ for (size_t ec = 0; ec < frame_header.extra_channel_upsampling.size();
+ ec++) {
+ if (frame_header.extra_channel_upsampling[ec] != 1) {
+ builder.AddStage(GetUpsamplingStage(
+ frame_header.nonserialized_metadata->transform_data, 3 + ec,
+ CeilLog2Nonzero(frame_header.extra_channel_upsampling[ec])));
+ }
+ }
+ }
+
+ if ((frame_header.flags & FrameHeader::kPatches) != 0) {
+ builder.AddStage(
+ GetPatchesStage(&shared->image_features.patches,
+ 3 + shared->metadata->m.num_extra_channels));
+ }
+ if ((frame_header.flags & FrameHeader::kSplines) != 0) {
+ builder.AddStage(GetSplineStage(&shared->image_features.splines));
+ }
+
+ if (frame_header.upsampling != 1) {
+ size_t nb_channels =
+ 3 +
+ (late_ec_upsample ? frame_header.extra_channel_upsampling.size() : 0);
+ for (size_t c = 0; c < nb_channels; c++) {
+ builder.AddStage(GetUpsamplingStage(
+ frame_header.nonserialized_metadata->transform_data, c,
+ CeilLog2Nonzero(frame_header.upsampling)));
+ }
+ }
+
+ if ((frame_header.flags & FrameHeader::kNoise) != 0) {
+ builder.AddStage(GetConvolveNoiseStage(num_c - 3));
+ builder.AddStage(GetAddNoiseStage(shared->image_features.noise_params,
+ shared->cmap, num_c - 3));
+ }
+ if (frame_header.dc_level != 0) {
+ builder.AddStage(GetWriteToImage3FStage(
+ &shared_storage.dc_frames[frame_header.dc_level - 1]));
+ }
+
+ if (frame_header.CanBeReferenced() &&
+ frame_header.save_before_color_transform) {
+ builder.AddStage(GetWriteToImageBundleStage(
+ &frame_storage_for_referencing, output_encoding_info.color_encoding));
+ }
+
+ bool has_alpha = false;
+ size_t alpha_c = 0;
+ for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size(); i++) {
+ if (decoded->metadata()->extra_channel_info[i].type ==
+ ExtraChannel::kAlpha) {
+ has_alpha = true;
+ alpha_c = 3 + i;
+ break;
+ }
+ }
+
+ if (fast_xyb_srgb8_conversion) {
+ JXL_ASSERT(!NeedsBlending(this));
+ JXL_ASSERT(!frame_header.CanBeReferenced() ||
+ frame_header.save_before_color_transform);
+ JXL_ASSERT(!options.render_spotcolors ||
+ !decoded->metadata()->Find(ExtraChannel::kSpotColor));
+ bool is_rgba = (main_output.format.num_channels == 4);
+ uint8_t* rgb_output = reinterpret_cast<uint8_t*>(main_output.buffer);
+ builder.AddStage(GetFastXYBTosRGB8Stage(rgb_output, main_output.stride,
+ width, height, is_rgba, has_alpha,
+ alpha_c));
+ } else {
+ bool linear = false;
+ if (frame_header.color_transform == ColorTransform::kYCbCr) {
+ builder.AddStage(GetYCbCrStage());
+ } else if (frame_header.color_transform == ColorTransform::kXYB) {
+ builder.AddStage(GetXYBStage(output_encoding_info));
+ if (output_encoding_info.color_encoding.GetColorSpace() !=
+ ColorSpace::kXYB) {
+ linear = true;
+ }
+ } // Nothing to do for kNone.
+
+ if (options.coalescing && NeedsBlending(this)) {
+ if (linear) {
+ builder.AddStage(GetFromLinearStage(output_encoding_info));
+ linear = false;
+ }
+ builder.AddStage(
+ GetBlendingStage(this, output_encoding_info.color_encoding));
+ }
+
+ if (options.coalescing && frame_header.CanBeReferenced() &&
+ !frame_header.save_before_color_transform) {
+ if (linear) {
+ builder.AddStage(GetFromLinearStage(output_encoding_info));
+ linear = false;
+ }
+ builder.AddStage(GetWriteToImageBundleStage(
+ &frame_storage_for_referencing, output_encoding_info.color_encoding));
+ }
+
+ if (options.render_spotcolors &&
+ frame_header.nonserialized_metadata->m.Find(ExtraChannel::kSpotColor)) {
+ for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size();
+ i++) {
+ // Don't use Find() because there may be multiple spot color channels.
+ const ExtraChannelInfo& eci =
+ decoded->metadata()->extra_channel_info[i];
+ if (eci.type == ExtraChannel::kSpotColor) {
+ builder.AddStage(GetSpotColorStage(3 + i, eci.spot_color));
+ }
+ }
+ }
+
+ auto tone_mapping_stage = GetToneMappingStage(output_encoding_info);
+ if (tone_mapping_stage) {
+ if (!linear) {
+ auto to_linear_stage = GetToLinearStage(output_encoding_info);
+ if (!to_linear_stage) {
+ return JXL_FAILURE(
+ "attempting to perform tone mapping on colorspace not "
+ "convertible to linear");
+ }
+ builder.AddStage(std::move(to_linear_stage));
+ linear = true;
+ }
+ builder.AddStage(std::move(tone_mapping_stage));
+ }
+
+ if (linear) {
+ builder.AddStage(GetFromLinearStage(output_encoding_info));
+ linear = false;
+ }
+
+ if (main_output.callback.IsPresent() || main_output.buffer) {
+ builder.AddStage(GetWriteToOutputStage(main_output, width, height,
+ has_alpha, unpremul_alpha, alpha_c,
+ undo_orientation, extra_output));
+ } else {
+ builder.AddStage(GetWriteToImageBundleStage(
+ decoded, output_encoding_info.color_encoding));
+ }
+ }
+ render_pipeline = std::move(builder).Finalize(shared->frame_dim);
+ return render_pipeline->IsInitialized();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_cache.h b/third_party/jpeg-xl/lib/jxl/dec_cache.h
new file mode 100644
index 0000000000..7c9fe9a6c3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_cache.h
@@ -0,0 +1,261 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CACHE_H_
+#define LIB_JXL_DEC_CACHE_H_
+
+#include <jxl/decode.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <hwy/base.h> // HWY_ALIGN_MAX
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+constexpr size_t kSigmaBorder = 1;
+constexpr size_t kSigmaPadding = 2;
+
+struct PixelCallback {
+ PixelCallback() = default;
+ PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run,
+ JxlImageOutDestroyCallback destroy, void* init_opaque)
+ : init(init), run(run), destroy(destroy), init_opaque(init_opaque) {
+#if JXL_ENABLE_ASSERT
+ const bool has_init = init != nullptr;
+ const bool has_run = run != nullptr;
+ const bool has_destroy = destroy != nullptr;
+ JXL_ASSERT(has_init == has_run && has_run == has_destroy);
+#endif
+ }
+
+ bool IsPresent() const { return run != nullptr; }
+
+ void* Init(size_t num_threads, size_t num_pixels) const {
+ return init(init_opaque, num_threads, num_pixels);
+ }
+
+ JxlImageOutInitCallback init = nullptr;
+ JxlImageOutRunCallback run = nullptr;
+ JxlImageOutDestroyCallback destroy = nullptr;
+ void* init_opaque = nullptr;
+};
+
+struct ImageOutput {
+ // Pixel format of the output pixels, used for buffer and callback output.
+ JxlPixelFormat format;
+ // Output bit depth for unsigned data types, used for float to int conversion.
+ size_t bits_per_sample;
+ // Callback for line-by-line output.
+ PixelCallback callback;
+ // Pixel buffer for image output.
+ void* buffer;
+ size_t buffer_size;
+ // Length of a row of image_buffer in bytes (based on oriented width).
+ size_t stride;
+};
+
+// Per-frame decoder state. All the images here should be accessed through a
+// group rect (either with block units or pixel units).
+struct PassesDecoderState {
+ PassesSharedState shared_storage;
+ // Allows avoiding copies for encoder loop.
+ const PassesSharedState* JXL_RESTRICT shared = &shared_storage;
+
+ // 8x upsampling stage for DC.
+ std::unique_ptr<RenderPipelineStage> upsampler8x;
+
+ // For ANS decoding.
+ std::vector<ANSCode> code;
+ std::vector<std::vector<uint8_t>> context_map;
+
+ // Multiplier to be applied to the quant matrices of the x channel.
+ float x_dm_multiplier;
+ float b_dm_multiplier;
+
+ // Sigma values for EPF.
+ ImageF sigma;
+
+ // Image dimensions before applying undo_orientation.
+ size_t width;
+ size_t height;
+ ImageOutput main_output;
+ std::vector<ImageOutput> extra_output;
+
+ // Whether to use int16 float-XYB-to-uint8-srgb conversion.
+ bool fast_xyb_srgb8_conversion;
+
+ // If true, the RGBA output will be unpremultiplied before writing to the
+ // output.
+ bool unpremul_alpha;
+
+ // The render pipeline will apply this orientation to bring the image to the
+ // intended display orientation.
+ Orientation undo_orientation;
+
+ // Used for seeding noise.
+ size_t visible_frame_index = 0;
+ size_t nonvisible_frame_index = 0;
+
+ // Keep track of the transform types used.
+ std::atomic<uint32_t> used_acs{0};
+
+ // Storage for coefficients if in "accumulate" mode.
+ std::unique_ptr<ACImage> coefficients = make_unique<ACImageT<int32_t>>(0, 0);
+
+ // Rendering pipeline.
+ std::unique_ptr<RenderPipeline> render_pipeline;
+
+ // Storage for the current frame if it can be referenced by future frames.
+ ImageBundle frame_storage_for_referencing;
+
+ struct PipelineOptions {
+ bool use_slow_render_pipeline;
+ bool coalescing;
+ bool render_spotcolors;
+ };
+
+ Status PreparePipeline(ImageBundle* decoded, PipelineOptions options);
+
+ // Information for colour conversions.
+ OutputEncodingInfo output_encoding_info;
+
+ // Initializes decoder-specific structures using information from *shared.
+ Status Init() {
+ x_dm_multiplier =
+ std::pow(1 / (1.25f), shared->frame_header.x_qm_scale - 2.0f);
+ b_dm_multiplier =
+ std::pow(1 / (1.25f), shared->frame_header.b_qm_scale - 2.0f);
+
+ main_output.callback = PixelCallback();
+ main_output.buffer = nullptr;
+ extra_output.clear();
+
+ fast_xyb_srgb8_conversion = false;
+ unpremul_alpha = false;
+ undo_orientation = Orientation::kIdentity;
+
+ used_acs = 0;
+
+ upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3);
+ if (shared->frame_header.loop_filter.epf_iters > 0) {
+ sigma = ImageF(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding,
+ shared->frame_dim.ysize_blocks + 2 * kSigmaPadding);
+ }
+ return true;
+ }
+
+ // Initialize the decoder state after all of DC is decoded.
+ Status InitForAC(ThreadPool* pool) {
+ shared_storage.coeff_order_size = 0;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ if (((1 << o) & used_acs) == 0) continue;
+ uint8_t ord = kStrategyOrder[o];
+ shared_storage.coeff_order_size =
+ std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize,
+ shared_storage.coeff_order_size);
+ }
+ size_t sz = shared_storage.frame_header.passes.num_passes *
+ shared_storage.coeff_order_size;
+ if (sz > shared_storage.coeff_orders.size()) {
+ shared_storage.coeff_orders.resize(sz);
+ }
+ return true;
+ }
+
+ // Fills the `state->filter_weights.sigma` image with the precomputed sigma
+ // values in the area inside `block_rect`. Accesses the AC strategy, quant
+ // field and epf_sharpness fields in the corresponding positions.
+ void ComputeSigma(const Rect& block_rect, PassesDecoderState* state);
+};
+
+// Temp images required for decoding a single group. Reduces memory allocations
+// for large images because we only initialize min(#threads, #groups) instances.
+struct GroupDecCache {
+ void InitOnce(size_t num_passes, size_t used_acs) {
+ PROFILER_FUNC;
+
+ for (size_t i = 0; i < num_passes; i++) {
+ if (num_nzeroes[i].xsize() == 0) {
+ // Allocate enough for a whole group - partial groups on the
+ // right/bottom border just use a subset. The valid size is passed via
+ // Rect.
+
+ num_nzeroes[i] = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+ }
+ }
+ size_t max_block_area = 0;
+
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ if ((used_acs & (1 << o)) == 0) continue;
+ size_t area =
+ acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
+ max_block_area = std::max(area, max_block_area);
+ }
+
+ if (max_block_area > max_block_area_) {
+ max_block_area_ = max_block_area;
+ // We need 3x float blocks for dequantized coefficients and 1x for scratch
+ // space for transforms.
+ float_memory_ = hwy::AllocateAligned<float>(max_block_area_ * 4);
+ // We need 3x int32 or int16 blocks for quantized coefficients.
+ int32_memory_ = hwy::AllocateAligned<int32_t>(max_block_area_ * 3);
+ int16_memory_ = hwy::AllocateAligned<int16_t>(max_block_area_ * 3);
+ }
+
+ dec_group_block = float_memory_.get();
+ scratch_space = dec_group_block + max_block_area_ * 3;
+ dec_group_qblock = int32_memory_.get();
+ dec_group_qblock16 = int16_memory_.get();
+ }
+
+ void InitDCBufferOnce() {
+ if (dc_buffer.xsize() == 0) {
+ dc_buffer = ImageF(kGroupDimInBlocks + kRenderPipelineXOffset * 2,
+ kGroupDimInBlocks + 4);
+ }
+ }
+
+ // Scratch space used by DecGroupImpl().
+ float* dec_group_block;
+ int32_t* dec_group_qblock;
+ int16_t* dec_group_qblock16;
+
+ // For TransformToPixels.
+ float* scratch_space;
+ // Note that scratch_space is never used at the same time as dec_group_qblock.
+ // Moreover, only one of dec_group_qblock16 is ever used.
+ // TODO(veluca): figure out if we can save allocations.
+
+ // AC decoding
+ Image3I num_nzeroes[kMaxNumPasses];
+
+ // Buffer for DC upsampling.
+ ImageF dc_buffer;
+
+ private:
+ hwy::AlignedFreeUniquePtr<float[]> float_memory_;
+ hwy::AlignedFreeUniquePtr<int32_t[]> int32_memory_;
+ hwy::AlignedFreeUniquePtr<int16_t[]> int16_memory_;
+ size_t max_block_area_ = 0;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_CACHE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_context_map.cc b/third_party/jpeg-xl/lib/jxl/dec_context_map.cc
new file mode 100644
index 0000000000..1b291650d7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_context_map.cc
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_context_map.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/inverse_mtf-inl.h"
+
+namespace jxl {
+
+namespace {
+
+Status VerifyContextMap(const std::vector<uint8_t>& context_map,
+ const size_t num_htrees) {
+ std::vector<bool> have_htree(num_htrees);
+ size_t num_found = 0;
+ for (const uint8_t htree : context_map) {
+ if (htree >= num_htrees) {
+ return JXL_FAILURE("Invalid histogram index in context map.");
+ }
+ if (!have_htree[htree]) {
+ have_htree[htree] = true;
+ ++num_found;
+ }
+ }
+ if (num_found != num_htrees) {
+ return JXL_FAILURE("Incomplete context map.");
+ }
+ return true;
+}
+
+} // namespace
+
+Status DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+ BitReader* input) {
+ bool is_simple = input->ReadFixedBits<1>();
+ if (is_simple) {
+ int bits_per_entry = input->ReadFixedBits<2>();
+ if (bits_per_entry != 0) {
+ for (size_t i = 0; i < context_map->size(); i++) {
+ (*context_map)[i] = input->ReadBits(bits_per_entry);
+ }
+ } else {
+ std::fill(context_map->begin(), context_map->end(), 0);
+ }
+ } else {
+ bool use_mtf = input->ReadFixedBits<1>();
+ ANSCode code;
+ std::vector<uint8_t> dummy_ctx_map;
+ // Usage of LZ77 is disallowed if decoding only two symbols. This doesn't
+ // make sense in non-malicious bitstreams, and could cause a stack overflow
+ // in malicious bitstreams by making every context map require its own
+ // context map.
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(input, 1, &code, &dummy_ctx_map,
+ /*disallow_lz77=*/context_map->size() <= 2));
+ ANSSymbolReader reader(&code, input);
+ size_t i = 0;
+ while (i < context_map->size()) {
+ uint32_t sym = reader.ReadHybridUint(0, input, dummy_ctx_map);
+ if (sym >= kMaxClusters) {
+ return JXL_FAILURE("Invalid cluster ID");
+ }
+ (*context_map)[i] = sym;
+ i++;
+ }
+ if (!reader.CheckANSFinalState()) {
+ return JXL_FAILURE("Invalid context map");
+ }
+ if (use_mtf) {
+ InverseMoveToFrontTransform(context_map->data(), context_map->size());
+ }
+ }
+ *num_htrees = *std::max_element(context_map->begin(), context_map->end()) + 1;
+ return VerifyContextMap(*context_map, *num_htrees);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_context_map.h b/third_party/jpeg-xl/lib/jxl/dec_context_map.h
new file mode 100644
index 0000000000..95b8a0ca92
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_context_map.h
@@ -0,0 +1,30 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_CONTEXT_MAP_H_
+#define LIB_JXL_DEC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+// Context map uses uint8_t.
+constexpr size_t kMaxClusters = 256;
+
+// Reads the context map from the bit stream. On calling this function,
+// context_map->size() must be the number of possible context ids.
+// Sets *num_htrees to the number of different histogram ids in
+// *context_map.
+Status DecodeContextMap(std::vector<uint8_t>* context_map, size_t* num_htrees,
+ BitReader* input);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_CONTEXT_MAP_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_external_image.cc b/third_party/jpeg-xl/lib/jxl/dec_external_image.cc
new file mode 100644
index 0000000000..bbf457ba91
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_external_image.cc
@@ -0,0 +1,493 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_external_image.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::NearestInt;
+
+// TODO(jon): check if this can be replaced by a FloatToU16 function
+void FloatToU32(const float* in, uint32_t* out, size_t num, float mul,
+ size_t bits_per_sample) {
+ const HWY_FULL(float) d;
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, decltype(d)> du;
+
+ // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+ // This is because we run NearestInt() on the vector, which triggers msan even
+ // it it safe to do so since the values are not mixed between lanes.
+ const size_t num_round_up = RoundUpTo(num, Lanes(d));
+ msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+ const auto one = Set(d, 1.0f);
+ const auto scale = Set(d, mul);
+ for (size_t x = 0; x < num; x += Lanes(d)) {
+ auto v = Load(d, in + x);
+ // Clamp turns NaN to 'min'.
+ v = Clamp(v, Zero(d), one);
+ auto i = NearestInt(Mul(v, scale));
+ Store(BitCast(du, i), du, out + x);
+ }
+
+ // Poison back the output.
+ msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+void FloatToF16(const float* in, hwy::float16_t* out, size_t num) {
+ const HWY_FULL(float) d;
+ const hwy::HWY_NAMESPACE::Rebind<hwy::float16_t, decltype(d)> du;
+
+ // Unpoison accessing partially-uninitialized vectors with memory sanitizer.
+ // This is because we run DemoteTo() on the vector which triggers msan.
+ const size_t num_round_up = RoundUpTo(num, Lanes(d));
+ msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num));
+
+ for (size_t x = 0; x < num; x += Lanes(d)) {
+ auto v = Load(d, in + x);
+ auto v16 = DemoteTo(du, v);
+ Store(v16, du, out + x);
+ }
+
+ // Poison back the output.
+ msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+namespace {
+
+// Stores a float in big endian
+void StoreBEFloat(float value, uint8_t* p) {
+ uint32_t u;
+ memcpy(&u, &value, 4);
+ StoreBE32(u, p);
+}
+
+// Stores a float in little endian
+void StoreLEFloat(float value, uint8_t* p) {
+ uint32_t u;
+ memcpy(&u, &value, 4);
+ StoreLE32(u, p);
+}
+
+// The orientation may not be identity.
+// TODO(lode): SIMDify where possible
+template <typename T>
+Status UndoOrientation(jxl::Orientation undo_orientation, const Plane<T>& image,
+ Plane<T>& out, jxl::ThreadPool* pool) {
+ const size_t xsize = image.xsize();
+ const size_t ysize = image.ysize();
+
+ if (undo_orientation == Orientation::kFlipHorizontal) {
+ out = Plane<T>(xsize, ysize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ T* JXL_RESTRICT row_out = out.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[xsize - x - 1] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kRotate180) {
+ out = Plane<T>(xsize, ysize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[xsize - x - 1] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kFlipVertical) {
+ out = Plane<T>(xsize, ysize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ T* JXL_RESTRICT row_out = out.Row(ysize - y - 1);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kTranspose) {
+ out = Plane<T>(ysize, xsize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ out.Row(x)[y] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kRotate90) {
+ out = Plane<T>(ysize, xsize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ out.Row(x)[ysize - y - 1] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kAntiTranspose) {
+ out = Plane<T>(ysize, xsize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ } else if (undo_orientation == Orientation::kRotate270) {
+ out = Plane<T>(ysize, xsize);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const int64_t y = task;
+ const T* JXL_RESTRICT row_in = image.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ out.Row(xsize - x - 1)[y] = row_in[x];
+ }
+ },
+ "UndoOrientation"));
+ }
+ return true;
+}
+} // namespace
+
+HWY_EXPORT(FloatToU32);
+HWY_EXPORT(FloatToF16);
+
+namespace {
+
+using StoreFuncType = void(uint32_t value, uint8_t* dest);
+template <StoreFuncType StoreFunc>
+void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels,
+ size_t xsize, size_t bytes_per_sample,
+ uint8_t* JXL_RESTRICT out) {
+ for (size_t x = 0; x < xsize; ++x) {
+ for (size_t c = 0; c < num_channels; c++) {
+ StoreFunc(rows_u32[c][x],
+ out + (num_channels * x + c) * bytes_per_sample);
+ }
+ }
+}
+
+template <void(StoreFunc)(float, uint8_t*)>
+void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels,
+ size_t xsize, uint8_t* JXL_RESTRICT out) {
+ for (size_t x = 0; x < xsize; ++x) {
+ for (size_t c = 0; c < num_channels; c++) {
+ StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float));
+ }
+ }
+}
+
+void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; }
+
+// Maximum number of channels for the ConvertChannelsToExternal function.
+const size_t kConvertMaxChannels = 4;
+
+// Converts a list of channels to an interleaved image, applying transformations
+// when needed.
+// The input channels are given as a (non-const!) array of channel pointers and
+// interleaved in that order.
+//
+// Note: if a pointer in channels[] is nullptr, a 1.0 value will be used
+// instead. This is useful for handling when a user requests an alpha channel
+// from an image that doesn't have one. The first channel in the list may not
+// be nullptr, since it is used to determine the image size.
+Status ConvertChannelsToExternal(const ImageF* channels[], size_t num_channels,
+ size_t bits_per_sample, bool float_out,
+ JxlEndianness endianness, size_t stride,
+ jxl::ThreadPool* pool, void* out_image,
+ size_t out_size,
+ const PixelCallback& out_callback,
+ jxl::Orientation undo_orientation) {
+ JXL_DASSERT(num_channels != 0 && num_channels <= kConvertMaxChannels);
+ JXL_DASSERT(channels[0] != nullptr);
+ JXL_CHECK(float_out ? bits_per_sample == 16 || bits_per_sample == 32
+ : bits_per_sample > 0 && bits_per_sample <= 16);
+ if (!!out_image == out_callback.IsPresent()) {
+ return JXL_FAILURE(
+ "Must provide either an out_image or an out_callback, but not both.");
+ }
+
+ const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte);
+ const size_t bytes_per_pixel = num_channels * bytes_per_channel;
+
+ std::vector<std::vector<uint8_t>> row_out_callback;
+ const auto FreeCallbackOpaque = [&out_callback](void* p) {
+ out_callback.destroy(p);
+ };
+ std::unique_ptr<void, decltype(FreeCallbackOpaque)> out_run_opaque(
+ nullptr, FreeCallbackOpaque);
+ auto InitOutCallback = [&](size_t num_threads) -> Status {
+ if (out_callback.IsPresent()) {
+ out_run_opaque.reset(out_callback.Init(num_threads, stride));
+ JXL_RETURN_IF_ERROR(out_run_opaque != nullptr);
+ row_out_callback.resize(num_threads);
+ for (size_t i = 0; i < num_threads; ++i) {
+ row_out_callback[i].resize(stride);
+ }
+ }
+ return true;
+ };
+
+ // Channels used to store the transformed original channels if needed.
+ ImageF temp_channels[kConvertMaxChannels];
+ if (undo_orientation != Orientation::kIdentity) {
+ for (size_t c = 0; c < num_channels; ++c) {
+ if (channels[c]) {
+ JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c],
+ temp_channels[c], pool));
+ channels[c] = &(temp_channels[c]);
+ }
+ }
+ }
+
+ // First channel may not be nullptr.
+ size_t xsize = channels[0]->xsize();
+ size_t ysize = channels[0]->ysize();
+ if (stride < bytes_per_pixel * xsize) {
+ return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS
+ " vs %" PRIuS,
+ stride, bytes_per_pixel * xsize);
+ }
+ if (!out_callback.IsPresent() &&
+ out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) {
+ return JXL_FAILURE("out_size is too small to store image");
+ }
+
+ const bool little_endian =
+ endianness == JXL_LITTLE_ENDIAN ||
+ (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+ // Handle the case where a channel is nullptr by creating a single row with
+ // ones to use instead.
+ ImageF ones;
+ for (size_t c = 0; c < num_channels; ++c) {
+ if (!channels[c]) {
+ ones = ImageF(xsize, 1);
+ FillImage(1.0f, &ones);
+ break;
+ }
+ }
+
+ if (float_out) {
+ if (bits_per_sample == 16) {
+ bool swap_endianness = little_endian != IsLittleEndian();
+ Plane<hwy::float16_t> f16_cache;
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize),
+ [&](size_t num_threads) {
+ f16_cache =
+ Plane<hwy::float16_t>(xsize, num_channels * num_threads);
+ return InitOutCallback(num_threads);
+ },
+ [&](const uint32_t task, const size_t thread) {
+ const int64_t y = task;
+ const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+ for (size_t c = 0; c < num_channels; c++) {
+ row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+ }
+ hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels];
+ for (size_t c = 0; c < num_channels; c++) {
+ row_f16[c] = f16_cache.Row(c + thread * num_channels);
+ HWY_DYNAMIC_DISPATCH(FloatToF16)
+ (row_in[c], row_f16[c], xsize);
+ }
+ uint8_t* row_out =
+ out_callback.IsPresent()
+ ? row_out_callback[thread].data()
+ : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+ // interleave the one scanline
+ hwy::float16_t* row_f16_out =
+ reinterpret_cast<hwy::float16_t*>(row_out);
+ for (size_t x = 0; x < xsize; x++) {
+ for (size_t c = 0; c < num_channels; c++) {
+ row_f16_out[x * num_channels + c] = row_f16[c][x];
+ }
+ }
+ if (swap_endianness) {
+ size_t size = xsize * num_channels * 2;
+ for (size_t i = 0; i < size; i += 2) {
+ std::swap(row_out[i + 0], row_out[i + 1]);
+ }
+ }
+ if (out_callback.IsPresent()) {
+ out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+ row_out);
+ }
+ },
+ "ConvertF16"));
+ } else if (bits_per_sample == 32) {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize),
+ [&](size_t num_threads) { return InitOutCallback(num_threads); },
+ [&](const uint32_t task, const size_t thread) {
+ const int64_t y = task;
+ uint8_t* row_out =
+ out_callback.IsPresent()
+ ? row_out_callback[thread].data()
+ : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+ const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+ for (size_t c = 0; c < num_channels; c++) {
+ row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+ }
+ if (little_endian) {
+ StoreFloatRow<StoreLEFloat>(row_in, num_channels, xsize, row_out);
+ } else {
+ StoreFloatRow<StoreBEFloat>(row_in, num_channels, xsize, row_out);
+ }
+ if (out_callback.IsPresent()) {
+ out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+ row_out);
+ }
+ },
+ "ConvertFloat"));
+ } else {
+ return JXL_FAILURE("float other than 16-bit and 32-bit not supported");
+ }
+ } else {
+ // Multiplier to convert from floating point 0-1 range to the integer
+ // range.
+ float mul = (1ull << bits_per_sample) - 1;
+ Plane<uint32_t> u32_cache;
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, static_cast<uint32_t>(ysize),
+ [&](size_t num_threads) {
+ u32_cache = Plane<uint32_t>(xsize, num_channels * num_threads);
+ return InitOutCallback(num_threads);
+ },
+ [&](const uint32_t task, const size_t thread) {
+ const int64_t y = task;
+ uint8_t* row_out =
+ out_callback.IsPresent()
+ ? row_out_callback[thread].data()
+ : &(reinterpret_cast<uint8_t*>(out_image))[stride * y];
+ const float* JXL_RESTRICT row_in[kConvertMaxChannels];
+ for (size_t c = 0; c < num_channels; c++) {
+ row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0);
+ }
+ uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels];
+ for (size_t c = 0; c < num_channels; c++) {
+ row_u32[c] = u32_cache.Row(c + thread * num_channels);
+ // row_u32[] is a per-thread temporary row storage, this isn't
+ // intended to be initialized on a previous run.
+ msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0]));
+ HWY_DYNAMIC_DISPATCH(FloatToU32)
+ (row_in[c], row_u32[c], xsize, mul, bits_per_sample);
+ }
+ if (bits_per_sample <= 8) {
+ StoreUintRow<Store8>(row_u32, num_channels, xsize, 1, row_out);
+ } else {
+ if (little_endian) {
+ StoreUintRow<StoreLE16>(row_u32, num_channels, xsize, 2, row_out);
+ } else {
+ StoreUintRow<StoreBE16>(row_u32, num_channels, xsize, 2, row_out);
+ }
+ }
+ if (out_callback.IsPresent()) {
+ out_callback.run(out_run_opaque.get(), thread, 0, y, xsize,
+ row_out);
+ }
+ },
+ "ConvertUint"));
+ }
+ return true;
+}
+
+} // namespace
+
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+ bool float_out, size_t num_channels,
+ JxlEndianness endianness, size_t stride,
+ jxl::ThreadPool* pool, void* out_image,
+ size_t out_size, const PixelCallback& out_callback,
+ jxl::Orientation undo_orientation,
+ bool unpremul_alpha) {
+ bool want_alpha = num_channels == 2 || num_channels == 4;
+ size_t color_channels = num_channels <= 2 ? 1 : 3;
+
+ const Image3F* color = &ib.color();
+ // Undo premultiplied alpha.
+ Image3F unpremul;
+ if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) {
+ unpremul = Image3F(color->xsize(), color->ysize());
+ CopyImageTo(*color, &unpremul);
+ for (size_t y = 0; y < unpremul.ysize(); y++) {
+ UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y),
+ unpremul.PlaneRow(2, y), ib.alpha().Row(y),
+ unpremul.xsize());
+ }
+ color = &unpremul;
+ }
+
+ const ImageF* channels[kConvertMaxChannels];
+ size_t c = 0;
+ for (; c < color_channels; c++) {
+ channels[c] = &color->Plane(c);
+ }
+ if (want_alpha) {
+ channels[c++] = ib.HasAlpha() ? &ib.alpha() : nullptr;
+ }
+ JXL_ASSERT(num_channels == c);
+
+ return ConvertChannelsToExternal(
+ channels, num_channels, bits_per_sample, float_out, endianness, stride,
+ pool, out_image, out_size, out_callback, undo_orientation);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dec_external_image.h b/third_party/jpeg-xl/lib/jxl/dec_external_image.h
new file mode 100644
index 0000000000..6ca7abff62
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_external_image.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_DEC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Converts ib to interleaved void* pixel buffer with the given format.
+// bits_per_sample: must be 16 or 32 if float_out is true, and at most 16
+// if it is false. No bit packing is done.
+// num_channels: must be 1, 2, 3 or 4 for gray, gray+alpha, RGB, RGB+alpha.
+// This supports the features needed for the C API and does not perform
+// color space conversion.
+// TODO(lode): support rectangle crop.
+// stride_out is output scanline size in bytes, must be >=
+// output_xsize * output_bytes_per_pixel.
+// undo_orientation is an EXIF orientation to undo. Depending on the
+// orientation, the output xsize and ysize are swapped compared to input
+// xsize and ysize.
+Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample,
+ bool float_out, size_t num_channels,
+ JxlEndianness endianness, size_t stride_out,
+ jxl::ThreadPool* thread_pool, void* out_image,
+ size_t out_size, const PixelCallback& out_callback,
+ jxl::Orientation undo_orientation,
+ bool unpremul_alpha = false);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_EXTERNAL_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_external_image_gbench.cc b/third_party/jpeg-xl/lib/jxl/dec_external_image_gbench.cc
new file mode 100644
index 0000000000..c87a4d5f36
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_external_image_gbench.cc
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Decoder case, interleaves an internal float image.
+void BM_DecExternalImage_ConvertImageRGBA(benchmark::State& state) {
+ const size_t kNumIter = 5;
+ size_t xsize = state.range();
+ size_t ysize = state.range();
+ size_t num_channels = 4;
+
+ ImageMetadata im;
+ im.SetAlphaBits(8);
+ ImageBundle ib(&im);
+ Image3F color(xsize, ysize);
+ ZeroFillImage(&color);
+ ib.SetFromImage(std::move(color), ColorEncoding::SRGB());
+ ImageF alpha(xsize, ysize);
+ ZeroFillImage(&alpha);
+ ib.SetAlpha(std::move(alpha));
+
+ const size_t bytes_per_row = xsize * num_channels;
+ std::vector<uint8_t> interleaved(bytes_per_row * ysize);
+
+ for (auto _ : state) {
+ for (size_t i = 0; i < kNumIter; ++i) {
+ JXL_CHECK(ConvertToExternal(
+ ib,
+ /*bits_per_sample=*/8,
+ /*float_out=*/false, num_channels, JXL_NATIVE_ENDIAN,
+ /*stride*/ bytes_per_row,
+ /*thread_pool=*/nullptr, interleaved.data(), interleaved.size(),
+ /*out_callback=*/{},
+ /*undo_orientation=*/jxl::Orientation::kIdentity));
+ }
+ }
+
+ // Pixels per second.
+ state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+ state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_DecExternalImage_ConvertImageRGBA)
+ ->RangeMultiplier(2)
+ ->Range(256, 2048);
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_frame.cc b/third_party/jpeg-xl/lib/jxl/dec_frame.cc
new file mode 100644
index 0000000000..98508e431b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_frame.cc
@@ -0,0 +1,878 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_frame.h"
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <atomic>
+#include <hwy/aligned_allocator.h>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+Status DecodeGlobalDCInfo(BitReader* reader, bool is_jpeg,
+ PassesDecoderState* state, ThreadPool* pool) {
+ PROFILER_FUNC;
+ JXL_RETURN_IF_ERROR(state->shared_storage.quantizer.Decode(reader));
+
+ JXL_RETURN_IF_ERROR(
+ DecodeBlockCtxMap(reader, &state->shared_storage.block_ctx_map));
+
+ JXL_RETURN_IF_ERROR(state->shared_storage.cmap.DecodeDC(reader));
+
+ // Pre-compute info for decoding a group.
+ if (is_jpeg) {
+ state->shared_storage.quantizer.ClearDCMul(); // Don't dequant DC
+ }
+
+ state->shared_storage.ac_strategy.FillInvalid();
+ return true;
+}
+} // namespace
+
+Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+ const uint8_t* next_in, size_t avail_in,
+ ImageBundle* decoded, const CodecMetadata& metadata,
+ bool use_slow_rendering_pipeline) {
+ FrameDecoder frame_decoder(dec_state, metadata, pool,
+ use_slow_rendering_pipeline);
+
+ BitReader reader(Span<const uint8_t>(next_in, avail_in));
+ JXL_RETURN_IF_ERROR(frame_decoder.InitFrame(&reader, decoded,
+ /*is_preview=*/false));
+ JXL_RETURN_IF_ERROR(frame_decoder.InitFrameOutput());
+
+ JXL_RETURN_IF_ERROR(reader.AllReadsWithinBounds());
+ size_t header_bytes = reader.TotalBitsConsumed() / kBitsPerByte;
+ JXL_RETURN_IF_ERROR(reader.Close());
+
+ size_t processed_bytes = header_bytes;
+ Status close_ok = true;
+ std::vector<std::unique_ptr<BitReader>> section_readers;
+ {
+ std::vector<std::unique_ptr<BitReaderScopedCloser>> section_closers;
+ std::vector<FrameDecoder::SectionInfo> section_info;
+ std::vector<FrameDecoder::SectionStatus> section_status;
+ size_t pos = header_bytes;
+ size_t index = 0;
+ for (auto toc_entry : frame_decoder.Toc()) {
+ JXL_RETURN_IF_ERROR(pos + toc_entry.size <= avail_in);
+ auto br = make_unique<BitReader>(
+ Span<const uint8_t>(next_in + pos, toc_entry.size));
+ section_info.emplace_back(
+ FrameDecoder::SectionInfo{br.get(), toc_entry.id, index++});
+ section_closers.emplace_back(
+ make_unique<BitReaderScopedCloser>(br.get(), &close_ok));
+ section_readers.emplace_back(std::move(br));
+ pos += toc_entry.size;
+ }
+ section_status.resize(section_info.size());
+ JXL_RETURN_IF_ERROR(frame_decoder.ProcessSections(
+ section_info.data(), section_info.size(), section_status.data()));
+ for (size_t i = 0; i < section_status.size(); i++) {
+ JXL_RETURN_IF_ERROR(section_status[i] == FrameDecoder::kDone);
+ processed_bytes += frame_decoder.Toc()[i].size;
+ }
+ }
+ JXL_RETURN_IF_ERROR(close_ok);
+ JXL_RETURN_IF_ERROR(frame_decoder.FinalizeFrame());
+ decoded->SetDecodedBytes(processed_bytes);
+ return true;
+}
+
+Status FrameDecoder::InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+ bool is_preview) {
+ PROFILER_FUNC;
+ decoded_ = decoded;
+ JXL_ASSERT(is_finalized_);
+
+ // Reset the dequantization matrices to their default values.
+ dec_state_->shared_storage.matrices = DequantMatrices();
+
+ frame_header_.nonserialized_is_preview = is_preview;
+ JXL_ASSERT(frame_header_.nonserialized_metadata != nullptr);
+ JXL_RETURN_IF_ERROR(ReadFrameHeader(br, &frame_header_));
+ frame_dim_ = frame_header_.ToFrameDimensions();
+ JXL_DEBUG_V(2, "FrameHeader: %s", frame_header_.DebugString().c_str());
+
+ const size_t num_passes = frame_header_.passes.num_passes;
+ const size_t num_groups = frame_dim_.num_groups;
+
+ // If the previous frame was not a kRegularFrame, `decoded` may have different
+ // dimensions; must reset to avoid errors.
+ decoded->RemoveColor();
+ decoded->ClearExtraChannels();
+
+ decoded->duration = frame_header_.animation_frame.duration;
+
+ if (!frame_header_.nonserialized_is_preview &&
+ (frame_header_.is_last || frame_header_.animation_frame.duration > 0) &&
+ (frame_header_.frame_type == kRegularFrame ||
+ frame_header_.frame_type == kSkipProgressive)) {
+ ++dec_state_->visible_frame_index;
+ dec_state_->nonvisible_frame_index = 0;
+ } else {
+ ++dec_state_->nonvisible_frame_index;
+ }
+
+ // Read TOC.
+ const bool has_ac_global = true;
+ const size_t toc_entries = NumTocEntries(num_groups, frame_dim_.num_dc_groups,
+ num_passes, has_ac_global);
+ std::vector<uint32_t> sizes;
+ std::vector<coeff_order_t> permutation;
+ JXL_RETURN_IF_ERROR(ReadToc(toc_entries, br, &sizes, &permutation));
+ bool have_permutation = !permutation.empty();
+ toc_.resize(toc_entries);
+ section_sizes_sum_ = 0;
+ for (size_t i = 0; i < toc_entries; ++i) {
+ toc_[i].size = sizes[i];
+ size_t index = have_permutation ? permutation[i] : i;
+ toc_[index].id = i;
+ if (section_sizes_sum_ + toc_[i].size < section_sizes_sum_) {
+ return JXL_FAILURE("group offset overflow");
+ }
+ section_sizes_sum_ += toc_[i].size;
+ }
+
+ JXL_DASSERT((br->TotalBitsConsumed() % kBitsPerByte) == 0);
+ const size_t group_codes_begin = br->TotalBitsConsumed() / kBitsPerByte;
+ JXL_DASSERT(!toc_.empty());
+
+ // Overflow check.
+ if (group_codes_begin + section_sizes_sum_ < group_codes_begin) {
+ return JXL_FAILURE("Invalid group codes");
+ }
+
+ if (!frame_header_.chroma_subsampling.Is444() &&
+ !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+ frame_header_.encoding == FrameEncoding::kVarDCT) {
+ return JXL_FAILURE(
+ "Non-444 chroma subsampling is not allowed when adaptive DC "
+ "smoothing is enabled");
+ }
+ return true;
+}
+
+Status FrameDecoder::InitFrameOutput() {
+ JXL_RETURN_IF_ERROR(
+ InitializePassesSharedState(frame_header_, &dec_state_->shared_storage));
+ JXL_RETURN_IF_ERROR(dec_state_->Init());
+ modular_frame_decoder_.Init(frame_dim_);
+
+ if (decoded_->IsJPEG()) {
+ if (frame_header_.encoding == FrameEncoding::kModular) {
+ return JXL_FAILURE("Cannot output JPEG from Modular");
+ }
+ jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get();
+ size_t num_components = jpeg_data->components.size();
+ if (num_components != 1 && num_components != 3) {
+ return JXL_FAILURE("Invalid number of components");
+ }
+ if (frame_header_.nonserialized_metadata->m.xyb_encoded) {
+ return JXL_FAILURE("Cannot decode to JPEG an XYB image");
+ }
+ auto jpeg_c_map = JpegOrder(ColorTransform::kYCbCr, num_components == 1);
+ decoded_->jpeg_data->width = frame_dim_.xsize;
+ decoded_->jpeg_data->height = frame_dim_.ysize;
+ for (size_t c = 0; c < num_components; c++) {
+ auto& component = jpeg_data->components[jpeg_c_map[c]];
+ component.width_in_blocks =
+ frame_dim_.xsize_blocks >> frame_header_.chroma_subsampling.HShift(c);
+ component.height_in_blocks =
+ frame_dim_.ysize_blocks >> frame_header_.chroma_subsampling.VShift(c);
+ component.h_samp_factor =
+ 1 << frame_header_.chroma_subsampling.RawHShift(c);
+ component.v_samp_factor =
+ 1 << frame_header_.chroma_subsampling.RawVShift(c);
+ component.coeffs.resize(component.width_in_blocks *
+ component.height_in_blocks * jxl::kDCTBlockSize);
+ }
+ }
+
+ // Clear the state.
+ decoded_dc_global_ = false;
+ decoded_ac_global_ = false;
+ is_finalized_ = false;
+ finalized_dc_ = false;
+ num_sections_done_ = 0;
+ decoded_dc_groups_.clear();
+ decoded_dc_groups_.resize(frame_dim_.num_dc_groups);
+ decoded_passes_per_ac_group_.clear();
+ decoded_passes_per_ac_group_.resize(frame_dim_.num_groups, 0);
+ processed_section_.clear();
+ processed_section_.resize(toc_.size());
+ allocated_ = false;
+ return true;
+}
+
+Status FrameDecoder::ProcessDCGlobal(BitReader* br) {
+ PROFILER_FUNC;
+ PassesSharedState& shared = dec_state_->shared_storage;
+ if (shared.frame_header.flags & FrameHeader::kPatches) {
+ bool uses_extra_channels = false;
+ JXL_RETURN_IF_ERROR(shared.image_features.patches.Decode(
+ br, frame_dim_.xsize_padded, frame_dim_.ysize_padded,
+ &uses_extra_channels));
+ if (uses_extra_channels && frame_header_.upsampling != 1) {
+ for (size_t ecups : frame_header_.extra_channel_upsampling) {
+ if (ecups != frame_header_.upsampling) {
+ return JXL_FAILURE(
+ "Cannot use extra channels in patches if color channels are "
+ "subsampled differently from extra channels");
+ }
+ }
+ }
+ } else {
+ shared.image_features.patches.Clear();
+ }
+ shared.image_features.splines.Clear();
+ if (shared.frame_header.flags & FrameHeader::kSplines) {
+ JXL_RETURN_IF_ERROR(shared.image_features.splines.Decode(
+ br, frame_dim_.xsize * frame_dim_.ysize));
+ }
+ if (shared.frame_header.flags & FrameHeader::kNoise) {
+ JXL_RETURN_IF_ERROR(DecodeNoise(br, &shared.image_features.noise_params));
+ }
+ JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.DecodeDC(br));
+
+ if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+ JXL_RETURN_IF_ERROR(
+ jxl::DecodeGlobalDCInfo(br, decoded_->IsJPEG(), dec_state_, pool_));
+ }
+ // Splines' draw cache uses the color correlation map.
+ if (shared.frame_header.flags & FrameHeader::kSplines) {
+ JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
+ frame_dim_.xsize_upsampled, frame_dim_.ysize_upsampled,
+ dec_state_->shared->cmap));
+ }
+ Status dec_status = modular_frame_decoder_.DecodeGlobalInfo(
+ br, frame_header_, /*allow_truncated_group=*/false);
+ if (dec_status.IsFatalError()) return dec_status;
+ if (dec_status) {
+ decoded_dc_global_ = true;
+ }
+ return dec_status;
+}
+
+Status FrameDecoder::ProcessDCGroup(size_t dc_group_id, BitReader* br) {
+ PROFILER_FUNC;
+ const size_t gx = dc_group_id % frame_dim_.xsize_dc_groups;
+ const size_t gy = dc_group_id / frame_dim_.xsize_dc_groups;
+ const LoopFilter& lf = dec_state_->shared->frame_header.loop_filter;
+ if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+ !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+ JXL_RETURN_IF_ERROR(
+ modular_frame_decoder_.DecodeVarDCTDC(dc_group_id, br, dec_state_));
+ }
+ const Rect mrect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
+ frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
+ JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+ mrect, br, 3, 1000, ModularStreamId::ModularDC(dc_group_id),
+ /*zerofill=*/false, nullptr, nullptr,
+ /*allow_truncated=*/false));
+ if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+ JXL_RETURN_IF_ERROR(
+ modular_frame_decoder_.DecodeAcMetadata(dc_group_id, br, dec_state_));
+ } else if (lf.epf_iters > 0) {
+ FillImage(kInvSigmaNum / lf.epf_sigma_for_modular, &dec_state_->sigma);
+ }
+ decoded_dc_groups_[dc_group_id] = uint8_t{true};
+ return true;
+}
+
+void FrameDecoder::FinalizeDC() {
+ // Do Adaptive DC smoothing if enabled. This *must* happen between all the
+ // ProcessDCGroup and ProcessACGroup.
+ if (frame_header_.encoding == FrameEncoding::kVarDCT &&
+ !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) &&
+ !(frame_header_.flags & FrameHeader::kUseDcFrame)) {
+ AdaptiveDCSmoothing(dec_state_->shared->quantizer.MulDC(),
+ &dec_state_->shared_storage.dc_storage, pool_);
+ }
+
+ finalized_dc_ = true;
+}
+
+Status FrameDecoder::AllocateOutput() {
+ if (allocated_) return true;
+ modular_frame_decoder_.MaybeDropFullImage();
+ decoded_->origin = dec_state_->shared->frame_header.frame_origin;
+ JXL_RETURN_IF_ERROR(dec_state_->InitForAC(nullptr));
+ allocated_ = true;
+ return true;
+}
+
+Status FrameDecoder::ProcessACGlobal(BitReader* br) {
+ JXL_CHECK(finalized_dc_);
+
+ // Decode AC group.
+ if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+ JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.Decode(
+ br, &modular_frame_decoder_));
+ JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.EnsureComputed(
+ dec_state_->used_acs));
+
+ size_t num_histo_bits =
+ CeilLog2Nonzero(dec_state_->shared->frame_dim.num_groups);
+ dec_state_->shared_storage.num_histograms =
+ 1 + br->ReadBits(num_histo_bits);
+
+ dec_state_->code.resize(kMaxNumPasses);
+ dec_state_->context_map.resize(kMaxNumPasses);
+ // Read coefficient orders and histograms.
+ size_t max_num_bits_ac = 0;
+ for (size_t i = 0;
+ i < dec_state_->shared_storage.frame_header.passes.num_passes; i++) {
+ uint16_t used_orders = U32Coder::Read(kOrderEnc, br);
+ JXL_RETURN_IF_ERROR(DecodeCoeffOrders(
+ used_orders, dec_state_->used_acs,
+ &dec_state_->shared_storage
+ .coeff_orders[i * dec_state_->shared_storage.coeff_order_size],
+ br));
+ size_t num_contexts =
+ dec_state_->shared->num_histograms *
+ dec_state_->shared_storage.block_ctx_map.NumACContexts();
+ JXL_RETURN_IF_ERROR(DecodeHistograms(
+ br, num_contexts, &dec_state_->code[i], &dec_state_->context_map[i]));
+ // Add extra values to enable the cheat in hot loop of DecodeACVarBlock.
+ dec_state_->context_map[i].resize(
+ num_contexts + kZeroDensityContextLimit - kZeroDensityContextCount);
+ max_num_bits_ac =
+ std::max(max_num_bits_ac, dec_state_->code[i].max_num_bits);
+ }
+ max_num_bits_ac += CeilLog2Nonzero(
+ dec_state_->shared_storage.frame_header.passes.num_passes);
+ // 16-bit buffer for decoding to JPEG are not implemented.
+ // TODO(veluca): figure out the exact limit - 16 should still work with
+ // 16-bit buffers, but we are excluding it for safety.
+ bool use_16_bit = max_num_bits_ac < 16 && !decoded_->IsJPEG();
+ bool store = frame_header_.passes.num_passes > 1;
+ size_t xs = store ? kGroupDim * kGroupDim : 0;
+ size_t ys = store ? frame_dim_.num_groups : 0;
+ if (use_16_bit) {
+ dec_state_->coefficients = make_unique<ACImageT<int16_t>>(xs, ys);
+ } else {
+ dec_state_->coefficients = make_unique<ACImageT<int32_t>>(xs, ys);
+ }
+ if (store) {
+ dec_state_->coefficients->ZeroFill();
+ }
+ }
+
+ // Set JPEG decoding data.
+ if (decoded_->IsJPEG()) {
+ decoded_->color_transform = frame_header_.color_transform;
+ decoded_->chroma_subsampling = frame_header_.chroma_subsampling;
+ const std::vector<QuantEncoding>& qe =
+ dec_state_->shared_storage.matrices.encodings();
+ if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+ std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+ return JXL_FAILURE(
+ "Quantization table is not a JPEG quantization table.");
+ }
+ jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get();
+ size_t num_components = jpeg_data->components.size();
+ bool is_gray = (num_components == 1);
+ auto jpeg_c_map = JpegOrder(frame_header_.color_transform, is_gray);
+ size_t qt_set = 0;
+ for (size_t c = 0; c < num_components; c++) {
+ // TODO(eustas): why 1-st quant table for gray?
+ size_t quant_c = is_gray ? 1 : c;
+ size_t qpos = jpeg_data->components[jpeg_c_map[c]].quant_idx;
+ JXL_CHECK(qpos != jpeg_data->quant.size());
+ qt_set |= 1 << qpos;
+ for (size_t x = 0; x < 8; x++) {
+ for (size_t y = 0; y < 8; y++) {
+ jpeg_data->quant[qpos].values[x * 8 + y] =
+ (*qe[0].qraw.qtable)[quant_c * 64 + y * 8 + x];
+ }
+ }
+ }
+ for (size_t i = 0; i < jpeg_data->quant.size(); i++) {
+ if (qt_set & (1 << i)) continue;
+ if (i == 0) return JXL_FAILURE("First quant table unused.");
+ // Unused quant table is set to copy of previous quant table
+ for (size_t j = 0; j < 64; j++) {
+ jpeg_data->quant[i].values[j] = jpeg_data->quant[i - 1].values[j];
+ }
+ }
+ }
+ decoded_ac_global_ = true;
+ return true;
+}
+
+Status FrameDecoder::ProcessACGroup(size_t ac_group_id,
+ BitReader* JXL_RESTRICT* br,
+ size_t num_passes, size_t thread,
+ bool force_draw, bool dc_only) {
+ PROFILER_ZONE("process_group");
+ size_t group_dim = frame_dim_.group_dim;
+ const size_t gx = ac_group_id % frame_dim_.xsize_groups;
+ const size_t gy = ac_group_id / frame_dim_.xsize_groups;
+ const size_t x = gx * group_dim;
+ const size_t y = gy * group_dim;
+ JXL_DEBUG_V(3,
+ "Processing AC group %" PRIuS "(%" PRIuS ",%" PRIuS
+ ") group_dim: %" PRIuS " decoded passes: %u new passes: %" PRIuS,
+ ac_group_id, gx, gy, group_dim,
+ decoded_passes_per_ac_group_[ac_group_id], num_passes);
+
+ RenderPipelineInput render_pipeline_input =
+ dec_state_->render_pipeline->GetInputBuffers(ac_group_id, thread);
+
+ bool should_run_pipeline = true;
+
+ if (frame_header_.encoding == FrameEncoding::kVarDCT) {
+ group_dec_caches_[thread].InitOnce(frame_header_.passes.num_passes,
+ dec_state_->used_acs);
+ JXL_RETURN_IF_ERROR(DecodeGroup(br, num_passes, ac_group_id, dec_state_,
+ &group_dec_caches_[thread], thread,
+ render_pipeline_input, decoded_,
+ decoded_passes_per_ac_group_[ac_group_id],
+ force_draw, dc_only, &should_run_pipeline));
+ }
+
+ // don't limit to image dimensions here (is done in DecodeGroup)
+ const Rect mrect(x, y, group_dim, group_dim);
+ bool modular_ready = false;
+ size_t pass0 = decoded_passes_per_ac_group_[ac_group_id];
+ size_t pass1 =
+ force_draw ? frame_header_.passes.num_passes : pass0 + num_passes;
+ for (size_t i = pass0; i < pass1; ++i) {
+ int minShift, maxShift;
+ frame_header_.passes.GetDownsamplingBracket(i, minShift, maxShift);
+ bool modular_pass_ready = true;
+ if (i < pass0 + num_passes) {
+ JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+ mrect, br[i - pass0], minShift, maxShift,
+ ModularStreamId::ModularAC(ac_group_id, i),
+ /*zerofill=*/false, dec_state_, &render_pipeline_input,
+ /*allow_truncated=*/false, &modular_pass_ready));
+ } else {
+ JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup(
+ mrect, nullptr, minShift, maxShift,
+ ModularStreamId::ModularAC(ac_group_id, i), /*zerofill=*/true,
+ dec_state_, &render_pipeline_input,
+ /*allow_truncated=*/false, &modular_pass_ready));
+ }
+ if (modular_pass_ready) modular_ready = true;
+ }
+ decoded_passes_per_ac_group_[ac_group_id] += num_passes;
+
+ if ((frame_header_.flags & FrameHeader::kNoise) != 0) {
+ PROFILER_ZONE("GenerateNoise");
+ size_t noise_c_start =
+ 3 + frame_header_.nonserialized_metadata->m.num_extra_channels;
+ // When the color channels are downsampled, we need to generate more noise
+ // input for the current group than just the group dimensions.
+ std::pair<ImageF*, Rect> rects[3];
+ for (size_t iy = 0; iy < frame_header_.upsampling; iy++) {
+ for (size_t ix = 0; ix < frame_header_.upsampling; ix++) {
+ for (size_t c = 0; c < 3; c++) {
+ auto r = render_pipeline_input.GetBuffer(noise_c_start + c);
+ rects[c].first = r.first;
+ size_t x1 = r.second.x0() + r.second.xsize();
+ size_t y1 = r.second.y0() + r.second.ysize();
+ rects[c].second = Rect(r.second.x0() + ix * group_dim,
+ r.second.y0() + iy * group_dim, group_dim,
+ group_dim, x1, y1);
+ }
+ Random3Planes(dec_state_->visible_frame_index,
+ dec_state_->nonvisible_frame_index,
+ (gx * frame_header_.upsampling + ix) * group_dim,
+ (gy * frame_header_.upsampling + iy) * group_dim,
+ rects[0], rects[1], rects[2]);
+ }
+ }
+ }
+
+ if (!modular_frame_decoder_.UsesFullImage() && !decoded_->IsJPEG()) {
+ if (should_run_pipeline && modular_ready) {
+ render_pipeline_input.Done();
+ } else if (force_draw) {
+ return JXL_FAILURE("Modular group decoding failed.");
+ }
+ }
+ return true;
+}
+
+void FrameDecoder::MarkSections(const SectionInfo* sections, size_t num,
+ SectionStatus* section_status) {
+ num_sections_done_ += num;
+ for (size_t i = 0; i < num; i++) {
+ if (section_status[i] != SectionStatus::kDone) {
+ processed_section_[sections[i].id] = false;
+ num_sections_done_--;
+ }
+ }
+}
+
+Status FrameDecoder::ProcessSections(const SectionInfo* sections, size_t num,
+ SectionStatus* section_status) {
+ if (num == 0) return true; // Nothing to process
+ std::fill(section_status, section_status + num, SectionStatus::kSkipped);
+ size_t dc_global_sec = num;
+ size_t ac_global_sec = num;
+ std::vector<size_t> dc_group_sec(frame_dim_.num_dc_groups, num);
+ std::vector<std::vector<size_t>> ac_group_sec(
+ frame_dim_.num_groups,
+ std::vector<size_t>(frame_header_.passes.num_passes, num));
+ // This keeps track of the number of ac passes we want to process during this
+ // call of ProcessSections.
+ std::vector<size_t> desired_num_ac_passes(frame_dim_.num_groups);
+ bool single_section =
+ frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1;
+ if (single_section) {
+ JXL_ASSERT(num == 1);
+ JXL_ASSERT(sections[0].id == 0);
+ if (processed_section_[0] == false) {
+ processed_section_[0] = true;
+ ac_group_sec[0].resize(1);
+ dc_global_sec = ac_global_sec = dc_group_sec[0] = ac_group_sec[0][0] = 0;
+ desired_num_ac_passes[0] = 1;
+ } else {
+ section_status[0] = SectionStatus::kDuplicate;
+ }
+ } else {
+ size_t ac_global_index = frame_dim_.num_dc_groups + 1;
+ for (size_t i = 0; i < num; i++) {
+ JXL_ASSERT(sections[i].id < processed_section_.size());
+ if (processed_section_[sections[i].id]) {
+ section_status[i] = SectionStatus::kDuplicate;
+ continue;
+ }
+ if (sections[i].id == 0) {
+ dc_global_sec = i;
+ } else if (sections[i].id < ac_global_index) {
+ dc_group_sec[sections[i].id - 1] = i;
+ } else if (sections[i].id == ac_global_index) {
+ ac_global_sec = i;
+ } else {
+ size_t ac_idx = sections[i].id - ac_global_index - 1;
+ size_t acg = ac_idx % frame_dim_.num_groups;
+ size_t acp = ac_idx / frame_dim_.num_groups;
+ if (acp >= frame_header_.passes.num_passes) {
+ return JXL_FAILURE("Invalid section ID");
+ }
+ ac_group_sec[acg][acp] = i;
+ }
+ processed_section_[sections[i].id] = true;
+ }
+ // Count number of new passes per group.
+ for (size_t g = 0; g < ac_group_sec.size(); g++) {
+ size_t j = 0;
+ for (; j + decoded_passes_per_ac_group_[g] <
+ frame_header_.passes.num_passes;
+ j++) {
+ if (ac_group_sec[g][j + decoded_passes_per_ac_group_[g]] == num) {
+ break;
+ }
+ }
+ desired_num_ac_passes[g] = j;
+ }
+ }
+ if (dc_global_sec != num) {
+ Status dc_global_status = ProcessDCGlobal(sections[dc_global_sec].br);
+ if (dc_global_status.IsFatalError()) return dc_global_status;
+ if (dc_global_status) {
+ section_status[dc_global_sec] = SectionStatus::kDone;
+ } else {
+ section_status[dc_global_sec] = SectionStatus::kPartial;
+ }
+ }
+
+ std::atomic<bool> has_error{false};
+ if (decoded_dc_global_) {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool_, 0, dc_group_sec.size(), ThreadPool::NoInit,
+ [this, &dc_group_sec, &num, &sections, &section_status, &has_error](
+ size_t i, size_t thread) {
+ if (dc_group_sec[i] != num) {
+ if (!ProcessDCGroup(i, sections[dc_group_sec[i]].br)) {
+ has_error = true;
+ } else {
+ section_status[dc_group_sec[i]] = SectionStatus::kDone;
+ }
+ }
+ },
+ "DecodeDCGroup"));
+ }
+ if (has_error) return JXL_FAILURE("Error in DC group");
+
+ if (*std::min_element(decoded_dc_groups_.begin(), decoded_dc_groups_.end()) &&
+ !finalized_dc_) {
+ PassesDecoderState::PipelineOptions pipeline_options;
+ pipeline_options.use_slow_render_pipeline = use_slow_rendering_pipeline_;
+ pipeline_options.coalescing = coalescing_;
+ pipeline_options.render_spotcolors = render_spotcolors_;
+ JXL_RETURN_IF_ERROR(
+ dec_state_->PreparePipeline(decoded_, pipeline_options));
+ FinalizeDC();
+ JXL_RETURN_IF_ERROR(AllocateOutput());
+ if (progressive_detail_ >= JxlProgressiveDetail::kDC) {
+ MarkSections(sections, num, section_status);
+ return true;
+ }
+ }
+
+ if (finalized_dc_ && ac_global_sec != num && !decoded_ac_global_) {
+ JXL_RETURN_IF_ERROR(ProcessACGlobal(sections[ac_global_sec].br));
+ section_status[ac_global_sec] = SectionStatus::kDone;
+ }
+
+ if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) {
+ // Mark that we only want the next progression pass.
+ size_t target_complete_passes = NextNumPassesToPause();
+ for (size_t i = 0; i < ac_group_sec.size(); i++) {
+ desired_num_ac_passes[i] =
+ std::min(desired_num_ac_passes[i],
+ target_complete_passes - decoded_passes_per_ac_group_[i]);
+ }
+ }
+
+ if (decoded_ac_global_) {
+ // Mark all the AC groups that we received as not complete yet.
+ for (size_t i = 0; i < ac_group_sec.size(); i++) {
+ if (desired_num_ac_passes[i] != 0) {
+ dec_state_->render_pipeline->ClearDone(i);
+ }
+ }
+
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool_, 0, ac_group_sec.size(),
+ [this](size_t num_threads) {
+ return PrepareStorage(num_threads,
+ decoded_passes_per_ac_group_.size());
+ },
+ [this, &ac_group_sec, &desired_num_ac_passes, &num, &sections,
+ &section_status, &has_error](size_t g, size_t thread) {
+ if (desired_num_ac_passes[g] == 0) {
+ // no new AC pass, nothing to do
+ return;
+ }
+ (void)num;
+ size_t first_pass = decoded_passes_per_ac_group_[g];
+ BitReader* JXL_RESTRICT readers[kMaxNumPasses];
+ for (size_t i = 0; i < desired_num_ac_passes[g]; i++) {
+ JXL_ASSERT(ac_group_sec[g][first_pass + i] != num);
+ readers[i] = sections[ac_group_sec[g][first_pass + i]].br;
+ }
+ if (!ProcessACGroup(g, readers, desired_num_ac_passes[g],
+ GetStorageLocation(thread, g),
+ /*force_draw=*/false, /*dc_only=*/false)) {
+ has_error = true;
+ } else {
+ for (size_t i = 0; i < desired_num_ac_passes[g]; i++) {
+ section_status[ac_group_sec[g][first_pass + i]] =
+ SectionStatus::kDone;
+ }
+ }
+ },
+ "DecodeGroup"));
+ }
+ if (has_error) return JXL_FAILURE("Error in AC group");
+
+ MarkSections(sections, num, section_status);
+ return true;
+}
+
+Status FrameDecoder::Flush() {
+ bool has_blending = frame_header_.blending_info.mode != BlendMode::kReplace ||
+ frame_header_.custom_size_or_origin;
+ for (const auto& blending_info_ec :
+ frame_header_.extra_channel_blending_info) {
+ if (blending_info_ec.mode != BlendMode::kReplace) has_blending = true;
+ }
+ // No early Flush() if blending is enabled.
+ if (has_blending && !is_finalized_) {
+ return false;
+ }
+ // No early Flush() - nothing to do - if the frame is a kSkipProgressive
+ // frame.
+ if (frame_header_.frame_type == FrameType::kSkipProgressive &&
+ !is_finalized_) {
+ return true;
+ }
+ if (decoded_->IsJPEG()) {
+ // Nothing to do.
+ return true;
+ }
+ JXL_RETURN_IF_ERROR(AllocateOutput());
+
+ uint32_t completely_decoded_ac_pass = *std::min_element(
+ decoded_passes_per_ac_group_.begin(), decoded_passes_per_ac_group_.end());
+ if (completely_decoded_ac_pass < frame_header_.passes.num_passes) {
+ // We don't have all AC yet: force a draw of all the missing areas.
+ // Mark all sections as not complete.
+ for (size_t i = 0; i < decoded_passes_per_ac_group_.size(); i++) {
+ if (decoded_passes_per_ac_group_[i] < frame_header_.passes.num_passes) {
+ dec_state_->render_pipeline->ClearDone(i);
+ }
+ }
+ std::atomic<bool> has_error{false};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool_, 0, decoded_passes_per_ac_group_.size(),
+ [this](const size_t num_threads) {
+ return PrepareStorage(num_threads,
+ decoded_passes_per_ac_group_.size());
+ },
+ [this, &has_error](const uint32_t g, size_t thread) {
+ if (decoded_passes_per_ac_group_[g] ==
+ frame_header_.passes.num_passes) {
+ // This group was drawn already, nothing to do.
+ return;
+ }
+ BitReader* JXL_RESTRICT readers[kMaxNumPasses] = {};
+ bool ok = ProcessACGroup(
+ g, readers, /*num_passes=*/0, GetStorageLocation(thread, g),
+ /*force_draw=*/true, /*dc_only=*/!decoded_ac_global_);
+ if (!ok) has_error = true;
+ },
+ "ForceDrawGroup"));
+ if (has_error) {
+ return JXL_FAILURE("Drawing groups failed");
+ }
+ }
+
+ // undo global modular transforms and copy int pixel buffers to float ones
+ JXL_RETURN_IF_ERROR(modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_,
+ is_finalized_));
+
+ return true;
+}
+
+int FrameDecoder::SavedAs(const FrameHeader& header) {
+ if (header.frame_type == FrameType::kDCFrame) {
+ // bits 16, 32, 64, 128 for DC level
+ return 16 << (header.dc_level - 1);
+ } else if (header.CanBeReferenced()) {
+ // bits 1, 2, 4 and 8 for the references
+ return 1 << header.save_as_reference;
+ }
+
+ return 0;
+}
+
+bool FrameDecoder::HasEverything() const {
+ if (!decoded_dc_global_) return false;
+ if (!decoded_ac_global_) return false;
+ for (auto& have_dc_group : decoded_dc_groups_) {
+ if (!have_dc_group) return false;
+ }
+ for (auto& nb_passes : decoded_passes_per_ac_group_) {
+ if (nb_passes < frame_header_.passes.num_passes) return false;
+ }
+ return true;
+}
+
+int FrameDecoder::References() const {
+ if (is_finalized_) {
+ return 0;
+ }
+ if (!HasEverything()) return 0;
+
+ int result = 0;
+
+ // Blending
+ if (frame_header_.frame_type == FrameType::kRegularFrame ||
+ frame_header_.frame_type == FrameType::kSkipProgressive) {
+ bool cropped = frame_header_.custom_size_or_origin;
+ if (cropped || frame_header_.blending_info.mode != BlendMode::kReplace) {
+ result |= (1 << frame_header_.blending_info.source);
+ }
+ const auto& extra = frame_header_.extra_channel_blending_info;
+ for (size_t i = 0; i < extra.size(); ++i) {
+ if (cropped || extra[i].mode != BlendMode::kReplace) {
+ result |= (1 << extra[i].source);
+ }
+ }
+ }
+
+ // Patches
+ if (frame_header_.flags & FrameHeader::kPatches) {
+ result |= dec_state_->shared->image_features.patches.GetReferences();
+ }
+
+ // DC Level
+ if (frame_header_.flags & FrameHeader::kUseDcFrame) {
+ // Reads from the next dc level
+ int dc_level = frame_header_.dc_level + 1;
+ // bits 16, 32, 64, 128 for DC level
+ result |= (16 << (dc_level - 1));
+ }
+
+ return result;
+}
+
+Status FrameDecoder::FinalizeFrame() {
+ if (is_finalized_) {
+ return JXL_FAILURE("FinalizeFrame called multiple times");
+ }
+ is_finalized_ = true;
+ if (decoded_->IsJPEG()) {
+ // Nothing to do.
+ return true;
+ }
+
+ // undo global modular transforms and copy int pixel buffers to float ones
+ JXL_RETURN_IF_ERROR(
+ modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_,
+ /*inplace=*/true));
+
+ if (frame_header_.CanBeReferenced()) {
+ auto& info = dec_state_->shared_storage
+ .reference_frames[frame_header_.save_as_reference];
+ info.frame = std::move(dec_state_->frame_storage_for_referencing);
+ info.ib_is_in_xyb = frame_header_.save_before_color_transform;
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_frame.h b/third_party/jpeg-xl/lib/jxl/dec_frame.h
new file mode 100644
index 0000000000..6b54ac631f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_frame.h
@@ -0,0 +1,329 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_FRAME_H_
+#define LIB_JXL_DEC_FRAME_H_
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Decodes a frame. Groups may be processed in parallel by `pool`.
+// `metadata` is the metadata that applies to all frames of the codestream
+// `decoded->metadata` must already be set and must match metadata.m.
+// Used in the encoder to model decoder behaviour, and in tests.
+Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool,
+ const uint8_t* next_in, size_t avail_in,
+ ImageBundle* decoded, const CodecMetadata& metadata,
+ bool use_slow_rendering_pipeline = false);
+
+// TODO(veluca): implement "forced drawing".
+class FrameDecoder {
+ public:
+ // All parameters must outlive the FrameDecoder.
+ FrameDecoder(PassesDecoderState* dec_state, const CodecMetadata& metadata,
+ ThreadPool* pool, bool use_slow_rendering_pipeline)
+ : dec_state_(dec_state),
+ pool_(pool),
+ frame_header_(&metadata),
+ use_slow_rendering_pipeline_(use_slow_rendering_pipeline) {}
+
+ void SetRenderSpotcolors(bool rsc) { render_spotcolors_ = rsc; }
+ void SetCoalescing(bool c) { coalescing_ = c; }
+
+ // Read FrameHeader and table of contents from the given BitReader.
+ Status InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded,
+ bool is_preview);
+
+ // Checks frame dimensions for their limits, and sets the output
+ // image buffer.
+ Status InitFrameOutput();
+
+ struct SectionInfo {
+ BitReader* JXL_RESTRICT br;
+ // Logical index of the section, regardless of any permutation that may be
+ // applied in the table of contents or of the physical position in the file.
+ size_t id;
+ // Index of the section in the order of the bytes inside the frame.
+ size_t index;
+ };
+
+ struct TocEntry {
+ size_t size;
+ size_t id;
+ };
+
+ enum SectionStatus {
+ // Processed correctly.
+ kDone = 0,
+ // Skipped because other required sections were not yet processed.
+ kSkipped = 1,
+ // Skipped because the section was already processed.
+ kDuplicate = 2,
+ // Only partially decoded: the section will need to be processed again.
+ kPartial = 3,
+ };
+
+ // Processes `num` sections; each SectionInfo contains the index
+ // of the section and a BitReader that only contains the data of the section.
+ // `section_status` should point to `num` elements, and will be filled with
+ // information about whether each section was processed or not.
+ // A section is a part of the encoded file that is indexed by the TOC.
+ Status ProcessSections(const SectionInfo* sections, size_t num,
+ SectionStatus* section_status);
+
+ // Flushes all the data decoded so far to pixels.
+ Status Flush();
+
+ // Runs final operations once a frame data is decoded.
+ // Must be called exactly once per frame, after all calls to ProcessSections.
+ Status FinalizeFrame();
+
+ // Returns dependencies of this frame on reference ids as a bit mask: bits 0-3
+ // indicate reference frame 0-3 for patches and blending, bits 4-7 indicate DC
+ // frames this frame depends on. Only returns a valid result after all calls
+ // to ProcessSections are finished and before FinalizeFrame.
+ int References() const;
+
+ // Returns reference id of storage location where this frame is stored as a
+ // bit flag, or 0 if not stored.
+ // Matches the bit mask used for GetReferences: bits 0-3 indicate it is stored
+ // for patching or blending, bits 4-7 indicate DC frame.
+ // Unlike References, can be ran at any time as
+ // soon as the frame header is known.
+ static int SavedAs(const FrameHeader& header);
+
+ uint64_t SumSectionSizes() const { return section_sizes_sum_; }
+ const std::vector<TocEntry>& Toc() const { return toc_; }
+
+ const FrameHeader& GetFrameHeader() const { return frame_header_; }
+
+ // Returns whether a DC image has been decoded, accessible at low resolution
+ // at passes.shared_storage.dc_storage
+ bool HasDecodedDC() const { return finalized_dc_; }
+ bool HasDecodedAll() const { return toc_.size() == num_sections_done_; }
+
+ size_t NumCompletePasses() const {
+ return *std::min_element(decoded_passes_per_ac_group_.begin(),
+ decoded_passes_per_ac_group_.end());
+ }
+
+ // If enabled, ProcessSections will stop and return true when the DC
+ // sections have been processed, instead of starting the AC sections. This
+ // will only occur if supported (that is, flushing will produce a valid
+ // 1/8th*1/8th resolution image). The return value of true then does not mean
+ // all sections have been processed, use HasDecodedDC and HasDecodedAll
+ // to check the true finished state.
+ // Returns the progressive detail that will be effective for the frame.
+ JxlProgressiveDetail SetPauseAtProgressive(JxlProgressiveDetail prog_detail) {
+ bool single_section =
+ frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1;
+ if (frame_header_.frame_type != kSkipProgressive &&
+ // If there's only one group and one pass, there is no separate section
+ // for DC and the entire full resolution image is available at once.
+ !single_section &&
+ // If extra channels are encoded with modular without squeeze, they
+ // don't support DC. If the are encoded with squeeze, DC works in theory
+ // but the implementation may not yet correctly support this for Flush.
+ // Therefore, can't correctly pause for a progressive step if there is
+ // an extra channel (including alpha channel)
+ // TODO(firsching): Check if this is still the case.
+ decoded_->metadata()->extra_channel_info.empty() &&
+ // DC is not guaranteed to be available in modular mode and may be a
+ // black image. If squeeze is used, it may be available depending on the
+ // current implementation.
+ // TODO(lode): do return DC if it's known that flushing at this point
+ // will produce a valid 1/8th downscaled image with modular encoding.
+ frame_header_.encoding == FrameEncoding::kVarDCT) {
+ progressive_detail_ = prog_detail;
+ } else {
+ progressive_detail_ = JxlProgressiveDetail::kFrames;
+ }
+ if (progressive_detail_ >= JxlProgressiveDetail::kPasses) {
+ for (size_t i = 1; i < frame_header_.passes.num_passes; ++i) {
+ passes_to_pause_.push_back(i);
+ }
+ } else if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) {
+ for (size_t i = 0; i < frame_header_.passes.num_downsample; ++i) {
+ passes_to_pause_.push_back(frame_header_.passes.last_pass[i] + 1);
+ }
+ // The format does not guarantee that these values are sorted.
+ std::sort(passes_to_pause_.begin(), passes_to_pause_.end());
+ }
+ return progressive_detail_;
+ }
+
+ size_t NextNumPassesToPause() const {
+ auto it = std::upper_bound(passes_to_pause_.begin(), passes_to_pause_.end(),
+ NumCompletePasses());
+ return (it != passes_to_pause_.end() ? *it
+ : std::numeric_limits<size_t>::max());
+ }
+
+ // Sets the pixel callback or image buffer where the pixels will be decoded.
+ //
+ // @param undo_orientation: if true, indicates the frame decoder should apply
+ // the exif orientation to bring the image to the intended display
+ // orientation.
+ void SetImageOutput(const PixelCallback& pixel_callback, void* image_buffer,
+ size_t image_buffer_size, size_t xsize, size_t ysize,
+ JxlPixelFormat format, size_t bits_per_sample,
+ bool unpremul_alpha, bool undo_orientation) const {
+ dec_state_->width = xsize;
+ dec_state_->height = ysize;
+ dec_state_->main_output.format = format;
+ dec_state_->main_output.bits_per_sample = bits_per_sample;
+ dec_state_->main_output.callback = pixel_callback;
+ dec_state_->main_output.buffer = image_buffer;
+ dec_state_->main_output.buffer_size = image_buffer_size;
+ dec_state_->main_output.stride = GetStride(xsize, format);
+ const jxl::ExtraChannelInfo* alpha =
+ decoded_->metadata()->Find(jxl::ExtraChannel::kAlpha);
+ if (alpha && alpha->alpha_associated && unpremul_alpha) {
+ dec_state_->unpremul_alpha = true;
+ }
+ if (undo_orientation) {
+ dec_state_->undo_orientation = decoded_->metadata()->GetOrientation();
+ if (static_cast<int>(dec_state_->undo_orientation) > 4) {
+ std::swap(dec_state_->width, dec_state_->height);
+ }
+ }
+ dec_state_->extra_output.clear();
+#if !JXL_HIGH_PRECISION
+ if (dec_state_->main_output.buffer &&
+ (format.data_type == JXL_TYPE_UINT8) && (format.num_channels >= 3) &&
+ !dec_state_->unpremul_alpha &&
+ (dec_state_->undo_orientation == Orientation::kIdentity) &&
+ decoded_->metadata()->xyb_encoded &&
+ dec_state_->output_encoding_info.color_encoding.IsSRGB() &&
+ dec_state_->output_encoding_info.all_default_opsin &&
+ (dec_state_->output_encoding_info.desired_intensity_target ==
+ dec_state_->output_encoding_info.orig_intensity_target) &&
+ HasFastXYBTosRGB8() && frame_header_.needs_color_transform()) {
+ dec_state_->fast_xyb_srgb8_conversion = true;
+ }
+#endif
+ }
+
+ void AddExtraChannelOutput(void* buffer, size_t buffer_size, size_t xsize,
+ JxlPixelFormat format, size_t bits_per_sample) {
+ ImageOutput out;
+ out.format = format;
+ out.bits_per_sample = bits_per_sample;
+ out.buffer = buffer;
+ out.buffer_size = buffer_size;
+ out.stride = GetStride(xsize, format);
+ dec_state_->extra_output.push_back(out);
+ }
+
+ private:
+ Status ProcessDCGlobal(BitReader* br);
+ Status ProcessDCGroup(size_t dc_group_id, BitReader* br);
+ void FinalizeDC();
+ Status AllocateOutput();
+ Status ProcessACGlobal(BitReader* br);
+ Status ProcessACGroup(size_t ac_group_id, BitReader* JXL_RESTRICT* br,
+ size_t num_passes, size_t thread, bool force_draw,
+ bool dc_only);
+ void MarkSections(const SectionInfo* sections, size_t num,
+ SectionStatus* section_status);
+
+ // Allocates storage for parallel decoding using up to `num_threads` threads
+ // of up to `num_tasks` tasks. The value of `thread` passed to
+ // `GetStorageLocation` must be smaller than the `num_threads` value passed
+ // here. The value of `task` passed to `GetStorageLocation` must be smaller
+ // than the value of `num_tasks` passed here.
+ Status PrepareStorage(size_t num_threads, size_t num_tasks) {
+ size_t storage_size = std::min(num_threads, num_tasks);
+ if (storage_size > group_dec_caches_.size()) {
+ group_dec_caches_.resize(storage_size);
+ }
+ use_task_id_ = num_threads > num_tasks;
+ bool use_group_ids = (modular_frame_decoder_.UsesFullImage() &&
+ (frame_header_.encoding == FrameEncoding::kVarDCT ||
+ (frame_header_.flags & FrameHeader::kNoise)));
+ if (dec_state_->render_pipeline) {
+ JXL_RETURN_IF_ERROR(dec_state_->render_pipeline->PrepareForThreads(
+ storage_size, use_group_ids));
+ }
+ return true;
+ }
+
+ size_t GetStorageLocation(size_t thread, size_t task) {
+ if (use_task_id_) return task;
+ return thread;
+ }
+
+ static size_t BytesPerChannel(JxlDataType data_type) {
+ return (data_type == JXL_TYPE_UINT8 ? 1u
+ : data_type == JXL_TYPE_FLOAT ? 4u
+ : 2u);
+ }
+
+ static size_t GetStride(const size_t xsize, JxlPixelFormat format) {
+ size_t stride =
+ (xsize * BytesPerChannel(format.data_type) * format.num_channels);
+ if (format.align > 1) {
+ stride = (jxl::DivCeil(stride, format.align) * format.align);
+ }
+ return stride;
+ }
+
+ PassesDecoderState* dec_state_;
+ ThreadPool* pool_;
+ std::vector<TocEntry> toc_;
+ uint64_t section_sizes_sum_;
+ // TODO(veluca): figure out the duplication between these and dec_state_.
+ FrameHeader frame_header_;
+ FrameDimensions frame_dim_;
+ ImageBundle* decoded_;
+ ModularFrameDecoder modular_frame_decoder_;
+ bool render_spotcolors_ = true;
+ bool coalescing_ = true;
+
+ std::vector<uint8_t> processed_section_;
+ std::vector<uint8_t> decoded_passes_per_ac_group_;
+ std::vector<uint8_t> decoded_dc_groups_;
+ bool decoded_dc_global_;
+ bool decoded_ac_global_;
+ bool HasEverything() const;
+ bool finalized_dc_ = true;
+ size_t num_sections_done_ = 0;
+ bool is_finalized_ = true;
+ bool allocated_ = false;
+
+ std::vector<GroupDecCache> group_dec_caches_;
+
+ // Whether or not the task id should be used for storage indexing, instead of
+ // the thread id.
+ bool use_task_id_ = false;
+
+ // Testing setting: whether or not to use the slow rendering pipeline.
+ bool use_slow_rendering_pipeline_;
+
+ JxlProgressiveDetail progressive_detail_ = kFrames;
+ // Number of completed passes where section decoding should pause.
+ // Used for progressive details at least kLastPasses.
+ std::vector<int> passes_to_pause_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_FRAME_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_group.cc b/third_party/jpeg-xl/lib/jxl/dec_group.cc
new file mode 100644
index 0000000000..be8df9b062
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_group.cc
@@ -0,0 +1,801 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+
+#ifndef LIB_JXL_DEC_GROUP_CC
+#define LIB_JXL_DEC_GROUP_CC
+namespace jxl {
+
+struct AuxOut;
+
+// Interface for reading groups for DecodeGroupImpl.
+class GetBlock {
+ public:
+ virtual void StartRow(size_t by) = 0;
+ virtual Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs,
+ size_t size, size_t log2_covered_blocks,
+ ACPtr block[3], ACType ac_type) = 0;
+ virtual ~GetBlock() {}
+};
+
+// Controls whether DecodeGroupImpl renders to pixels or not.
+enum DrawMode {
+ // Render to pixels.
+ kDraw = 0,
+ // Don't render to pixels.
+ kDontDraw = 1,
+};
+
+} // namespace jxl
+#endif // LIB_JXL_DEC_GROUP_CC
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftRight;
+
+using D = HWY_FULL(float);
+using DU = HWY_FULL(uint32_t);
+using DI = HWY_FULL(int32_t);
+using DI16 = Rebind<int16_t, DI>;
+constexpr D d;
+constexpr DI di;
+constexpr DI16 di16;
+
+// TODO(veluca): consider SIMDfying.
+void Transpose8x8InPlace(int32_t* JXL_RESTRICT block) {
+ for (size_t x = 0; x < 8; x++) {
+ for (size_t y = x + 1; y < 8; y++) {
+ std::swap(block[y * 8 + x], block[x * 8 + y]);
+ }
+ }
+}
+
+template <ACType ac_type>
+void DequantLane(Vec<D> scaled_dequant_x, Vec<D> scaled_dequant_y,
+ Vec<D> scaled_dequant_b,
+ const float* JXL_RESTRICT dequant_matrices, size_t size,
+ size_t k, Vec<D> x_cc_mul, Vec<D> b_cc_mul,
+ const float* JXL_RESTRICT biases, ACPtr qblock[3],
+ float* JXL_RESTRICT block) {
+ const auto x_mul = Mul(Load(d, dequant_matrices + k), scaled_dequant_x);
+ const auto y_mul =
+ Mul(Load(d, dequant_matrices + size + k), scaled_dequant_y);
+ const auto b_mul =
+ Mul(Load(d, dequant_matrices + 2 * size + k), scaled_dequant_b);
+
+ Vec<DI> quantized_x_int;
+ Vec<DI> quantized_y_int;
+ Vec<DI> quantized_b_int;
+ if (ac_type == ACType::k16) {
+ Rebind<int16_t, DI> di16;
+ quantized_x_int = PromoteTo(di, Load(di16, qblock[0].ptr16 + k));
+ quantized_y_int = PromoteTo(di, Load(di16, qblock[1].ptr16 + k));
+ quantized_b_int = PromoteTo(di, Load(di16, qblock[2].ptr16 + k));
+ } else {
+ quantized_x_int = Load(di, qblock[0].ptr32 + k);
+ quantized_y_int = Load(di, qblock[1].ptr32 + k);
+ quantized_b_int = Load(di, qblock[2].ptr32 + k);
+ }
+
+ const auto dequant_x_cc =
+ Mul(AdjustQuantBias(di, 0, quantized_x_int, biases), x_mul);
+ const auto dequant_y =
+ Mul(AdjustQuantBias(di, 1, quantized_y_int, biases), y_mul);
+ const auto dequant_b_cc =
+ Mul(AdjustQuantBias(di, 2, quantized_b_int, biases), b_mul);
+
+ const auto dequant_x = MulAdd(x_cc_mul, dequant_y, dequant_x_cc);
+ const auto dequant_b = MulAdd(b_cc_mul, dequant_y, dequant_b_cc);
+ Store(dequant_x, d, block + k);
+ Store(dequant_y, d, block + size + k);
+ Store(dequant_b, d, block + 2 * size + k);
+}
+
+template <ACType ac_type>
+void DequantBlock(const AcStrategy& acs, float inv_global_scale, int quant,
+ float x_dm_multiplier, float b_dm_multiplier, Vec<D> x_cc_mul,
+ Vec<D> b_cc_mul, size_t kind, size_t size,
+ const Quantizer& quantizer, size_t covered_blocks,
+ const size_t* sbx,
+ const float* JXL_RESTRICT* JXL_RESTRICT dc_row,
+ size_t dc_stride, const float* JXL_RESTRICT biases,
+ ACPtr qblock[3], float* JXL_RESTRICT block) {
+ PROFILER_FUNC;
+
+ const auto scaled_dequant_s = inv_global_scale / quant;
+
+ const auto scaled_dequant_x = Set(d, scaled_dequant_s * x_dm_multiplier);
+ const auto scaled_dequant_y = Set(d, scaled_dequant_s);
+ const auto scaled_dequant_b = Set(d, scaled_dequant_s * b_dm_multiplier);
+
+ const float* dequant_matrices = quantizer.DequantMatrix(kind, 0);
+
+ for (size_t k = 0; k < covered_blocks * kDCTBlockSize; k += Lanes(d)) {
+ DequantLane<ac_type>(scaled_dequant_x, scaled_dequant_y, scaled_dequant_b,
+ dequant_matrices, size, k, x_cc_mul, b_cc_mul, biases,
+ qblock, block);
+ }
+ for (size_t c = 0; c < 3; c++) {
+ LowestFrequenciesFromDC(acs.Strategy(), dc_row[c] + sbx[c], dc_stride,
+ block + c * size);
+ }
+}
+
+Status DecodeGroupImpl(GetBlock* JXL_RESTRICT get_block,
+ GroupDecCache* JXL_RESTRICT group_dec_cache,
+ PassesDecoderState* JXL_RESTRICT dec_state,
+ size_t thread, size_t group_idx,
+ RenderPipelineInput& render_pipeline_input,
+ ImageBundle* decoded, DrawMode draw) {
+ // TODO(veluca): investigate cache usage in this function.
+ PROFILER_FUNC;
+ const Rect block_rect = dec_state->shared->BlockGroupRect(group_idx);
+ const AcStrategyImage& ac_strategy = dec_state->shared->ac_strategy;
+
+ const size_t xsize_blocks = block_rect.xsize();
+ const size_t ysize_blocks = block_rect.ysize();
+
+ const size_t dc_stride = dec_state->shared->dc->PixelsPerRow();
+
+ const float inv_global_scale = dec_state->shared->quantizer.InvGlobalScale();
+
+ const YCbCrChromaSubsampling& cs =
+ dec_state->shared->frame_header.chroma_subsampling;
+
+ size_t idct_stride[3];
+ for (size_t c = 0; c < 3; c++) {
+ idct_stride[c] = render_pipeline_input.GetBuffer(c).first->PixelsPerRow();
+ }
+
+ HWY_ALIGN int32_t scaled_qtable[64 * 3];
+
+ ACType ac_type = dec_state->coefficients->Type();
+ auto dequant_block = ac_type == ACType::k16 ? DequantBlock<ACType::k16>
+ : DequantBlock<ACType::k32>;
+ // Whether or not coefficients should be stored for future usage, and/or read
+ // from past usage.
+ bool accumulate = !dec_state->coefficients->IsEmpty();
+ // Offset of the current block in the group.
+ size_t offset = 0;
+
+ std::array<int, 3> jpeg_c_map;
+ bool jpeg_is_gray = false;
+ std::array<int, 3> dcoff = {};
+
+ // TODO(veluca): all of this should be done only once per image.
+ if (decoded->IsJPEG()) {
+ if (!dec_state->shared->cmap.IsJPEGCompatible()) {
+ return JXL_FAILURE("The CfL map is not JPEG-compatible");
+ }
+ jpeg_is_gray = (decoded->jpeg_data->components.size() == 1);
+ jpeg_c_map = JpegOrder(dec_state->shared->frame_header.color_transform,
+ jpeg_is_gray);
+ const std::vector<QuantEncoding>& qe =
+ dec_state->shared->matrices.encodings();
+ if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW ||
+ std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) {
+ return JXL_FAILURE(
+ "Quantization table is not a JPEG quantization table.");
+ }
+ for (size_t c = 0; c < 3; c++) {
+ if (dec_state->shared->frame_header.color_transform ==
+ ColorTransform::kNone) {
+ dcoff[c] = 1024 / (*qe[0].qraw.qtable)[64 * c];
+ }
+ for (size_t i = 0; i < 64; i++) {
+ // Transpose the matrix, as it will be used on the transposed block.
+ int n = qe[0].qraw.qtable->at(64 + i);
+ int d = qe[0].qraw.qtable->at(64 * c + i);
+ if (n <= 0 || d <= 0 || n >= 65536 || d >= 65536) {
+ return JXL_FAILURE("Invalid JPEG quantization table");
+ }
+ scaled_qtable[64 * c + (i % 8) * 8 + (i / 8)] =
+ (1 << kCFLFixedPointPrecision) * n / d;
+ }
+ }
+ }
+
+ size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)};
+ size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)};
+ Rect r[3];
+ for (size_t i = 0; i < 3; i++) {
+ r[i] =
+ Rect(block_rect.x0() >> hshift[i], block_rect.y0() >> vshift[i],
+ block_rect.xsize() >> hshift[i], block_rect.ysize() >> vshift[i]);
+ if (!r[i].IsInside({0, 0, dec_state->shared->dc->Plane(i).xsize(),
+ dec_state->shared->dc->Plane(i).ysize()})) {
+ return JXL_FAILURE("Frame dimensions are too big for the image.");
+ }
+ }
+
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ get_block->StartRow(by);
+ size_t sby[3] = {by >> vshift[0], by >> vshift[1], by >> vshift[2]};
+
+ const int32_t* JXL_RESTRICT row_quant =
+ block_rect.ConstRow(dec_state->shared->raw_quant_field, by);
+
+ const float* JXL_RESTRICT dc_rows[3] = {
+ r[0].ConstPlaneRow(*dec_state->shared->dc, 0, sby[0]),
+ r[1].ConstPlaneRow(*dec_state->shared->dc, 1, sby[1]),
+ r[2].ConstPlaneRow(*dec_state->shared->dc, 2, sby[2]),
+ };
+
+ const size_t ty = (block_rect.y0() + by) / kColorTileDimInBlocks;
+ AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+
+ const int8_t* JXL_RESTRICT row_cmap[3] = {
+ dec_state->shared->cmap.ytox_map.ConstRow(ty),
+ nullptr,
+ dec_state->shared->cmap.ytob_map.ConstRow(ty),
+ };
+
+ float* JXL_RESTRICT idct_row[3];
+ int16_t* JXL_RESTRICT jpeg_row[3];
+ for (size_t c = 0; c < 3; c++) {
+ idct_row[c] = render_pipeline_input.GetBuffer(c).second.Row(
+ render_pipeline_input.GetBuffer(c).first, sby[c] * kBlockDim);
+ if (decoded->IsJPEG()) {
+ auto& component = decoded->jpeg_data->components[jpeg_c_map[c]];
+ jpeg_row[c] =
+ component.coeffs.data() +
+ (component.width_in_blocks * (r[c].y0() + sby[c]) + r[c].x0()) *
+ kDCTBlockSize;
+ }
+ }
+
+ size_t bx = 0;
+ for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+ tx++) {
+ size_t abs_tx = tx + block_rect.x0() / kColorTileDimInBlocks;
+ auto x_cc_mul =
+ Set(d, dec_state->shared->cmap.YtoXRatio(row_cmap[0][abs_tx]));
+ auto b_cc_mul =
+ Set(d, dec_state->shared->cmap.YtoBRatio(row_cmap[2][abs_tx]));
+ // Increment bx by llf_x because those iterations would otherwise
+ // immediately continue (!IsFirstBlock). Reduces mispredictions.
+ for (; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;) {
+ size_t sbx[3] = {bx >> hshift[0], bx >> hshift[1], bx >> hshift[2]};
+ AcStrategy acs = acs_row[bx];
+ const size_t llf_x = acs.covered_blocks_x();
+
+ // Can only happen in the second or lower rows of a varblock.
+ if (JXL_UNLIKELY(!acs.IsFirstBlock())) {
+ bx += llf_x;
+ continue;
+ }
+ PROFILER_ZONE("DecodeGroupImpl inner");
+ const size_t log2_covered_blocks = acs.log2_covered_blocks();
+
+ const size_t covered_blocks = 1 << log2_covered_blocks;
+ const size_t size = covered_blocks * kDCTBlockSize;
+
+ ACPtr qblock[3];
+ if (accumulate) {
+ for (size_t c = 0; c < 3; c++) {
+ qblock[c] = dec_state->coefficients->PlaneRow(c, group_idx, offset);
+ }
+ } else {
+ // No point in reading from bitstream without accumulating and not
+ // drawing.
+ JXL_ASSERT(draw == kDraw);
+ if (ac_type == ACType::k16) {
+ memset(group_dec_cache->dec_group_qblock16, 0,
+ size * 3 * sizeof(int16_t));
+ for (size_t c = 0; c < 3; c++) {
+ qblock[c].ptr16 = group_dec_cache->dec_group_qblock16 + c * size;
+ }
+ } else {
+ memset(group_dec_cache->dec_group_qblock, 0,
+ size * 3 * sizeof(int32_t));
+ for (size_t c = 0; c < 3; c++) {
+ qblock[c].ptr32 = group_dec_cache->dec_group_qblock + c * size;
+ }
+ }
+ }
+ JXL_RETURN_IF_ERROR(get_block->LoadBlock(
+ bx, by, acs, size, log2_covered_blocks, qblock, ac_type));
+ offset += size;
+ if (draw == kDontDraw) {
+ bx += llf_x;
+ continue;
+ }
+
+ if (JXL_UNLIKELY(decoded->IsJPEG())) {
+ if (acs.Strategy() != AcStrategy::Type::DCT) {
+ return JXL_FAILURE(
+ "Can only decode to JPEG if only DCT-8 is used.");
+ }
+
+ HWY_ALIGN int32_t transposed_dct_y[64];
+ for (size_t c : {1, 0, 2}) {
+ // Propagate only Y for grayscale.
+ if (jpeg_is_gray && c != 1) {
+ continue;
+ }
+ if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+ continue;
+ }
+ int16_t* JXL_RESTRICT jpeg_pos =
+ jpeg_row[c] + sbx[c] * kDCTBlockSize;
+ // JPEG XL is transposed, JPEG is not.
+ auto transposed_dct = qblock[c].ptr32;
+ Transpose8x8InPlace(transposed_dct);
+ // No CfL - no need to store the y block converted to integers.
+ if (!cs.Is444() ||
+ (row_cmap[0][abs_tx] == 0 && row_cmap[2][abs_tx] == 0)) {
+ for (size_t i = 0; i < 64; i += Lanes(d)) {
+ const auto ini = Load(di, transposed_dct + i);
+ const auto ini16 = DemoteTo(di16, ini);
+ StoreU(ini16, di16, jpeg_pos + i);
+ }
+ } else if (c == 1) {
+ // Y channel: save for restoring X/B, but nothing else to do.
+ for (size_t i = 0; i < 64; i += Lanes(d)) {
+ const auto ini = Load(di, transposed_dct + i);
+ Store(ini, di, transposed_dct_y + i);
+ const auto ini16 = DemoteTo(di16, ini);
+ StoreU(ini16, di16, jpeg_pos + i);
+ }
+ } else {
+ // transposed_dct_y contains the y channel block, transposed.
+ const auto scale = Set(
+ di, dec_state->shared->cmap.RatioJPEG(row_cmap[c][abs_tx]));
+ const auto round = Set(di, 1 << (kCFLFixedPointPrecision - 1));
+ for (int i = 0; i < 64; i += Lanes(d)) {
+ auto in = Load(di, transposed_dct + i);
+ auto in_y = Load(di, transposed_dct_y + i);
+ auto qt = Load(di, scaled_qtable + c * size + i);
+ auto coeff_scale = ShiftRight<kCFLFixedPointPrecision>(
+ Add(Mul(qt, scale), round));
+ auto cfl_factor = ShiftRight<kCFLFixedPointPrecision>(
+ Add(Mul(in_y, coeff_scale), round));
+ StoreU(DemoteTo(di16, Add(in, cfl_factor)), di16, jpeg_pos + i);
+ }
+ }
+ jpeg_pos[0] =
+ Clamp1<float>(dc_rows[c][sbx[c]] - dcoff[c], -2047, 2047);
+ }
+ } else {
+ HWY_ALIGN float* const block = group_dec_cache->dec_group_block;
+ // Dequantize and add predictions.
+ dequant_block(
+ acs, inv_global_scale, row_quant[bx], dec_state->x_dm_multiplier,
+ dec_state->b_dm_multiplier, x_cc_mul, b_cc_mul, acs.RawStrategy(),
+ size, dec_state->shared->quantizer,
+ acs.covered_blocks_y() * acs.covered_blocks_x(), sbx, dc_rows,
+ dc_stride,
+ dec_state->output_encoding_info.opsin_params.quant_biases, qblock,
+ block);
+
+ for (size_t c : {1, 0, 2}) {
+ if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) {
+ continue;
+ }
+ // IDCT
+ float* JXL_RESTRICT idct_pos = idct_row[c] + sbx[c] * kBlockDim;
+ TransformToPixels(acs.Strategy(), block + c * size, idct_pos,
+ idct_stride[c], group_dec_cache->scratch_space);
+ }
+ }
+ bx += llf_x;
+ }
+ }
+ }
+ if (draw == kDontDraw) {
+ return true;
+ }
+ return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+// Decode quantized AC coefficients of DCT blocks.
+// LLF components in the output block will not be modified.
+template <ACType ac_type>
+Status DecodeACVarBlock(size_t ctx_offset, size_t log2_covered_blocks,
+ int32_t* JXL_RESTRICT row_nzeros,
+ const int32_t* JXL_RESTRICT row_nzeros_top,
+ size_t nzeros_stride, size_t c, size_t bx, size_t by,
+ size_t lbx, AcStrategy acs,
+ const coeff_order_t* JXL_RESTRICT coeff_order,
+ BitReader* JXL_RESTRICT br,
+ ANSSymbolReader* JXL_RESTRICT decoder,
+ const std::vector<uint8_t>& context_map,
+ const uint8_t* qdc_row, const int32_t* qf_row,
+ const BlockCtxMap& block_ctx_map, ACPtr block,
+ size_t shift = 0) {
+ PROFILER_FUNC;
+ // Equal to number of LLF coefficients.
+ const size_t covered_blocks = 1 << log2_covered_blocks;
+ const size_t size = covered_blocks * kDCTBlockSize;
+ int32_t predicted_nzeros =
+ PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32);
+
+ size_t ord = kStrategyOrder[acs.RawStrategy()];
+ const coeff_order_t* JXL_RESTRICT order =
+ &coeff_order[CoeffOrderOffset(ord, c)];
+
+ size_t block_ctx = block_ctx_map.Context(qdc_row[lbx], qf_row[bx], ord, c);
+ const int32_t nzero_ctx =
+ block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx) + ctx_offset;
+
+ size_t nzeros = decoder->ReadHybridUint(nzero_ctx, br, context_map);
+ if (nzeros + covered_blocks > size) {
+ return JXL_FAILURE("Invalid AC: nzeros too large");
+ }
+ for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+ for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+ row_nzeros[bx + x + y * nzeros_stride] =
+ (nzeros + covered_blocks - 1) >> log2_covered_blocks;
+ }
+ }
+
+ const size_t histo_offset =
+ ctx_offset + block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+
+ // Skip LLF
+ {
+ PROFILER_ZONE("AcDecSkipLLF, reader");
+ size_t prev = (nzeros > size / 16 ? 0 : 1);
+ for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+ const size_t ctx =
+ histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+ log2_covered_blocks, prev);
+ const size_t u_coeff = decoder->ReadHybridUint(ctx, br, context_map);
+ // Hand-rolled version of UnpackSigned, shifting before the conversion to
+ // signed integer to avoid undefined behavior of shifting negative
+ // numbers.
+ const size_t magnitude = u_coeff >> 1;
+ const size_t neg_sign = (~u_coeff) & 1;
+ const intptr_t coeff =
+ static_cast<intptr_t>((magnitude ^ (neg_sign - 1)) << shift);
+ if (ac_type == ACType::k16) {
+ block.ptr16[order[k]] += coeff;
+ } else {
+ block.ptr32[order[k]] += coeff;
+ }
+ prev = static_cast<size_t>(u_coeff != 0);
+ nzeros -= prev;
+ }
+ if (JXL_UNLIKELY(nzeros != 0)) {
+ return JXL_FAILURE("Invalid AC: nzeros not 0. Block (%" PRIuS ", %" PRIuS
+ "), channel %" PRIuS,
+ bx, by, c);
+ }
+ }
+ return true;
+}
+
+// Structs used by DecodeGroupImpl to get a quantized block.
+// GetBlockFromBitstream uses ANS decoding (and thus keeps track of row
+// pointers in row_nzeros), GetBlockFromEncoder simply reads the coefficient
+// image provided by the encoder.
+
+struct GetBlockFromBitstream : public GetBlock {
+ void StartRow(size_t by) override {
+ qf_row = rect.ConstRow(*qf, by);
+ for (size_t c = 0; c < 3; c++) {
+ size_t sby = by >> vshift[c];
+ quant_dc_row = quant_dc->ConstRow(rect.y0() + by) + rect.x0();
+ for (size_t i = 0; i < num_passes; i++) {
+ row_nzeros[i][c] = group_dec_cache->num_nzeroes[i].PlaneRow(c, sby);
+ row_nzeros_top[i][c] =
+ sby == 0
+ ? nullptr
+ : group_dec_cache->num_nzeroes[i].ConstPlaneRow(c, sby - 1);
+ }
+ }
+ }
+
+ Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+ size_t log2_covered_blocks, ACPtr block[3],
+ ACType ac_type) override {
+ auto decode_ac_varblock = ac_type == ACType::k16
+ ? DecodeACVarBlock<ACType::k16>
+ : DecodeACVarBlock<ACType::k32>;
+ for (size_t c : {1, 0, 2}) {
+ size_t sbx = bx >> hshift[c];
+ size_t sby = by >> vshift[c];
+ if (JXL_UNLIKELY((sbx << hshift[c] != bx) || (sby << vshift[c] != by))) {
+ continue;
+ }
+
+ for (size_t pass = 0; JXL_UNLIKELY(pass < num_passes); pass++) {
+ JXL_RETURN_IF_ERROR(decode_ac_varblock(
+ ctx_offset[pass], log2_covered_blocks, row_nzeros[pass][c],
+ row_nzeros_top[pass][c], nzeros_stride, c, sbx, sby, bx, acs,
+ &coeff_orders[pass * coeff_order_size], readers[pass],
+ &decoders[pass], context_map[pass], quant_dc_row, qf_row,
+ *block_ctx_map, block[c], shift_for_pass[pass]));
+ }
+ }
+ return true;
+ }
+
+ Status Init(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, size_t num_passes,
+ size_t group_idx, size_t histo_selector_bits, const Rect& rect,
+ GroupDecCache* JXL_RESTRICT group_dec_cache,
+ PassesDecoderState* dec_state, size_t first_pass) {
+ for (size_t i = 0; i < 3; i++) {
+ hshift[i] = dec_state->shared->frame_header.chroma_subsampling.HShift(i);
+ vshift[i] = dec_state->shared->frame_header.chroma_subsampling.VShift(i);
+ }
+ this->coeff_order_size = dec_state->shared->coeff_order_size;
+ this->coeff_orders =
+ dec_state->shared->coeff_orders.data() + first_pass * coeff_order_size;
+ this->context_map = dec_state->context_map.data() + first_pass;
+ this->readers = readers;
+ this->num_passes = num_passes;
+ this->shift_for_pass =
+ dec_state->shared->frame_header.passes.shift + first_pass;
+ this->group_dec_cache = group_dec_cache;
+ this->rect = rect;
+ block_ctx_map = &dec_state->shared->block_ctx_map;
+ qf = &dec_state->shared->raw_quant_field;
+ quant_dc = &dec_state->shared->quant_dc;
+
+ for (size_t pass = 0; pass < num_passes; pass++) {
+ // Select which histogram set to use among those of the current pass.
+ size_t cur_histogram = 0;
+ if (histo_selector_bits != 0) {
+ cur_histogram = readers[pass]->ReadBits(histo_selector_bits);
+ }
+ if (cur_histogram >= dec_state->shared->num_histograms) {
+ return JXL_FAILURE("Invalid histogram selector");
+ }
+ ctx_offset[pass] = cur_histogram * block_ctx_map->NumACContexts();
+
+ decoders[pass] =
+ ANSSymbolReader(&dec_state->code[pass + first_pass], readers[pass]);
+ }
+ nzeros_stride = group_dec_cache->num_nzeroes[0].PixelsPerRow();
+ for (size_t i = 0; i < num_passes; i++) {
+ JXL_ASSERT(
+ nzeros_stride ==
+ static_cast<size_t>(group_dec_cache->num_nzeroes[i].PixelsPerRow()));
+ }
+ return true;
+ }
+
+ const uint32_t* shift_for_pass = nullptr; // not owned
+ const coeff_order_t* JXL_RESTRICT coeff_orders;
+ size_t coeff_order_size;
+ const std::vector<uint8_t>* JXL_RESTRICT context_map;
+ ANSSymbolReader decoders[kMaxNumPasses];
+ BitReader* JXL_RESTRICT* JXL_RESTRICT readers;
+ size_t num_passes;
+ size_t ctx_offset[kMaxNumPasses];
+ size_t nzeros_stride;
+ int32_t* JXL_RESTRICT row_nzeros[kMaxNumPasses][3];
+ const int32_t* JXL_RESTRICT row_nzeros_top[kMaxNumPasses][3];
+ GroupDecCache* JXL_RESTRICT group_dec_cache;
+ const BlockCtxMap* block_ctx_map;
+ const ImageI* qf;
+ const ImageB* quant_dc;
+ const int32_t* qf_row;
+ const uint8_t* quant_dc_row;
+ Rect rect;
+ size_t hshift[3], vshift[3];
+};
+
+struct GetBlockFromEncoder : public GetBlock {
+ void StartRow(size_t by) override {}
+
+ Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size,
+ size_t log2_covered_blocks, ACPtr block[3],
+ ACType ac_type) override {
+ JXL_DASSERT(ac_type == ACType::k32);
+ for (size_t c = 0; c < 3; c++) {
+ // for each pass
+ for (size_t i = 0; i < quantized_ac->size(); i++) {
+ for (size_t k = 0; k < size; k++) {
+ // TODO(veluca): SIMD.
+ block[c].ptr32[k] +=
+ rows[i][c][offset + k] * (1 << shift_for_pass[i]);
+ }
+ }
+ }
+ offset += size;
+ return true;
+ }
+
+ GetBlockFromEncoder(const std::vector<std::unique_ptr<ACImage>>& ac,
+ size_t group_idx, const uint32_t* shift_for_pass)
+ : quantized_ac(&ac), shift_for_pass(shift_for_pass) {
+ // TODO(veluca): not supported with chroma subsampling.
+ for (size_t i = 0; i < quantized_ac->size(); i++) {
+ JXL_CHECK((*quantized_ac)[i]->Type() == ACType::k32);
+ for (size_t c = 0; c < 3; c++) {
+ rows[i][c] = (*quantized_ac)[i]->PlaneRow(c, group_idx, 0).ptr32;
+ }
+ }
+ }
+
+ const std::vector<std::unique_ptr<ACImage>>* JXL_RESTRICT quantized_ac;
+ size_t offset = 0;
+ const int32_t* JXL_RESTRICT rows[kMaxNumPasses][3];
+ const uint32_t* shift_for_pass = nullptr; // not owned
+};
+
+HWY_EXPORT(DecodeGroupImpl);
+
+} // namespace
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+ size_t num_passes, size_t group_idx,
+ PassesDecoderState* JXL_RESTRICT dec_state,
+ GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+ RenderPipelineInput& render_pipeline_input,
+ ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+ bool force_draw, bool dc_only, bool* should_run_pipeline) {
+ PROFILER_FUNC;
+
+ DrawMode draw = (num_passes + first_pass ==
+ dec_state->shared->frame_header.passes.num_passes) ||
+ force_draw
+ ? kDraw
+ : kDontDraw;
+
+ if (should_run_pipeline) {
+ *should_run_pipeline = draw != kDontDraw;
+ }
+
+ if (draw == kDraw && num_passes == 0 && first_pass == 0) {
+ group_dec_cache->InitDCBufferOnce();
+ const YCbCrChromaSubsampling& cs =
+ dec_state->shared->frame_header.chroma_subsampling;
+ for (size_t c : {0, 1, 2}) {
+ size_t hs = cs.HShift(c);
+ size_t vs = cs.VShift(c);
+ // We reuse filter_input_storage here as it is not currently in use.
+ const Rect src_rect_precs = dec_state->shared->BlockGroupRect(group_idx);
+ const Rect src_rect =
+ Rect(src_rect_precs.x0() >> hs, src_rect_precs.y0() >> vs,
+ src_rect_precs.xsize() >> hs, src_rect_precs.ysize() >> vs);
+ const Rect copy_rect(kRenderPipelineXOffset, 2, src_rect.xsize(),
+ src_rect.ysize());
+ CopyImageToWithPadding(src_rect, dec_state->shared->dc->Plane(c), 2,
+ copy_rect, &group_dec_cache->dc_buffer);
+ // Mirrorpad. Interleaving left and right padding ensures that padding
+ // works out correctly even for images with DC size of 1.
+ for (size_t y = 0; y < src_rect.ysize() + 4; y++) {
+ size_t xend = kRenderPipelineXOffset +
+ (dec_state->shared->dc->Plane(c).xsize() >> hs) -
+ src_rect.x0();
+ for (size_t ix = 0; ix < 2; ix++) {
+ if (src_rect.x0() == 0) {
+ group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset - ix - 1] =
+ group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset + ix];
+ }
+ if (src_rect.x0() + src_rect.xsize() + 2 >=
+ (dec_state->shared->dc->xsize() >> hs)) {
+ group_dec_cache->dc_buffer.Row(y)[xend + ix] =
+ group_dec_cache->dc_buffer.Row(y)[xend - ix - 1];
+ }
+ }
+ }
+ Rect dst_rect = render_pipeline_input.GetBuffer(c).second;
+ ImageF* upsampling_dst = render_pipeline_input.GetBuffer(c).first;
+ JXL_ASSERT(dst_rect.IsInside(*upsampling_dst));
+
+ RenderPipelineStage::RowInfo input_rows(1, std::vector<float*>(5));
+ RenderPipelineStage::RowInfo output_rows(1, std::vector<float*>(8));
+ for (size_t y = src_rect.y0(); y < src_rect.y0() + src_rect.ysize();
+ y++) {
+ for (ssize_t iy = 0; iy < 5; iy++) {
+ input_rows[0][iy] = group_dec_cache->dc_buffer.Row(
+ Mirror(ssize_t(y) + iy - 2,
+ dec_state->shared->dc->Plane(c).ysize() >> vs) +
+ 2 - src_rect.y0());
+ }
+ for (size_t iy = 0; iy < 8; iy++) {
+ output_rows[0][iy] =
+ dst_rect.Row(upsampling_dst, ((y - src_rect.y0()) << 3) + iy) -
+ kRenderPipelineXOffset;
+ }
+ // Arguments set to 0/nullptr are not used.
+ dec_state->upsampler8x->ProcessRow(input_rows, output_rows,
+ /*xextra=*/0, src_rect.xsize(), 0, 0,
+ thread);
+ }
+ }
+ return true;
+ }
+
+ size_t histo_selector_bits = 0;
+ if (dc_only) {
+ JXL_ASSERT(num_passes == 0);
+ } else {
+ JXL_ASSERT(dec_state->shared->num_histograms > 0);
+ histo_selector_bits = CeilLog2Nonzero(dec_state->shared->num_histograms);
+ }
+
+ GetBlockFromBitstream get_block;
+ JXL_RETURN_IF_ERROR(
+ get_block.Init(readers, num_passes, group_idx, histo_selector_bits,
+ dec_state->shared->BlockGroupRect(group_idx),
+ group_dec_cache, dec_state, first_pass));
+
+ JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
+ &get_block, group_dec_cache, dec_state, thread, group_idx,
+ render_pipeline_input, decoded, draw));
+
+ for (size_t pass = 0; pass < num_passes; pass++) {
+ if (!get_block.decoders[pass].CheckANSFinalState()) {
+ return JXL_FAILURE("ANS checksum failure.");
+ }
+ }
+ return true;
+}
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+ size_t group_idx,
+ PassesDecoderState* JXL_RESTRICT dec_state,
+ GroupDecCache* JXL_RESTRICT group_dec_cache,
+ size_t thread,
+ RenderPipelineInput& render_pipeline_input,
+ ImageBundle* JXL_RESTRICT decoded,
+ AuxOut* aux_out) {
+ PROFILER_FUNC;
+
+ GetBlockFromEncoder get_block(ac, group_idx,
+ dec_state->shared->frame_header.passes.shift);
+ group_dec_cache->InitOnce(
+ /*num_passes=*/0,
+ /*used_acs=*/(1u << AcStrategy::kNumValidStrategies) - 1);
+
+ return HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)(
+ &get_block, group_dec_cache, dec_state, thread, group_idx,
+ render_pipeline_input, decoded, kDraw);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dec_group.h b/third_party/jpeg-xl/lib/jxl/dec_group.h
new file mode 100644
index 0000000000..e32ea67b5f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_group.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_H_
+#define LIB_JXL_DEC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers,
+ size_t num_passes, size_t group_idx,
+ PassesDecoderState* JXL_RESTRICT dec_state,
+ GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread,
+ RenderPipelineInput& render_pipeline_input,
+ ImageBundle* JXL_RESTRICT decoded, size_t first_pass,
+ bool force_draw, bool dc_only, bool* should_run_pipeline);
+
+Status DecodeGroupForRoundtrip(const std::vector<std::unique_ptr<ACImage>>& ac,
+ size_t group_idx,
+ PassesDecoderState* JXL_RESTRICT dec_state,
+ GroupDecCache* JXL_RESTRICT group_dec_cache,
+ size_t thread,
+ RenderPipelineInput& render_pipeline_input,
+ ImageBundle* JXL_RESTRICT decoded,
+ AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_GROUP_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_group_border.cc b/third_party/jpeg-xl/lib/jxl/dec_group_border.cc
new file mode 100644
index 0000000000..4bee3ae6ef
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_group_border.cc
@@ -0,0 +1,184 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_group_border.h"
+
+#include <atomic>
+
+namespace jxl {
+
+void GroupBorderAssigner::Init(const FrameDimensions& frame_dim) {
+ frame_dim_ = frame_dim;
+ size_t num_corners =
+ (frame_dim_.xsize_groups + 1) * (frame_dim_.ysize_groups + 1);
+ counters_.reset(new std::atomic<uint8_t>[num_corners]);
+ // Initialize counters.
+ for (size_t y = 0; y < frame_dim_.ysize_groups + 1; y++) {
+ for (size_t x = 0; x < frame_dim_.xsize_groups + 1; x++) {
+ // Counters at image borders don't have anything on the other side, we
+ // pre-fill their value to have more uniform handling afterwards.
+ uint8_t init_value = 0;
+ if (x == 0) {
+ init_value |= kTopLeft | kBottomLeft;
+ }
+ if (x == frame_dim_.xsize_groups) {
+ init_value |= kTopRight | kBottomRight;
+ }
+ if (y == 0) {
+ init_value |= kTopLeft | kTopRight;
+ }
+ if (y == frame_dim_.ysize_groups) {
+ init_value |= kBottomLeft | kBottomRight;
+ }
+ counters_[y * (frame_dim_.xsize_groups + 1) + x] = init_value;
+ }
+ }
+}
+
+void GroupBorderAssigner::ClearDone(size_t group_id) {
+ size_t x = group_id % frame_dim_.xsize_groups;
+ size_t y = group_id / frame_dim_.xsize_groups;
+ size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+ size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+ size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+ size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+ counters_[top_left_idx].fetch_and(~kBottomRight);
+ counters_[top_right_idx].fetch_and(~kBottomLeft);
+ counters_[bottom_left_idx].fetch_and(~kTopRight);
+ counters_[bottom_right_idx].fetch_and(~kTopLeft);
+}
+
+// Looking at each corner between groups, we can guarantee that the four
+// involved groups will agree between each other regarding the order in which
+// each of the four groups terminated. Thus, the last of the four groups
+// gets the responsibility of handling the corner. For borders, every border
+// is assigned to its top corner (for vertical borders) or to its left corner
+// (for horizontal borders): the order as seen on those corners will decide who
+// handles that border.
+
+void GroupBorderAssigner::GroupDone(size_t group_id, size_t padx, size_t pady,
+ Rect* rects_to_finalize,
+ size_t* num_to_finalize) {
+ size_t x = group_id % frame_dim_.xsize_groups;
+ size_t y = group_id / frame_dim_.xsize_groups;
+ Rect block_rect(x * frame_dim_.group_dim / kBlockDim,
+ y * frame_dim_.group_dim / kBlockDim,
+ frame_dim_.group_dim / kBlockDim,
+ frame_dim_.group_dim / kBlockDim, frame_dim_.xsize_blocks,
+ frame_dim_.ysize_blocks);
+
+ size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x;
+ size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1;
+ size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1;
+ size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x;
+
+ auto fetch_status = [this](size_t idx, uint8_t bit) {
+ // Note that the acq-rel semantics of this fetch are actually needed to
+ // ensure that the pixel data of the group is already written to memory.
+ size_t status = counters_[idx].fetch_or(bit);
+ JXL_DASSERT((bit & status) == 0);
+ return bit | status;
+ };
+
+ size_t top_left_status = fetch_status(top_left_idx, kBottomRight);
+ size_t top_right_status = fetch_status(top_right_idx, kBottomLeft);
+ size_t bottom_right_status = fetch_status(bottom_right_idx, kTopLeft);
+ size_t bottom_left_status = fetch_status(bottom_left_idx, kTopRight);
+
+ size_t x1 = block_rect.x0() + block_rect.xsize();
+ size_t y1 = block_rect.y0() + block_rect.ysize();
+
+ bool is_last_group_x = frame_dim_.xsize_groups == x + 1;
+ bool is_last_group_y = frame_dim_.ysize_groups == y + 1;
+
+ // Start of border of neighbouring group, end of border of this group, start
+ // of border of this group (on the other side), end of border of next group.
+ size_t xpos[4] = {
+ block_rect.x0() == 0 ? 0 : block_rect.x0() * kBlockDim - padx,
+ block_rect.x0() == 0
+ ? 0
+ : std::min(frame_dim_.xsize, block_rect.x0() * kBlockDim + padx),
+ is_last_group_x ? frame_dim_.xsize : x1 * kBlockDim - padx,
+ std::min(frame_dim_.xsize, x1 * kBlockDim + padx)};
+ size_t ypos[4] = {
+ block_rect.y0() == 0 ? 0 : block_rect.y0() * kBlockDim - pady,
+ block_rect.y0() == 0
+ ? 0
+ : std::min(frame_dim_.ysize, block_rect.y0() * kBlockDim + pady),
+ is_last_group_y ? frame_dim_.ysize : y1 * kBlockDim - pady,
+ std::min(frame_dim_.ysize, y1 * kBlockDim + pady)};
+
+ *num_to_finalize = 0;
+ auto append_rect = [&](size_t x0, size_t x1, size_t y0, size_t y1) {
+ Rect rect(xpos[x0], ypos[y0], xpos[x1] - xpos[x0], ypos[y1] - ypos[y0]);
+ if (rect.xsize() == 0 || rect.ysize() == 0) return;
+ JXL_DASSERT(*num_to_finalize < kMaxToFinalize);
+ rects_to_finalize[(*num_to_finalize)++] = rect;
+ };
+
+ // Because of how group borders are assigned, it is impossible that we need to
+ // process the left and right side of some area but not the center area. Thus,
+ // we compute the first/last part to process in every horizontal strip and
+ // merge them together. We first collect a mask of what parts should be
+ // processed.
+ // We do this horizontally rather than vertically because horizontal borders
+ // are larger.
+ bool available_parts_mask[3][3] = {}; // [x][y]
+ // Center
+ available_parts_mask[1][1] = true;
+ // Corners
+ if (top_left_status == 0xF) available_parts_mask[0][0] = true;
+ if (top_right_status == 0xF) available_parts_mask[2][0] = true;
+ if (bottom_right_status == 0xF) available_parts_mask[2][2] = true;
+ if (bottom_left_status == 0xF) available_parts_mask[0][2] = true;
+ // Other borders
+ if (top_left_status & kTopRight) available_parts_mask[1][0] = true;
+ if (top_left_status & kBottomLeft) available_parts_mask[0][1] = true;
+ if (top_right_status & kBottomRight) available_parts_mask[2][1] = true;
+ if (bottom_left_status & kBottomRight) available_parts_mask[1][2] = true;
+
+ // Collect horizontal ranges.
+ constexpr size_t kNoSegment = 3;
+ std::pair<size_t, size_t> horizontal_segments[3] = {{kNoSegment, kNoSegment},
+ {kNoSegment, kNoSegment},
+ {kNoSegment, kNoSegment}};
+ for (size_t y = 0; y < 3; y++) {
+ for (size_t x = 0; x < 3; x++) {
+ if (!available_parts_mask[x][y]) continue;
+ JXL_DASSERT(horizontal_segments[y].second == kNoSegment ||
+ horizontal_segments[y].second == x);
+ JXL_DASSERT((horizontal_segments[y].first == kNoSegment) ==
+ (horizontal_segments[y].second == kNoSegment));
+ if (horizontal_segments[y].first == kNoSegment) {
+ horizontal_segments[y].first = x;
+ }
+ horizontal_segments[y].second = x + 1;
+ }
+ }
+ if (horizontal_segments[0] == horizontal_segments[1] &&
+ horizontal_segments[0] == horizontal_segments[2]) {
+ append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+ 3);
+ } else if (horizontal_segments[0] == horizontal_segments[1]) {
+ append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+ 2);
+ append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+ 3);
+ } else if (horizontal_segments[1] == horizontal_segments[2]) {
+ append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+ 1);
+ append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+ 3);
+ } else {
+ append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0,
+ 1);
+ append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1,
+ 2);
+ append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2,
+ 3);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_group_border.h b/third_party/jpeg-xl/lib/jxl/dec_group_border.h
new file mode 100644
index 0000000000..2d974c9987
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_group_border.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_GROUP_BORDER_H_
+#define LIB_JXL_DEC_GROUP_BORDER_H_
+
+#include <stddef.h>
+
+#include <atomic>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+class GroupBorderAssigner {
+ public:
+ // Prepare the GroupBorderAssigner to handle a given frame.
+ void Init(const FrameDimensions& frame_dim);
+ // Marks a group as done, and returns the (at most 3) rects to run
+ // FinalizeImageRect on. `block_rect` must be the rect corresponding
+ // to the given `group_id`, measured in blocks.
+ void GroupDone(size_t group_id, size_t padx, size_t pady,
+ Rect* rects_to_finalize, size_t* num_to_finalize);
+ // Marks a group as not-done, for running re-paints.
+ void ClearDone(size_t group_id);
+
+ static constexpr size_t kMaxToFinalize = 3;
+
+ private:
+ FrameDimensions frame_dim_;
+ std::unique_ptr<std::atomic<uint8_t>[]> counters_;
+
+ // Constants to identify group positions relative to the corners.
+ static constexpr uint8_t kTopLeft = 0x01;
+ static constexpr uint8_t kTopRight = 0x02;
+ static constexpr uint8_t kBottomRight = 0x04;
+ static constexpr uint8_t kBottomLeft = 0x08;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_GROUP_BORDER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_huffman.cc b/third_party/jpeg-xl/lib/jxl/dec_huffman.cc
new file mode 100644
index 0000000000..05b275773a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_huffman.cc
@@ -0,0 +1,255 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_huffman.h"
+
+#include <string.h> /* for memset */
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static const int kCodeLengthCodes = 18;
+static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = {
+ 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+};
+static const uint8_t kDefaultCodeLength = 8;
+static const uint8_t kCodeLengthRepeatCode = 16;
+
+int ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths,
+ int num_symbols, uint8_t* code_lengths,
+ BitReader* br) {
+ int symbol = 0;
+ uint8_t prev_code_len = kDefaultCodeLength;
+ int repeat = 0;
+ uint8_t repeat_code_len = 0;
+ int space = 32768;
+ HuffmanCode table[32];
+
+ uint16_t counts[16] = {0};
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ ++counts[code_length_code_lengths[i]];
+ }
+ if (!BuildHuffmanTable(table, 5, code_length_code_lengths, kCodeLengthCodes,
+ &counts[0])) {
+ return 0;
+ }
+
+ while (symbol < num_symbols && space > 0) {
+ const HuffmanCode* p = table;
+ uint8_t code_len;
+ br->Refill();
+ p += br->PeekFixedBits<5>();
+ br->Consume(p->bits);
+ code_len = (uint8_t)p->value;
+ if (code_len < kCodeLengthRepeatCode) {
+ repeat = 0;
+ code_lengths[symbol++] = code_len;
+ if (code_len != 0) {
+ prev_code_len = code_len;
+ space -= 32768u >> code_len;
+ }
+ } else {
+ const int extra_bits = code_len - 14;
+ int old_repeat;
+ int repeat_delta;
+ uint8_t new_len = 0;
+ if (code_len == kCodeLengthRepeatCode) {
+ new_len = prev_code_len;
+ }
+ if (repeat_code_len != new_len) {
+ repeat = 0;
+ repeat_code_len = new_len;
+ }
+ old_repeat = repeat;
+ if (repeat > 0) {
+ repeat -= 2;
+ repeat <<= extra_bits;
+ }
+ repeat += (int)br->ReadBits(extra_bits) + 3;
+ repeat_delta = repeat - old_repeat;
+ if (symbol + repeat_delta > num_symbols) {
+ return 0;
+ }
+ memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta);
+ symbol += repeat_delta;
+ if (repeat_code_len != 0) {
+ space -= repeat_delta << (15 - repeat_code_len);
+ }
+ }
+ }
+ if (space != 0) {
+ return 0;
+ }
+ memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol));
+ return true;
+}
+
+static JXL_INLINE bool ReadSimpleCode(size_t alphabet_size, BitReader* br,
+ HuffmanCode* table) {
+ size_t max_bits =
+ (alphabet_size > 1u) ? FloorLog2Nonzero(alphabet_size - 1u) + 1 : 0;
+
+ size_t num_symbols = br->ReadFixedBits<2>() + 1;
+
+ uint16_t symbols[4] = {0};
+ for (size_t i = 0; i < num_symbols; ++i) {
+ uint16_t symbol = br->ReadBits(max_bits);
+ if (symbol >= alphabet_size) {
+ return false;
+ }
+ symbols[i] = symbol;
+ }
+
+ for (size_t i = 0; i < num_symbols - 1; ++i) {
+ for (size_t j = i + 1; j < num_symbols; ++j) {
+ if (symbols[i] == symbols[j]) return false;
+ }
+ }
+
+ // 4 symbols have to option to encode.
+ if (num_symbols == 4) num_symbols += br->ReadFixedBits<1>();
+
+ const auto swap_symbols = [&symbols](size_t i, size_t j) {
+ uint16_t t = symbols[j];
+ symbols[j] = symbols[i];
+ symbols[i] = t;
+ };
+
+ size_t table_size = 1;
+ switch (num_symbols) {
+ case 1:
+ table[0] = {0, symbols[0]};
+ break;
+ case 2:
+ if (symbols[0] > symbols[1]) swap_symbols(0, 1);
+ table[0] = {1, symbols[0]};
+ table[1] = {1, symbols[1]};
+ table_size = 2;
+ break;
+ case 3:
+ if (symbols[1] > symbols[2]) swap_symbols(1, 2);
+ table[0] = {1, symbols[0]};
+ table[2] = {1, symbols[0]};
+ table[1] = {2, symbols[1]};
+ table[3] = {2, symbols[2]};
+ table_size = 4;
+ break;
+ case 4: {
+ for (size_t i = 0; i < 3; ++i) {
+ for (size_t j = i + 1; j < 4; ++j) {
+ if (symbols[i] > symbols[j]) swap_symbols(i, j);
+ }
+ }
+ table[0] = {2, symbols[0]};
+ table[2] = {2, symbols[1]};
+ table[1] = {2, symbols[2]};
+ table[3] = {2, symbols[3]};
+ table_size = 4;
+ break;
+ }
+ case 5: {
+ if (symbols[2] > symbols[3]) swap_symbols(2, 3);
+ table[0] = {1, symbols[0]};
+ table[1] = {2, symbols[1]};
+ table[2] = {1, symbols[0]};
+ table[3] = {3, symbols[2]};
+ table[4] = {1, symbols[0]};
+ table[5] = {2, symbols[1]};
+ table[6] = {1, symbols[0]};
+ table[7] = {3, symbols[3]};
+ table_size = 8;
+ break;
+ }
+ default: {
+ // Unreachable.
+ return false;
+ }
+ }
+
+ const uint32_t goal_size = 1u << kHuffmanTableBits;
+ while (table_size != goal_size) {
+ memcpy(&table[table_size], &table[0],
+ (size_t)table_size * sizeof(table[0]));
+ table_size <<= 1;
+ }
+
+ return true;
+}
+
+bool HuffmanDecodingData::ReadFromBitStream(size_t alphabet_size,
+ BitReader* br) {
+ if (alphabet_size > (1 << PREFIX_MAX_BITS)) return false;
+
+ /* simple_code_or_skip is used as follows:
+ 1 for simple code;
+ 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */
+ uint32_t simple_code_or_skip = br->ReadFixedBits<2>();
+ if (simple_code_or_skip == 1u) {
+ table_.resize(1u << kHuffmanTableBits);
+ return ReadSimpleCode(alphabet_size, br, table_.data());
+ }
+
+ std::vector<uint8_t> code_lengths(alphabet_size, 0);
+ uint8_t code_length_code_lengths[kCodeLengthCodes] = {0};
+ int space = 32;
+ int num_codes = 0;
+ /* Static Huffman code for the code length code lengths */
+ static const HuffmanCode huff[16] = {
+ {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1},
+ {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5},
+ };
+ for (size_t i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) {
+ const int code_len_idx = kCodeLengthCodeOrder[i];
+ const HuffmanCode* p = huff;
+ uint8_t v;
+ br->Refill();
+ p += br->PeekFixedBits<4>();
+ br->Consume(p->bits);
+ v = (uint8_t)p->value;
+ code_length_code_lengths[code_len_idx] = v;
+ if (v != 0) {
+ space -= (32u >> v);
+ ++num_codes;
+ }
+ }
+ bool ok = (num_codes == 1 || space == 0) &&
+ ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size,
+ &code_lengths[0], br);
+
+ if (!ok) return false;
+ uint16_t counts[16] = {0};
+ for (size_t i = 0; i < alphabet_size; ++i) {
+ ++counts[code_lengths[i]];
+ }
+ table_.resize(alphabet_size + 376);
+ uint32_t table_size =
+ BuildHuffmanTable(table_.data(), kHuffmanTableBits, &code_lengths[0],
+ alphabet_size, &counts[0]);
+ table_.resize(table_size);
+ return (table_size > 0);
+}
+
+// Decodes the next Huffman coded symbol from the bit-stream.
+uint16_t HuffmanDecodingData::ReadSymbol(BitReader* br) const {
+ size_t n_bits;
+ const HuffmanCode* table = table_.data();
+ table += br->PeekBits(kHuffmanTableBits);
+ n_bits = table->bits;
+ if (n_bits > kHuffmanTableBits) {
+ br->Consume(kHuffmanTableBits);
+ n_bits -= kHuffmanTableBits;
+ table += table->value;
+ table += br->PeekBits(n_bits);
+ }
+ br->Consume(table->bits);
+ return table->value;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_huffman.h b/third_party/jpeg-xl/lib/jxl/dec_huffman.h
new file mode 100644
index 0000000000..162c3e309c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_huffman.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_HUFFMAN_H_
+#define LIB_JXL_DEC_HUFFMAN_H_
+
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+static constexpr size_t kHuffmanTableBits = 8u;
+
+struct HuffmanDecodingData {
+ // Decodes the Huffman code lengths from the bit-stream and fills in the
+ // pre-allocated table with the corresponding 2-level Huffman decoding table.
+ // Returns false if the Huffman code lengths can not de decoded.
+ bool ReadFromBitStream(size_t alphabet_size, BitReader* br);
+
+ uint16_t ReadSymbol(BitReader* br) const;
+
+ std::vector<HuffmanCode> table_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_HUFFMAN_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_modular.cc b/third_party/jpeg-xl/lib/jxl/dec_modular.cc
new file mode 100644
index 0000000000..bf85eaa05c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_modular.cc
@@ -0,0 +1,774 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_modular.h"
+
+#include <stdint.h>
+
+#include <atomic>
+#include <sstream>
+#include <vector>
+
+#include "lib/jxl/frame_header.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+
+void MultiplySum(const size_t xsize,
+ const pixel_type* const JXL_RESTRICT row_in,
+ const pixel_type* const JXL_RESTRICT row_in_Y,
+ const float factor, float* const JXL_RESTRICT row_out) {
+ const HWY_FULL(float) df;
+ const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float
+ const auto factor_v = Set(df, factor);
+ for (size_t x = 0; x < xsize; x += Lanes(di)) {
+ const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x));
+ const auto out = Mul(ConvertTo(df, in), factor_v);
+ Store(out, df, row_out + x);
+ }
+}
+
+void RgbFromSingle(const size_t xsize,
+ const pixel_type* const JXL_RESTRICT row_in,
+ const float factor, float* out_r, float* out_g,
+ float* out_b) {
+ const HWY_FULL(float) df;
+ const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float
+
+ const auto factor_v = Set(df, factor);
+ for (size_t x = 0; x < xsize; x += Lanes(di)) {
+ const auto in = Load(di, row_in + x);
+ const auto out = Mul(ConvertTo(df, in), factor_v);
+ Store(out, df, out_r + x);
+ Store(out, df, out_g + x);
+ Store(out, df, out_b + x);
+ }
+}
+
+void SingleFromSingle(const size_t xsize,
+ const pixel_type* const JXL_RESTRICT row_in,
+ const float factor, float* row_out) {
+ const HWY_FULL(float) df;
+ const Rebind<pixel_type, HWY_FULL(float)> di; // assumes pixel_type <= float
+
+ const auto factor_v = Set(df, factor);
+ for (size_t x = 0; x < xsize; x += Lanes(di)) {
+ const auto in = Load(di, row_in + x);
+ const auto out = Mul(ConvertTo(df, in), factor_v);
+ Store(out, df, row_out + x);
+ }
+}
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(MultiplySum); // Local function
+HWY_EXPORT(RgbFromSingle); // Local function
+HWY_EXPORT(SingleFromSingle); // Local function
+
+// Slow conversion using double precision multiplication, only
+// needed when the bit depth is too high for single precision
+void SingleFromSingleAccurate(const size_t xsize,
+ const pixel_type* const JXL_RESTRICT row_in,
+ const double factor, float* row_out) {
+ for (size_t x = 0; x < xsize; x++) {
+ row_out[x] = row_in[x] * factor;
+ }
+}
+
+// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int
+// back to binary32 float
+void int_to_float(const pixel_type* const JXL_RESTRICT row_in,
+ float* const JXL_RESTRICT row_out, const size_t xsize,
+ const int bits, const int exp_bits) {
+ if (bits == 32) {
+ JXL_ASSERT(sizeof(pixel_type) == sizeof(float));
+ JXL_ASSERT(exp_bits == 8);
+ memcpy(row_out, row_in, xsize * sizeof(float));
+ return;
+ }
+ int exp_bias = (1 << (exp_bits - 1)) - 1;
+ int sign_shift = bits - 1;
+ int mant_bits = bits - exp_bits - 1;
+ int mant_shift = 23 - mant_bits;
+ for (size_t x = 0; x < xsize; ++x) {
+ uint32_t f;
+ memcpy(&f, &row_in[x], 4);
+ int signbit = (f >> sign_shift);
+ f &= (1 << sign_shift) - 1;
+ if (f == 0) {
+ row_out[x] = (signbit ? -0.f : 0.f);
+ continue;
+ }
+ int exp = (f >> mant_bits);
+ int mantissa = (f & ((1 << mant_bits) - 1));
+ mantissa <<= mant_shift;
+ // Try to normalize only if there is space for maneuver.
+ if (exp == 0 && exp_bits < 8) {
+ // subnormal number
+ while ((mantissa & 0x800000) == 0) {
+ mantissa <<= 1;
+ exp--;
+ }
+ exp++;
+ // remove leading 1 because it is implicit now
+ mantissa &= 0x7fffff;
+ }
+ exp -= exp_bias;
+ // broke up the arbitrary float into its parts, now reassemble into
+ // binary32
+ exp += 127;
+ JXL_ASSERT(exp >= 0);
+ f = (signbit ? 0x80000000 : 0);
+ f |= (exp << 23);
+ f |= mantissa;
+ memcpy(&row_out[x], &f, 4);
+ }
+}
+
+std::string ModularStreamId::DebugString() const {
+ std::ostringstream os;
+ os << (kind == kGlobalData ? "ModularGlobal"
+ : kind == kVarDCTDC ? "VarDCTDC"
+ : kind == kModularDC ? "ModularDC"
+ : kind == kACMetadata ? "ACMeta"
+ : kind == kQuantTable ? "QuantTable"
+ : kind == kModularAC ? "ModularAC"
+ : "");
+ if (kind == kVarDCTDC || kind == kModularDC || kind == kACMetadata ||
+ kind == kModularAC) {
+ os << " group " << group_id;
+ }
+ if (kind == kModularAC) {
+ os << " pass " << pass_id;
+ }
+ if (kind == kQuantTable) {
+ os << " " << quant_table_id;
+ }
+ return os.str();
+}
+
+Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader,
+ const FrameHeader& frame_header,
+ bool allow_truncated_group) {
+ bool decode_color = frame_header.encoding == FrameEncoding::kModular;
+ const auto& metadata = frame_header.nonserialized_metadata->m;
+ bool is_gray = metadata.color_encoding.IsGray();
+ size_t nb_chans = 3;
+ if (is_gray && frame_header.color_transform == ColorTransform::kNone) {
+ nb_chans = 1;
+ }
+ do_color = decode_color;
+ size_t nb_extra = metadata.extra_channel_info.size();
+ bool has_tree = reader->ReadBits(1);
+ if (!allow_truncated_group ||
+ reader->TotalBitsConsumed() < reader->TotalBytes() * kBitsPerByte) {
+ if (has_tree) {
+ size_t tree_size_limit =
+ std::min(static_cast<size_t>(1 << 22),
+ 1024 + frame_dim.xsize * frame_dim.ysize *
+ (nb_chans + nb_extra) / 16);
+ JXL_RETURN_IF_ERROR(DecodeTree(reader, &tree, tree_size_limit));
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(reader, (tree.size() + 1) / 2, &code, &context_map));
+ }
+ }
+ if (!do_color) nb_chans = 0;
+
+ bool fp = metadata.bit_depth.floating_point_sample;
+
+ // bits_per_sample is just metadata for XYB images.
+ if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+ frame_header.color_transform != ColorTransform::kXYB) {
+ if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+ return JXL_FAILURE("uint32_t not supported in dec_modular");
+ } else if (metadata.bit_depth.bits_per_sample > 32) {
+ return JXL_FAILURE("bits_per_sample > 32 not supported");
+ }
+ }
+
+ Image gi(frame_dim.xsize, frame_dim.ysize, metadata.bit_depth.bits_per_sample,
+ nb_chans + nb_extra);
+
+ all_same_shift = true;
+ if (frame_header.color_transform == ColorTransform::kYCbCr) {
+ for (size_t c = 0; c < nb_chans; c++) {
+ gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c);
+ gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c);
+ size_t xsize_shifted =
+ DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift);
+ size_t ysize_shifted =
+ DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift);
+ gi.channel[c].shrink(xsize_shifted, ysize_shifted);
+ if (gi.channel[c].hshift != gi.channel[0].hshift ||
+ gi.channel[c].vshift != gi.channel[0].vshift)
+ all_same_shift = false;
+ }
+ }
+
+ for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) {
+ size_t ecups = frame_header.extra_channel_upsampling[ec];
+ gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups),
+ DivCeil(frame_dim.ysize_upsampled, ecups));
+ gi.channel[c].hshift = gi.channel[c].vshift =
+ CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+ if (gi.channel[c].hshift != gi.channel[0].hshift ||
+ gi.channel[c].vshift != gi.channel[0].vshift)
+ all_same_shift = false;
+ }
+
+ JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s",
+ gi.DebugString().c_str());
+ ModularOptions options;
+ options.max_chan_size = frame_dim.group_dim;
+ options.group_dim = frame_dim.group_dim;
+ Status dec_status = ModularGenericDecompress(
+ reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim),
+ &options,
+ /*undo_transforms=*/false, &tree, &code, &context_map,
+ allow_truncated_group);
+ if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+ if (dec_status.IsFatalError()) {
+ return JXL_FAILURE("Failed to decode global modular info");
+ }
+
+ // TODO(eustas): are we sure this can be done after partial decode?
+ have_something = false;
+ for (size_t c = 0; c < gi.channel.size(); c++) {
+ Channel& gic = gi.channel[c];
+ if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim &&
+ gic.h <= frame_dim.group_dim)
+ have_something = true;
+ }
+ // move global transforms to groups if possible
+ if (!have_something && all_same_shift) {
+ if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) {
+ global_transform = gi.transform;
+ gi.transform.clear();
+ // TODO(jon): also move no-delta-palette out (trickier though)
+ }
+ }
+ full_image = std::move(gi);
+ JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s",
+ full_image.DebugString().c_str());
+ return dec_status;
+}
+
+void ModularFrameDecoder::MaybeDropFullImage() {
+ if (full_image.transform.empty() && !have_something && all_same_shift) {
+ use_full_image = false;
+ JXL_DEBUG_V(6, "Dropping full image");
+ for (auto& ch : full_image.channel) {
+ // keep metadata on channels around, but dealloc their planes
+ ch.plane = Plane<pixel_type>();
+ }
+ }
+}
+
+Status ModularFrameDecoder::DecodeGroup(
+ const Rect& rect, BitReader* reader, int minShift, int maxShift,
+ const ModularStreamId& stream, bool zerofill, PassesDecoderState* dec_state,
+ RenderPipelineInput* render_pipeline_input, bool allow_truncated,
+ bool* should_run_pipeline) {
+ JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s",
+ stream.DebugString().c_str(), Description(rect).c_str(), minShift,
+ maxShift, zerofill ? "using zerofill" : "");
+ JXL_DASSERT(stream.kind == ModularStreamId::kModularDC ||
+ stream.kind == ModularStreamId::kModularAC);
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ Image gi(xsize, ysize, full_image.bitdepth, 0);
+ // start at the first bigger-than-groupsize non-metachannel
+ size_t c = full_image.nb_meta_channels;
+ for (; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break;
+ }
+ size_t beginc = c;
+ for (; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ int shift = std::min(fc.hshift, fc.vshift);
+ if (shift > maxShift) continue;
+ if (shift < minShift) continue;
+ Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+ rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+ if (r.xsize() == 0 || r.ysize() == 0) continue;
+ if (zerofill && use_full_image) {
+ for (size_t y = 0; y < r.ysize(); ++y) {
+ pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y);
+ memset(row_out, 0, r.xsize() * sizeof(*row_out));
+ }
+ } else {
+ Channel gc(r.xsize(), r.ysize());
+ if (zerofill) ZeroFillImage(&gc.plane);
+ gc.hshift = fc.hshift;
+ gc.vshift = fc.vshift;
+ gi.channel.emplace_back(std::move(gc));
+ }
+ }
+ if (zerofill && use_full_image) return true;
+ // Return early if there's nothing to decode. Otherwise there might be
+ // problems later (in ModularImageToDecodedRect).
+ if (gi.channel.empty()) {
+ if (dec_state && should_run_pipeline) {
+ const auto& frame_header = dec_state->shared->frame_header;
+ const auto* metadata = frame_header.nonserialized_metadata;
+ if (do_color || metadata->m.num_extra_channels > 0) {
+ // Signal to FrameDecoder that we do not have some of the required input
+ // for the render pipeline.
+ *should_run_pipeline = false;
+ }
+ }
+ JXL_DEBUG_V(6, "Nothing to decode, returning early.");
+ return true;
+ }
+ ModularOptions options;
+ if (!zerofill) {
+ auto status = ModularGenericDecompress(
+ reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options,
+ /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated);
+ if (!allow_truncated) JXL_RETURN_IF_ERROR(status);
+ if (status.IsFatalError()) return status;
+ }
+ // Undo global transforms that have been pushed to the group level
+ if (!use_full_image) {
+ JXL_ASSERT(render_pipeline_input);
+ for (auto t : global_transform) {
+ JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header));
+ }
+ JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(gi, dec_state, nullptr,
+ *render_pipeline_input,
+ Rect(0, 0, gi.w, gi.h)));
+ return true;
+ }
+ int gic = 0;
+ for (c = beginc; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ int shift = std::min(fc.hshift, fc.vshift);
+ if (shift > maxShift) continue;
+ if (shift < minShift) continue;
+ Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+ rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+ if (r.xsize() == 0 || r.ysize() == 0) continue;
+ JXL_ASSERT(use_full_image);
+ CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()),
+ /*from=*/gi.channel[gic].plane,
+ /*rect_to=*/r, /*to=*/&fc.plane);
+ gic++;
+ }
+ return true;
+}
+
+Status ModularFrameDecoder::DecodeVarDCTDC(size_t group_id, BitReader* reader,
+ PassesDecoderState* dec_state) {
+ const Rect r = dec_state->shared->DCGroupRect(group_id);
+ // TODO(eustas): investigate if we could reduce the impact of
+ // EvalRationalPolynomial; generally speaking, the limit is
+ // 2**(128/(3*magic)), where 128 comes from IEEE 754 exponent,
+ // 3 comes from XybToRgb that cubes the values, and "magic" is
+ // the sum of all other contributions. 2**18 is known to lead
+ // to NaN on input found by fuzzing (see commit message).
+ Image image(r.xsize(), r.ysize(), full_image.bitdepth, 3);
+ size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim);
+ reader->Refill();
+ size_t extra_precision = reader->ReadFixedBits<2>();
+ float mul = 1.0f / (1 << extra_precision);
+ ModularOptions options;
+ for (size_t c = 0; c < 3; c++) {
+ Channel& ch = image.channel[c < 2 ? c ^ 1 : c];
+ ch.w >>= dec_state->shared->frame_header.chroma_subsampling.HShift(c);
+ ch.h >>= dec_state->shared->frame_header.chroma_subsampling.VShift(c);
+ ch.shrink();
+ }
+ if (!ModularGenericDecompress(
+ reader, image, /*header=*/nullptr, stream_id, &options,
+ /*undo_transforms=*/true, &tree, &code, &context_map)) {
+ return JXL_FAILURE("Failed to decode modular DC group");
+ }
+ DequantDC(r, &dec_state->shared_storage.dc_storage,
+ &dec_state->shared_storage.quant_dc, image,
+ dec_state->shared->quantizer.MulDC(), mul,
+ dec_state->shared->cmap.DCFactors(),
+ dec_state->shared->frame_header.chroma_subsampling,
+ dec_state->shared->block_ctx_map);
+ return true;
+}
+
+Status ModularFrameDecoder::DecodeAcMetadata(size_t group_id, BitReader* reader,
+ PassesDecoderState* dec_state) {
+ const Rect r = dec_state->shared->DCGroupRect(group_id);
+ size_t upper_bound = r.xsize() * r.ysize();
+ reader->Refill();
+ size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1;
+ size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim);
+ // YToX, YToB, ACS + QF, EPF
+ Image image(r.xsize(), r.ysize(), full_image.bitdepth, 4);
+ static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+ Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+ image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+ image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+ image.channel[2] = Channel(count, 2, 0, 0);
+ ModularOptions options;
+ if (!ModularGenericDecompress(
+ reader, image, /*header=*/nullptr, stream_id, &options,
+ /*undo_transforms=*/true, &tree, &code, &context_map)) {
+ return JXL_FAILURE("Failed to decode AC metadata");
+ }
+ ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane, cr,
+ &dec_state->shared_storage.cmap.ytox_map);
+ ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane, cr,
+ &dec_state->shared_storage.cmap.ytob_map);
+ size_t num = 0;
+ bool is444 = dec_state->shared->frame_header.chroma_subsampling.Is444();
+ auto& ac_strategy = dec_state->shared_storage.ac_strategy;
+ size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize());
+ size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize());
+ uint32_t local_used_acs = 0;
+ for (size_t iy = 0; iy < r.ysize(); iy++) {
+ size_t y = r.y0() + iy;
+ int32_t* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy);
+ uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy);
+ int32_t* row_in_1 = image.channel[2].plane.Row(0);
+ int32_t* row_in_2 = image.channel[2].plane.Row(1);
+ int32_t* row_in_3 = image.channel[3].plane.Row(iy);
+ for (size_t ix = 0; ix < r.xsize(); ix++) {
+ size_t x = r.x0() + ix;
+ int sharpness = row_in_3[ix];
+ if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) {
+ return JXL_FAILURE("Corrupted sharpness field");
+ }
+ row_epf[ix] = sharpness;
+ if (ac_strategy.IsValid(x, y)) {
+ continue;
+ }
+
+ if (num >= count) return JXL_FAILURE("Corrupted stream");
+
+ if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) {
+ return JXL_FAILURE("Invalid AC strategy");
+ }
+ local_used_acs |= 1u << row_in_1[num];
+ AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]);
+ if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) &&
+ !is444) {
+ return JXL_FAILURE(
+ "AC strategy not compatible with chroma subsampling");
+ }
+ // Ensure that blocks do not overflow *AC* groups.
+ size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+ size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks;
+ size_t next_x_dct_block = x + acs.covered_blocks_x();
+ size_t next_y_dct_block = y + acs.covered_blocks_y();
+ if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) {
+ return JXL_FAILURE("Invalid AC strategy, x overflow");
+ }
+ if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) {
+ return JXL_FAILURE("Invalid AC strategy, y overflow");
+ }
+ JXL_RETURN_IF_ERROR(
+ ac_strategy.SetNoBoundsCheck(x, y, AcStrategy::Type(row_in_1[num])));
+ row_qf[ix] = 1 + std::max<int32_t>(0, std::min(Quantizer::kQuantMax - 1,
+ row_in_2[num]));
+ num++;
+ }
+ }
+ dec_state->used_acs |= local_used_acs;
+ if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+ ComputeSigma(r, dec_state);
+ }
+ return true;
+}
+
+Status ModularFrameDecoder::ModularImageToDecodedRect(
+ Image& gi, PassesDecoderState* dec_state, jxl::ThreadPool* pool,
+ RenderPipelineInput& render_pipeline_input, Rect modular_rect) {
+ const auto& frame_header = dec_state->shared->frame_header;
+ const auto* metadata = frame_header.nonserialized_metadata;
+ JXL_CHECK(gi.transform.empty());
+
+ auto get_row = [&](size_t c, size_t y) {
+ const auto& buffer = render_pipeline_input.GetBuffer(c);
+ return buffer.second.Row(buffer.first, y);
+ };
+
+ size_t c = 0;
+ if (do_color) {
+ const bool rgb_from_gray =
+ metadata->m.color_encoding.IsGray() &&
+ frame_header.color_transform == ColorTransform::kNone;
+ const bool fp = metadata->m.bit_depth.floating_point_sample &&
+ frame_header.color_transform != ColorTransform::kXYB;
+ for (; c < 3; c++) {
+ double factor = full_image.bitdepth < 32
+ ? 1.0 / ((1u << full_image.bitdepth) - 1)
+ : 0;
+ size_t c_in = c;
+ if (frame_header.color_transform == ColorTransform::kXYB) {
+ factor = dec_state->shared->matrices.DCQuants()[c];
+ // XYB is encoded as YX(B-Y)
+ if (c < 2) c_in = 1 - c;
+ } else if (rgb_from_gray) {
+ c_in = 0;
+ }
+ JXL_ASSERT(c_in < gi.channel.size());
+ Channel& ch_in = gi.channel[c_in];
+ // TODO(eustas): could we detect it on earlier stage?
+ if (ch_in.w == 0 || ch_in.h == 0) {
+ return JXL_FAILURE("Empty image");
+ }
+ JXL_CHECK(ch_in.hshift <= 3 && ch_in.vshift <= 3);
+ Rect r = render_pipeline_input.GetBuffer(c).second;
+ Rect mr(modular_rect.x0() >> ch_in.hshift,
+ modular_rect.y0() >> ch_in.vshift,
+ DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
+ DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
+ mr = mr.Crop(ch_in.plane);
+ size_t xsize_shifted = r.xsize();
+ size_t ysize_shifted = r.ysize();
+ if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
+ return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
+ "x%" PRIuS
+ " modular channel into "
+ "a %" PRIuS "x%" PRIuS " rect",
+ mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
+ }
+ if (frame_header.color_transform == ColorTransform::kXYB && c == 2) {
+ JXL_ASSERT(!fp);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ysize_shifted, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ const pixel_type* const JXL_RESTRICT row_in =
+ mr.Row(&ch_in.plane, y);
+ const pixel_type* const JXL_RESTRICT row_in_Y =
+ mr.Row(&gi.channel[0].plane, y);
+ float* const JXL_RESTRICT row_out = get_row(c, y);
+ HWY_DYNAMIC_DISPATCH(MultiplySum)
+ (xsize_shifted, row_in, row_in_Y, factor, row_out);
+ },
+ "ModularIntToFloat"));
+ } else if (fp) {
+ int bits = metadata->m.bit_depth.bits_per_sample;
+ int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample;
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ysize_shifted, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ const pixel_type* const JXL_RESTRICT row_in =
+ mr.Row(&ch_in.plane, y);
+ if (rgb_from_gray) {
+ for (size_t cc = 0; cc < 3; cc++) {
+ float* const JXL_RESTRICT row_out = get_row(cc, y);
+ int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
+ }
+ } else {
+ float* const JXL_RESTRICT row_out = get_row(c, y);
+ int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits);
+ }
+ },
+ "ModularIntToFloat_losslessfloat"));
+ } else {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ysize_shifted, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ const pixel_type* const JXL_RESTRICT row_in =
+ mr.Row(&ch_in.plane, y);
+ if (rgb_from_gray) {
+ if (full_image.bitdepth < 23) {
+ HWY_DYNAMIC_DISPATCH(RgbFromSingle)
+ (xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y),
+ get_row(2, y));
+ } else {
+ SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+ get_row(0, y));
+ SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+ get_row(1, y));
+ SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+ get_row(2, y));
+ }
+ } else {
+ float* const JXL_RESTRICT row_out = get_row(c, y);
+ if (full_image.bitdepth < 23) {
+ HWY_DYNAMIC_DISPATCH(SingleFromSingle)
+ (xsize_shifted, row_in, factor, row_out);
+ } else {
+ SingleFromSingleAccurate(xsize_shifted, row_in, factor,
+ row_out);
+ }
+ }
+ },
+ "ModularIntToFloat"));
+ }
+ if (rgb_from_gray) {
+ break;
+ }
+ }
+ if (rgb_from_gray) {
+ c = 1;
+ }
+ }
+ size_t num_extra_channels = metadata->m.num_extra_channels;
+ for (size_t ec = 0; ec < num_extra_channels; ec++, c++) {
+ const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec];
+ int bits = eci.bit_depth.bits_per_sample;
+ int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+ bool fp = eci.bit_depth.floating_point_sample;
+ JXL_ASSERT(fp || bits < 32);
+ const double factor = fp ? 0 : (1.0 / ((1u << bits) - 1));
+ JXL_ASSERT(c < gi.channel.size());
+ Channel& ch_in = gi.channel[c];
+ Rect r = render_pipeline_input.GetBuffer(3 + ec).second;
+ Rect mr(modular_rect.x0() >> ch_in.hshift,
+ modular_rect.y0() >> ch_in.vshift,
+ DivCeil(modular_rect.xsize(), 1 << ch_in.hshift),
+ DivCeil(modular_rect.ysize(), 1 << ch_in.vshift));
+ mr = mr.Crop(ch_in.plane);
+ if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) {
+ return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS
+ "x%" PRIuS
+ " modular channel into "
+ "a %" PRIuS "x%" PRIuS " rect",
+ mr.xsize(), mr.ysize(), r.xsize(), r.ysize());
+ }
+ for (size_t y = 0; y < r.ysize(); ++y) {
+ float* const JXL_RESTRICT row_out =
+ r.Row(render_pipeline_input.GetBuffer(3 + ec).first, y);
+ const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y);
+ if (fp) {
+ int_to_float(row_in, row_out, r.xsize(), bits, exp_bits);
+ } else {
+ if (full_image.bitdepth < 23) {
+ HWY_DYNAMIC_DISPATCH(SingleFromSingle)
+ (r.xsize(), row_in, factor, row_out);
+ } else {
+ SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out);
+ }
+ }
+ }
+ }
+ return true;
+}
+
+Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state,
+ jxl::ThreadPool* pool,
+ bool inplace) {
+ if (!use_full_image) return true;
+ Image gi = (inplace ? std::move(full_image) : full_image.clone());
+ size_t xsize = gi.w;
+ size_t ysize = gi.h;
+
+ JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s",
+ gi.DebugString().c_str());
+
+ // Don't use threads if total image size is smaller than a group
+ if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr;
+
+ // Undo the global transforms
+ gi.undo_transforms(global_header.wp_header, pool);
+ JXL_DASSERT(global_transform.empty());
+ if (gi.error) return JXL_FAILURE("Undoing transforms failed");
+
+ for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) {
+ dec_state->render_pipeline->ClearDone(i);
+ }
+ std::atomic<bool> has_error{false};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, dec_state->shared->frame_dim.num_groups,
+ [&](size_t num_threads) {
+ const auto& frame_header = dec_state->shared->frame_header;
+ bool use_group_ids = (frame_header.encoding == FrameEncoding::kVarDCT ||
+ (frame_header.flags & FrameHeader::kNoise));
+ return dec_state->render_pipeline->PrepareForThreads(num_threads,
+ use_group_ids);
+ },
+ [&](const uint32_t group, size_t thread_id) {
+ RenderPipelineInput input =
+ dec_state->render_pipeline->GetInputBuffers(group, thread_id);
+ if (!ModularImageToDecodedRect(gi, dec_state, nullptr, input,
+ dec_state->shared->GroupRect(group))) {
+ has_error = true;
+ return;
+ }
+ input.Done();
+ },
+ "ModularToRect"));
+ if (has_error) {
+ return JXL_FAILURE("Error producing input to render pipeline");
+ }
+ return true;
+}
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+Status ModularFrameDecoder::DecodeQuantTable(
+ size_t required_size_x, size_t required_size_y, BitReader* br,
+ QuantEncoding* encoding, size_t idx,
+ ModularFrameDecoder* modular_frame_decoder) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den));
+ if (encoding->qraw.qtable_den < kAlmostZero) {
+ // qtable[] values are already checked for <= 0 so the denominator may not
+ // be negative.
+ return JXL_FAILURE("Invalid qtable_den: value too small");
+ }
+ Image image(required_size_x, required_size_y, 8, 3);
+ ModularOptions options;
+ if (modular_frame_decoder) {
+ JXL_RETURN_IF_ERROR(ModularGenericDecompress(
+ br, image, /*header=*/nullptr,
+ ModularStreamId::QuantTable(idx).ID(modular_frame_decoder->frame_dim),
+ &options, /*undo_transforms=*/true, &modular_frame_decoder->tree,
+ &modular_frame_decoder->code, &modular_frame_decoder->context_map));
+ } else {
+ JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr,
+ 0, &options,
+ /*undo_transforms=*/true));
+ }
+ if (!encoding->qraw.qtable) {
+ encoding->qraw.qtable = new std::vector<int>();
+ }
+ encoding->qraw.qtable->resize(required_size_x * required_size_y * 3);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < required_size_y; y++) {
+ int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+ for (size_t x = 0; x < required_size_x; x++) {
+ (*encoding->qraw.qtable)[c * required_size_x * required_size_y +
+ y * required_size_x + x] = row[x];
+ if (row[x] <= 0) {
+ return JXL_FAILURE("Invalid raw quantization table");
+ }
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dec_modular.h b/third_party/jpeg-xl/lib/jxl/dec_modular.h
new file mode 100644
index 0000000000..aae643cf1f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_modular.h
@@ -0,0 +1,140 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_MODULAR_H_
+#define LIB_JXL_DEC_MODULAR_H_
+
+#include <stddef.h>
+
+#include <string>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct ModularStreamId {
+ enum Kind {
+ kGlobalData,
+ kVarDCTDC,
+ kModularDC,
+ kACMetadata,
+ kQuantTable,
+ kModularAC
+ };
+ Kind kind;
+ size_t quant_table_id;
+ size_t group_id; // DC or AC group id.
+ size_t pass_id; // Only for kModularAC.
+ size_t ID(const FrameDimensions& frame_dim) const {
+ size_t id = 0;
+ switch (kind) {
+ case kGlobalData:
+ id = 0;
+ break;
+ case kVarDCTDC:
+ id = 1 + group_id;
+ break;
+ case kModularDC:
+ id = 1 + frame_dim.num_dc_groups + group_id;
+ break;
+ case kACMetadata:
+ id = 1 + 2 * frame_dim.num_dc_groups + group_id;
+ break;
+ case kQuantTable:
+ id = 1 + 3 * frame_dim.num_dc_groups + quant_table_id;
+ break;
+ case kModularAC:
+ id = 1 + 3 * frame_dim.num_dc_groups + DequantMatrices::kNum +
+ frame_dim.num_groups * pass_id + group_id;
+ break;
+ };
+ return id;
+ }
+ static ModularStreamId Global() {
+ return ModularStreamId{kGlobalData, 0, 0, 0};
+ }
+ static ModularStreamId VarDCTDC(size_t group_id) {
+ return ModularStreamId{kVarDCTDC, 0, group_id, 0};
+ }
+ static ModularStreamId ModularDC(size_t group_id) {
+ return ModularStreamId{kModularDC, 0, group_id, 0};
+ }
+ static ModularStreamId ACMetadata(size_t group_id) {
+ return ModularStreamId{kACMetadata, 0, group_id, 0};
+ }
+ static ModularStreamId QuantTable(size_t quant_table_id) {
+ JXL_ASSERT(quant_table_id < DequantMatrices::kNum);
+ return ModularStreamId{kQuantTable, quant_table_id, 0, 0};
+ }
+ static ModularStreamId ModularAC(size_t group_id, size_t pass_id) {
+ return ModularStreamId{kModularAC, 0, group_id, pass_id};
+ }
+ static size_t Num(const FrameDimensions& frame_dim, size_t passes) {
+ return ModularAC(0, passes).ID(frame_dim);
+ }
+ std::string DebugString() const;
+};
+
+class ModularFrameDecoder {
+ public:
+ void Init(const FrameDimensions& frame_dim) { this->frame_dim = frame_dim; }
+ Status DecodeGlobalInfo(BitReader* reader, const FrameHeader& frame_header,
+ bool allow_truncated_group);
+ Status DecodeGroup(const Rect& rect, BitReader* reader, int minShift,
+ int maxShift, const ModularStreamId& stream, bool zerofill,
+ PassesDecoderState* dec_state,
+ RenderPipelineInput* render_pipeline_input,
+ bool allow_truncated, bool* should_run_pipeline = nullptr);
+ // Decodes a VarDCT DC group (`group_id`) from the given `reader`.
+ Status DecodeVarDCTDC(size_t group_id, BitReader* reader,
+ PassesDecoderState* dec_state);
+ // Decodes a VarDCT AC Metadata group (`group_id`) from the given `reader`.
+ Status DecodeAcMetadata(size_t group_id, BitReader* reader,
+ PassesDecoderState* dec_state);
+ // Decodes a RAW quant table from `br` into the given `encoding`, of size
+ // `required_size_x x required_size_y`. If `modular_frame_decoder` is passed,
+ // its global tree is used, otherwise no global tree is used.
+ static Status DecodeQuantTable(size_t required_size_x, size_t required_size_y,
+ BitReader* br, QuantEncoding* encoding,
+ size_t idx,
+ ModularFrameDecoder* modular_frame_decoder);
+ // if inplace is true, this can only be called once
+ // if it is false, it can be called multiple times (e.g. for progressive
+ // steps)
+ Status FinalizeDecoding(PassesDecoderState* dec_state, jxl::ThreadPool* pool,
+ bool inplace);
+ bool have_dc() const { return have_something; }
+ void MaybeDropFullImage();
+ bool UsesFullImage() const { return use_full_image; }
+
+ private:
+ Status ModularImageToDecodedRect(Image& gi, PassesDecoderState* dec_state,
+ jxl::ThreadPool* pool,
+ RenderPipelineInput& render_pipeline_input,
+ Rect modular_rect);
+
+ Image full_image;
+ std::vector<Transform> global_transform;
+ FrameDimensions frame_dim;
+ bool do_color;
+ bool have_something;
+ bool use_full_image = true;
+ bool all_same_shift;
+ Tree tree;
+ ANSCode code;
+ std::vector<uint8_t> context_map;
+ GroupHeader global_header;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_MODULAR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_noise.cc b/third_party/jpeg-xl/lib/jxl/dec_noise.cc
new file mode 100644
index 0000000000..275a6d0b21
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_noise.cc
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Vec;
+
+using D = HWY_CAPPED(float, kBlockDim);
+using DI = hwy::HWY_NAMESPACE::Rebind<int, D>;
+using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
+
+// Converts one vector's worth of random bits to floats in [1, 2).
+// NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in
+// [0, 1) or in [1, 2).
+void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits,
+ float* JXL_RESTRICT floats) {
+ const HWY_FULL(float) df;
+ const HWY_FULL(uint32_t) du;
+
+ const auto bits = Load(du, random_bits);
+ // 1.0 + 23 random mantissa bits = [1, 2)
+ const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000)));
+ Store(rand12, df, floats);
+}
+
+void RandomImage(Xorshift128Plus* rng, const Rect& rect,
+ ImageF* JXL_RESTRICT noise) {
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+
+ // May exceed the vector size, hence we have two loops over x below.
+ constexpr size_t kFloatsPerBatch =
+ Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float);
+ HWY_ALIGN uint64_t batch[Xorshift128Plus::N] = {};
+
+ const HWY_FULL(float) df;
+ const size_t N = Lanes(df);
+
+ for (size_t y = 0; y < ysize; ++y) {
+ float* JXL_RESTRICT row = rect.Row(noise, y);
+
+ size_t x = 0;
+ // Only entire batches (avoids exceeding the image padding).
+ for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) {
+ rng->Fill(batch);
+ for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) {
+ BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + i, row + x + i);
+ }
+ }
+
+ // Any remaining pixels, rounded up to vectors (safe due to padding).
+ rng->Fill(batch);
+ size_t batch_pos = 0; // < kFloatsPerBatch
+ for (; x < xsize; x += N) {
+ BitsToFloat(reinterpret_cast<const uint32_t*>(batch) + batch_pos,
+ row + x);
+ batch_pos += N;
+ }
+ }
+}
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+ size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+ const std::pair<ImageF*, Rect>& plane1,
+ const std::pair<ImageF*, Rect>& plane2) {
+ HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0,
+ y0);
+ RandomImage(&rng, plane0.second, plane0.first);
+ RandomImage(&rng, plane1.second, plane1.first);
+ RandomImage(&rng, plane2.second, plane2.first);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(Random3Planes);
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+ size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+ const std::pair<ImageF*, Rect>& plane1,
+ const std::pair<ImageF*, Rect>& plane2) {
+ return HWY_DYNAMIC_DISPATCH(Random3Planes)(visible_frame_index,
+ nonvisible_frame_index, x0, y0,
+ plane0, plane1, plane2);
+}
+
+void DecodeFloatParam(float precision, float* val, BitReader* br) {
+ const int absval_quant = br->ReadFixedBits<10>();
+ *val = absval_quant / precision;
+}
+
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params) {
+ for (float& i : noise_params->lut) {
+ DecodeFloatParam(kNoisePrecision, &i, br);
+ }
+ return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dec_noise.h b/third_party/jpeg-xl/lib/jxl/dec_noise.h
new file mode 100644
index 0000000000..ac05866470
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_noise.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_NOISE_H_
+#define LIB_JXL_DEC_NOISE_H_
+
+// Noise synthesis. Currently disabled.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index,
+ size_t x0, size_t y0, const std::pair<ImageF*, Rect>& plane0,
+ const std::pair<ImageF*, Rect>& plane1,
+ const std::pair<ImageF*, Rect>& plane2);
+
+// Must only call if FrameHeader.flags.kNoise.
+Status DecodeNoise(BitReader* br, NoiseParams* noise_params);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_NOISE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.cc b/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.cc
new file mode 100644
index 0000000000..85e5de3c8d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.cc
@@ -0,0 +1,347 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/blending.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+Status PatchDictionary::Decode(BitReader* br, size_t xsize, size_t ysize,
+ bool* uses_extra_channels) {
+ positions_.clear();
+ std::vector<uint8_t> context_map;
+ ANSCode code;
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(br, kNumPatchDictionaryContexts, &code, &context_map));
+ ANSSymbolReader decoder(&code, br);
+
+ auto read_num = [&](size_t context) {
+ size_t r = decoder.ReadHybridUint(context, br, context_map);
+ return r;
+ };
+
+ size_t num_ref_patch = read_num(kNumRefPatchContext);
+ // Limit max memory usage of patches to about 66 bytes per pixel (assuming 8
+ // bytes per size_t)
+ const size_t num_pixels = xsize * ysize;
+ const size_t max_ref_patches = 1024 + num_pixels / 4;
+ const size_t max_patches = max_ref_patches * 4;
+ const size_t max_blending_infos = max_patches * 4;
+ if (num_ref_patch > max_ref_patches) {
+ return JXL_FAILURE("Too many patches in dictionary");
+ }
+ size_t num_ec = shared_->metadata->m.num_extra_channels;
+
+ size_t total_patches = 0;
+ size_t next_size = 1;
+
+ for (size_t id = 0; id < num_ref_patch; id++) {
+ PatchReferencePosition ref_pos;
+ ref_pos.ref = read_num(kReferenceFrameContext);
+ if (ref_pos.ref >= kMaxNumReferenceFrames ||
+ shared_->reference_frames[ref_pos.ref].frame.xsize() == 0) {
+ return JXL_FAILURE("Invalid reference frame ID");
+ }
+ if (!shared_->reference_frames[ref_pos.ref].ib_is_in_xyb) {
+ return JXL_FAILURE(
+ "Patches cannot use frames saved post color transforms");
+ }
+ const ImageBundle& ib = shared_->reference_frames[ref_pos.ref].frame;
+ ref_pos.x0 = read_num(kPatchReferencePositionContext);
+ ref_pos.y0 = read_num(kPatchReferencePositionContext);
+ ref_pos.xsize = read_num(kPatchSizeContext) + 1;
+ ref_pos.ysize = read_num(kPatchSizeContext) + 1;
+ if (ref_pos.x0 + ref_pos.xsize > ib.xsize()) {
+ return JXL_FAILURE("Invalid position specified in reference frame");
+ }
+ if (ref_pos.y0 + ref_pos.ysize > ib.ysize()) {
+ return JXL_FAILURE("Invalid position specified in reference frame");
+ }
+ size_t id_count = read_num(kPatchCountContext) + 1;
+ total_patches += id_count;
+ if (total_patches > max_patches) {
+ return JXL_FAILURE("Too many patches in dictionary");
+ }
+ if (next_size < total_patches) {
+ next_size *= 2;
+ next_size = std::min<size_t>(next_size, max_patches);
+ }
+ if (next_size * (num_ec + 1) > max_blending_infos) {
+ return JXL_FAILURE("Too many patches in dictionary");
+ }
+ positions_.reserve(next_size);
+ blendings_.reserve(next_size * (num_ec + 1));
+ for (size_t i = 0; i < id_count; i++) {
+ PatchPosition pos;
+ pos.ref_pos_idx = ref_positions_.size();
+ if (i == 0) {
+ pos.x = read_num(kPatchPositionContext);
+ pos.y = read_num(kPatchPositionContext);
+ } else {
+ pos.x =
+ positions_.back().x + UnpackSigned(read_num(kPatchOffsetContext));
+ pos.y =
+ positions_.back().y + UnpackSigned(read_num(kPatchOffsetContext));
+ }
+ if (pos.x + ref_pos.xsize > xsize) {
+ return JXL_FAILURE("Invalid patch x: at %" PRIuS " + %" PRIuS
+ " > %" PRIuS,
+ pos.x, ref_pos.xsize, xsize);
+ }
+ if (pos.y + ref_pos.ysize > ysize) {
+ return JXL_FAILURE("Invalid patch y: at %" PRIuS " + %" PRIuS
+ " > %" PRIuS,
+ pos.y, ref_pos.ysize, ysize);
+ }
+ for (size_t j = 0; j < num_ec + 1; j++) {
+ uint32_t blend_mode = read_num(kPatchBlendModeContext);
+ if (blend_mode >= uint32_t(PatchBlendMode::kNumBlendModes)) {
+ return JXL_FAILURE("Invalid patch blend mode: %u", blend_mode);
+ }
+ PatchBlending info;
+ info.mode = static_cast<PatchBlendMode>(blend_mode);
+ if (UsesAlpha(info.mode)) {
+ *uses_extra_channels = true;
+ }
+ if (info.mode != PatchBlendMode::kNone && j > 0) {
+ *uses_extra_channels = true;
+ }
+ if (UsesAlpha(info.mode) &&
+ shared_->metadata->m.extra_channel_info.size() > 1) {
+ info.alpha_channel = read_num(kPatchAlphaChannelContext);
+ if (info.alpha_channel >=
+ shared_->metadata->m.extra_channel_info.size()) {
+ return JXL_FAILURE(
+ "Invalid alpha channel for blending: %u out of %u\n",
+ info.alpha_channel,
+ (uint32_t)shared_->metadata->m.extra_channel_info.size());
+ }
+ } else {
+ info.alpha_channel = 0;
+ }
+ if (UsesClamp(info.mode)) {
+ info.clamp = read_num(kPatchClampContext);
+ } else {
+ info.clamp = false;
+ }
+ blendings_.push_back(info);
+ }
+ positions_.push_back(std::move(pos));
+ }
+ ref_positions_.emplace_back(std::move(ref_pos));
+ }
+ positions_.shrink_to_fit();
+
+ if (!decoder.CheckANSFinalState()) {
+ return JXL_FAILURE("ANS checksum failure.");
+ }
+
+ ComputePatchTree();
+ return true;
+}
+
+int PatchDictionary::GetReferences() const {
+ int result = 0;
+ for (size_t i = 0; i < ref_positions_.size(); ++i) {
+ result |= (1 << static_cast<int>(ref_positions_[i].ref));
+ }
+ return result;
+}
+
+namespace {
+struct PatchInterval {
+ size_t idx;
+ size_t y0, y1;
+};
+} // namespace
+
+void PatchDictionary::ComputePatchTree() {
+ patch_tree_.clear();
+ num_patches_.clear();
+ sorted_patches_y0_.clear();
+ sorted_patches_y1_.clear();
+ if (positions_.empty()) {
+ return;
+ }
+ // Create a y-interval for each patch.
+ std::vector<PatchInterval> intervals(positions_.size());
+ for (size_t i = 0; i < positions_.size(); ++i) {
+ const auto& pos = positions_[i];
+ intervals[i].idx = i;
+ intervals[i].y0 = pos.y;
+ intervals[i].y1 = pos.y + ref_positions_[pos.ref_pos_idx].ysize;
+ }
+ auto sort_by_y0 = [&intervals](size_t start, size_t end) {
+ std::sort(intervals.data() + start, intervals.data() + end,
+ [](const PatchInterval& i0, const PatchInterval& i1) {
+ return i0.y0 < i1.y0;
+ });
+ };
+ auto sort_by_y1 = [&intervals](size_t start, size_t end) {
+ std::sort(intervals.data() + start, intervals.data() + end,
+ [](const PatchInterval& i0, const PatchInterval& i1) {
+ return i0.y1 < i1.y1;
+ });
+ };
+ // Count the number of patches for each row.
+ sort_by_y1(0, intervals.size());
+ num_patches_.resize(intervals.back().y1);
+ for (auto iv : intervals) {
+ for (size_t y = iv.y0; y < iv.y1; ++y) num_patches_[y]++;
+ }
+ PatchTreeNode root;
+ root.start = 0;
+ root.num = intervals.size();
+ patch_tree_.push_back(root);
+ size_t next = 0;
+ while (next < patch_tree_.size()) {
+ auto& node = patch_tree_[next];
+ size_t start = node.start;
+ size_t end = node.start + node.num;
+ // Choose the y_center for this node to be the median of interval starts.
+ sort_by_y0(start, end);
+ size_t middle_idx = start + node.num / 2;
+ node.y_center = intervals[middle_idx].y0;
+ // Divide the intervals in [start, end) into three groups:
+ // * those completely to the right of y_center: [right_start, end)
+ // * those overlapping y_center: [left_end, right_start)
+ // * those completely to the left of y_center: [start, left_end)
+ size_t right_start = middle_idx;
+ while (right_start < end && intervals[right_start].y0 == node.y_center) {
+ ++right_start;
+ }
+ sort_by_y1(start, right_start);
+ size_t left_end = right_start;
+ while (left_end > start && intervals[left_end - 1].y1 > node.y_center) {
+ --left_end;
+ }
+ // Fill in sorted_patches_y0_ and sorted_patches_y1_ for the current node.
+ node.num = right_start - left_end;
+ node.start = sorted_patches_y0_.size();
+ for (ssize_t i = static_cast<ssize_t>(right_start) - 1;
+ i >= static_cast<ssize_t>(left_end); --i) {
+ sorted_patches_y1_.push_back({intervals[i].y1, intervals[i].idx});
+ }
+ sort_by_y0(left_end, right_start);
+ for (size_t i = left_end; i < right_start; ++i) {
+ sorted_patches_y0_.push_back({intervals[i].y0, intervals[i].idx});
+ }
+ // Create the left and right nodes (if not empty).
+ node.left_child = node.right_child = -1;
+ if (left_end > start) {
+ PatchTreeNode left;
+ left.start = start;
+ left.num = left_end - left.start;
+ patch_tree_[next].left_child = patch_tree_.size();
+ patch_tree_.push_back(left);
+ }
+ if (right_start < end) {
+ PatchTreeNode right;
+ right.start = right_start;
+ right.num = end - right.start;
+ patch_tree_[next].right_child = patch_tree_.size();
+ patch_tree_.push_back(right);
+ }
+ ++next;
+ }
+}
+
+std::vector<size_t> PatchDictionary::GetPatchesForRow(size_t y) const {
+ std::vector<size_t> result;
+ if (y < num_patches_.size() && num_patches_[y] > 0) {
+ result.reserve(num_patches_[y]);
+ for (ssize_t tree_idx = 0; tree_idx != -1;) {
+ JXL_DASSERT(tree_idx < (ssize_t)patch_tree_.size());
+ const auto& node = patch_tree_[tree_idx];
+ if (y <= node.y_center) {
+ for (size_t i = 0; i < node.num; ++i) {
+ const auto& p = sorted_patches_y0_[node.start + i];
+ if (y < p.first) break;
+ result.push_back(p.second);
+ }
+ tree_idx = y < node.y_center ? node.left_child : -1;
+ } else {
+ for (size_t i = 0; i < node.num; ++i) {
+ const auto& p = sorted_patches_y1_[node.start + i];
+ if (y >= p.first) break;
+ result.push_back(p.second);
+ }
+ tree_idx = node.right_child;
+ }
+ }
+ // Ensure that he relative order of patches that affect the same pixels is
+ // preserved. This is important for patches that have a blend mode
+ // different from kAdd.
+ std::sort(result.begin(), result.end());
+ }
+ return result;
+}
+
+// Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed
+// to be located at position (x0, y) in the frame.
+void PatchDictionary::AddOneRow(float* const* inout, size_t y, size_t x0,
+ size_t xsize) const {
+ size_t num_ec = shared_->metadata->m.num_extra_channels;
+ std::vector<const float*> fg_ptrs(3 + num_ec);
+ for (size_t pos_idx : GetPatchesForRow(y)) {
+ const size_t blending_idx = pos_idx * (num_ec + 1);
+ const PatchPosition& pos = positions_[pos_idx];
+ const PatchReferencePosition& ref_pos = ref_positions_[pos.ref_pos_idx];
+ size_t by = pos.y;
+ size_t bx = pos.x;
+ size_t patch_xsize = ref_pos.xsize;
+ JXL_DASSERT(y >= by);
+ JXL_DASSERT(y < by + ref_pos.ysize);
+ size_t iy = y - by;
+ size_t ref = ref_pos.ref;
+ if (bx >= x0 + xsize) continue;
+ if (bx + patch_xsize < x0) continue;
+ size_t patch_x0 = std::max(bx, x0);
+ size_t patch_x1 = std::min(bx + patch_xsize, x0 + xsize);
+ for (size_t c = 0; c < 3; c++) {
+ fg_ptrs[c] = shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ c, ref_pos.y0 + iy) +
+ ref_pos.x0 + x0 - bx;
+ }
+ for (size_t i = 0; i < num_ec; i++) {
+ fg_ptrs[3 + i] =
+ shared_->reference_frames[ref].frame.extra_channels()[i].ConstRow(
+ ref_pos.y0 + iy) +
+ ref_pos.x0 + x0 - bx;
+ }
+ PerformBlending(inout, fg_ptrs.data(), inout, patch_x0 - x0,
+ patch_x1 - patch_x0, blendings_[blending_idx],
+ blendings_.data() + blending_idx + 1,
+ shared_->metadata->m.extra_channel_info);
+ }
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.h b/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.h
new file mode 100644
index 0000000000..a950e83e85
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_patch_dictionary.h
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_PATCH_DICTIONARY_H_
+#define LIB_JXL_DEC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+enum class PatchBlendMode : uint8_t {
+ // The new values are the old ones. Useful to skip some channels.
+ kNone = 0,
+ // The new values (in the crop) replace the old ones: sample = new
+ kReplace = 1,
+ // The new values (in the crop) get added to the old ones: sample = old + new
+ kAdd = 2,
+ // The new values (in the crop) get multiplied by the old ones:
+ // sample = old * new
+ // This blend mode is only supported if BlendColorSpace is kEncoded. The
+ // range of the new value matters for multiplication purposes, and its
+ // nominal range of 0..1 is computed the same way as this is done for the
+ // alpha values in kBlend and kAlphaWeightedAdd.
+ kMul = 3,
+ // The new values (in the crop) replace the old ones if alpha>0:
+ // For first alpha channel:
+ // alpha = old + new * (1 - old)
+ // For other channels if !alpha_associated:
+ // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+ // For other channels if alpha_associated:
+ // sample = (1 - new_alpha) * old + new
+ // The alpha formula applies to the alpha used for the division in the other
+ // channels formula, and applies to the alpha channel itself if its
+ // blend_channel value matches itself.
+ // If using kBlendAbove, new is the patch and old is the original image; if
+ // using kBlendBelow, the meaning is inverted.
+ kBlendAbove = 4,
+ kBlendBelow = 5,
+ // The new values (in the crop) are added to the old ones if alpha>0:
+ // For first alpha channel: sample = sample = old + new * (1 - old)
+ // For other channels: sample = old + alpha * new
+ kAlphaWeightedAddAbove = 6,
+ kAlphaWeightedAddBelow = 7,
+ kNumBlendModes,
+};
+
+inline bool UsesAlpha(PatchBlendMode mode) {
+ return mode == PatchBlendMode::kBlendAbove ||
+ mode == PatchBlendMode::kBlendBelow ||
+ mode == PatchBlendMode::kAlphaWeightedAddAbove ||
+ mode == PatchBlendMode::kAlphaWeightedAddBelow;
+}
+inline bool UsesClamp(PatchBlendMode mode) {
+ return UsesAlpha(mode) || mode == PatchBlendMode::kMul;
+}
+
+struct PatchBlending {
+ PatchBlendMode mode;
+ uint32_t alpha_channel;
+ bool clamp;
+};
+
+// Position and size of the patch in the reference frame.
+struct PatchReferencePosition {
+ size_t ref, x0, y0, xsize, ysize;
+};
+
+struct PatchPosition {
+ // Position of top-left corner of the patch in the image.
+ size_t x, y;
+ size_t ref_pos_idx;
+};
+
+struct PassesSharedState;
+
+// Encoder-side helper class to encode the PatchesDictionary.
+class PatchDictionaryEncoder;
+
+class PatchDictionary {
+ public:
+ PatchDictionary() = default;
+
+ void SetPassesSharedState(const PassesSharedState* shared) {
+ shared_ = shared;
+ }
+
+ bool HasAny() const { return !positions_.empty(); }
+
+ Status Decode(BitReader* br, size_t xsize, size_t ysize,
+ bool* uses_extra_channels);
+
+ void Clear() {
+ positions_.clear();
+ ComputePatchTree();
+ }
+
+ // Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed
+ // to be located at position (x0, y) in the frame.
+ void AddOneRow(float* const* inout, size_t y, size_t x0, size_t xsize) const;
+
+ // Returns dependencies of this patch dictionary on reference frame ids as a
+ // bit mask: bits 0-3 indicate reference frame 0-3.
+ int GetReferences() const;
+
+ std::vector<size_t> GetPatchesForRow(size_t y) const;
+
+ private:
+ friend class PatchDictionaryEncoder;
+
+ const PassesSharedState* shared_;
+ std::vector<PatchPosition> positions_;
+ std::vector<PatchReferencePosition> ref_positions_;
+ std::vector<PatchBlending> blendings_;
+
+ // Interval tree on the y coordinates of the patches.
+ struct PatchTreeNode {
+ ssize_t left_child;
+ ssize_t right_child;
+ size_t y_center;
+ // Range of patches in sorted_patches_y0_ and sorted_patches_y1_ that
+ // contain the row y_center.
+ size_t start;
+ size_t num;
+ };
+ std::vector<PatchTreeNode> patch_tree_;
+ // Number of patches for each row.
+ std::vector<size_t> num_patches_;
+ std::vector<std::pair<size_t, size_t>> sorted_patches_y0_;
+ std::vector<std::pair<size_t, size_t>> sorted_patches_y1_;
+
+ void ComputePatchTree();
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_PATCH_DICTIONARY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_tone_mapping-inl.h b/third_party/jpeg-xl/lib/jxl/dec_tone_mapping-inl.h
new file mode 100644
index 0000000000..26bf643152
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_tone_mapping-inl.h
@@ -0,0 +1,234 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_DEC_TONE_MAPPING_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#undef LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#else
+#define LIB_JXL_DEC_TONE_MAPPING_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+template <typename D>
+class Rec2408ToneMapper {
+ private:
+ using V = hwy::HWY_NAMESPACE::Vec<D>;
+
+ public:
+ explicit Rec2408ToneMapper(std::pair<float, float> source_range,
+ std::pair<float, float> target_range,
+ const float primaries_luminances[3])
+ : source_range_(source_range),
+ target_range_(target_range),
+ red_Y_(primaries_luminances[0]),
+ green_Y_(primaries_luminances[1]),
+ blue_Y_(primaries_luminances[2]) {}
+
+ void ToneMap(V* red, V* green, V* blue) const {
+ const V luminance = Mul(Set(df_, source_range_.second),
+ (MulAdd(Set(df_, red_Y_), *red,
+ MulAdd(Set(df_, green_Y_), *green,
+ Mul(Set(df_, blue_Y_), *blue)))));
+ const V pq_mastering_min = Set(df_, pq_mastering_min_);
+ const V inv_pq_mastering_range = Set(df_, inv_pq_mastering_range_);
+ const V normalized_pq = Min(
+ Set(df_, 1.f),
+ Mul(Sub(InvEOTF(luminance), pq_mastering_min), inv_pq_mastering_range));
+ const V ks = Set(df_, ks_);
+ const V e2 =
+ IfThenElse(Lt(normalized_pq, ks), normalized_pq, P(normalized_pq));
+ const V one_minus_e2 = Sub(Set(df_, 1), e2);
+ const V one_minus_e2_2 = Mul(one_minus_e2, one_minus_e2);
+ const V one_minus_e2_4 = Mul(one_minus_e2_2, one_minus_e2_2);
+ const V b = Set(df_, min_lum_);
+ const V e3 = MulAdd(b, one_minus_e2_4, e2);
+ const V pq_mastering_range = Set(df_, pq_mastering_range_);
+ const V e4 = MulAdd(e3, pq_mastering_range, pq_mastering_min);
+ const V new_luminance =
+ Min(Set(df_, target_range_.second),
+ ZeroIfNegative(
+ Mul(Set(df_, 10000), TF_PQ().DisplayFromEncoded(df_, e4))));
+
+ const V ratio = Div(new_luminance, luminance);
+ const V inv_target_peak = Set(df_, inv_target_peak_);
+ const V normalizer = Set(df_, normalizer_);
+ const V multiplier = Mul(ratio, normalizer);
+ for (V* const val : {red, green, blue}) {
+ *val = IfThenElse(Le(luminance, Set(df_, 1e-6f)),
+ Mul(new_luminance, inv_target_peak),
+ Mul(*val, multiplier));
+ }
+ }
+
+ private:
+ V InvEOTF(const V luminance) const {
+ return TF_PQ().EncodedFromDisplay(df_,
+ Mul(luminance, Set(df_, 1. / 10000)));
+ }
+ float InvEOTF(const float luminance) const {
+ return TF_PQ().EncodedFromDisplay(luminance / 10000.0f);
+ }
+ V T(const V a) const {
+ const V ks = Set(df_, ks_);
+ const V inv_one_minus_ks = Set(df_, inv_one_minus_ks_);
+ return Mul(Sub(a, ks), inv_one_minus_ks);
+ }
+ V P(const V b) const {
+ const V t_b = T(b);
+ const V t_b_2 = Mul(t_b, t_b);
+ const V t_b_3 = Mul(t_b_2, t_b);
+ const V ks = Set(df_, ks_);
+ const V max_lum = Set(df_, max_lum_);
+ return MulAdd(
+ MulAdd(Set(df_, 2), t_b_3, MulAdd(Set(df_, -3), t_b_2, Set(df_, 1))),
+ ks,
+ MulAdd(Add(t_b_3, MulAdd(Set(df_, -2), t_b_2, t_b)),
+ Sub(Set(df_, 1), ks),
+ Mul(MulAdd(Set(df_, -2), t_b_3, Mul(Set(df_, 3), t_b_2)),
+ max_lum)));
+ }
+
+ D df_;
+ const std::pair<float, float> source_range_;
+ const std::pair<float, float> target_range_;
+ const float red_Y_;
+ const float green_Y_;
+ const float blue_Y_;
+
+ const float pq_mastering_min_ = InvEOTF(source_range_.first);
+ const float pq_mastering_max_ = InvEOTF(source_range_.second);
+ const float pq_mastering_range_ = pq_mastering_max_ - pq_mastering_min_;
+ const float inv_pq_mastering_range_ = 1.0f / pq_mastering_range_;
+ // TODO(eustas): divide instead of inverse-multiply?
+ const float min_lum_ = (InvEOTF(target_range_.first) - pq_mastering_min_) *
+ inv_pq_mastering_range_;
+ // TODO(eustas): divide instead of inverse-multiply?
+ const float max_lum_ = (InvEOTF(target_range_.second) - pq_mastering_min_) *
+ inv_pq_mastering_range_;
+ const float ks_ = 1.5f * max_lum_ - 0.5f;
+ const float b_ = min_lum_;
+
+ const float inv_one_minus_ks_ = 1.0f / std::max(1e-6f, 1.0f - ks_);
+
+ const float normalizer_ = source_range_.second / target_range_.second;
+ const float inv_target_peak_ = 1.f / target_range_.second;
+};
+
+class HlgOOTF {
+ public:
+ explicit HlgOOTF(float source_luminance, float target_luminance,
+ const float primaries_luminances[3])
+ : HlgOOTF(/*gamma=*/std::pow(
+ 1.111f, std::log2(target_luminance / source_luminance)),
+ primaries_luminances) {}
+
+ static HlgOOTF FromSceneLight(float display_luminance,
+ const float primaries_luminances[3]) {
+ return HlgOOTF(/*gamma=*/1.2f *
+ std::pow(1.111f, std::log2(display_luminance / 1000.f)),
+ primaries_luminances);
+ }
+
+ static HlgOOTF ToSceneLight(float display_luminance,
+ const float primaries_luminances[3]) {
+ return HlgOOTF(
+ /*gamma=*/(1 / 1.2f) *
+ std::pow(1.111f, -std::log2(display_luminance / 1000.f)),
+ primaries_luminances);
+ }
+
+ template <typename V>
+ void Apply(V* red, V* green, V* blue) const {
+ hwy::HWY_NAMESPACE::DFromV<V> df;
+ if (!apply_ootf_) return;
+ const V luminance =
+ MulAdd(Set(df, red_Y_), *red,
+ MulAdd(Set(df, green_Y_), *green, Mul(Set(df, blue_Y_), *blue)));
+ const V ratio =
+ Min(FastPowf(df, luminance, Set(df, exponent_)), Set(df, 1e9));
+ *red = Mul(*red, ratio);
+ *green = Mul(*green, ratio);
+ *blue = Mul(*blue, ratio);
+ }
+
+ bool WarrantsGamutMapping() const { return apply_ootf_ && exponent_ < 0; }
+
+ private:
+ explicit HlgOOTF(float gamma, const float luminances[3])
+ : exponent_(gamma - 1),
+ red_Y_(luminances[0]),
+ green_Y_(luminances[1]),
+ blue_Y_(luminances[2]) {}
+ const float exponent_;
+ const bool apply_ootf_ = exponent_ < -0.01f || 0.01f < exponent_;
+ const float red_Y_;
+ const float green_Y_;
+ const float blue_Y_;
+};
+
+template <typename V>
+void GamutMap(V* red, V* green, V* blue, const float primaries_luminances[3],
+ float preserve_saturation = 0.1f) {
+ hwy::HWY_NAMESPACE::DFromV<V> df;
+ const V luminance =
+ MulAdd(Set(df, primaries_luminances[0]), *red,
+ MulAdd(Set(df, primaries_luminances[1]), *green,
+ Mul(Set(df, primaries_luminances[2]), *blue)));
+
+ // Desaturate out-of-gamut pixels. This is done by mixing each pixel
+ // with just enough gray of the target luminance to make all
+ // components non-negative.
+ // - For saturation preservation, if a component is still larger than
+ // 1 then the pixel is normalized to have a maximum component of 1.
+ // That will reduce its luminance.
+ // - For luminance preservation, getting all components below 1 is
+ // done by mixing in yet more gray. That will desaturate it further.
+ V gray_mix_saturation = Zero(df);
+ V gray_mix_luminance = Zero(df);
+ for (const V* ch : {red, green, blue}) {
+ const V& val = *ch;
+ const V inv_val_minus_gray = Div(Set(df, 1), (Sub(val, luminance)));
+ gray_mix_saturation =
+ IfThenElse(Ge(val, luminance), gray_mix_saturation,
+ Max(gray_mix_saturation, Mul(val, inv_val_minus_gray)));
+ gray_mix_luminance =
+ Max(gray_mix_luminance,
+ IfThenElse(Le(val, luminance), gray_mix_saturation,
+ Mul(Sub(val, Set(df, 1)), inv_val_minus_gray)));
+ }
+ const V gray_mix = Clamp(
+ MulAdd(Set(df, preserve_saturation),
+ Sub(gray_mix_saturation, gray_mix_luminance), gray_mix_luminance),
+ Zero(df), Set(df, 1));
+ for (V* const val : {red, green, blue}) {
+ *val = MulAdd(gray_mix, Sub(luminance, *val), *val);
+ }
+ const V normalizer =
+ Div(Set(df, 1), Max(Set(df, 1), Max(*red, Max(*green, *blue))));
+ for (V* const val : {red, green, blue}) {
+ *val = Mul(*val, normalizer);
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_DEC_TONE_MAPPING_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_transforms-inl.h b/third_party/jpeg-xl/lib/jxl/dec_transforms-inl.h
new file mode 100644
index 0000000000..075619b3b9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_transforms-inl.h
@@ -0,0 +1,853 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_DEC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::MulAdd;
+
+// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
+// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
+// input block.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+ size_t ROWS, size_t COLS>
+JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
+ float* output, const size_t output_stride) {
+ static_assert(LF_ROWS == ROWS,
+ "ReinterpretingDCT should only be called with LF == N");
+ static_assert(LF_COLS == COLS,
+ "ReinterpretingDCT should only be called with LF == N");
+ HWY_ALIGN float block[ROWS * COLS];
+
+ // ROWS, COLS <= 8, so we can put scratch space on the stack.
+ HWY_ALIGN float scratch_space[ROWS * COLS];
+ ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
+ scratch_space);
+ if (ROWS < COLS) {
+ for (size_t y = 0; y < LF_ROWS; y++) {
+ for (size_t x = 0; x < LF_COLS; x++) {
+ output[y * output_stride + x] =
+ block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
+ DCTTotalResampleScale<COLS, DCT_COLS>(x);
+ }
+ }
+ } else {
+ for (size_t y = 0; y < LF_COLS; y++) {
+ for (size_t x = 0; x < LF_ROWS; x++) {
+ output[y * output_stride + x] =
+ block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
+ DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
+ }
+ }
+ }
+}
+
+template <size_t S>
+void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
+ static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+ static_assert(S % 2 == 0, "S should be even");
+ float temp[kDCTBlockSize];
+ constexpr size_t num_2x2 = S / 2;
+ for (size_t y = 0; y < num_2x2; y++) {
+ for (size_t x = 0; x < num_2x2; x++) {
+ float c00 = block[y * kBlockDim + x];
+ float c01 = block[y * kBlockDim + num_2x2 + x];
+ float c10 = block[(y + num_2x2) * kBlockDim + x];
+ float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
+ float r00 = c00 + c01 + c10 + c11;
+ float r01 = c00 + c01 - c10 - c11;
+ float r10 = c00 - c01 + c10 - c11;
+ float r11 = c00 - c01 - c10 + c11;
+ temp[y * 2 * kBlockDim + x * 2] = r00;
+ temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
+ temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
+ temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
+ }
+ }
+ for (size_t y = 0; y < S; y++) {
+ for (size_t x = 0; x < S; x++) {
+ out[y * stride_out + x] = temp[y * kBlockDim + x];
+ }
+ }
+}
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+ HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
+ {
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ 0.25,
+ },
+ {
+ 0.876902929799142f,
+ 0.2206518106944235f,
+ -0.10140050393753763f,
+ -0.1014005039375375f,
+ 0.2206518106944236f,
+ -0.10140050393753777f,
+ -0.10140050393753772f,
+ -0.10140050393753763f,
+ -0.10140050393753758f,
+ -0.10140050393753769f,
+ -0.1014005039375375f,
+ -0.10140050393753768f,
+ -0.10140050393753768f,
+ -0.10140050393753759f,
+ -0.10140050393753763f,
+ -0.10140050393753741f,
+ },
+ {
+ 0.0,
+ 0.0,
+ 0.40670075830260755f,
+ 0.44444816619734445f,
+ 0.0,
+ 0.0,
+ 0.19574399372042936f,
+ 0.2929100136981264f,
+ -0.40670075830260716f,
+ -0.19574399372042872f,
+ 0.0,
+ 0.11379074460448091f,
+ -0.44444816619734384f,
+ -0.29291001369812636f,
+ -0.1137907446044814f,
+ 0.0,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.21255748058288748f,
+ 0.3085497062849767f,
+ 0.0,
+ 0.4706702258572536f,
+ -0.1621205195722993f,
+ 0.0,
+ -0.21255748058287047f,
+ -0.16212051957228327f,
+ -0.47067022585725277f,
+ -0.1464291867126764f,
+ 0.3085497062849487f,
+ 0.0,
+ -0.14642918671266536f,
+ 0.4251149611657548f,
+ },
+ {
+ 0.0,
+ -0.7071067811865474f,
+ 0.0,
+ 0.0,
+ 0.7071067811865476f,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ },
+ {
+ -0.4105377591765233f,
+ 0.6235485373547691f,
+ -0.06435071657946274f,
+ -0.06435071657946266f,
+ 0.6235485373547694f,
+ -0.06435071657946284f,
+ -0.0643507165794628f,
+ -0.06435071657946274f,
+ -0.06435071657946272f,
+ -0.06435071657946279f,
+ -0.06435071657946266f,
+ -0.06435071657946277f,
+ -0.06435071657946277f,
+ -0.06435071657946273f,
+ -0.06435071657946274f,
+ -0.0643507165794626f,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.4517556589999482f,
+ 0.15854503551840063f,
+ 0.0,
+ -0.04038515160822202f,
+ 0.0074182263792423875f,
+ 0.39351034269210167f,
+ -0.45175565899994635f,
+ 0.007418226379244351f,
+ 0.1107416575309343f,
+ 0.08298163094882051f,
+ 0.15854503551839705f,
+ 0.3935103426921022f,
+ 0.0829816309488214f,
+ -0.45175565899994796f,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.304684750724869f,
+ 0.5112616136591823f,
+ 0.0,
+ 0.0,
+ -0.290480129728998f,
+ -0.06578701549142804f,
+ 0.304684750724884f,
+ 0.2904801297290076f,
+ 0.0,
+ -0.23889773523344604f,
+ -0.5112616136592012f,
+ 0.06578701549142545f,
+ 0.23889773523345467f,
+ 0.0,
+ },
+ {
+ 0.0,
+ 0.0,
+ 0.3017929516615495f,
+ 0.25792362796341184f,
+ 0.0,
+ 0.16272340142866204f,
+ 0.09520022653475037f,
+ 0.0,
+ 0.3017929516615503f,
+ 0.09520022653475055f,
+ -0.16272340142866173f,
+ -0.35312385449816297f,
+ 0.25792362796341295f,
+ 0.0,
+ -0.3531238544981624f,
+ -0.6035859033230976f,
+ },
+ {
+ 0.0,
+ 0.0,
+ 0.40824829046386274f,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ -0.4082482904638628f,
+ -0.4082482904638635f,
+ 0.0,
+ 0.0,
+ -0.40824829046386296f,
+ 0.0,
+ 0.4082482904638634f,
+ 0.408248290463863f,
+ 0.0,
+ },
+ {
+ 0.0,
+ 0.0,
+ 0.1747866975480809f,
+ 0.0812611176717539f,
+ 0.0,
+ 0.0,
+ -0.3675398009862027f,
+ -0.307882213957909f,
+ -0.17478669754808135f,
+ 0.3675398009862011f,
+ 0.0,
+ 0.4826689115059883f,
+ -0.08126111767175039f,
+ 0.30788221395790305f,
+ -0.48266891150598584f,
+ 0.0,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.21105601049335784f,
+ 0.18567180916109802f,
+ 0.0,
+ 0.0,
+ 0.49215859013738733f,
+ -0.38525013709251915f,
+ 0.21105601049335806f,
+ -0.49215859013738905f,
+ 0.0,
+ 0.17419412659916217f,
+ -0.18567180916109904f,
+ 0.3852501370925211f,
+ -0.1741941265991621f,
+ 0.0,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.14266084808807264f,
+ -0.3416446842253372f,
+ 0.0,
+ 0.7367497537172237f,
+ 0.24627107722075148f,
+ -0.08574019035519306f,
+ -0.14266084808807344f,
+ 0.24627107722075137f,
+ 0.14883399227113567f,
+ -0.04768680350229251f,
+ -0.3416446842253373f,
+ -0.08574019035519267f,
+ -0.047686803502292804f,
+ -0.14266084808807242f,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.13813540350758585f,
+ 0.3302282550303788f,
+ 0.0,
+ 0.08755115000587084f,
+ -0.07946706605909573f,
+ -0.4613374887461511f,
+ -0.13813540350758294f,
+ -0.07946706605910261f,
+ 0.49724647109535086f,
+ 0.12538059448563663f,
+ 0.3302282550303805f,
+ -0.4613374887461554f,
+ 0.12538059448564315f,
+ -0.13813540350758452f,
+ },
+ {
+ 0.0,
+ 0.0,
+ -0.17437602599651067f,
+ 0.0702790691196284f,
+ 0.0,
+ -0.2921026642334881f,
+ 0.3623817333531167f,
+ 0.0,
+ -0.1743760259965108f,
+ 0.36238173335311646f,
+ 0.29210266423348785f,
+ -0.4326608024727445f,
+ 0.07027906911962818f,
+ 0.0,
+ -0.4326608024727457f,
+ 0.34875205199302267f,
+ },
+ {
+ 0.0,
+ 0.0,
+ 0.11354987314994337f,
+ -0.07417504595810355f,
+ 0.0,
+ 0.19402893032594343f,
+ -0.435190496523228f,
+ 0.21918684838857466f,
+ 0.11354987314994257f,
+ -0.4351904965232251f,
+ 0.5550443808910661f,
+ -0.25468277124066463f,
+ -0.07417504595810233f,
+ 0.2191868483885728f,
+ -0.25468277124066413f,
+ 0.1135498731499429f,
+ },
+ };
+
+ const HWY_CAPPED(float, 16) d;
+ for (size_t i = 0; i < 16; i += Lanes(d)) {
+ auto pixel = Zero(d);
+ for (size_t j = 0; j < 16; j++) {
+ auto cf = Set(d, coeffs[j]);
+ auto basis = Load(d, k4x4AFVBasis[j] + i);
+ pixel = MulAdd(cf, basis, pixel);
+ }
+ Store(pixel, d, pixels + i);
+ }
+}
+
+template <size_t afv_kind>
+void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT pixels, size_t pixels_stride) {
+ HWY_ALIGN float scratch_space[4 * 8];
+ size_t afv_x = afv_kind & 1;
+ size_t afv_y = afv_kind / 2;
+ float dcs[3] = {};
+ float block00 = coefficients[0];
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ dcs[0] = (block00 + block10 + block01) * 4.0f;
+ dcs[1] = (block00 + block10 - block01);
+ dcs[2] = block00 - block10;
+ // IAFV: (even, even) positions.
+ HWY_ALIGN float coeff[4 * 4];
+ coeff[0] = dcs[0];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
+ }
+ }
+ HWY_ALIGN float block[4 * 8];
+ AFVIDCT4x4(coeff, block);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
+ block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
+ }
+ }
+ // IDCT4x4 in (odd, even) positions.
+ block[0] = dcs[1];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
+ }
+ }
+ ComputeScaledIDCT<4, 4>()(
+ block,
+ DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+ pixels_stride),
+ scratch_space);
+ // IDCT4x8.
+ block[0] = dcs[2];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
+ }
+ }
+ ComputeScaledIDCT<4, 8>()(
+ block,
+ DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+ scratch_space);
+}
+
+HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT pixels,
+ size_t pixels_stride,
+ float* scratch_space) {
+ using Type = AcStrategy::Type;
+ switch (strategy) {
+ case Type::IDENTITY: {
+ PROFILER_ZONE("IDCT Identity");
+ float dcs[4] = {};
+ float block00 = coefficients[0];
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ float block11 = coefficients[9];
+ dcs[0] = block00 + block01 + block10 + block11;
+ dcs[1] = block00 + block01 - block10 - block11;
+ dcs[2] = block00 - block01 + block10 - block11;
+ dcs[3] = block00 - block01 - block10 + block11;
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ float block_dc = dcs[y * 2 + x];
+ float residual_sum = 0;
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
+ }
+ }
+ pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
+ block_dc - residual_sum * (1.0f / 16);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 1 && iy == 1) continue;
+ pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
+ coefficients[(y + iy * 2) * 8 + x + ix * 2] +
+ pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+ }
+ }
+ pixels[y * 4 * pixels_stride + x * 4] =
+ coefficients[(y + 2) * 8 + x + 2] +
+ pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
+ }
+ }
+ break;
+ }
+ case Type::DCT8X4: {
+ PROFILER_ZONE("IDCT 8x4");
+ float dcs[2] = {};
+ float block0 = coefficients[0];
+ float block1 = coefficients[8];
+ dcs[0] = block0 + block1;
+ dcs[1] = block0 - block1;
+ for (size_t x = 0; x < 2; x++) {
+ HWY_ALIGN float block[4 * 8];
+ block[0] = dcs[x];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
+ }
+ }
+ ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
+ scratch_space);
+ }
+ break;
+ }
+ case Type::DCT4X8: {
+ PROFILER_ZONE("IDCT 4x8");
+ float dcs[2] = {};
+ float block0 = coefficients[0];
+ float block1 = coefficients[8];
+ dcs[0] = block0 + block1;
+ dcs[1] = block0 - block1;
+ for (size_t y = 0; y < 2; y++) {
+ HWY_ALIGN float block[4 * 8];
+ block[0] = dcs[y];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
+ }
+ }
+ ComputeScaledIDCT<4, 8>()(
+ block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
+ scratch_space);
+ }
+ break;
+ }
+ case Type::DCT4X4: {
+ PROFILER_ZONE("IDCT 4");
+ float dcs[4] = {};
+ float block00 = coefficients[0];
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ float block11 = coefficients[9];
+ dcs[0] = block00 + block01 + block10 + block11;
+ dcs[1] = block00 + block01 - block10 - block11;
+ dcs[2] = block00 - block01 + block10 - block11;
+ dcs[3] = block00 - block01 - block10 + block11;
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ HWY_ALIGN float block[4 * 4];
+ block[0] = dcs[y * 2 + x];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 0 && iy == 0) continue;
+ block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
+ }
+ }
+ ComputeScaledIDCT<4, 4>()(
+ block,
+ DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+ scratch_space);
+ }
+ }
+ break;
+ }
+ case Type::DCT2X2: {
+ PROFILER_ZONE("IDCT 2");
+ HWY_ALIGN float coeffs[kDCTBlockSize];
+ memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
+ IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
+ IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
+ IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
+ for (size_t y = 0; y < kBlockDim; y++) {
+ for (size_t x = 0; x < kBlockDim; x++) {
+ pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
+ }
+ }
+ break;
+ }
+ case Type::DCT16X16: {
+ PROFILER_ZONE("IDCT 16");
+ ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT16X8: {
+ PROFILER_ZONE("IDCT 16x8");
+ ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT8X16: {
+ PROFILER_ZONE("IDCT 8x16");
+ ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X8: {
+ PROFILER_ZONE("IDCT 32x8");
+ ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT8X32: {
+ PROFILER_ZONE("IDCT 8x32");
+ ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X16: {
+ PROFILER_ZONE("IDCT 32x16");
+ ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT16X32: {
+ PROFILER_ZONE("IDCT 16x32");
+ ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X32: {
+ PROFILER_ZONE("IDCT 32");
+ ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT: {
+ PROFILER_ZONE("IDCT 8");
+ ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::AFV0: {
+ PROFILER_ZONE("IAFV0");
+ AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
+ break;
+ }
+ case Type::AFV1: {
+ PROFILER_ZONE("IAFV1");
+ AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
+ break;
+ }
+ case Type::AFV2: {
+ PROFILER_ZONE("IAFV2");
+ AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
+ break;
+ }
+ case Type::AFV3: {
+ PROFILER_ZONE("IAFV3");
+ AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
+ break;
+ }
+ case Type::DCT64X32: {
+ PROFILER_ZONE("IDCT 64x32");
+ ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X64: {
+ PROFILER_ZONE("IDCT 32x64");
+ ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT64X64: {
+ PROFILER_ZONE("IDCT 64");
+ ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X64: {
+ PROFILER_ZONE("IDCT 128x64");
+ ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT64X128: {
+ PROFILER_ZONE("IDCT 64x128");
+ ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X128: {
+ PROFILER_ZONE("IDCT 128");
+ ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT256X128: {
+ PROFILER_ZONE("IDCT 256x128");
+ ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X256: {
+ PROFILER_ZONE("IDCT 128x256");
+ ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::DCT256X256: {
+ PROFILER_ZONE("IDCT 256");
+ ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
+ scratch_space);
+ break;
+ }
+ case Type::kNumValidStrategies:
+ JXL_ABORT("Invalid strategy");
+ }
+}
+
+HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy,
+ const float* dc, size_t dc_stride,
+ float* llf) {
+ using Type = AcStrategy::Type;
+ switch (strategy) {
+ case Type::DCT16X8: {
+ ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+ dc, dc_stride, llf, 2 * kBlockDim);
+ break;
+ }
+ case Type::DCT8X16: {
+ ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+ dc, dc_stride, llf, 2 * kBlockDim);
+ break;
+ }
+ case Type::DCT16X16: {
+ ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+ dc, dc_stride, llf, 2 * kBlockDim);
+ break;
+ }
+ case Type::DCT32X8: {
+ ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+ dc, dc_stride, llf, 4 * kBlockDim);
+ break;
+ }
+ case Type::DCT8X32: {
+ ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+ dc, dc_stride, llf, 4 * kBlockDim);
+ break;
+ }
+ case Type::DCT32X16: {
+ ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+ dc, dc_stride, llf, 4 * kBlockDim);
+ break;
+ }
+ case Type::DCT16X32: {
+ ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+ dc, dc_stride, llf, 4 * kBlockDim);
+ break;
+ }
+ case Type::DCT32X32: {
+ ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+ dc, dc_stride, llf, 4 * kBlockDim);
+ break;
+ }
+ case Type::DCT64X32: {
+ ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+ dc, dc_stride, llf, 8 * kBlockDim);
+ break;
+ }
+ case Type::DCT32X64: {
+ ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+ dc, dc_stride, llf, 8 * kBlockDim);
+ break;
+ }
+ case Type::DCT64X64: {
+ ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+ dc, dc_stride, llf, 8 * kBlockDim);
+ break;
+ }
+ case Type::DCT128X64: {
+ ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+ dc, dc_stride, llf, 16 * kBlockDim);
+ break;
+ }
+ case Type::DCT64X128: {
+ ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+ dc, dc_stride, llf, 16 * kBlockDim);
+ break;
+ }
+ case Type::DCT128X128: {
+ ReinterpretingDCT<
+ /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+ dc, dc_stride, llf, 16 * kBlockDim);
+ break;
+ }
+ case Type::DCT256X128: {
+ ReinterpretingDCT<
+ /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+ dc, dc_stride, llf, 32 * kBlockDim);
+ break;
+ }
+ case Type::DCT128X256: {
+ ReinterpretingDCT<
+ /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+ dc, dc_stride, llf, 32 * kBlockDim);
+ break;
+ }
+ case Type::DCT256X256: {
+ ReinterpretingDCT<
+ /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+ /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+ dc, dc_stride, llf, 32 * kBlockDim);
+ break;
+ }
+ case Type::DCT:
+ case Type::DCT2X2:
+ case Type::DCT4X4:
+ case Type::DCT4X8:
+ case Type::DCT8X4:
+ case Type::AFV0:
+ case Type::AFV1:
+ case Type::AFV2:
+ case Type::AFV3:
+ case Type::IDENTITY:
+ llf[0] = dc[0];
+ break;
+ case Type::kNumValidStrategies:
+ JXL_ABORT("Invalid strategy");
+ };
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_DEC_TRANSFORMS_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.cc b/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.cc
new file mode 100644
index 0000000000..9ee80c59dc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_transforms_testonly.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_transforms_testonly.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformToPixels);
+void TransformToPixels(AcStrategy::Type strategy,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* scratch_space) {
+ return HWY_DYNAMIC_DISPATCH(TransformToPixels)(strategy, coefficients, pixels,
+ pixels_stride, scratch_space);
+}
+
+HWY_EXPORT(LowestFrequenciesFromDC);
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+ const float* dc, size_t dc_stride, float* llf) {
+ return HWY_DYNAMIC_DISPATCH(LowestFrequenciesFromDC)(strategy, dc, dc_stride,
+ llf);
+}
+
+HWY_EXPORT(AFVIDCT4x4);
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
+ return HWY_DYNAMIC_DISPATCH(AFVIDCT4x4)(coeffs, pixels);
+}
+#endif // HWY_ONCE
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.h b/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.h
new file mode 100644
index 0000000000..97c4ca543d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_transforms_testonly.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+#define LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
+
+// Facade for (non-inlined) inverse integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformToPixels(AcStrategy::Type strategy,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy,
+ const float* dc, size_t dc_stride, float* llf);
+
+void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_xyb-inl.h b/third_party/jpeg-xl/lib/jxl/dec_xyb-inl.h
new file mode 100644
index 0000000000..a4f24cd123
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_xyb-inl.h
@@ -0,0 +1,346 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// XYB -> linear sRGB helper function.
+
+#if defined(LIB_JXL_DEC_XYB_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_DEC_XYB_INL_H_
+#undef LIB_JXL_DEC_XYB_INL_H_
+#else
+#define LIB_JXL_DEC_XYB_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_xyb.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Broadcast;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including
+// the gamma mixing and simple gamma). Avoids clamping to [0, 1] - out of (sRGB)
+// gamut values may be in-gamut after transforming to a wider space.
+// "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries
+// of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its
+// entries by c is equivalent to multiplying linear_* by c afterwards.
+template <class D, class V>
+HWY_INLINE HWY_MAYBE_UNUSED void XybToRgb(D d, const V opsin_x, const V opsin_y,
+ const V opsin_b,
+ const OpsinParams& opsin_params,
+ V* const HWY_RESTRICT linear_r,
+ V* const HWY_RESTRICT linear_g,
+ V* const HWY_RESTRICT linear_b) {
+#if HWY_TARGET == HWY_SCALAR
+ const auto neg_bias_r = Set(d, opsin_params.opsin_biases[0]);
+ const auto neg_bias_g = Set(d, opsin_params.opsin_biases[1]);
+ const auto neg_bias_b = Set(d, opsin_params.opsin_biases[2]);
+#else
+ const auto neg_bias_rgb = LoadDup128(d, opsin_params.opsin_biases);
+ const auto neg_bias_r = Broadcast<0>(neg_bias_rgb);
+ const auto neg_bias_g = Broadcast<1>(neg_bias_rgb);
+ const auto neg_bias_b = Broadcast<2>(neg_bias_rgb);
+#endif
+
+ // Color space: XYB -> RGB
+ auto gamma_r = Add(opsin_y, opsin_x);
+ auto gamma_g = Sub(opsin_y, opsin_x);
+ auto gamma_b = opsin_b;
+
+ gamma_r = Sub(gamma_r, Set(d, opsin_params.opsin_biases_cbrt[0]));
+ gamma_g = Sub(gamma_g, Set(d, opsin_params.opsin_biases_cbrt[1]));
+ gamma_b = Sub(gamma_b, Set(d, opsin_params.opsin_biases_cbrt[2]));
+
+ // Undo gamma compression: linear = gamma^3 for efficiency.
+ const auto gamma_r2 = Mul(gamma_r, gamma_r);
+ const auto gamma_g2 = Mul(gamma_g, gamma_g);
+ const auto gamma_b2 = Mul(gamma_b, gamma_b);
+ const auto mixed_r = MulAdd(gamma_r2, gamma_r, neg_bias_r);
+ const auto mixed_g = MulAdd(gamma_g2, gamma_g, neg_bias_g);
+ const auto mixed_b = MulAdd(gamma_b2, gamma_b, neg_bias_b);
+
+ const float* HWY_RESTRICT inverse_matrix = opsin_params.inverse_opsin_matrix;
+
+ // Unmix (multiply by 3x3 inverse_matrix)
+ // TODO(eustas): ref would be more readable than pointer
+ *linear_r = Mul(LoadDup128(d, &inverse_matrix[0 * 4]), mixed_r);
+ *linear_g = Mul(LoadDup128(d, &inverse_matrix[3 * 4]), mixed_r);
+ *linear_b = Mul(LoadDup128(d, &inverse_matrix[6 * 4]), mixed_r);
+ *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[1 * 4]), mixed_g, *linear_r);
+ *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[4 * 4]), mixed_g, *linear_g);
+ *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[7 * 4]), mixed_g, *linear_b);
+ *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[2 * 4]), mixed_b, *linear_r);
+ *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[5 * 4]), mixed_b, *linear_g);
+ *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[8 * 4]), mixed_b, *linear_b);
+}
+
+static inline HWY_MAYBE_UNUSED bool HasFastXYBTosRGB8() {
+#if HWY_TARGET == HWY_NEON
+ return true;
+#else
+ return false;
+#endif
+}
+
+static inline HWY_MAYBE_UNUSED void FastXYBTosRGB8(const float* input[4],
+ uint8_t* output,
+ bool is_rgba, size_t xsize) {
+ // This function is very NEON-specific. As such, it uses intrinsics directly.
+#if HWY_TARGET == HWY_NEON
+ // WARNING: doing fixed point arithmetic correctly is very complicated.
+ // Changes to this function should be thoroughly tested.
+
+ // Note that the input is assumed to have 13 bits of mantissa, and the output
+ // will have 14 bits.
+ auto srgb_tf = [&](int16x8_t v16) {
+ int16x8_t clz = vclzq_s16(v16);
+ // Convert to [0.25, 0.5) range.
+ int16x8_t v025_05_16 = vqshlq_s16(v16, vqsubq_s16(clz, vdupq_n_s16(2)));
+
+ // third degree polynomial approximation between 0.25 and 0.5
+ // of 1.055/2^(7/2.4) * x^(1/2.4) / 32.
+ // poly ~ ((0.95x-1.75)*x+1.72)*x+0.29
+ // We actually compute ~ ((0.47x-0.87)*x+0.86)*(2x)+0.29 as 1.75 and 1.72
+ // overflow our fixed point representation.
+
+ int16x8_t twov = vqaddq_s16(v025_05_16, v025_05_16);
+
+ // 0.47 * x
+ int16x8_t step1 = vqrdmulhq_n_s16(v025_05_16, 15706);
+ // - 0.87
+ int16x8_t step2 = vsubq_s16(step1, vdupq_n_s16(28546));
+ // * x
+ int16x8_t step3 = vqrdmulhq_s16(step2, v025_05_16);
+ // + 0.86
+ int16x8_t step4 = vaddq_s16(step3, vdupq_n_s16(28302));
+ // * 2x
+ int16x8_t step5 = vqrdmulhq_s16(step4, twov);
+ // + 0.29
+ int16x8_t mul16 = vaddq_s16(step5, vdupq_n_s16(9485));
+
+ int16x8_t exp16 = vsubq_s16(vdupq_n_s16(11), clz);
+ // Compute 2**(1/2.4*exp16)/32. Values of exp16 that would overflow are
+ // capped to 1.
+ // Generated with the following Python script:
+ // a = []
+ // b = []
+ //
+ // for i in range(0, 16):
+ // v = 2**(5/12.*i)
+ // v /= 16
+ // v *= 256 * 128
+ // v = int(v)
+ // a.append(v // 256)
+ // b.append(v % 256)
+ //
+ // print(", ".join("0x%02x" % x for x in a))
+ //
+ // print(", ".join("0x%02x" % x for x in b))
+
+ HWY_ALIGN constexpr uint8_t k2to512powersm1div32_high[16] = {
+ 0x08, 0x0a, 0x0e, 0x13, 0x19, 0x21, 0x2d, 0x3c,
+ 0x50, 0x6b, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f,
+ };
+ HWY_ALIGN constexpr uint8_t k2to512powersm1div32_low[16] = {
+ 0x00, 0xad, 0x41, 0x06, 0x65, 0xe7, 0x41, 0x68,
+ 0xa2, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+ // Using the highway implementation here since vqtbl1q is aarch64-only.
+ using hwy::HWY_NAMESPACE::Vec128;
+ uint8x16_t pow_low =
+ TableLookupBytes(
+ Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_low)),
+ Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+ .raw;
+ uint8x16_t pow_high =
+ TableLookupBytes(
+ Vec128<uint8_t, 16>(vld1q_u8(k2to512powersm1div32_high)),
+ Vec128<uint8_t, 16>(vreinterpretq_u8_s16(exp16)))
+ .raw;
+ int16x8_t pow16 = vreinterpretq_s16_u16(vsliq_n_u16(
+ vreinterpretq_u16_u8(pow_low), vreinterpretq_u16_u8(pow_high), 8));
+
+ // approximation of v * 12.92, divided by 2
+ // Note that our input is using 13 mantissa bits instead of 15.
+ int16x8_t v16_linear = vrshrq_n_s16(vmulq_n_s16(v16, 826), 5);
+ // 1.055*pow(v, 1/2.4) - 0.055, divided by 2
+ auto v16_pow = vsubq_s16(vqrdmulhq_s16(mul16, pow16), vdupq_n_s16(901));
+ // > 0.0031308f (note that v16 has 13 mantissa bits)
+ return vbslq_s16(vcgeq_s16(v16, vdupq_n_s16(26)), v16_pow, v16_linear);
+ };
+
+ const float* JXL_RESTRICT row_in_x = input[0];
+ const float* JXL_RESTRICT row_in_y = input[1];
+ const float* JXL_RESTRICT row_in_b = input[2];
+ const float* JXL_RESTRICT row_in_a = input[3];
+ for (size_t x = 0; x < xsize; x += 8) {
+ // Normal ranges for xyb for in-gamut sRGB colors:
+ // x: -0.015386 0.028100
+ // y: 0.000000 0.845308
+ // b: 0.000000 0.845308
+
+ // We actually want x * 8 to have some extra precision.
+ // TODO(veluca): consider different approaches here, like vld1q_f32_x2.
+ float32x4_t opsin_x_left = vld1q_f32(row_in_x + x);
+ int16x4_t opsin_x16_times8_left =
+ vqmovn_s32(vcvtq_n_s32_f32(opsin_x_left, 18));
+ float32x4_t opsin_x_right =
+ vld1q_f32(row_in_x + x + (x + 4 < xsize ? 4 : 0));
+ int16x4_t opsin_x16_times8_right =
+ vqmovn_s32(vcvtq_n_s32_f32(opsin_x_right, 18));
+ int16x8_t opsin_x16_times8 =
+ vcombine_s16(opsin_x16_times8_left, opsin_x16_times8_right);
+
+ float32x4_t opsin_y_left = vld1q_f32(row_in_y + x);
+ int16x4_t opsin_y16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_left, 15));
+ float32x4_t opsin_y_right =
+ vld1q_f32(row_in_y + x + (x + 4 < xsize ? 4 : 0));
+ int16x4_t opsin_y16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_right, 15));
+ int16x8_t opsin_y16 = vcombine_s16(opsin_y16_left, opsin_y16_right);
+
+ float32x4_t opsin_b_left = vld1q_f32(row_in_b + x);
+ int16x4_t opsin_b16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_left, 15));
+ float32x4_t opsin_b_right =
+ vld1q_f32(row_in_b + x + (x + 4 < xsize ? 4 : 0));
+ int16x4_t opsin_b16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_right, 15));
+ int16x8_t opsin_b16 = vcombine_s16(opsin_b16_left, opsin_b16_right);
+
+ int16x8_t neg_bias16 = vdupq_n_s16(-124); // -0.0037930732552754493
+ int16x8_t neg_bias_cbrt16 = vdupq_n_s16(-5110); // -0.155954201
+ int16x8_t neg_bias_half16 = vdupq_n_s16(-62);
+
+ // Color space: XYB -> RGB
+ // Compute ((y+x-bias_cbrt)^3-(y-x-bias_cbrt)^3)/2,
+ // ((y+x-bias_cbrt)^3+(y-x-bias_cbrt)^3)/2+bias, (b-bias_cbrt)^3+bias.
+ // Note that ignoring x2 in the formulas below (as x << y) results in
+ // errors of at least 3 in the final sRGB values.
+ int16x8_t opsin_yp16 = vqsubq_s16(opsin_y16, neg_bias_cbrt16);
+ int16x8_t ysq16 = vqrdmulhq_s16(opsin_yp16, opsin_yp16);
+ int16x8_t twentyfourx16 = vmulq_n_s16(opsin_x16_times8, 3);
+ int16x8_t twentyfourxy16 = vqrdmulhq_s16(opsin_yp16, twentyfourx16);
+ int16x8_t threexsq16 =
+ vrshrq_n_s16(vqrdmulhq_s16(opsin_x16_times8, twentyfourx16), 6);
+
+ // We can ignore x^3 here. Note that this is multiplied by 8.
+ int16x8_t mixed_rmg16 = vqrdmulhq_s16(twentyfourxy16, opsin_yp16);
+
+ int16x8_t mixed_rpg_sos_half = vhaddq_s16(ysq16, threexsq16);
+ int16x8_t mixed_rpg16 = vhaddq_s16(
+ vqrdmulhq_s16(opsin_yp16, mixed_rpg_sos_half), neg_bias_half16);
+
+ int16x8_t gamma_b16 = vqsubq_s16(opsin_b16, neg_bias_cbrt16);
+ int16x8_t gamma_bsq16 = vqrdmulhq_s16(gamma_b16, gamma_b16);
+ int16x8_t gamma_bcb16 = vqrdmulhq_s16(gamma_bsq16, gamma_b16);
+ int16x8_t mixed_b16 = vqaddq_s16(gamma_bcb16, neg_bias16);
+ // mixed_rpg and mixed_b are in 0-1 range.
+ // mixed_rmg has a smaller range (-0.035 to 0.035 for valid sRGB). Note
+ // that at this point it is already multiplied by 8.
+
+ // We multiply all the mixed values by 1/4 (i.e. shift them to 13-bit
+ // fixed point) to ensure intermediate quantities are in range. Note that
+ // r-g is not shifted, and was x8 before here; this corresponds to a x32
+ // overall multiplicative factor and ensures that all the matrix constants
+ // are in 0-1 range.
+ // Similarly, mixed_rpg16 is already multiplied by 1/4 because of the two
+ // vhadd + using neg_bias_half.
+ mixed_b16 = vshrq_n_s16(mixed_b16, 2);
+
+ // Unmix (multiply by 3x3 inverse_matrix)
+ // For increased precision, we use a matrix for converting from
+ // ((mixed_r - mixed_g)/2, (mixed_r + mixed_g)/2, mixed_b) to rgb. This
+ // avoids cancellation effects when computing (y+x)^3-(y-x)^3.
+ // We compute mixed_rpg - mixed_b because the (1+c)*mixed_rpg - c *
+ // mixed_b pattern is repeated frequently in the code below. This allows
+ // us to save a multiply per channel, and removes the presence of
+ // some constants above 1. Moreover, mixed_rmg - mixed_b is in (-1, 1)
+ // range, so the subtraction is safe.
+ // All the magic-looking constants here are derived by computing the
+ // inverse opsin matrix for the transformation modified as described
+ // above.
+
+ // Precomputation common to multiple color values.
+ int16x8_t mixed_rpgmb16 = vqsubq_s16(mixed_rpg16, mixed_b16);
+ int16x8_t mixed_rpgmb_times_016 = vqrdmulhq_n_s16(mixed_rpgmb16, 5394);
+ int16x8_t mixed_rg16 = vqaddq_s16(mixed_rpgmb_times_016, mixed_rpg16);
+
+ // R
+ int16x8_t linear_r16 =
+ vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, 21400));
+
+ // G
+ int16x8_t linear_g16 =
+ vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, -7857));
+
+ // B
+ int16x8_t linear_b16 = vqrdmulhq_n_s16(mixed_rpgmb16, -30996);
+ linear_b16 = vqaddq_s16(linear_b16, mixed_b16);
+ linear_b16 = vqaddq_s16(linear_b16, vqrdmulhq_n_s16(mixed_rmg16, -6525));
+
+ // Apply SRGB transfer function.
+ int16x8_t r = srgb_tf(linear_r16);
+ int16x8_t g = srgb_tf(linear_g16);
+ int16x8_t b = srgb_tf(linear_b16);
+
+ uint8x8_t r8 =
+ vqmovun_s16(vrshrq_n_s16(vsubq_s16(r, vshrq_n_s16(r, 8)), 6));
+ uint8x8_t g8 =
+ vqmovun_s16(vrshrq_n_s16(vsubq_s16(g, vshrq_n_s16(g, 8)), 6));
+ uint8x8_t b8 =
+ vqmovun_s16(vrshrq_n_s16(vsubq_s16(b, vshrq_n_s16(b, 8)), 6));
+
+ size_t n = xsize - x;
+ if (is_rgba) {
+ float32x4_t a_f32_left =
+ row_in_a ? vld1q_f32(row_in_a + x) : vdupq_n_f32(1.0f);
+ float32x4_t a_f32_right =
+ row_in_a ? vld1q_f32(row_in_a + x + (x + 4 < xsize ? 4 : 0))
+ : vdupq_n_f32(1.0f);
+ int16x4_t a16_left = vqmovn_s32(vcvtq_n_s32_f32(a_f32_left, 8));
+ int16x4_t a16_right = vqmovn_s32(vcvtq_n_s32_f32(a_f32_right, 8));
+ uint8x8_t a8 = vqmovun_s16(vcombine_s16(a16_left, a16_right));
+ uint8_t* buf = output + 4 * x;
+ uint8x8x4_t data = {r8, g8, b8, a8};
+ if (n >= 8) {
+ vst4_u8(buf, data);
+ } else {
+ uint8_t tmp[8 * 4];
+ vst4_u8(tmp, data);
+ memcpy(buf, tmp, n * 4);
+ }
+ } else {
+ uint8_t* buf = output + 3 * x;
+ uint8x8x3_t data = {r8, g8, b8};
+ if (n >= 8) {
+ vst3_u8(buf, data);
+ } else {
+ uint8_t tmp[8 * 3];
+ vst3_u8(tmp, data);
+ memcpy(buf, tmp, n * 3);
+ }
+ }
+ }
+#else
+ (void)input;
+ (void)output;
+ (void)is_rgba;
+ (void)xsize;
+ JXL_ABORT("Unreachable");
+#endif
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_DEC_XYB_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/dec_xyb.cc b/third_party/jpeg-xl/lib/jxl/dec_xyb.cc
new file mode 100644
index 0000000000..46fc63c49e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_xyb.cc
@@ -0,0 +1,329 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/dec_xyb.h"
+
+#include <string.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/sanitizers.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::MulAdd;
+
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+ const OpsinParams& opsin_params) {
+ PROFILER_FUNC;
+ JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout));
+
+ const size_t xsize = inout->xsize(); // not padded
+ JXL_CHECK(RunOnPool(
+ pool, 0, inout->ysize(), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+
+ // Faster than adding via ByteOffset at end of loop.
+ float* JXL_RESTRICT row0 = inout->PlaneRow(0, y);
+ float* JXL_RESTRICT row1 = inout->PlaneRow(1, y);
+ float* JXL_RESTRICT row2 = inout->PlaneRow(2, y);
+
+ const HWY_FULL(float) d;
+
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto in_opsin_x = Load(d, row0 + x);
+ const auto in_opsin_y = Load(d, row1 + x);
+ const auto in_opsin_b = Load(d, row2 + x);
+ auto linear_r = Undefined(d);
+ auto linear_g = Undefined(d);
+ auto linear_b = Undefined(d);
+ XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+ &linear_r, &linear_g, &linear_b);
+
+ Store(linear_r, d, row0 + x);
+ Store(linear_g, d, row1 + x);
+ Store(linear_b, d, row2 + x);
+ }
+ },
+ "OpsinToLinear"));
+}
+
+// Same, but not in-place.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+ Image3F* JXL_RESTRICT linear,
+ const OpsinParams& opsin_params) {
+ PROFILER_FUNC;
+
+ JXL_ASSERT(SameSize(rect, *linear));
+ JXL_CHECK_IMAGE_INITIALIZED(opsin, rect);
+
+ JXL_CHECK(RunOnPool(
+ pool, 0, static_cast<int>(rect.ysize()), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = static_cast<size_t>(task);
+
+ // Faster than adding via ByteOffset at end of loop.
+ const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y);
+ const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y);
+ const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y);
+ float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y);
+ float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y);
+ float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y);
+
+ const HWY_FULL(float) d;
+
+ for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) {
+ const auto in_opsin_x = Load(d, row_opsin_0 + x);
+ const auto in_opsin_y = Load(d, row_opsin_1 + x);
+ const auto in_opsin_b = Load(d, row_opsin_2 + x);
+ auto linear_r = Undefined(d);
+ auto linear_g = Undefined(d);
+ auto linear_b = Undefined(d);
+ XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params,
+ &linear_r, &linear_g, &linear_b);
+
+ Store(linear_r, d, row_linear_0 + x);
+ Store(linear_g, d, row_linear_1 + x);
+ Store(linear_b, d, row_linear_2 + x);
+ }
+ },
+ "OpsinToLinear(Rect)"));
+ JXL_CHECK_IMAGE_INITIALIZED(*linear, rect);
+}
+
+// Transform YCbCr to RGB.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+ JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect);
+ const HWY_CAPPED(float, kBlockDim) df;
+ const size_t S = Lanes(df); // Step.
+
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ if ((xsize == 0) || (ysize == 0)) return;
+
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto c128 = Set(df, 128.0f / 255);
+ const auto crcr = Set(df, 1.402f);
+ const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+ const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+ const auto cbcb = Set(df, 1.772f);
+
+ for (size_t y = 0; y < ysize; y++) {
+ const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y);
+ const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y);
+ const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y);
+ float* r_row = rect.PlaneRow(rgb, 0, y);
+ float* g_row = rect.PlaneRow(rgb, 1, y);
+ float* b_row = rect.PlaneRow(rgb, 2, y);
+ for (size_t x = 0; x < xsize; x += S) {
+ const auto y_vec = Add(Load(df, y_row + x), c128);
+ const auto cb_vec = Load(df, cb_row + x);
+ const auto cr_vec = Load(df, cr_row + x);
+ const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+ const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+ const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+ Store(r_vec, df, r_row + x);
+ Store(g_vec, df, g_row + x);
+ Store(b_vec, df, b_row + x);
+ }
+ }
+ JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(OpsinToLinearInplace);
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+ const OpsinParams& opsin_params) {
+ return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params);
+}
+
+HWY_EXPORT(OpsinToLinear);
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+ Image3F* JXL_RESTRICT linear,
+ const OpsinParams& opsin_params) {
+ return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear,
+ opsin_params);
+}
+
+HWY_EXPORT(YcbcrToRgb);
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) {
+ return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect);
+}
+
+HWY_EXPORT(HasFastXYBTosRGB8);
+bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); }
+
+HWY_EXPORT(FastXYBTosRGB8);
+void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
+ size_t xsize) {
+ return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize);
+}
+
+void OpsinParams::Init(float intensity_target) {
+ InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix,
+ intensity_target);
+ memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB,
+ sizeof(kNegOpsinAbsorbanceBiasRGB));
+ memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias));
+ for (size_t c = 0; c < 4; c++) {
+ opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]);
+ }
+}
+
+bool CanOutputToColorEncoding(const ColorEncoding& c_desired) {
+ if (!c_desired.HaveFields()) {
+ return false;
+ }
+ // TODO(veluca): keep in sync with dec_reconstruct.cc
+ if (!c_desired.tf.IsPQ() && !c_desired.tf.IsSRGB() &&
+ !c_desired.tf.IsGamma() && !c_desired.tf.IsLinear() &&
+ !c_desired.tf.IsHLG() && !c_desired.tf.IsDCI() && !c_desired.tf.Is709()) {
+ return false;
+ }
+ if (c_desired.IsGray() && c_desired.white_point != WhitePoint::kD65) {
+ // TODO(veluca): figure out what should happen here.
+ return false;
+ }
+ return true;
+}
+
+Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) {
+ orig_color_encoding = metadata.m.color_encoding;
+ orig_intensity_target = metadata.m.IntensityTarget();
+ desired_intensity_target = orig_intensity_target;
+ const auto& im = metadata.transform_data.opsin_inverse_matrix;
+ memcpy(orig_inverse_matrix, im.inverse_matrix, sizeof(orig_inverse_matrix));
+ default_transform = im.all_default;
+ xyb_encoded = metadata.m.xyb_encoded;
+ std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases),
+ opsin_params.opsin_biases);
+ for (int i = 0; i < 3; ++i) {
+ opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]);
+ }
+ opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1;
+ std::copy(std::begin(im.quant_biases), std::end(im.quant_biases),
+ opsin_params.quant_biases);
+ bool orig_ok = CanOutputToColorEncoding(orig_color_encoding);
+ bool orig_grey = orig_color_encoding.IsGray();
+ return SetColorEncoding(!xyb_encoded || orig_ok
+ ? orig_color_encoding
+ : ColorEncoding::LinearSRGB(orig_grey));
+}
+
+Status OutputEncodingInfo::MaybeSetColorEncoding(
+ const ColorEncoding& c_desired) {
+ if (c_desired.GetColorSpace() == ColorSpace::kXYB &&
+ ((color_encoding.GetColorSpace() == ColorSpace::kRGB &&
+ color_encoding.primaries != Primaries::kSRGB) ||
+ color_encoding.tf.IsPQ())) {
+ return false;
+ }
+ if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) {
+ return false;
+ }
+ return SetColorEncoding(c_desired);
+}
+
+Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) {
+ color_encoding = c_desired;
+ color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired);
+
+ // Compute the opsin inverse matrix and luminances based on primaries and
+ // white point.
+ float inverse_matrix[9];
+ bool inverse_matrix_is_default = default_transform;
+ memcpy(inverse_matrix, orig_inverse_matrix, sizeof(inverse_matrix));
+ constexpr float kSRGBLuminances[3] = {0.2126, 0.7152, 0.0722};
+ memcpy(luminances, kSRGBLuminances, sizeof(luminances));
+ if ((c_desired.primaries != Primaries::kSRGB ||
+ c_desired.white_point != WhitePoint::kD65) &&
+ !c_desired.IsGray()) {
+ float srgb_to_xyzd50[9];
+ const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false);
+ JXL_CHECK(PrimariesToXYZD50(
+ srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y,
+ srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y,
+ srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y,
+ srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50));
+ float original_to_xyz[3][3];
+ JXL_RETURN_IF_ERROR(PrimariesToXYZ(
+ c_desired.GetPrimaries().r.x, c_desired.GetPrimaries().r.y,
+ c_desired.GetPrimaries().g.x, c_desired.GetPrimaries().g.y,
+ c_desired.GetPrimaries().b.x, c_desired.GetPrimaries().b.y,
+ c_desired.GetWhitePoint().x, c_desired.GetWhitePoint().y,
+ &original_to_xyz[0][0]));
+ memcpy(luminances, original_to_xyz[1], sizeof luminances);
+ if (xyb_encoded) {
+ float adapt_to_d50[9];
+ JXL_RETURN_IF_ERROR(AdaptToXYZD50(c_desired.GetWhitePoint().x,
+ c_desired.GetWhitePoint().y,
+ adapt_to_d50));
+ float xyzd50_to_original[9];
+ Mul3x3Matrix(adapt_to_d50, &original_to_xyz[0][0], xyzd50_to_original);
+ JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original));
+ float srgb_to_original[9];
+ Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original);
+ Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix);
+ inverse_matrix_is_default = false;
+ }
+ }
+
+ if (c_desired.IsGray()) {
+ float tmp_inv_matrix[9];
+ memcpy(tmp_inv_matrix, inverse_matrix, sizeof(inverse_matrix));
+ float srgb_to_luma[9];
+ memcpy(&srgb_to_luma[0], luminances, sizeof(luminances));
+ memcpy(&srgb_to_luma[3], luminances, sizeof(luminances));
+ memcpy(&srgb_to_luma[6], luminances, sizeof(luminances));
+ Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix);
+ }
+
+ // The internal XYB color space uses absolute luminance, so we scale back the
+ // opsin inverse matrix to relative luminance where 1.0 corresponds to the
+ // original intensity target, or to absolute luminance for PQ, where 1.0
+ // corresponds to 10000 nits.
+ if (xyb_encoded) {
+ float intensity_target =
+ (c_desired.tf.IsPQ() ? 10000 : orig_intensity_target);
+ InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix,
+ intensity_target);
+ all_default_opsin = (std::abs(intensity_target - 255.0) <= 0.1f &&
+ inverse_matrix_is_default);
+ }
+
+ // Set the inverse gamma based on color space transfer function.
+ inverse_gamma = (c_desired.tf.IsGamma() ? c_desired.tf.GetGamma()
+ : c_desired.tf.IsDCI() ? 1.0f / 2.6f
+ : 1.0);
+ return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/dec_xyb.h b/third_party/jpeg-xl/lib/jxl/dec_xyb.h
new file mode 100644
index 0000000000..ebaae9a176
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/dec_xyb.h
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DEC_XYB_H_
+#define LIB_JXL_DEC_XYB_H_
+
+// XYB -> linear sRGB.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+// Parameters for XYB->sRGB conversion.
+struct OpsinParams {
+ float inverse_opsin_matrix[9 * 4];
+ float opsin_biases[4];
+ float opsin_biases_cbrt[4];
+ float quant_biases[4];
+ void Init(float intensity_target);
+};
+
+struct OutputEncodingInfo {
+ //
+ // Fields depending only on image metadata
+ //
+ ColorEncoding orig_color_encoding;
+ // Used for the HLG OOTF and PQ tone mapping.
+ float orig_intensity_target;
+ // Opsin inverse matrix taken from the metadata.
+ float orig_inverse_matrix[9];
+ bool default_transform;
+ bool xyb_encoded;
+ //
+ // Fields depending on output color encoding
+ //
+ ColorEncoding color_encoding;
+ bool color_encoding_is_original;
+ // Contains an opsin matrix that converts to the primaries of the output
+ // encoding.
+ OpsinParams opsin_params;
+ bool all_default_opsin;
+ // Used for Gamma and DCI transfer functions.
+ float inverse_gamma;
+ // Luminances of color_encoding's primaries, used for the HLG inverse OOTF and
+ // for PQ tone mapping.
+ // Default to sRGB's.
+ float luminances[3];
+ // Used for the HLG inverse OOTF and PQ tone mapping.
+ float desired_intensity_target;
+
+ Status SetFromMetadata(const CodecMetadata& metadata);
+ Status MaybeSetColorEncoding(const ColorEncoding& c_desired);
+
+ private:
+ Status SetColorEncoding(const ColorEncoding& c_desired);
+};
+
+// Converts `inout` (not padded) from opsin to linear sRGB in-place. Called from
+// per-pass postprocessing, hence parallelized.
+void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool,
+ const OpsinParams& opsin_params);
+
+// Converts `opsin:rect` (opsin may be padded, rect.x0 must be vector-aligned)
+// to linear sRGB. Called from whole-frame encoder, hence parallelized.
+void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool,
+ Image3F* JXL_RESTRICT linear,
+ const OpsinParams& opsin_params);
+
+// Bt.601 to match JPEG/JFIF. Inputs are _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect);
+
+bool HasFastXYBTosRGB8();
+void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba,
+ size_t xsize);
+
+} // namespace jxl
+
+#endif // LIB_JXL_DEC_XYB_H_
diff --git a/third_party/jpeg-xl/lib/jxl/decode.cc b/third_party/jpeg-xl/lib/jxl/decode.cc
new file mode 100644
index 0000000000..5476f686f6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/decode.cc
@@ -0,0 +1,2809 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/types.h>
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#if JPEGXL_ENABLE_BOXES || JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/box_content_decoder.h"
+#endif
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/dec_modular.h"
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/decode_to_jpeg.h"
+#endif
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/memory_manager_internal.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/toc.h"
+
+namespace {
+
+// Checks if a + b > size, taking possible integer overflow into account.
+bool OutOfBounds(size_t a, size_t b, size_t size) {
+ size_t pos = a + b;
+ if (pos > size) return true;
+ if (pos < a) return true; // overflow happened
+ return false;
+}
+
+bool SumOverflows(size_t a, size_t b, size_t c) {
+ size_t sum = a + b;
+ if (sum < b) return true;
+ sum += c;
+ if (sum < c) return true;
+ return false;
+}
+
+JXL_INLINE size_t InitialBasicInfoSizeHint() {
+ // Amount of bytes before the start of the codestream in the container format,
+ // assuming that the codestream is the first box after the signature and
+ // filetype boxes. 12 bytes signature box + 20 bytes filetype box + 16 bytes
+ // codestream box length + name + optional XLBox length.
+ const size_t container_header_size = 48;
+
+ // Worst-case amount of bytes for basic info of the JPEG XL codestream header,
+ // that is all information up to and including extra_channel_bits. Up to
+ // around 2 bytes signature + 8 bytes SizeHeader + 31 bytes ColorEncoding + 4
+ // bytes rest of ImageMetadata + 5 bytes part of ImageMetadata2.
+ // TODO(lode): recompute and update this value when alpha_bits is moved to
+ // extra channels info.
+ const size_t max_codestream_basic_info_size = 50;
+
+ return container_header_size + max_codestream_basic_info_size;
+}
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_DEC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...) \
+ (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+ ::jxl::Abort(), JXL_DEC_ERROR)
+#else // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(format, ...) \
+ (((JXL_DEBUG_ON_ERROR) && \
+ ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+ JXL_DEC_ERROR)
+#endif // JXL_CRASH_ON_ERROR
+
+JxlDecoderStatus ConvertStatus(JxlDecoderStatus status) { return status; }
+
+JxlDecoderStatus ConvertStatus(jxl::Status status) {
+ return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR;
+}
+
+JxlSignature ReadSignature(const uint8_t* buf, size_t len, size_t* pos) {
+ if (*pos >= len) return JXL_SIG_NOT_ENOUGH_BYTES;
+
+ buf += *pos;
+ len -= *pos;
+
+ // JPEG XL codestream: 0xff 0x0a
+ if (len >= 1 && buf[0] == 0xff) {
+ if (len < 2) {
+ return JXL_SIG_NOT_ENOUGH_BYTES;
+ } else if (buf[1] == jxl::kCodestreamMarker) {
+ *pos += 2;
+ return JXL_SIG_CODESTREAM;
+ } else {
+ return JXL_SIG_INVALID;
+ }
+ }
+
+ // JPEG XL container
+ if (len >= 1 && buf[0] == 0) {
+ if (len < 12) {
+ return JXL_SIG_NOT_ENOUGH_BYTES;
+ } else if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0xC && buf[4] == 'J' &&
+ buf[5] == 'X' && buf[6] == 'L' && buf[7] == ' ' &&
+ buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 &&
+ buf[11] == 0xA) {
+ *pos += 12;
+ return JXL_SIG_CONTAINER;
+ } else {
+ return JXL_SIG_INVALID;
+ }
+ }
+
+ return JXL_SIG_INVALID;
+}
+
+} // namespace
+
+uint32_t JxlDecoderVersion(void) {
+ return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+ JPEGXL_PATCH_VERSION;
+}
+
+JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len) {
+ size_t pos = 0;
+ return ReadSignature(buf, len, &pos);
+}
+
+namespace {
+
+size_t BitsPerChannel(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 8;
+ case JXL_TYPE_UINT16:
+ return 16;
+ case JXL_TYPE_FLOAT:
+ return 32;
+ case JXL_TYPE_FLOAT16:
+ return 16;
+ default:
+ return 0; // signals unhandled JxlDataType
+ }
+}
+
+template <typename T>
+uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
+ JxlPixelFormat format) {
+ if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+ return BitsPerChannel(format.data_type);
+ } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) {
+ return metadata.bit_depth.bits_per_sample;
+ } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+ return bit_depth.bits_per_sample;
+ } else {
+ return 0;
+ }
+}
+
+enum class DecoderStage : uint32_t {
+ kInited, // Decoder created, no JxlDecoderProcessInput called yet
+ kStarted, // Running JxlDecoderProcessInput calls
+ kCodestreamFinished, // Codestream done, but other boxes could still occur.
+ // This stage can also occur before having seen the
+ // entire codestream if the user didn't subscribe to any
+ // codestream events at all, e.g. only to box events,
+ // or, the user only subscribed to basic info, and only
+ // the header of the codestream was parsed.
+ kError, // Error occurred, decoder object no longer usable
+};
+
+enum class FrameStage : uint32_t {
+ kHeader, // Must parse frame header.
+ kTOC, // Must parse TOC
+ kFull, // Must parse full pixels
+};
+
+enum class BoxStage : uint32_t {
+ kHeader, // Parsing box header of the next box, or start of non-container
+ // stream
+ kFtyp, // The ftyp box
+ kSkip, // Box whose contents are skipped
+ kCodestream, // Handling codestream box contents, or non-container stream
+ kPartialCodestream, // Handling the extra header of partial codestream box
+ kJpegRecon, // Handling jpeg reconstruction box
+};
+
+enum class JpegReconStage : uint32_t {
+ kNone, // Not outputting
+ kSettingMetadata, // Ready to output, must set metadata to the jpeg_data
+ kOutputting, // Currently outputting the JPEG bytes
+ kFinished, // JPEG reconstruction fully handled
+};
+
+/*
+Given list of frame references to storage slots, and storage slots in which this
+frame is saved, computes which frames are required to decode the frame at the
+given index and any frames after it. The frames on which this depends are
+returned as a vector of their indices, in no particular order. The given index
+must be smaller than saved_as.size(), and references.size() must equal
+saved_as.size(). Any frames beyond saved_as and references are considered
+unknown future frames and must be treated as if something depends on them.
+*/
+std::vector<size_t> GetFrameDependencies(size_t index,
+ const std::vector<int>& saved_as,
+ const std::vector<int>& references) {
+ JXL_ASSERT(references.size() == saved_as.size());
+ JXL_ASSERT(index < references.size());
+
+ std::vector<size_t> result;
+
+ constexpr size_t kNumStorage = 8;
+
+ // value which indicates nothing is stored in this storage slot
+ const size_t invalid = references.size();
+ // for each of the 8 storage slots, a vector that translates frame index to
+ // frame stored in this storage slot at this point, that is, the last
+ // frame that was stored in this slot before or at this index.
+ std::array<std::vector<size_t>, kNumStorage> storage;
+ for (size_t s = 0; s < kNumStorage; ++s) {
+ storage[s].resize(saved_as.size());
+ int mask = 1 << s;
+ size_t id = invalid;
+ for (size_t i = 0; i < saved_as.size(); ++i) {
+ if (saved_as[i] & mask) {
+ id = i;
+ }
+ storage[s][i] = id;
+ }
+ }
+
+ std::vector<char> seen(index + 1, 0);
+ std::vector<size_t> stack;
+ stack.push_back(index);
+ seen[index] = 1;
+
+ // For frames after index, assume they can depend on any of the 8 storage
+ // slots, so push the frame for each stored reference to the stack and result.
+ // All frames after index are treated as having unknown references and with
+ // the possibility that there are more frames after the last known.
+ // TODO(lode): take values of saved_as and references after index, and a
+ // input flag indicating if they are all frames of the image, to further
+ // optimize this.
+ for (size_t s = 0; s < kNumStorage; ++s) {
+ size_t frame_ref = storage[s][index];
+ if (frame_ref == invalid) continue;
+ if (seen[frame_ref]) continue;
+ stack.push_back(frame_ref);
+ seen[frame_ref] = 1;
+ result.push_back(frame_ref);
+ }
+
+ while (!stack.empty()) {
+ size_t frame_index = stack.back();
+ stack.pop_back();
+ if (frame_index == 0) continue; // first frame cannot have references
+ for (size_t s = 0; s < kNumStorage; ++s) {
+ int mask = 1 << s;
+ if (!(references[frame_index] & mask)) continue;
+ size_t frame_ref = storage[s][frame_index - 1];
+ if (frame_ref == invalid) continue;
+ if (seen[frame_ref]) continue;
+ stack.push_back(frame_ref);
+ seen[frame_ref] = 1;
+ result.push_back(frame_ref);
+ }
+ }
+
+ return result;
+}
+
+// Parameters for user-requested extra channel output.
+struct ExtraChannelOutput {
+ JxlPixelFormat format;
+ void* buffer;
+ size_t buffer_size;
+};
+
+} // namespace
+
+namespace jxl {
+
+typedef struct JxlDecoderFrameIndexBoxEntryStruct {
+ // OFFi: offset of start byte of this frame compared to start
+ // byte of previous frame from this index in the JPEG XL codestream. For the
+ // first frame, this is the offset from the first byte of the JPEG XL
+ // codestream.
+ uint64_t OFFi;
+ // Ti: duration in ticks between the start of this frame and
+ // the start of the next frame in the index. If this is the last frame in the
+ // index, this is the duration in ticks between the start of this frame and
+ // the end of the stream. A tick lasts TNUM / TDEN seconds.
+ uint32_t Ti;
+ // Fi: amount of frames the next frame in the index occurs
+ // after this frame. If this is the last frame in the index, this is the
+ // amount of frames after this frame in the remainder of the stream. Only
+ // frames that are presented by the decoder are counted for this purpose, this
+ // excludes frames that are not intended for display but for compositing with
+ // other frames, such as frames that aren't the last frame with a duration of
+ // 0 ticks.
+ uint32_t Fi;
+} JxlDecoderFrameIndexBoxEntry;
+
+typedef struct JxlDecoderFrameIndexBoxStruct {
+ int64_t NF() const { return entries.size(); }
+ int32_t TNUM = 1;
+ int32_t TDEN = 1000;
+
+ std::vector<JxlDecoderFrameIndexBoxEntry> entries;
+
+ // That way we can ensure that every index box will have the first frame.
+ // If the API user decides to mark it as an indexed frame, we call
+ // the AddFrame again, this time with requested.
+ void AddFrame(uint64_t OFFi, uint32_t Ti, uint32_t Fi) {
+ JxlDecoderFrameIndexBoxEntry e;
+ e.OFFi = OFFi;
+ e.Ti = Ti;
+ e.Fi = Fi;
+ entries.push_back(e);
+ }
+} JxlDecoderFrameIndexBox;
+
+} // namespace jxl
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct JxlDecoderStruct {
+ JxlDecoderStruct() = default;
+
+ JxlMemoryManager memory_manager;
+ std::unique_ptr<jxl::ThreadPool> thread_pool;
+
+ DecoderStage stage;
+
+ // Status of progression, internal.
+ bool got_signature;
+ // Indicates we know that we've seen the last codestream box: either this
+ // was a jxlc box, or a jxlp box that has its index indicated as last by
+ // having its most significant bit set, or no boxes are used at all. This
+ // does not indicate the full codestream has already been seen, only the
+ // last box of it has been initiated.
+ bool last_codestream_seen;
+ bool got_codestream_signature;
+ bool got_basic_info;
+ bool got_transform_data; // To skip everything before ICC.
+ bool got_all_headers; // Codestream metadata headers.
+ bool post_headers; // Already decoding pixels.
+ jxl::ICCReader icc_reader;
+ jxl::JxlDecoderFrameIndexBox frame_index_box;
+ // This means either we actually got the preview image, or determined we
+ // cannot get it or there is none.
+ bool got_preview_image;
+ bool preview_frame;
+
+ // Position of next_in in the original file including box format if present
+ // (as opposed to position in the codestream)
+ size_t file_pos;
+
+ size_t box_contents_begin;
+ size_t box_contents_end;
+ size_t box_contents_size;
+ size_t box_size;
+ size_t header_size;
+ // Either a final box that runs until EOF, or the case of no container format
+ // at all.
+ bool box_contents_unbounded;
+
+ JxlBoxType box_type;
+ JxlBoxType box_decoded_type; // Underlying type for brob boxes
+ // Set to true right after a JXL_DEC_BOX event only.
+ bool box_event;
+ bool decompress_boxes;
+
+ bool box_out_buffer_set;
+ // Whether the out buffer is set for the current box, if the user did not yet
+ // release the buffer while the next box is encountered, this will be set to
+ // false. If this is false, no JXL_DEC_NEED_MORE_INPUT is emitted
+ // (irrespective of the value of box_out_buffer_set), because not setting
+ // output indicates the user does not wish the data of this box.
+ bool box_out_buffer_set_current_box;
+ uint8_t* box_out_buffer;
+ size_t box_out_buffer_size;
+ // which byte of the full box content the start of the out buffer points to
+ size_t box_out_buffer_begin;
+ // which byte of box_out_buffer to write to next
+ size_t box_out_buffer_pos;
+
+ // Settings
+ bool keep_orientation;
+ bool unpremul_alpha;
+ bool render_spotcolors;
+ bool coalescing;
+ float desired_intensity_target;
+
+ // Bitfield, for which informative events (JXL_DEC_BASIC_INFO, etc...) the
+ // decoder returns a status. By default, do not return for any of the events,
+ // only return when the decoder cannot continue because it needs more input or
+ // output data.
+ int events_wanted;
+ int orig_events_wanted;
+
+ // Fields for reading the basic info from the header.
+ size_t basic_info_size_hint;
+ bool have_container;
+ size_t box_count;
+
+ // The level of progressive detail in frame decoding.
+ JxlProgressiveDetail prog_detail = kDC;
+ // The progressive detail of the current frame.
+ JxlProgressiveDetail frame_prog_detail;
+ // The intended downsampling ratio for the current progression step.
+ size_t downsampling_target;
+
+ // Set to true if either an image out buffer or an image out callback was set.
+ bool image_out_buffer_set;
+
+ // Owned by the caller, buffer for preview or full resolution image.
+ void* image_out_buffer;
+ JxlImageOutInitCallback image_out_init_callback;
+ JxlImageOutRunCallback image_out_run_callback;
+ JxlImageOutDestroyCallback image_out_destroy_callback;
+ void* image_out_init_opaque;
+ struct SimpleImageOutCallback {
+ JxlImageOutCallback callback;
+ void* opaque;
+ };
+ SimpleImageOutCallback simple_image_out_callback;
+
+ size_t image_out_size;
+
+ JxlPixelFormat image_out_format;
+ JxlBitDepth image_out_bit_depth;
+
+ // For extra channels. Empty if no extra channels are requested, and they are
+ // reset each frame
+ std::vector<ExtraChannelOutput> extra_channel_output;
+
+ jxl::CodecMetadata metadata;
+ // Same as metadata.m, except for the color_encoding, which is set to the
+ // output encoding.
+ jxl::ImageMetadata image_metadata;
+ std::unique_ptr<jxl::ImageBundle> ib;
+
+ std::unique_ptr<jxl::PassesDecoderState> passes_state;
+ std::unique_ptr<jxl::FrameDecoder> frame_dec;
+ size_t next_section;
+ std::vector<char> section_processed;
+
+ // headers and TOC for the current frame. When got_toc is true, this is
+ // always the frame header of the last frame of the current still series,
+ // that is, the displayed frame.
+ std::unique_ptr<jxl::FrameHeader> frame_header;
+
+ size_t remaining_frame_size;
+ FrameStage frame_stage;
+ bool dc_frame_progression_done;
+ // The currently processed frame is the last of the current composite still,
+ // and so must be returned as pixels
+ bool is_last_of_still;
+ // The currently processed frame is the last of the codestream
+ bool is_last_total;
+ // How many frames to skip.
+ size_t skip_frames;
+ // Skipping the current frame. May be false if skip_frames was just set to
+ // a positive value while already processing a current frame, then
+ // skipping_frame will be enabled only for the next frame.
+ bool skipping_frame;
+
+ // Amount of internal frames and external frames started. External frames are
+ // user-visible frames, internal frames includes all external frames and
+ // also invisible frames such as patches, blending-only and dc_level frames.
+ size_t internal_frames;
+ size_t external_frames;
+
+ // For each internal frame, which storage locations it references, and which
+ // storage locations it is stored in, using the bit mask as defined in
+ // FrameDecoder::References and FrameDecoder::SaveAs.
+ std::vector<int> frame_references;
+ std::vector<int> frame_saved_as;
+
+ // Translates external frame index to internal frame index. The external
+ // index is the index of user-visible frames. The internal index can be larger
+ // since non-visible frames (such as frames with patches, ...) are included.
+ std::vector<size_t> frame_external_to_internal;
+
+ // Whether the frame with internal index is required to decode the frame
+ // being skipped to or any frames after that. If no skipping is active,
+ // this vector is ignored. If the current internal frame index is beyond this
+ // vector, it must be treated as a required frame.
+ std::vector<char> frame_required;
+
+ // Codestream input data is copied here temporarily when the decoder needs
+ // more input bytes to process the next part of the stream. We copy the input
+ // data in order to be able to release it all through the API it when
+ // returning JXL_DEC_NEED_MORE_INPUT.
+ std::vector<uint8_t> codestream_copy;
+ // Number of bytes at the end of codestream_copy that were not yet consumed
+ // by calling AdvanceInput().
+ size_t codestream_unconsumed;
+ // Position in the codestream_copy vector that the decoder already finished
+ // processing. It can be greater than the current size of codestream_copy in
+ // case where the decoder skips some parts of the frame that were not yet
+ // provided.
+ size_t codestream_pos;
+ // Number of bits after codestream_pos that were already processed.
+ size_t codestream_bits_ahead;
+
+ BoxStage box_stage;
+
+#if JPEGXL_ENABLE_BOXES
+ jxl::JxlBoxContentDecoder box_content_decoder;
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ jxl::JxlToJpegDecoder jpeg_decoder;
+ // Decodes Exif or XMP metadata for JPEG reconstruction
+ jxl::JxlBoxContentDecoder metadata_decoder;
+ std::vector<uint8_t> exif_metadata;
+ std::vector<uint8_t> xmp_metadata;
+ // must store JPEG reconstruction metadata from the current box
+ // 0 = not stored, 1 = currently storing, 2 = finished
+ int store_exif;
+ int store_xmp;
+ size_t recon_out_buffer_pos;
+ size_t recon_exif_size; // Expected exif size as read from the jbrd box
+ size_t recon_xmp_size; // Expected exif size as read from the jbrd box
+ JpegReconStage recon_output_jpeg;
+
+ bool JbrdNeedMoreBoxes() const {
+ // jbrd box wants exif but exif box not yet seen
+ if (store_exif < 2 && recon_exif_size > 0) return true;
+ // jbrd box wants xmp but xmp box not yet seen
+ if (store_xmp < 2 && recon_xmp_size > 0) return true;
+ return false;
+ }
+#endif
+
+ const uint8_t* next_in;
+ size_t avail_in;
+ bool input_closed;
+
+ void AdvanceInput(size_t size) {
+ JXL_DASSERT(avail_in >= size);
+ next_in += size;
+ avail_in -= size;
+ file_pos += size;
+ }
+
+ size_t AvailableCodestream() const {
+ size_t avail_codestream = avail_in;
+ if (!box_contents_unbounded) {
+ avail_codestream =
+ std::min<size_t>(avail_codestream, box_contents_end - file_pos);
+ }
+ return avail_codestream;
+ }
+
+ void AdvanceCodestream(size_t size) {
+ size_t avail_codestream = AvailableCodestream();
+ if (codestream_copy.empty()) {
+ if (size <= avail_codestream) {
+ AdvanceInput(size);
+ } else {
+ codestream_pos = size - avail_codestream;
+ AdvanceInput(avail_codestream);
+ }
+ } else {
+ codestream_pos += size;
+ if (codestream_pos + codestream_unconsumed >= codestream_copy.size()) {
+ size_t advance = std::min(
+ codestream_unconsumed,
+ codestream_unconsumed + codestream_pos - codestream_copy.size());
+ AdvanceInput(advance);
+ codestream_pos -= std::min(codestream_pos, codestream_copy.size());
+ codestream_unconsumed = 0;
+ codestream_copy.clear();
+ }
+ }
+ }
+
+ JxlDecoderStatus RequestMoreInput() {
+ if (codestream_copy.empty()) {
+ size_t avail_codestream = AvailableCodestream();
+ codestream_copy.insert(codestream_copy.end(), next_in,
+ next_in + avail_codestream);
+ AdvanceInput(avail_codestream);
+ } else {
+ AdvanceInput(codestream_unconsumed);
+ codestream_unconsumed = 0;
+ }
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+
+ JxlDecoderStatus GetCodestreamInput(jxl::Span<const uint8_t>* span) {
+ if (codestream_copy.empty() && codestream_pos > 0) {
+ size_t avail_codestream = AvailableCodestream();
+ size_t skip = std::min<size_t>(codestream_pos, avail_codestream);
+ AdvanceInput(skip);
+ codestream_pos -= skip;
+ if (codestream_pos > 0) {
+ return RequestMoreInput();
+ }
+ }
+ JXL_ASSERT(codestream_pos <= codestream_copy.size());
+ JXL_ASSERT(codestream_unconsumed <= codestream_copy.size());
+ size_t avail_codestream = AvailableCodestream();
+ if (codestream_copy.empty()) {
+ if (avail_codestream == 0) {
+ return RequestMoreInput();
+ }
+ *span = jxl::Span<const uint8_t>(next_in, avail_codestream);
+ return JXL_DEC_SUCCESS;
+ } else {
+ codestream_copy.insert(codestream_copy.end(),
+ next_in + codestream_unconsumed,
+ next_in + avail_codestream);
+ codestream_unconsumed = avail_codestream;
+ *span = jxl::Span<const uint8_t>(codestream_copy.data() + codestream_pos,
+ codestream_copy.size() - codestream_pos);
+ return JXL_DEC_SUCCESS;
+ }
+ }
+
+ // Whether the decoder can use more codestream input for a purpose it needs.
+ // This returns false if the user didn't subscribe to any events that
+ // require the codestream (e.g. only subscribed to metadata boxes), or all
+ // parts of the codestream that are subscribed to (e.g. only basic info) have
+ // already occurred.
+ bool CanUseMoreCodestreamInput() const {
+ // The decoder can set this to finished early if all relevant events were
+ // processed, so this check works.
+ return stage != DecoderStage::kCodestreamFinished;
+ }
+
+ // If set then some operations will fail, if those would require
+ // allocating large objects. Actual memory usage might be two orders of
+ // magnitude bigger.
+ // TODO(eustas): remove once there is working API for memory / CPU limit.
+ size_t memory_limit_base = 0;
+ size_t cpu_limit_base = 0;
+ size_t used_cpu_base = 0;
+};
+
+namespace {
+
+bool CheckSizeLimit(JxlDecoder* dec, size_t xsize, size_t ysize) {
+ if (!dec->memory_limit_base) return true;
+ if (xsize == 0 || ysize == 0) return true;
+ if (xsize >= dec->memory_limit_base || ysize >= dec->memory_limit_base) {
+ return false;
+ }
+ // Rough estimate of real row length.
+ xsize = jxl::DivCeil(xsize, 32) * 32;
+ size_t num_pixels = xsize * ysize;
+ if (num_pixels / xsize != ysize) return false; // overflow
+ if (num_pixels > dec->memory_limit_base) return false;
+ return true;
+}
+
+} // namespace
+
+// TODO(zond): Make this depend on the data loaded into the decoder.
+JxlDecoderStatus JxlDecoderDefaultPixelFormat(const JxlDecoder* dec,
+ JxlPixelFormat* format) {
+ if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+ *format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ return JXL_DEC_SUCCESS;
+}
+
+// Resets the state that must be reset for both Rewind and Reset
+void JxlDecoderRewindDecodingState(JxlDecoder* dec) {
+ dec->stage = DecoderStage::kInited;
+ dec->got_signature = false;
+ dec->last_codestream_seen = false;
+ dec->got_codestream_signature = false;
+ dec->got_basic_info = false;
+ dec->got_transform_data = false;
+ dec->got_all_headers = false;
+ dec->post_headers = false;
+ dec->icc_reader.Reset();
+ dec->got_preview_image = false;
+ dec->preview_frame = false;
+ dec->file_pos = 0;
+ dec->box_contents_begin = 0;
+ dec->box_contents_end = 0;
+ dec->box_contents_size = 0;
+ dec->box_size = 0;
+ dec->header_size = 0;
+ dec->box_contents_unbounded = false;
+ memset(dec->box_type, 0, sizeof(dec->box_type));
+ memset(dec->box_decoded_type, 0, sizeof(dec->box_decoded_type));
+ dec->box_event = false;
+ dec->box_stage = BoxStage::kHeader;
+ dec->box_out_buffer_set = false;
+ dec->box_out_buffer_set_current_box = false;
+ dec->box_out_buffer = nullptr;
+ dec->box_out_buffer_size = 0;
+ dec->box_out_buffer_begin = 0;
+ dec->box_out_buffer_pos = 0;
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ dec->exif_metadata.clear();
+ dec->xmp_metadata.clear();
+ dec->store_exif = 0;
+ dec->store_xmp = 0;
+ dec->recon_out_buffer_pos = 0;
+ dec->recon_exif_size = 0;
+ dec->recon_xmp_size = 0;
+ dec->recon_output_jpeg = JpegReconStage::kNone;
+#endif
+
+ dec->events_wanted = dec->orig_events_wanted;
+ dec->basic_info_size_hint = InitialBasicInfoSizeHint();
+ dec->have_container = 0;
+ dec->box_count = 0;
+ dec->downsampling_target = 8;
+ dec->image_out_buffer_set = false;
+ dec->image_out_buffer = nullptr;
+ dec->image_out_init_callback = nullptr;
+ dec->image_out_run_callback = nullptr;
+ dec->image_out_destroy_callback = nullptr;
+ dec->image_out_init_opaque = nullptr;
+ dec->image_out_size = 0;
+ dec->image_out_bit_depth.type = JXL_BIT_DEPTH_FROM_PIXEL_FORMAT;
+ dec->extra_channel_output.clear();
+ dec->next_in = 0;
+ dec->avail_in = 0;
+ dec->input_closed = false;
+
+ dec->passes_state.reset(nullptr);
+ dec->frame_dec.reset(nullptr);
+ dec->next_section = 0;
+ dec->section_processed.clear();
+
+ dec->ib.reset();
+ dec->metadata = jxl::CodecMetadata();
+ dec->image_metadata = dec->metadata.m;
+ dec->frame_header.reset(new jxl::FrameHeader(&dec->metadata));
+
+ dec->codestream_copy.clear();
+ dec->codestream_unconsumed = 0;
+ dec->codestream_pos = 0;
+ dec->codestream_bits_ahead = 0;
+
+ dec->frame_stage = FrameStage::kHeader;
+ dec->remaining_frame_size = 0;
+ dec->is_last_of_still = false;
+ dec->is_last_total = false;
+ dec->skip_frames = 0;
+ dec->skipping_frame = false;
+ dec->internal_frames = 0;
+ dec->external_frames = 0;
+}
+
+void JxlDecoderReset(JxlDecoder* dec) {
+ JxlDecoderRewindDecodingState(dec);
+
+ dec->thread_pool.reset();
+ dec->keep_orientation = false;
+ dec->unpremul_alpha = false;
+ dec->render_spotcolors = true;
+ dec->coalescing = true;
+ dec->desired_intensity_target = 0;
+ dec->orig_events_wanted = 0;
+ dec->events_wanted = 0;
+ dec->frame_references.clear();
+ dec->frame_saved_as.clear();
+ dec->frame_external_to_internal.clear();
+ dec->frame_required.clear();
+ dec->decompress_boxes = false;
+}
+
+JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager) {
+ JxlMemoryManager local_memory_manager;
+ if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager))
+ return nullptr;
+
+ void* alloc =
+ jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlDecoder));
+ if (!alloc) return nullptr;
+ // Placement new constructor on allocated memory
+ JxlDecoder* dec = new (alloc) JxlDecoder();
+ dec->memory_manager = local_memory_manager;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+ if (!memory_manager) {
+ dec->memory_limit_base = 53 << 16;
+ // Allow 5 x max_image_size processing units; every frame is accounted
+ // as W x H CPU processing units, so there could be numerous small frames
+ // or few larger ones.
+ dec->cpu_limit_base = 5 * dec->memory_limit_base;
+ }
+#endif
+
+ JxlDecoderReset(dec);
+
+ return dec;
+}
+
+void JxlDecoderDestroy(JxlDecoder* dec) {
+ if (dec) {
+ JxlMemoryManager local_memory_manager = dec->memory_manager;
+ // Call destructor directly since custom free function is used.
+ dec->~JxlDecoder();
+ jxl::MemoryManagerFree(&local_memory_manager, dec);
+ }
+}
+
+void JxlDecoderRewind(JxlDecoder* dec) { JxlDecoderRewindDecodingState(dec); }
+
+void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount) {
+ // Increment amount, rather than set it: making the amount smaller is
+ // impossible because the decoder may already have skipped frames required to
+ // decode earlier frames, and making the amount larger compared to an existing
+ // amount is impossible because if JxlDecoderSkipFrames is called in the
+ // middle of already skipping frames, the user cannot know how many frames
+ // have already been skipped internally so far so an absolute value cannot
+ // be defined.
+ dec->skip_frames += amount;
+
+ dec->frame_required.clear();
+ size_t next_frame = dec->external_frames + dec->skip_frames;
+
+ // A frame that has been seen before a rewind
+ if (next_frame < dec->frame_external_to_internal.size()) {
+ size_t internal_index = dec->frame_external_to_internal[next_frame];
+ if (internal_index < dec->frame_saved_as.size()) {
+ std::vector<size_t> deps = GetFrameDependencies(
+ internal_index, dec->frame_saved_as, dec->frame_references);
+
+ dec->frame_required.resize(internal_index + 1, 0);
+ for (size_t i = 0; i < deps.size(); i++) {
+ JXL_ASSERT(deps[i] < dec->frame_required.size());
+ dec->frame_required[deps[i]] = 1;
+ }
+ }
+ }
+}
+
+JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec) {
+ if (dec->frame_stage != FrameStage::kFull) {
+ return JXL_DEC_ERROR;
+ }
+ JXL_DASSERT(dec->frame_dec);
+ dec->frame_stage = FrameStage::kHeader;
+ dec->AdvanceCodestream(dec->remaining_frame_size);
+ if (dec->is_last_of_still) {
+ dec->image_out_buffer_set = false;
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus
+JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_API_ERROR("parallel_runner must be set before starting");
+ }
+ dec->thread_pool.reset(
+ new jxl::ThreadPool(parallel_runner, parallel_runner_opaque));
+ return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec) {
+ if (dec->got_basic_info) return 0;
+ return dec->basic_info_size_hint;
+}
+
+JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, int events_wanted) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_DEC_ERROR; // Cannot subscribe to events after having started.
+ }
+ if (events_wanted & 63) {
+ return JXL_DEC_ERROR; // Can only subscribe to informative events.
+ }
+ dec->events_wanted = events_wanted;
+ dec->orig_events_wanted = events_wanted;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetKeepOrientation(JxlDecoder* dec,
+ JXL_BOOL skip_reorientation) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_API_ERROR("Must set keep_orientation option before starting");
+ }
+ dec->keep_orientation = !!skip_reorientation;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec,
+ JXL_BOOL unpremul_alpha) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_API_ERROR("Must set unpremul_alpha option before starting");
+ }
+ dec->unpremul_alpha = !!unpremul_alpha;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetRenderSpotcolors(JxlDecoder* dec,
+ JXL_BOOL render_spotcolors) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_API_ERROR("Must set render_spotcolors option before starting");
+ }
+ dec->render_spotcolors = !!render_spotcolors;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec, JXL_BOOL coalescing) {
+ if (dec->stage != DecoderStage::kInited) {
+ return JXL_API_ERROR("Must set coalescing option before starting");
+ }
+ dec->coalescing = !!coalescing;
+ return JXL_DEC_SUCCESS;
+}
+
+namespace {
+// helper function to get the dimensions of the current image buffer
+void GetCurrentDimensions(const JxlDecoder* dec, size_t& xsize, size_t& ysize) {
+ if (dec->frame_header->nonserialized_is_preview) {
+ xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+ ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+ return;
+ }
+ xsize = dec->metadata.oriented_xsize(dec->keep_orientation);
+ ysize = dec->metadata.oriented_ysize(dec->keep_orientation);
+ if (!dec->coalescing) {
+ const auto frame_dim = dec->frame_header->ToFrameDimensions();
+ xsize = frame_dim.xsize_upsampled;
+ ysize = frame_dim.ysize_upsampled;
+ if (!dec->keep_orientation &&
+ static_cast<int>(dec->metadata.m.GetOrientation()) > 4) {
+ std::swap(xsize, ysize);
+ }
+ }
+}
+} // namespace
+
+namespace jxl {
+namespace {
+
+template <class T>
+bool CanRead(Span<const uint8_t> data, BitReader* reader, T* JXL_RESTRICT t) {
+ // Use a copy of the bit reader because CanRead advances bits.
+ BitReader reader2(data);
+ reader2.SkipBits(reader->TotalBitsConsumed());
+ bool result = Bundle::CanRead(&reader2, t);
+ JXL_ASSERT(reader2.Close());
+ return result;
+}
+
+// Returns JXL_DEC_SUCCESS if the full bundle was successfully read, status
+// indicating either error or need more input otherwise.
+template <class T>
+JxlDecoderStatus ReadBundle(JxlDecoder* dec, Span<const uint8_t> data,
+ BitReader* reader, T* JXL_RESTRICT t) {
+ if (!CanRead(data, reader, t)) {
+ return dec->RequestMoreInput();
+ }
+ if (!Bundle::Read(reader, t)) {
+ return JXL_DEC_ERROR;
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+#define JXL_API_RETURN_IF_ERROR(expr) \
+ { \
+ JxlDecoderStatus status_ = ConvertStatus(expr); \
+ if (status_ != JXL_DEC_SUCCESS) return status_; \
+ }
+
+std::unique_ptr<BitReader, std::function<void(BitReader*)>> GetBitReader(
+ Span<const uint8_t> span) {
+ BitReader* reader = new BitReader(span);
+ return std::unique_ptr<BitReader, std::function<void(BitReader*)>>(
+ reader, [](BitReader* reader) {
+ // We can't allow Close to abort the program if the reader is out of
+ // bounds, or all return paths in the code, even those that already
+ // return failure, would have to manually call AllReadsWithinBounds().
+ // Invalid JXL codestream should not cause program to quit.
+ (void)reader->AllReadsWithinBounds();
+ (void)reader->Close();
+ delete reader;
+ });
+}
+
+JxlDecoderStatus JxlDecoderReadBasicInfo(JxlDecoder* dec) {
+ if (!dec->got_codestream_signature) {
+ // Check and skip the codestream signature
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ if (span.size() < 2) {
+ return dec->RequestMoreInput();
+ }
+ if (span.data()[0] != 0xff || span.data()[1] != jxl::kCodestreamMarker) {
+ return JXL_API_ERROR("invalid signature");
+ }
+ dec->got_codestream_signature = true;
+ dec->AdvanceCodestream(2);
+ }
+
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ auto reader = GetBitReader(span);
+ JXL_API_RETURN_IF_ERROR(
+ ReadBundle(dec, span, reader.get(), &dec->metadata.size));
+ JXL_API_RETURN_IF_ERROR(
+ ReadBundle(dec, span, reader.get(), &dec->metadata.m));
+ size_t total_bits = reader->TotalBitsConsumed();
+ dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte);
+ dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte;
+ dec->got_basic_info = true;
+ dec->basic_info_size_hint = 0;
+ dec->image_metadata = dec->metadata.m;
+ JXL_DEBUG_V(2, "Decoded BasicInfo: %s", dec->metadata.DebugString().c_str());
+
+ if (!CheckSizeLimit(dec, dec->metadata.size.xsize(),
+ dec->metadata.size.ysize())) {
+ return JXL_API_ERROR("image is too large");
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+// Reads all codestream headers (but not frame headers)
+JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec) {
+ if (!dec->got_transform_data) {
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ auto reader = GetBitReader(span);
+ reader->SkipBits(dec->codestream_bits_ahead);
+ dec->metadata.transform_data.nonserialized_xyb_encoded =
+ dec->metadata.m.xyb_encoded;
+ JXL_API_RETURN_IF_ERROR(
+ ReadBundle(dec, span, reader.get(), &dec->metadata.transform_data));
+ size_t total_bits = reader->TotalBitsConsumed();
+ dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte);
+ dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte;
+ dec->got_transform_data = true;
+ }
+
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ auto reader = GetBitReader(span);
+ reader->SkipBits(dec->codestream_bits_ahead);
+
+ if (dec->metadata.m.color_encoding.WantICC()) {
+ jxl::Status status =
+ dec->icc_reader.Init(reader.get(), dec->memory_limit_base);
+ // Always check AllReadsWithinBounds, not all the C++ decoder implementation
+ // handles reader out of bounds correctly yet (e.g. context map). Not
+ // checking AllReadsWithinBounds can cause reader->Close() to trigger an
+ // assert, but we don't want library to quit program for invalid codestream.
+ if (!reader->AllReadsWithinBounds() ||
+ status.code() == StatusCode::kNotEnoughBytes) {
+ return dec->RequestMoreInput();
+ }
+ if (!status) {
+ // Other non-successful status is an error
+ return JXL_DEC_ERROR;
+ }
+ PaddedBytes icc;
+ status = dec->icc_reader.Process(reader.get(), &icc);
+ if (status.code() == StatusCode::kNotEnoughBytes) {
+ return dec->RequestMoreInput();
+ }
+ if (!status) {
+ // Other non-successful status is an error
+ return JXL_DEC_ERROR;
+ }
+ if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) {
+ return JXL_DEC_ERROR;
+ }
+ }
+
+ dec->got_all_headers = true;
+ JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+
+ dec->AdvanceCodestream(reader->TotalBitsConsumed() / jxl::kBitsPerByte);
+ dec->codestream_bits_ahead = 0;
+
+ if (!dec->passes_state) {
+ dec->passes_state.reset(new jxl::PassesDecoderState());
+ }
+
+ JXL_API_RETURN_IF_ERROR(
+ dec->passes_state->output_encoding_info.SetFromMetadata(dec->metadata));
+ if (dec->desired_intensity_target > 0) {
+ dec->passes_state->output_encoding_info.desired_intensity_target =
+ dec->desired_intensity_target;
+ }
+ dec->image_metadata = dec->metadata.m;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderProcessSections(JxlDecoder* dec) {
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ const auto& toc = dec->frame_dec->Toc();
+ size_t pos = 0;
+ std::vector<jxl::FrameDecoder::SectionInfo> section_info;
+ std::vector<jxl::FrameDecoder::SectionStatus> section_status;
+ for (size_t i = dec->next_section; i < toc.size(); ++i) {
+ if (dec->section_processed[i]) {
+ pos += toc[i].size;
+ continue;
+ }
+ size_t id = toc[i].id;
+ size_t size = toc[i].size;
+ if (OutOfBounds(pos, size, span.size())) {
+ break;
+ }
+ auto br =
+ new jxl::BitReader(jxl::Span<const uint8_t>(span.data() + pos, size));
+ section_info.emplace_back(jxl::FrameDecoder::SectionInfo{br, id, i});
+ section_status.emplace_back();
+ pos += size;
+ }
+ jxl::Status status = dec->frame_dec->ProcessSections(
+ section_info.data(), section_info.size(), section_status.data());
+ bool out_of_bounds = false;
+ for (const auto& info : section_info) {
+ if (!info.br->AllReadsWithinBounds()) {
+ // Mark out of bounds section, but keep closing and deleting the next
+ // ones as well.
+ out_of_bounds = true;
+ }
+ JXL_ASSERT(info.br->Close());
+ delete info.br;
+ }
+ if (out_of_bounds) {
+ // If any bit reader indicates out of bounds, it's an error, not just
+ // needing more input, since we ensure only bit readers containing
+ // a complete section are provided to the FrameDecoder.
+ return JXL_API_ERROR("frame out of bounds");
+ }
+ if (!status) {
+ return JXL_API_ERROR("frame processing failed");
+ }
+ for (size_t i = 0; i < section_status.size(); ++i) {
+ auto status = section_status[i];
+ if (status == jxl::FrameDecoder::kDone) {
+ dec->section_processed[section_info[i].index] = 1;
+ } else if (status != jxl::FrameDecoder::kSkipped) {
+ return JXL_API_ERROR("unexpected section status");
+ }
+ }
+ size_t completed_prefix_bytes = 0;
+ while (dec->next_section < dec->section_processed.size() &&
+ dec->section_processed[dec->next_section] == 1) {
+ completed_prefix_bytes += toc[dec->next_section].size;
+ ++dec->next_section;
+ }
+ dec->remaining_frame_size -= completed_prefix_bytes;
+ dec->AdvanceCodestream(completed_prefix_bytes);
+ return JXL_DEC_SUCCESS;
+}
+
+// TODO(eustas): no CodecInOut -> no image size reinforcement -> possible OOM.
+JxlDecoderStatus JxlDecoderProcessCodestream(JxlDecoder* dec) {
+ // If no parallel runner is set, use the default
+ // TODO(lode): move this initialization to an appropriate location once the
+ // runner is used to decode pixels.
+ if (!dec->thread_pool) {
+ dec->thread_pool.reset(new jxl::ThreadPool(nullptr, nullptr));
+ }
+
+ // No matter what events are wanted, the basic info is always required.
+ if (!dec->got_basic_info) {
+ JxlDecoderStatus status = JxlDecoderReadBasicInfo(dec);
+ if (status != JXL_DEC_SUCCESS) return status;
+ }
+
+ if (dec->events_wanted & JXL_DEC_BASIC_INFO) {
+ dec->events_wanted &= ~JXL_DEC_BASIC_INFO;
+ return JXL_DEC_BASIC_INFO;
+ }
+
+ if (!dec->events_wanted) {
+ dec->stage = DecoderStage::kCodestreamFinished;
+ return JXL_DEC_SUCCESS;
+ }
+
+ if (!dec->got_all_headers) {
+ JxlDecoderStatus status = JxlDecoderReadAllHeaders(dec);
+ if (status != JXL_DEC_SUCCESS) return status;
+ }
+
+ if (dec->events_wanted & JXL_DEC_COLOR_ENCODING) {
+ dec->events_wanted &= ~JXL_DEC_COLOR_ENCODING;
+ return JXL_DEC_COLOR_ENCODING;
+ }
+
+ if (!dec->events_wanted) {
+ dec->stage = DecoderStage::kCodestreamFinished;
+ return JXL_DEC_SUCCESS;
+ }
+
+ dec->post_headers = true;
+
+ if (!dec->got_preview_image && dec->metadata.m.have_preview) {
+ dec->preview_frame = true;
+ }
+
+ // Handle frames
+ for (;;) {
+ bool parse_frames =
+ (dec->events_wanted &
+ (JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ if (!parse_frames) {
+ break;
+ }
+ if (dec->frame_stage == FrameStage::kHeader && dec->is_last_total) {
+ break;
+ }
+ if (dec->frame_stage == FrameStage::kHeader) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata ||
+ dec->recon_output_jpeg == JpegReconStage::kOutputting) {
+ // The image bundle contains the JPEG reconstruction frame, but the
+ // decoder is still waiting to decode an EXIF or XMP box. It's not
+ // implemented to decode additional frames during this, and a JPEG
+ // reconstruction image should have only one frame.
+ return JXL_API_ERROR(
+ "cannot decode a next frame after JPEG reconstruction frame");
+ }
+#endif
+ if (!dec->ib) {
+ dec->ib.reset(new jxl::ImageBundle(&dec->image_metadata));
+ }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ // If JPEG reconstruction is wanted and possible, set the jpeg_data of
+ // the ImageBundle.
+ if (!dec->jpeg_decoder.SetImageBundleJpegData(dec->ib.get()))
+ return JXL_DEC_ERROR;
+#endif
+ dec->frame_dec.reset(new FrameDecoder(
+ dec->passes_state.get(), dec->metadata, dec->thread_pool.get(),
+ /*use_slow_rendering_pipeline=*/false));
+ dec->frame_header.reset(new FrameHeader(&dec->metadata));
+ Span<const uint8_t> span;
+ JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span));
+ auto reader = GetBitReader(span);
+ jxl::Status status = dec->frame_dec->InitFrame(
+ reader.get(), dec->ib.get(), dec->preview_frame);
+ if (!reader->AllReadsWithinBounds() ||
+ status.code() == StatusCode::kNotEnoughBytes) {
+ return dec->RequestMoreInput();
+ } else if (!status) {
+ return JXL_API_ERROR("invalid frame header");
+ }
+ dec->AdvanceCodestream(reader->TotalBitsConsumed() / kBitsPerByte);
+ *dec->frame_header = dec->frame_dec->GetFrameHeader();
+ jxl::FrameDimensions frame_dim = dec->frame_header->ToFrameDimensions();
+ if (!CheckSizeLimit(dec, frame_dim.xsize_upsampled_padded,
+ frame_dim.ysize_upsampled_padded)) {
+ return JXL_API_ERROR("frame is too large");
+ }
+ bool output_needed =
+ (dec->preview_frame ? (dec->events_wanted & JXL_DEC_PREVIEW_IMAGE)
+ : (dec->events_wanted & JXL_DEC_FULL_IMAGE));
+ if (output_needed) {
+ JXL_API_RETURN_IF_ERROR(dec->frame_dec->InitFrameOutput());
+ }
+ if (dec->cpu_limit_base != 0) {
+ // No overflow, checked in CheckSizeLimit.
+ size_t num_pixels = frame_dim.xsize * frame_dim.ysize;
+ if (dec->used_cpu_base + num_pixels < dec->used_cpu_base) {
+ return JXL_API_ERROR("used too much CPU");
+ }
+ dec->used_cpu_base += num_pixels;
+ if (dec->used_cpu_base > dec->cpu_limit_base) {
+ return JXL_API_ERROR("used too much CPU");
+ }
+ }
+ dec->remaining_frame_size = dec->frame_dec->SumSectionSizes();
+
+ dec->frame_stage = FrameStage::kTOC;
+ if (dec->preview_frame) {
+ if (!(dec->events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+ dec->frame_stage = FrameStage::kHeader;
+ dec->AdvanceCodestream(dec->remaining_frame_size);
+ dec->got_preview_image = true;
+ dec->preview_frame = false;
+ }
+ continue;
+ }
+
+ int saved_as = FrameDecoder::SavedAs(*dec->frame_header);
+ // is last in entire codestream
+ dec->is_last_total = dec->frame_header->is_last;
+ // is last of current still
+ dec->is_last_of_still =
+ dec->is_last_total || dec->frame_header->animation_frame.duration > 0;
+ // is kRegularFrame and coalescing is disabled
+ dec->is_last_of_still |=
+ (!dec->coalescing &&
+ dec->frame_header->frame_type == FrameType::kRegularFrame);
+ const size_t internal_frame_index = dec->internal_frames;
+ const size_t external_frame_index = dec->external_frames;
+ if (dec->is_last_of_still) dec->external_frames++;
+ dec->internal_frames++;
+
+ if (dec->skip_frames > 0) {
+ dec->skipping_frame = true;
+ if (dec->is_last_of_still) {
+ dec->skip_frames--;
+ }
+ } else {
+ dec->skipping_frame = false;
+ }
+
+ if (external_frame_index >= dec->frame_external_to_internal.size()) {
+ dec->frame_external_to_internal.push_back(internal_frame_index);
+ JXL_ASSERT(dec->frame_external_to_internal.size() ==
+ external_frame_index + 1);
+ }
+
+ if (internal_frame_index >= dec->frame_saved_as.size()) {
+ dec->frame_saved_as.push_back(saved_as);
+ JXL_ASSERT(dec->frame_saved_as.size() == internal_frame_index + 1);
+
+ // add the value 0xff (which means all references) to new slots: we only
+ // know the references of the frame at FinalizeFrame, and fill in the
+ // correct values there. As long as this information is not known, the
+ // worst case where the frame depends on all storage slots is assumed.
+ dec->frame_references.push_back(0xff);
+ JXL_ASSERT(dec->frame_references.size() == internal_frame_index + 1);
+ }
+
+ if (dec->skipping_frame) {
+ // Whether this frame could be referenced by any future frame: either
+ // because it's a frame saved for blending or patches, or because it's
+ // a DC frame.
+ bool referenceable =
+ dec->frame_header->CanBeReferenced() ||
+ dec->frame_header->frame_type == FrameType::kDCFrame;
+ if (internal_frame_index < dec->frame_required.size() &&
+ !dec->frame_required[internal_frame_index]) {
+ referenceable = false;
+ }
+ if (!referenceable) {
+ // Skip all decoding for this frame, since the user is skipping this
+ // frame and no future frames can reference it.
+ dec->frame_stage = FrameStage::kHeader;
+ dec->AdvanceCodestream(dec->remaining_frame_size);
+ continue;
+ }
+ }
+
+ if ((dec->events_wanted & JXL_DEC_FRAME) && dec->is_last_of_still) {
+ // Only return this for the last of a series of stills: patches frames
+ // etc... before this one do not contain the correct information such
+ // as animation timing, ...
+ if (!dec->skipping_frame) {
+ return JXL_DEC_FRAME;
+ }
+ }
+ }
+
+ if (dec->frame_stage == FrameStage::kTOC) {
+ dec->frame_dec->SetRenderSpotcolors(dec->render_spotcolors);
+ dec->frame_dec->SetCoalescing(dec->coalescing);
+
+ if (!dec->preview_frame &&
+ (dec->events_wanted & JXL_DEC_FRAME_PROGRESSION)) {
+ dec->frame_prog_detail =
+ dec->frame_dec->SetPauseAtProgressive(dec->prog_detail);
+ } else {
+ dec->frame_prog_detail = JxlProgressiveDetail::kFrames;
+ }
+ dec->dc_frame_progression_done = 0;
+
+ dec->next_section = 0;
+ dec->section_processed.clear();
+ dec->section_processed.resize(dec->frame_dec->Toc().size(), 0);
+
+ // If we don't need pixels, we can skip actually decoding the frames.
+ if (dec->preview_frame || (dec->events_wanted & JXL_DEC_FULL_IMAGE)) {
+ dec->frame_stage = FrameStage::kFull;
+ } else if (!dec->is_last_total) {
+ dec->frame_stage = FrameStage::kHeader;
+ dec->AdvanceCodestream(dec->remaining_frame_size);
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if (dec->frame_stage == FrameStage::kFull) {
+ if (!dec->image_out_buffer_set) {
+ if (dec->preview_frame) {
+ return JXL_DEC_NEED_PREVIEW_OUT_BUFFER;
+ }
+ if (
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ (!dec->jpeg_decoder.IsOutputSet() ||
+ dec->ib->jpeg_data == nullptr) &&
+#endif
+ dec->is_last_of_still && !dec->skipping_frame) {
+ // TODO(lode): remove the dec->is_last_of_still condition if the
+ // frame decoder needs the image buffer as working space for decoding
+ // non-visible or blending frames too
+ return JXL_DEC_NEED_IMAGE_OUT_BUFFER;
+ }
+ }
+
+ if (dec->image_out_buffer_set) {
+ size_t xsize, ysize;
+ GetCurrentDimensions(dec, xsize, ysize);
+ size_t bits_per_sample = GetBitDepth(
+ dec->image_out_bit_depth, dec->metadata.m, dec->image_out_format);
+ dec->frame_dec->SetImageOutput(
+ PixelCallback{
+ dec->image_out_init_callback, dec->image_out_run_callback,
+ dec->image_out_destroy_callback, dec->image_out_init_opaque},
+ reinterpret_cast<uint8_t*>(dec->image_out_buffer),
+ dec->image_out_size, xsize, ysize, dec->image_out_format,
+ bits_per_sample, dec->unpremul_alpha, !dec->keep_orientation);
+ for (size_t i = 0; i < dec->extra_channel_output.size(); ++i) {
+ const auto& extra = dec->extra_channel_output[i];
+ size_t ec_bits_per_sample =
+ GetBitDepth(dec->image_out_bit_depth,
+ dec->metadata.m.extra_channel_info[i], extra.format);
+ dec->frame_dec->AddExtraChannelOutput(extra.buffer, extra.buffer_size,
+ xsize, extra.format,
+ ec_bits_per_sample);
+ }
+ }
+
+ size_t next_num_passes_to_pause = dec->frame_dec->NextNumPassesToPause();
+
+ JXL_API_RETURN_IF_ERROR(JxlDecoderProcessSections(dec));
+
+ bool all_sections_done = dec->frame_dec->HasDecodedAll();
+ bool got_dc_only = !all_sections_done && dec->frame_dec->HasDecodedDC();
+
+ if (dec->frame_prog_detail >= JxlProgressiveDetail::kDC &&
+ !dec->dc_frame_progression_done && got_dc_only) {
+ dec->dc_frame_progression_done = true;
+ dec->downsampling_target = 8;
+ return JXL_DEC_FRAME_PROGRESSION;
+ }
+
+ bool new_progression_step_done =
+ dec->frame_dec->NumCompletePasses() >= next_num_passes_to_pause;
+
+ if (!all_sections_done &&
+ dec->frame_prog_detail >= JxlProgressiveDetail::kLastPasses &&
+ new_progression_step_done) {
+ dec->downsampling_target =
+ dec->frame_header->passes.GetDownsamplingTargetForCompletedPasses(
+ dec->frame_dec->NumCompletePasses());
+ return JXL_DEC_FRAME_PROGRESSION;
+ }
+
+ if (!all_sections_done) {
+ // Not all sections have been processed yet
+ return dec->RequestMoreInput();
+ }
+
+ if (!dec->preview_frame) {
+ size_t internal_index = dec->internal_frames - 1;
+ JXL_ASSERT(dec->frame_references.size() > internal_index);
+ // Always fill this in, even if it was already written, it could be that
+ // this frame was skipped before and set to 255, while only now we know
+ // the true value.
+ dec->frame_references[internal_index] = dec->frame_dec->References();
+ }
+
+ if (!dec->frame_dec->FinalizeFrame()) {
+ return JXL_API_ERROR("decoding frame failed");
+ }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ // If jpeg output was requested, we merely return the JXL_DEC_FULL_IMAGE
+ // status without outputting pixels.
+ if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) {
+ dec->frame_stage = FrameStage::kHeader;
+ dec->recon_output_jpeg = JpegReconStage::kSettingMetadata;
+ return JXL_DEC_FULL_IMAGE;
+ }
+#endif
+ if (dec->preview_frame || dec->is_last_of_still) {
+ dec->image_out_buffer_set = false;
+ dec->extra_channel_output.clear();
+ }
+ }
+
+ dec->frame_stage = FrameStage::kHeader;
+
+ // The pixels have been output or are not needed, do not keep them in
+ // memory here.
+ dec->ib.reset();
+ if (dec->preview_frame) {
+ dec->got_preview_image = true;
+ dec->preview_frame = false;
+ dec->events_wanted &= ~JXL_DEC_PREVIEW_IMAGE;
+ return JXL_DEC_PREVIEW_IMAGE;
+ } else if (dec->is_last_of_still &&
+ (dec->events_wanted & JXL_DEC_FULL_IMAGE) &&
+ !dec->skipping_frame) {
+ return JXL_DEC_FULL_IMAGE;
+ }
+ }
+
+ dec->stage = DecoderStage::kCodestreamFinished;
+ // Return success, this means there is nothing more to do.
+ return JXL_DEC_SUCCESS;
+}
+
+} // namespace
+} // namespace jxl
+
+JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, const uint8_t* data,
+ size_t size) {
+ if (dec->next_in) {
+ return JXL_API_ERROR("already set input, use JxlDecoderReleaseInput first");
+ }
+ if (dec->input_closed) {
+ return JXL_API_ERROR("input already closed");
+ }
+
+ dec->next_in = data;
+ dec->avail_in = size;
+ return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseInput(JxlDecoder* dec) {
+ size_t result = dec->avail_in;
+ dec->next_in = nullptr;
+ dec->avail_in = 0;
+ return result;
+}
+
+void JxlDecoderCloseInput(JxlDecoder* dec) { dec->input_closed = true; }
+
+JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, uint8_t* data,
+ size_t size) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ // JPEG reconstruction buffer can only set and updated before or during the
+ // first frame, the reconstruction box refers to the first frame and in
+ // theory multi-frame images should not be used with a jbrd box.
+ if (dec->internal_frames > 1) {
+ return JXL_API_ERROR("JPEG reconstruction only works for the first frame");
+ }
+ if (dec->jpeg_decoder.IsOutputSet()) {
+ return JXL_API_ERROR("Already set JPEG buffer");
+ }
+ return dec->jpeg_decoder.SetOutputBuffer(data, size);
+#else
+ return JXL_API_ERROR("JPEG reconstruction is not supported.");
+#endif
+}
+
+size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ return dec->jpeg_decoder.ReleaseOutputBuffer();
+#else
+ return JXL_API_ERROR("JPEG reconstruction is not supported.");
+#endif
+}
+
+// Parses the header of the box, outputting the 4-character type and the box
+// size, including header size, as stored in the box header.
+// @param in current input bytes.
+// @param size available input size.
+// @param pos position in the input, must begin at the header of the box.
+// @param file_pos position of pos since the start of the JXL file, rather than
+// the current input, used for integer overflow checking.
+// @param type the output box type.
+// @param box_size output the total box size, including header, in bytes, or 0
+// if it's a final unbounded box.
+// @param header_size output size of the box header.
+// @return JXL_DEC_SUCCESS if the box header was fully parsed. In that case the
+// parsing position must be incremented by header_size bytes.
+// JXL_DEC_NEED_MORE_INPUT if not enough input bytes available, in that case
+// header_size indicates a lower bound for the known size the header has to be
+// at least. JXL_DEC_ERROR if the box header is invalid.
+static JxlDecoderStatus ParseBoxHeader(const uint8_t* in, size_t size,
+ size_t pos, size_t file_pos,
+ JxlBoxType type, uint64_t* box_size,
+ uint64_t* header_size) {
+ if (OutOfBounds(pos, 8, size)) {
+ *header_size = 8;
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+ size_t box_start = pos;
+ // Box size, including this header itself.
+ *box_size = LoadBE32(in + pos);
+ pos += 4;
+ if (*box_size == 1) {
+ *header_size = 16;
+ if (OutOfBounds(pos, 12, size)) return JXL_DEC_NEED_MORE_INPUT;
+ *box_size = LoadBE64(in + pos);
+ pos += 8;
+ }
+ memcpy(type, in + pos, 4);
+ pos += 4;
+ *header_size = pos - box_start;
+ if (*box_size > 0 && *box_size < *header_size) {
+ return JXL_API_ERROR("invalid box size");
+ }
+ if (SumOverflows(file_pos, pos, *box_size)) {
+ return JXL_API_ERROR("Box size overflow");
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+// This includes handling the codestream if it is not a box-based jxl file.
+static JxlDecoderStatus HandleBoxes(JxlDecoder* dec) {
+ // Box handling loop
+ for (;;) {
+ if (dec->box_stage != BoxStage::kHeader) {
+ dec->AdvanceInput(dec->header_size);
+ dec->header_size = 0;
+#if JPEGXL_ENABLE_BOXES
+ if ((dec->events_wanted & JXL_DEC_BOX) &&
+ dec->box_out_buffer_set_current_box) {
+ uint8_t* next_out = dec->box_out_buffer + dec->box_out_buffer_pos;
+ size_t avail_out = dec->box_out_buffer_size - dec->box_out_buffer_pos;
+
+ JxlDecoderStatus box_result = dec->box_content_decoder.Process(
+ dec->next_in, dec->avail_in,
+ dec->file_pos - dec->box_contents_begin, &next_out, &avail_out);
+ size_t produced =
+ next_out - (dec->box_out_buffer + dec->box_out_buffer_pos);
+ dec->box_out_buffer_pos += produced;
+
+ // Don't return JXL_DEC_NEED_MORE_INPUT: the box stages below, instead,
+ // handle the input progression, and the above only outputs the part of
+ // the box seen so far.
+ if (box_result != JXL_DEC_SUCCESS &&
+ box_result != JXL_DEC_NEED_MORE_INPUT) {
+ return box_result;
+ }
+ }
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->store_exif == 1 || dec->store_xmp == 1) {
+ std::vector<uint8_t>& metadata =
+ (dec->store_exif == 1) ? dec->exif_metadata : dec->xmp_metadata;
+ for (;;) {
+ if (metadata.empty()) metadata.resize(64);
+ uint8_t* orig_next_out = metadata.data() + dec->recon_out_buffer_pos;
+ uint8_t* next_out = orig_next_out;
+ size_t avail_out = metadata.size() - dec->recon_out_buffer_pos;
+ JxlDecoderStatus box_result = dec->metadata_decoder.Process(
+ dec->next_in, dec->avail_in,
+ dec->file_pos - dec->box_contents_begin, &next_out, &avail_out);
+ size_t produced = next_out - orig_next_out;
+ dec->recon_out_buffer_pos += produced;
+ if (box_result == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ metadata.resize(metadata.size() * 2);
+ } else if (box_result == JXL_DEC_NEED_MORE_INPUT) {
+ break; // box stage handling below will handle this instead
+ } else if (box_result == JXL_DEC_SUCCESS) {
+ size_t needed_size = (dec->store_exif == 1) ? dec->recon_exif_size
+ : dec->recon_xmp_size;
+ if (dec->box_contents_unbounded &&
+ dec->recon_out_buffer_pos < needed_size) {
+ // Unbounded box, but we know the expected size due to the jbrd
+ // box's data. Treat this as the JXL_DEC_NEED_MORE_INPUT case.
+ break;
+ } else {
+ metadata.resize(dec->recon_out_buffer_pos);
+ if (dec->store_exif == 1) dec->store_exif = 2;
+ if (dec->store_xmp == 1) dec->store_xmp = 2;
+ break;
+ }
+ } else {
+ // error
+ return box_result;
+ }
+ }
+ }
+#endif
+ }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata &&
+ !dec->JbrdNeedMoreBoxes()) {
+ jxl::jpeg::JPEGData* jpeg_data = dec->ib->jpeg_data.get();
+ if (dec->recon_exif_size) {
+ JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetExif(
+ dec->exif_metadata.data(), dec->exif_metadata.size(), jpeg_data);
+ if (status != JXL_DEC_SUCCESS) return status;
+ }
+ if (dec->recon_xmp_size) {
+ JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetXmp(
+ dec->xmp_metadata.data(), dec->xmp_metadata.size(), jpeg_data);
+ if (status != JXL_DEC_SUCCESS) return status;
+ }
+ dec->recon_output_jpeg = JpegReconStage::kOutputting;
+ }
+
+ if (dec->recon_output_jpeg == JpegReconStage::kOutputting &&
+ !dec->JbrdNeedMoreBoxes()) {
+ JxlDecoderStatus status =
+ dec->jpeg_decoder.WriteOutput(*dec->ib->jpeg_data);
+ if (status != JXL_DEC_SUCCESS) return status;
+ dec->recon_output_jpeg = JpegReconStage::kFinished;
+ dec->ib.reset();
+ if (dec->events_wanted & JXL_DEC_FULL_IMAGE) {
+ // Return the full image event here now, this may be delayed if this
+ // could only be done after decoding an exif or xmp box after the
+ // codestream.
+ return JXL_DEC_FULL_IMAGE;
+ }
+ }
+#endif
+
+ if (dec->box_stage == BoxStage::kHeader) {
+ if (!dec->have_container) {
+ if (dec->stage == DecoderStage::kCodestreamFinished)
+ return JXL_DEC_SUCCESS;
+ dec->box_stage = BoxStage::kCodestream;
+ dec->box_contents_unbounded = true;
+ continue;
+ }
+ if (dec->avail_in == 0) {
+ if (dec->stage != DecoderStage::kCodestreamFinished) {
+ // Not yet seen (all) codestream boxes.
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->JbrdNeedMoreBoxes()) {
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+#endif
+ if (dec->input_closed) {
+ return JXL_DEC_SUCCESS;
+ }
+ if (!(dec->events_wanted & JXL_DEC_BOX)) {
+ // All codestream and jbrd metadata boxes finished, and no individual
+ // boxes requested by user, so no need to request any more input.
+ // This returns success for backwards compatibility, when
+ // JxlDecoderCloseInput and JXL_DEC_BOX did not exist, as well
+ // as for efficiency.
+ return JXL_DEC_SUCCESS;
+ }
+ // Even though we are exactly at a box end, there still may be more
+ // boxes. The user may call JxlDecoderCloseInput to indicate the input
+ // is finished and get success instead.
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+
+ bool boxed_codestream_done =
+ ((dec->events_wanted & JXL_DEC_BOX) &&
+ dec->stage == DecoderStage::kCodestreamFinished &&
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ !dec->JbrdNeedMoreBoxes() &&
+#endif
+ dec->last_codestream_seen);
+ if (boxed_codestream_done && dec->avail_in >= 2 &&
+ dec->next_in[0] == 0xff &&
+ dec->next_in[1] == jxl::kCodestreamMarker) {
+ // We detected the start of the next naked codestream, so we can return
+ // success here.
+ return JXL_DEC_SUCCESS;
+ }
+
+ uint64_t box_size, header_size;
+ JxlDecoderStatus status =
+ ParseBoxHeader(dec->next_in, dec->avail_in, 0, dec->file_pos,
+ dec->box_type, &box_size, &header_size);
+ if (status != JXL_DEC_SUCCESS) {
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ dec->basic_info_size_hint =
+ InitialBasicInfoSizeHint() + header_size - dec->file_pos;
+ }
+ return status;
+ }
+ if (memcmp(dec->box_type, "brob", 4) == 0) {
+ if (dec->avail_in < header_size + 4) {
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+ memcpy(dec->box_decoded_type, dec->next_in + header_size,
+ sizeof(dec->box_decoded_type));
+ } else {
+ memcpy(dec->box_decoded_type, dec->box_type,
+ sizeof(dec->box_decoded_type));
+ }
+
+ // Box order validity checks
+ // The signature box at box_count == 1 is not checked here since that's
+ // already done at the beginning.
+ dec->box_count++;
+ if (boxed_codestream_done && memcmp(dec->box_type, "JXL ", 4) == 0) {
+ // We detected the start of the next boxed stream, so we can return
+ // success here.
+ return JXL_DEC_SUCCESS;
+ }
+ if (dec->box_count == 2 && memcmp(dec->box_type, "ftyp", 4) != 0) {
+ return JXL_API_ERROR("the second box must be the ftyp box");
+ }
+ if (memcmp(dec->box_type, "ftyp", 4) == 0 && dec->box_count != 2) {
+ return JXL_API_ERROR("the ftyp box must come second");
+ }
+
+ dec->box_contents_unbounded = (box_size == 0);
+ dec->box_contents_begin = dec->file_pos + header_size;
+ dec->box_contents_end =
+ dec->box_contents_unbounded ? 0 : (dec->file_pos + box_size);
+ dec->box_contents_size =
+ dec->box_contents_unbounded ? 0 : (box_size - header_size);
+ dec->box_size = box_size;
+ dec->header_size = header_size;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) {
+ // Initiate storing of Exif or XMP data for JPEG reconstruction
+ if (dec->store_exif == 0 &&
+ memcmp(dec->box_decoded_type, "Exif", 4) == 0) {
+ dec->store_exif = 1;
+ dec->recon_out_buffer_pos = 0;
+ }
+ if (dec->store_xmp == 0 &&
+ memcmp(dec->box_decoded_type, "xml ", 4) == 0) {
+ dec->store_xmp = 1;
+ dec->recon_out_buffer_pos = 0;
+ }
+ }
+#endif
+#if JPEGXL_ENABLE_BOXES
+ if (dec->events_wanted & JXL_DEC_BOX) {
+ bool decompress =
+ dec->decompress_boxes && memcmp(dec->box_type, "brob", 4) == 0;
+ dec->box_content_decoder.StartBox(
+ decompress, dec->box_contents_unbounded, dec->box_contents_size);
+ }
+#endif
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->store_exif == 1 || dec->store_xmp == 1) {
+ bool brob = memcmp(dec->box_type, "brob", 4) == 0;
+ dec->metadata_decoder.StartBox(brob, dec->box_contents_unbounded,
+ dec->box_contents_size);
+ }
+#endif
+ if (memcmp(dec->box_type, "ftyp", 4) == 0) {
+ dec->box_stage = BoxStage::kFtyp;
+ } else if (memcmp(dec->box_type, "jxlc", 4) == 0) {
+ if (dec->last_codestream_seen) {
+ return JXL_API_ERROR("there can only be one jxlc box");
+ }
+ dec->last_codestream_seen = true;
+ dec->box_stage = BoxStage::kCodestream;
+ } else if (memcmp(dec->box_type, "jxlp", 4) == 0) {
+ dec->box_stage = BoxStage::kPartialCodestream;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ } else if ((dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) &&
+ memcmp(dec->box_type, "jbrd", 4) == 0) {
+ if (!(dec->events_wanted & JXL_DEC_JPEG_RECONSTRUCTION)) {
+ return JXL_API_ERROR(
+ "multiple JPEG reconstruction boxes not supported");
+ }
+ dec->box_stage = BoxStage::kJpegRecon;
+#endif
+ } else {
+ dec->box_stage = BoxStage::kSkip;
+ }
+
+ if (dec->events_wanted & JXL_DEC_BOX) {
+ dec->box_event = true;
+ dec->box_out_buffer_set_current_box = false;
+ return JXL_DEC_BOX;
+ }
+ } else if (dec->box_stage == BoxStage::kFtyp) {
+ if (dec->box_contents_size < 12) {
+ return JXL_API_ERROR("file type box too small");
+ }
+ if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+ if (memcmp(dec->next_in, "jxl ", 4) != 0) {
+ return JXL_API_ERROR("file type box major brand must be \"jxl \"");
+ }
+ dec->AdvanceInput(4);
+ dec->box_stage = BoxStage::kSkip;
+ } else if (dec->box_stage == BoxStage::kPartialCodestream) {
+ if (dec->last_codestream_seen) {
+ return JXL_API_ERROR("cannot have jxlp box after last jxlp box");
+ }
+ // TODO(lode): error if box is unbounded but last bit not set
+ if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT;
+ if (!dec->box_contents_unbounded && dec->box_contents_size < 4) {
+ return JXL_API_ERROR("jxlp box too small to contain index");
+ }
+ size_t jxlp_index = LoadBE32(dec->next_in);
+ // The high bit of jxlp_index indicates whether this is the last
+ // jxlp box.
+ if (jxlp_index & 0x80000000) {
+ dec->last_codestream_seen = true;
+ }
+ dec->AdvanceInput(4);
+ dec->box_stage = BoxStage::kCodestream;
+ } else if (dec->box_stage == BoxStage::kCodestream) {
+ JxlDecoderStatus status = jxl::JxlDecoderProcessCodestream(dec);
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (status == JXL_DEC_FULL_IMAGE) {
+ if (dec->recon_output_jpeg != JpegReconStage::kNone) {
+ continue;
+ }
+ }
+#endif
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (dec->file_pos == dec->box_contents_end &&
+ !dec->box_contents_unbounded) {
+ dec->box_stage = BoxStage::kHeader;
+ continue;
+ }
+ }
+
+ if (status == JXL_DEC_SUCCESS) {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->JbrdNeedMoreBoxes()) {
+ dec->box_stage = BoxStage::kSkip;
+ continue;
+ }
+#endif
+ if (dec->box_contents_unbounded) {
+ // Last box reached and codestream done, nothing more to do.
+ break;
+ }
+ if (dec->events_wanted & JXL_DEC_BOX) {
+ // Codestream done, but there may be more other boxes.
+ dec->box_stage = BoxStage::kSkip;
+ continue;
+ }
+ }
+ return status;
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ } else if (dec->box_stage == BoxStage::kJpegRecon) {
+ if (!dec->jpeg_decoder.IsParsingBox()) {
+ // This is a new JPEG reconstruction metadata box.
+ dec->jpeg_decoder.StartBox(dec->box_contents_unbounded,
+ dec->box_contents_size);
+ }
+ const uint8_t* next_in = dec->next_in;
+ size_t avail_in = dec->avail_in;
+ JxlDecoderStatus recon_result =
+ dec->jpeg_decoder.Process(&next_in, &avail_in);
+ size_t consumed = next_in - dec->next_in;
+ dec->AdvanceInput(consumed);
+ if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) {
+ jxl::jpeg::JPEGData* jpeg_data = dec->jpeg_decoder.GetJpegData();
+ size_t num_exif = jxl::JxlToJpegDecoder::NumExifMarkers(*jpeg_data);
+ size_t num_xmp = jxl::JxlToJpegDecoder::NumXmpMarkers(*jpeg_data);
+ if (num_exif) {
+ if (num_exif > 1) {
+ return JXL_API_ERROR(
+ "multiple exif markers for JPEG reconstruction not supported");
+ }
+ if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::ExifBoxContentSize(
+ *jpeg_data, &dec->recon_exif_size)) {
+ return JXL_API_ERROR("invalid jbrd exif size");
+ }
+ }
+ if (num_xmp) {
+ if (num_xmp > 1) {
+ return JXL_API_ERROR(
+ "multiple XMP markers for JPEG reconstruction not supported");
+ }
+ if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::XmlBoxContentSize(
+ *jpeg_data, &dec->recon_xmp_size)) {
+ return JXL_API_ERROR("invalid jbrd XMP size");
+ }
+ }
+
+ dec->box_stage = BoxStage::kHeader;
+ // If successful JPEG reconstruction, return the success if the user
+ // cares about it, otherwise continue.
+ if (dec->events_wanted & recon_result) {
+ dec->events_wanted &= ~recon_result;
+ return recon_result;
+ }
+ } else {
+ // If anything else, return the result.
+ return recon_result;
+ }
+#endif
+ } else if (dec->box_stage == BoxStage::kSkip) {
+ if (dec->box_contents_unbounded) {
+ if (dec->input_closed) {
+ return JXL_DEC_SUCCESS;
+ }
+ if (!(dec->box_out_buffer_set)) {
+ // An unbounded box is always the last box. Not requesting box data,
+ // so return success even if JxlDecoderCloseInput was not called for
+ // backwards compatibility as well as efficiency since this box is
+ // being skipped.
+ return JXL_DEC_SUCCESS;
+ }
+ // Arbitrarily more bytes may follow, only JxlDecoderCloseInput can
+ // mark the end.
+ dec->AdvanceInput(dec->avail_in);
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+ // Amount of remaining bytes in the box that is being skipped.
+ size_t remaining = dec->box_contents_end - dec->file_pos;
+ if (dec->avail_in < remaining) {
+ // Indicate how many more bytes needed starting from next_in.
+ dec->basic_info_size_hint =
+ InitialBasicInfoSizeHint() + dec->box_contents_end - dec->file_pos;
+ // Don't have the full box yet, skip all we have so far
+ dec->AdvanceInput(dec->avail_in);
+ return JXL_DEC_NEED_MORE_INPUT;
+ } else {
+ // Full box available, skip all its remaining bytes
+ dec->AdvanceInput(remaining);
+ dec->box_stage = BoxStage::kHeader;
+ }
+ } else {
+ JXL_DASSERT(false); // unknown box stage
+ }
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec) {
+ if (dec->stage == DecoderStage::kInited) {
+ dec->stage = DecoderStage::kStarted;
+ }
+ if (dec->stage == DecoderStage::kError) {
+ return JXL_API_ERROR(
+ "Cannot keep using decoder after it encountered an error, use "
+ "JxlDecoderReset to reset it");
+ }
+
+ if (!dec->got_signature) {
+ JxlSignature sig = JxlSignatureCheck(dec->next_in, dec->avail_in);
+ if (sig == JXL_SIG_INVALID) return JXL_API_ERROR("invalid signature");
+ if (sig == JXL_SIG_NOT_ENOUGH_BYTES) {
+ if (dec->input_closed) {
+ return JXL_API_ERROR("file too small for signature");
+ }
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+
+ dec->got_signature = true;
+
+ if (sig == JXL_SIG_CONTAINER) {
+ dec->have_container = 1;
+ } else {
+ dec->last_codestream_seen = true;
+ }
+ }
+
+ JxlDecoderStatus status = HandleBoxes(dec);
+
+ if (status == JXL_DEC_NEED_MORE_INPUT && dec->input_closed) {
+ return JXL_API_ERROR("missing input");
+ }
+
+ // Even if the box handling returns success, certain types of
+ // data may be missing.
+ if (status == JXL_DEC_SUCCESS) {
+ if (dec->CanUseMoreCodestreamInput()) {
+ return JXL_API_ERROR("codestream never finished");
+ }
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ if (dec->JbrdNeedMoreBoxes()) {
+ return JXL_API_ERROR("missing metadata boxes for jpeg reconstruction");
+ }
+#endif
+ }
+
+ return status;
+}
+
+// To ensure ABI forward-compatibility, this struct has a constant size.
+static_assert(sizeof(JxlBasicInfo) == 204,
+ "JxlBasicInfo struct size should remain constant");
+
+JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec,
+ JxlBasicInfo* info) {
+ if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+ if (info) {
+ memset(info, 0, sizeof(*info));
+
+ const jxl::ImageMetadata& meta = dec->metadata.m;
+
+ info->have_container = dec->have_container;
+ info->xsize = dec->metadata.size.xsize();
+ info->ysize = dec->metadata.size.ysize();
+ info->uses_original_profile = !meta.xyb_encoded;
+
+ info->bits_per_sample = meta.bit_depth.bits_per_sample;
+ info->exponent_bits_per_sample = meta.bit_depth.exponent_bits_per_sample;
+
+ info->have_preview = meta.have_preview;
+ info->have_animation = meta.have_animation;
+ info->orientation = static_cast<JxlOrientation>(meta.orientation);
+
+ if (!dec->keep_orientation) {
+ if (info->orientation >= JXL_ORIENT_TRANSPOSE) {
+ std::swap(info->xsize, info->ysize);
+ }
+ info->orientation = JXL_ORIENT_IDENTITY;
+ }
+
+ info->intensity_target = meta.IntensityTarget();
+ if (dec->desired_intensity_target > 0) {
+ info->intensity_target = dec->desired_intensity_target;
+ }
+ info->min_nits = meta.tone_mapping.min_nits;
+ info->relative_to_max_display = meta.tone_mapping.relative_to_max_display;
+ info->linear_below = meta.tone_mapping.linear_below;
+
+ const jxl::ExtraChannelInfo* alpha = meta.Find(jxl::ExtraChannel::kAlpha);
+ if (alpha != nullptr) {
+ info->alpha_bits = alpha->bit_depth.bits_per_sample;
+ info->alpha_exponent_bits = alpha->bit_depth.exponent_bits_per_sample;
+ info->alpha_premultiplied = alpha->alpha_associated;
+ } else {
+ info->alpha_bits = 0;
+ info->alpha_exponent_bits = 0;
+ info->alpha_premultiplied = 0;
+ }
+
+ info->num_color_channels =
+ meta.color_encoding.GetColorSpace() == jxl::ColorSpace::kGray ? 1 : 3;
+
+ info->num_extra_channels = meta.num_extra_channels;
+
+ if (info->have_preview) {
+ info->preview.xsize = dec->metadata.m.preview_size.xsize();
+ info->preview.ysize = dec->metadata.m.preview_size.ysize();
+ }
+
+ if (info->have_animation) {
+ info->animation.tps_numerator = dec->metadata.m.animation.tps_numerator;
+ info->animation.tps_denominator =
+ dec->metadata.m.animation.tps_denominator;
+ info->animation.num_loops = dec->metadata.m.animation.num_loops;
+ info->animation.have_timecodes = dec->metadata.m.animation.have_timecodes;
+ }
+
+ if (meta.have_intrinsic_size) {
+ info->intrinsic_xsize = dec->metadata.m.intrinsic_size.xsize();
+ info->intrinsic_ysize = dec->metadata.m.intrinsic_size.ysize();
+ } else {
+ info->intrinsic_xsize = info->xsize;
+ info->intrinsic_ysize = info->ysize;
+ }
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelInfo(const JxlDecoder* dec,
+ size_t index,
+ JxlExtraChannelInfo* info) {
+ if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+ const std::vector<jxl::ExtraChannelInfo>& channels =
+ dec->metadata.m.extra_channel_info;
+
+ if (index >= channels.size()) return JXL_DEC_ERROR; // out of bounds
+ const jxl::ExtraChannelInfo& channel = channels[index];
+
+ info->type = static_cast<JxlExtraChannelType>(channel.type);
+ info->bits_per_sample = channel.bit_depth.bits_per_sample;
+ info->exponent_bits_per_sample =
+ channel.bit_depth.floating_point_sample
+ ? channel.bit_depth.exponent_bits_per_sample
+ : 0;
+ info->dim_shift = channel.dim_shift;
+ info->name_length = channel.name.size();
+ info->alpha_premultiplied = channel.alpha_associated;
+ info->spot_color[0] = channel.spot_color[0];
+ info->spot_color[1] = channel.spot_color[1];
+ info->spot_color[2] = channel.spot_color[2];
+ info->spot_color[3] = channel.spot_color[3];
+ info->cfa_channel = channel.cfa_channel;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec,
+ size_t index, char* name,
+ size_t size) {
+ if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT;
+
+ const std::vector<jxl::ExtraChannelInfo>& channels =
+ dec->metadata.m.extra_channel_info;
+
+ if (index >= channels.size()) return JXL_DEC_ERROR; // out of bounds
+ const jxl::ExtraChannelInfo& channel = channels[index];
+
+ // Also need null-termination character
+ if (channel.name.size() + 1 > size) return JXL_DEC_ERROR;
+
+ memcpy(name, channel.name.c_str(), channel.name.size() + 1);
+
+ return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Gets the jxl::ColorEncoding for the desired target, and checks errors.
+// Returns the object regardless of whether the actual color space is in ICC,
+// but ensures that if the color encoding is not the encoding from the
+// codestream header metadata, it cannot require ICC profile.
+JxlDecoderStatus GetColorEncodingForTarget(
+ const JxlDecoder* dec, JxlColorProfileTarget target,
+ const jxl::ColorEncoding** encoding) {
+ if (!dec->got_all_headers) return JXL_DEC_NEED_MORE_INPUT;
+ *encoding = nullptr;
+ if (target == JXL_COLOR_PROFILE_TARGET_DATA && dec->metadata.m.xyb_encoded) {
+ *encoding = &dec->passes_state->output_encoding_info.color_encoding;
+ } else {
+ *encoding = &dec->metadata.m.color_encoding;
+ }
+ return JXL_DEC_SUCCESS;
+}
+} // namespace
+
+JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, JxlColorEncoding* color_encoding) {
+ const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+ JxlDecoderStatus status =
+ GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+ if (status) return status;
+
+ if (jxl_color_encoding->WantICC())
+ return JXL_DEC_ERROR; // Indicate no encoded profile available.
+
+ if (color_encoding) {
+ ConvertInternalToExternalColorEncoding(*jxl_color_encoding, color_encoding);
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetICCProfileSize(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, size_t* size) {
+ const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+ JxlDecoderStatus status =
+ GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ if (jxl_color_encoding->WantICC()) {
+ jxl::ColorSpace color_space =
+ dec->metadata.m.color_encoding.GetColorSpace();
+ if (color_space == jxl::ColorSpace::kUnknown ||
+ color_space == jxl::ColorSpace::kXYB) {
+ // This indicates there's no ICC profile available
+ // TODO(lode): for the XYB case, do we want to craft an ICC profile that
+ // represents XYB as an RGB profile? It may be possible, but not with
+ // only 1D transfer functions.
+ return JXL_DEC_ERROR;
+ }
+ }
+
+ if (size) {
+ *size = jxl_color_encoding->ICC().size();
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetColorAsICCProfile(
+ const JxlDecoder* dec, const JxlPixelFormat* unused_format,
+ JxlColorProfileTarget target, uint8_t* icc_profile, size_t size) {
+ size_t wanted_size;
+ // This also checks the NEED_MORE_INPUT and the unknown/xyb cases
+ JxlDecoderStatus status =
+ JxlDecoderGetICCProfileSize(dec, nullptr, target, &wanted_size);
+ if (status != JXL_DEC_SUCCESS) return status;
+ if (size < wanted_size) return JXL_API_ERROR("ICC profile output too small");
+
+ const jxl::ColorEncoding* jxl_color_encoding = nullptr;
+ status = GetColorEncodingForTarget(dec, target, &jxl_color_encoding);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ memcpy(icc_profile, jxl_color_encoding->ICC().data(),
+ jxl_color_encoding->ICC().size());
+
+ return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+// Returns the amount of bits needed for getting memory buffer size, and does
+// all error checking required for size checking and format validity.
+JxlDecoderStatus PrepareSizeCheck(const JxlDecoder* dec,
+ const JxlPixelFormat* format, size_t* bits) {
+ if (!dec->got_basic_info) {
+ // Don't know image dimensions yet, cannot check for valid size.
+ return JXL_DEC_NEED_MORE_INPUT;
+ }
+ if (!dec->coalescing &&
+ (!dec->frame_header || dec->frame_stage == FrameStage::kHeader)) {
+ return JXL_API_ERROR("Don't know frame dimensions yet");
+ }
+ if (format->num_channels > 4) {
+ return JXL_API_ERROR("More than 4 channels not supported");
+ }
+
+ *bits = BitsPerChannel(format->data_type);
+
+ if (*bits == 0) {
+ return JXL_API_ERROR("Invalid/unsupported data type");
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+} // namespace
+
+size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec) {
+ return dec->downsampling_target;
+}
+
+JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) {
+ if (!dec->image_out_buffer_set) return JXL_DEC_ERROR;
+ if (dec->frame_stage != FrameStage::kFull) {
+ return JXL_DEC_ERROR;
+ }
+ JXL_DASSERT(dec->frame_dec);
+ if (!dec->frame_dec->HasDecodedDC()) {
+ // FrameDecoder::Flush currently requires DC to have been decoded already
+ // to work correctly.
+ return JXL_DEC_ERROR;
+ }
+
+ if (!dec->frame_dec->Flush()) {
+ return JXL_DEC_ERROR;
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize(
+ const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+ size_t bits;
+ JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+ if (status != JXL_DEC_SUCCESS) return status;
+ if (format->num_channels < 3 &&
+ !dec->image_metadata.color_encoding.IsGray()) {
+ return JXL_API_ERROR("Number of channels is too low for color output");
+ }
+
+ size_t xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation);
+ size_t ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation);
+
+ size_t row_size =
+ jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+ size_t last_row_size = row_size;
+ if (format->align > 1) {
+ row_size = jxl::DivCeil(row_size, format->align) * format->align;
+ }
+ *size = row_size * (ysize - 1) + last_row_size;
+ return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer(
+ JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) {
+ if (!dec->got_basic_info || !dec->metadata.m.have_preview ||
+ !(dec->orig_events_wanted & JXL_DEC_PREVIEW_IMAGE)) {
+ return JXL_API_ERROR("No preview out buffer needed at this time");
+ }
+ if (format->num_channels < 3 &&
+ !dec->image_metadata.color_encoding.IsGray()) {
+ return JXL_API_ERROR("Number of channels is too low for color output");
+ }
+
+ size_t min_size;
+ // This also checks whether the format is valid and supported and basic info
+ // is available.
+ JxlDecoderStatus status =
+ JxlDecoderPreviewOutBufferSize(dec, format, &min_size);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ if (size < min_size) return JXL_DEC_ERROR;
+
+ dec->image_out_buffer_set = true;
+ dec->image_out_buffer = buffer;
+ dec->image_out_size = size;
+ dec->image_out_format = *format;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize(
+ const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) {
+ size_t bits;
+ JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+ if (status != JXL_DEC_SUCCESS) return status;
+ if (format->num_channels < 3 &&
+ !dec->image_metadata.color_encoding.IsGray()) {
+ return JXL_API_ERROR("Number of channels is too low for color output");
+ }
+ size_t xsize, ysize;
+ GetCurrentDimensions(dec, xsize, ysize);
+ size_t row_size =
+ jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte);
+ if (format->align > 1) {
+ row_size = jxl::DivCeil(row_size, format->align) * format->align;
+ }
+ *size = row_size * ysize;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutBuffer(JxlDecoder* dec,
+ const JxlPixelFormat* format,
+ void* buffer, size_t size) {
+ if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+ return JXL_API_ERROR("No image out buffer needed at this time");
+ }
+ if (dec->image_out_buffer_set && !!dec->image_out_run_callback) {
+ return JXL_API_ERROR(
+ "Cannot change from image out callback to image out buffer");
+ }
+ if (format->num_channels < 3 &&
+ !dec->image_metadata.color_encoding.IsGray()) {
+ return JXL_API_ERROR("Number of channels is too low for color output");
+ }
+ size_t min_size;
+ // This also checks whether the format is valid and supported and basic info
+ // is available.
+ JxlDecoderStatus status =
+ JxlDecoderImageOutBufferSize(dec, format, &min_size);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ if (size < min_size) return JXL_DEC_ERROR;
+
+ dec->image_out_buffer_set = true;
+ dec->image_out_buffer = buffer;
+ dec->image_out_size = size;
+ dec->image_out_format = *format;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderExtraChannelBufferSize(const JxlDecoder* dec,
+ const JxlPixelFormat* format,
+ size_t* size,
+ uint32_t index) {
+ if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) {
+ return JXL_API_ERROR("No extra channel buffer needed at this time");
+ }
+
+ if (index >= dec->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR("Invalid extra channel index");
+ }
+
+ size_t num_channels = 1; // Do not use format's num_channels
+
+ size_t bits;
+ JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ size_t xsize, ysize;
+ GetCurrentDimensions(dec, xsize, ysize);
+ size_t row_size =
+ jxl::DivCeil(xsize * num_channels * bits, jxl::kBitsPerByte);
+ if (format->align > 1) {
+ row_size = jxl::DivCeil(row_size, format->align) * format->align;
+ }
+ *size = row_size * ysize;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec,
+ const JxlPixelFormat* format,
+ void* buffer, size_t size,
+ uint32_t index) {
+ size_t min_size;
+ // This also checks whether the format and index are valid and supported and
+ // basic info is available.
+ JxlDecoderStatus status =
+ JxlDecoderExtraChannelBufferSize(dec, format, &min_size, index);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ if (size < min_size) return JXL_DEC_ERROR;
+
+ if (dec->extra_channel_output.size() <= index) {
+ dec->extra_channel_output.resize(dec->metadata.m.num_extra_channels,
+ {{}, nullptr, 0});
+ }
+ // Guaranteed correct thanks to check in JxlDecoderExtraChannelBufferSize.
+ JXL_ASSERT(index < dec->extra_channel_output.size());
+
+ dec->extra_channel_output[index].format = *format;
+ dec->extra_channel_output[index].format.num_channels = 1;
+ dec->extra_channel_output[index].buffer = buffer;
+ dec->extra_channel_output[index].buffer_size = size;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetImageOutCallback(JxlDecoder* dec,
+ const JxlPixelFormat* format,
+ JxlImageOutCallback callback,
+ void* opaque) {
+ dec->simple_image_out_callback.callback = callback;
+ dec->simple_image_out_callback.opaque = opaque;
+ const auto init_callback =
+ +[](void* init_opaque, size_t num_threads, size_t num_pixels_per_thread) {
+ // No initialization to do, just reuse init_opaque as run_opaque.
+ return init_opaque;
+ };
+ const auto run_callback =
+ +[](void* run_opaque, size_t thread_id, size_t x, size_t y,
+ size_t num_pixels, const void* pixels) {
+ const auto* const simple_callback =
+ static_cast<const JxlDecoder::SimpleImageOutCallback*>(run_opaque);
+ simple_callback->callback(simple_callback->opaque, x, y, num_pixels,
+ pixels);
+ };
+ const auto destroy_callback = +[](void* run_opaque) {};
+ return JxlDecoderSetMultithreadedImageOutCallback(
+ dec, format, init_callback, run_callback,
+ /*destroy_callback=*/destroy_callback, &dec->simple_image_out_callback);
+}
+
+JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback(
+ JxlDecoder* dec, const JxlPixelFormat* format,
+ JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback,
+ JxlImageOutDestroyCallback destroy_callback, void* init_opaque) {
+ if (dec->image_out_buffer_set && !!dec->image_out_buffer) {
+ return JXL_API_ERROR(
+ "Cannot change from image out buffer to image out callback");
+ }
+
+ if (init_callback == nullptr || run_callback == nullptr ||
+ destroy_callback == nullptr) {
+ return JXL_API_ERROR("All callbacks are required");
+ }
+
+ // Perform error checking for invalid format.
+ size_t bits_dummy;
+ JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits_dummy);
+ if (status != JXL_DEC_SUCCESS) return status;
+
+ dec->image_out_buffer_set = true;
+ dec->image_out_init_callback = init_callback;
+ dec->image_out_run_callback = run_callback;
+ dec->image_out_destroy_callback = destroy_callback;
+ dec->image_out_init_opaque = init_opaque;
+ dec->image_out_format = *format;
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec,
+ JxlFrameHeader* header) {
+ if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+ return JXL_API_ERROR("no frame header available");
+ }
+ const auto& metadata = dec->metadata.m;
+ memset(header, 0, sizeof(*header));
+ if (metadata.have_animation) {
+ header->duration = dec->frame_header->animation_frame.duration;
+ if (metadata.animation.have_timecodes) {
+ header->timecode = dec->frame_header->animation_frame.timecode;
+ }
+ }
+ header->name_length = dec->frame_header->name.size();
+ header->is_last = dec->frame_header->is_last;
+ size_t xsize, ysize;
+ GetCurrentDimensions(dec, xsize, ysize);
+ header->layer_info.xsize = xsize;
+ header->layer_info.ysize = ysize;
+ if (!dec->coalescing && dec->frame_header->custom_size_or_origin) {
+ header->layer_info.crop_x0 = dec->frame_header->frame_origin.x0;
+ header->layer_info.crop_y0 = dec->frame_header->frame_origin.y0;
+ header->layer_info.have_crop = JXL_TRUE;
+ } else {
+ header->layer_info.crop_x0 = 0;
+ header->layer_info.crop_y0 = 0;
+ header->layer_info.have_crop = JXL_FALSE;
+ }
+ if (!dec->keep_orientation && !dec->coalescing) {
+ // orient the crop offset
+ size_t W = dec->metadata.oriented_xsize(false);
+ size_t H = dec->metadata.oriented_ysize(false);
+ if (metadata.orientation > 4) {
+ std::swap(header->layer_info.crop_x0, header->layer_info.crop_y0);
+ }
+ size_t o = (metadata.orientation - 1) & 3;
+ if (o > 0 && o < 3) {
+ header->layer_info.crop_x0 = W - xsize - header->layer_info.crop_x0;
+ }
+ if (o > 1) {
+ header->layer_info.crop_y0 = H - ysize - header->layer_info.crop_y0;
+ }
+ }
+ if (dec->coalescing) {
+ header->layer_info.blend_info.blendmode = JXL_BLEND_REPLACE;
+ header->layer_info.blend_info.source = 0;
+ header->layer_info.blend_info.alpha = 0;
+ header->layer_info.blend_info.clamp = JXL_FALSE;
+ header->layer_info.save_as_reference = 0;
+ } else {
+ header->layer_info.blend_info.blendmode =
+ static_cast<JxlBlendMode>(dec->frame_header->blending_info.mode);
+ header->layer_info.blend_info.source =
+ dec->frame_header->blending_info.source;
+ header->layer_info.blend_info.alpha =
+ dec->frame_header->blending_info.alpha_channel;
+ header->layer_info.blend_info.clamp =
+ dec->frame_header->blending_info.clamp;
+ header->layer_info.save_as_reference = dec->frame_header->save_as_reference;
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo(const JxlDecoder* dec,
+ size_t index,
+ JxlBlendInfo* blend_info) {
+ if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+ return JXL_API_ERROR("no frame header available");
+ }
+ const auto& metadata = dec->metadata.m;
+ if (index >= metadata.num_extra_channels) {
+ return JXL_API_ERROR("Invalid extra channel index");
+ }
+ blend_info->blendmode = static_cast<JxlBlendMode>(
+ dec->frame_header->extra_channel_blending_info[index].mode);
+ blend_info->source =
+ dec->frame_header->extra_channel_blending_info[index].source;
+ blend_info->alpha =
+ dec->frame_header->extra_channel_blending_info[index].alpha_channel;
+ blend_info->clamp =
+ dec->frame_header->extra_channel_blending_info[index].clamp;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, char* name,
+ size_t size) {
+ if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) {
+ return JXL_API_ERROR("no frame header available");
+ }
+ if (size < dec->frame_header->name.size() + 1) {
+ return JXL_API_ERROR("too small frame name output buffer");
+ }
+ memcpy(name, dec->frame_header->name.c_str(),
+ dec->frame_header->name.size() + 1);
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetPreferredColorProfile(
+ JxlDecoder* dec, const JxlColorEncoding* color_encoding) {
+ if (!dec->got_all_headers) {
+ return JXL_API_ERROR("color info not yet available");
+ }
+ if (dec->post_headers) {
+ return JXL_API_ERROR("too late to set the color encoding");
+ }
+ if (dec->image_metadata.color_encoding.IsGray() &&
+ color_encoding->color_space != JXL_COLOR_SPACE_GRAY &&
+ dec->image_out_buffer_set && dec->image_out_format.num_channels < 3) {
+ return JXL_API_ERROR("Number of channels is too low for color output");
+ }
+ if (color_encoding->color_space == JXL_COLOR_SPACE_UNKNOWN) {
+ return JXL_API_ERROR("Unknown output colorspace");
+ }
+ jxl::ColorEncoding c_out;
+ JXL_API_RETURN_IF_ERROR(
+ ConvertExternalToInternalColorEncoding(*color_encoding, &c_out));
+ JXL_API_RETURN_IF_ERROR(!c_out.ICC().empty());
+ auto& output_encoding = dec->passes_state->output_encoding_info;
+ if (!c_out.SameColorEncoding(output_encoding.color_encoding)) {
+ JXL_API_RETURN_IF_ERROR(output_encoding.MaybeSetColorEncoding(c_out));
+ dec->image_metadata.color_encoding = output_encoding.color_encoding;
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget(
+ JxlDecoder* dec, float desired_intensity_target) {
+ if (desired_intensity_target < 0) {
+ return JXL_API_ERROR("negative intensity target requested");
+ }
+ dec->desired_intensity_target = desired_intensity_target;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec, uint8_t* data,
+ size_t size) {
+ if (dec->box_out_buffer_set) {
+ return JXL_API_ERROR("must release box buffer before setting it again");
+ }
+ if (!dec->box_event) {
+ return JXL_API_ERROR("can only set box buffer after box event");
+ }
+
+ dec->box_out_buffer_set = true;
+ dec->box_out_buffer_set_current_box = true;
+ dec->box_out_buffer = data;
+ dec->box_out_buffer_size = size;
+ dec->box_out_buffer_pos = 0;
+ return JXL_DEC_SUCCESS;
+}
+
+size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec) {
+ if (!dec->box_out_buffer_set) {
+ return 0;
+ }
+ size_t result = dec->box_out_buffer_size - dec->box_out_buffer_pos;
+ dec->box_out_buffer_set = false;
+ dec->box_out_buffer = nullptr;
+ dec->box_out_buffer_size = 0;
+ if (!dec->box_out_buffer_set_current_box) {
+ dec->box_out_buffer_begin = 0;
+ } else {
+ dec->box_out_buffer_begin += dec->box_out_buffer_pos;
+ }
+ dec->box_out_buffer_set_current_box = false;
+ return result;
+}
+
+JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec,
+ JXL_BOOL decompress) {
+ // TODO(lode): return error if libbrotli is not compiled in the jxl decoding
+ // library
+ dec->decompress_boxes = decompress;
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec, JxlBoxType type,
+ JXL_BOOL decompressed) {
+ if (!dec->box_event) {
+ return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event");
+ }
+ if (decompressed) {
+ memcpy(type, dec->box_decoded_type, sizeof(dec->box_decoded_type));
+ } else {
+ memcpy(type, dec->box_type, sizeof(dec->box_type));
+ }
+
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec,
+ uint64_t* size) {
+ if (!dec->box_event) {
+ return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event");
+ }
+ if (size) {
+ *size = dec->box_size;
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+JxlDecoderStatus JxlDecoderSetProgressiveDetail(JxlDecoder* dec,
+ JxlProgressiveDetail detail) {
+ if (detail != kDC && detail != kLastPasses && detail != kPasses) {
+ return JXL_API_ERROR(
+ "Values other than kDC (%d), kLastPasses (%d) and kPasses (%d), "
+ "like %d are not implemented.",
+ kDC, kLastPasses, kPasses, detail);
+ }
+ dec->prog_detail = detail;
+ return JXL_DEC_SUCCESS;
+}
+
+namespace {
+
+template <typename T>
+JxlDecoderStatus VerifyOutputBitDepth(JxlBitDepth bit_depth, const T& metadata,
+ JxlPixelFormat format) {
+ if ((format.data_type == JXL_TYPE_FLOAT ||
+ format.data_type == JXL_TYPE_FLOAT16) &&
+ bit_depth.type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+ return JXL_API_ERROR(
+ "Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT is implemented "
+ "for float types.");
+ }
+ uint32_t bits_per_sample = GetBitDepth(bit_depth, metadata, format);
+ if (format.data_type == JXL_TYPE_UINT8 &&
+ (bits_per_sample == 0 || bits_per_sample > 8)) {
+ return JXL_API_ERROR("Inavlid bit depth %u for uint8 output",
+ bits_per_sample);
+ } else if (format.data_type == JXL_TYPE_UINT16 &&
+ (bits_per_sample == 0 || bits_per_sample > 16)) {
+ return JXL_API_ERROR("Inavlid bit depth %u for uint16 output",
+ bits_per_sample);
+ }
+ return JXL_DEC_SUCCESS;
+}
+
+} // namespace
+
+JxlDecoderStatus JxlDecoderSetImageOutBitDepth(JxlDecoder* dec,
+ const JxlBitDepth* bit_depth) {
+ if (!dec->image_out_buffer_set) {
+ return JXL_API_ERROR("No image out buffer was set.");
+ }
+ JXL_API_RETURN_IF_ERROR(
+ VerifyOutputBitDepth(*bit_depth, dec->metadata.m, dec->image_out_format));
+ dec->image_out_bit_depth = *bit_depth;
+ return JXL_DEC_SUCCESS;
+}
diff --git a/third_party/jpeg-xl/lib/jxl/decode_test.cc b/third_party/jpeg-xl/lib/jxl/decode_test.cc
new file mode 100644
index 0000000000..30f6b61183
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/decode_test.cc
@@ -0,0 +1,5507 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <jxl/types.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/color_description.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/enc_progressive_split.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+#include "lib/jxl/toc.h"
+
+////////////////////////////////////////////////////////////////////////////////
+
+namespace {
+void AppendU32BE(uint32_t u32, jxl::PaddedBytes* bytes) {
+ bytes->push_back(u32 >> 24);
+ bytes->push_back(u32 >> 16);
+ bytes->push_back(u32 >> 8);
+ bytes->push_back(u32 >> 0);
+}
+
+// What type of codestream format in the boxes to use for testing
+enum CodeStreamBoxFormat {
+ // Do not use box format at all, only pure codestream
+ kCSBF_None,
+ // Have a single codestream box, with its actual size given in the box
+ kCSBF_Single,
+ // Have a single codestream box, with box size 0 (final box running to end)
+ kCSBF_Single_Zero_Terminated,
+ // Single codestream box, with another unknown box behind it
+ kCSBF_Single_Other,
+ // Have multiple partial codestream boxes
+ kCSBF_Multi,
+ // Have multiple partial codestream boxes, with final box size 0 (running
+ // to end)
+ kCSBF_Multi_Zero_Terminated,
+ // Have multiple partial codestream boxes, terminated by non-codestream box
+ kCSBF_Multi_Other_Terminated,
+ // Have multiple partial codestream boxes, terminated by non-codestream box
+ // that has its size set to 0 (running to end)
+ kCSBF_Multi_Other_Zero_Terminated,
+ // Have multiple partial codestream boxes, and the first one has a content
+ // of zero length
+ kCSBF_Multi_First_Empty,
+ // Have multiple partial codestream boxes, and the last one has a content
+ // of zero length and there is an unknown empty box at the end
+ kCSBF_Multi_Last_Empty_Other,
+ // Have a compressed exif box before a regular codestream box
+ kCSBF_Brob_Exif,
+ // Not a value but used for counting amount of enum entries
+ kCSBF_NUM_ENTRIES,
+};
+
+// Unknown boxes for testing
+static const char* unk1_box_type = "unk1";
+static const char* unk1_box_contents = "abcdefghijklmnopqrstuvwxyz";
+static const size_t unk1_box_size = strlen(unk1_box_contents);
+static const char* unk2_box_type = "unk2";
+static const char* unk2_box_contents = "0123456789";
+static const size_t unk2_box_size = strlen(unk2_box_contents);
+static const char* unk3_box_type = "unk3";
+static const char* unk3_box_contents = "ABCDEF123456";
+static const size_t unk3_box_size = strlen(unk3_box_contents);
+// Box with brob-compressed exif, including header
+static const uint8_t* box_brob_exif = reinterpret_cast<const uint8_t*>(
+ "\0\0\0@brobExif\241\350\2\300\177\244v\2525\304\360\27=?\267{"
+ "\33\37\314\332\214QX17PT\"\256\0\0\202s\214\313t\333\310\320k\20\276\30"
+ "\204\277l$\326c#\1\b");
+size_t box_brob_exif_size = 64;
+// The uncompressed Exif data from the brob box
+static const uint8_t* exif_uncompressed = reinterpret_cast<const uint8_t*>(
+ "\0\0\0\0MM\0*"
+ "\0\0\0\b\0\5\1\22\0\3\0\0\0\1\0\5\0\0\1\32\0\5\0\0\0\1\0\0\0J\1\33\0\5\0\0"
+ "\0\1\0\0\0R\1("
+ "\0\3\0\0\0\1\0\1\0\0\2\23\0\3\0\0\0\1\0\1\0\0\0\0\0\0\0\0\0\1\0\0\0\1\0\0"
+ "\0\1\0\0\0\1");
+size_t exif_uncompressed_size = 94;
+
+// Returns an ICC profile output by the JPEG XL decoder for RGB_D65_SRG_Rel_Lin,
+// but with, on purpose, rXYZ, bXYZ and gXYZ (the RGB primaries) switched to a
+// different order to ensure the profile does not match any known profile, so
+// the encoder cannot encode it in a compact struct instead.
+jxl::PaddedBytes GetIccTestProfile() {
+ const uint8_t* profile = reinterpret_cast<const uint8_t*>(
+ "\0\0\3\200lcms\0040\0\0mntrRGB XYZ "
+ "\a\344\0\a\0\27\0\21\0$"
+ "\0\37acspAPPL\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\366"
+ "\326\0\1\0\0\0\0\323-lcms\372c\207\36\227\200{"
+ "\2\232s\255\327\340\0\n\26\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\rdesc\0\0\1 "
+ "\0\0\0Bcprt\0\0\1d\0\0\1\0wtpt\0\0\2d\0\0\0\24chad\0\0\2x\0\0\0,"
+ "bXYZ\0\0\2\244\0\0\0\24gXYZ\0\0\2\270\0\0\0\24rXYZ\0\0\2\314\0\0\0\24rTR"
+ "C\0\0\2\340\0\0\0 gTRC\0\0\2\340\0\0\0 bTRC\0\0\2\340\0\0\0 "
+ "chrm\0\0\3\0\0\0\0$dmnd\0\0\3$\0\0\0("
+ "dmdd\0\0\3L\0\0\0002mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0&"
+ "\0\0\0\34\0R\0G\0B\0_\0D\0006\0005\0_\0S\0R\0G\0_\0R\0e\0l\0_"
+ "\0L\0i\0n\0\0mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\344\0\0\0\34\0C\0o\0"
+ "p\0y\0r\0i\0g\0h\0t\0 \0002\0000\0001\08\0 \0G\0o\0o\0g\0l\0e\0 "
+ "\0L\0L\0C\0,\0 \0C\0C\0-\0B\0Y\0-\0S\0A\0 \0003\0.\0000\0 "
+ "\0U\0n\0p\0o\0r\0t\0e\0d\0 "
+ "\0l\0i\0c\0e\0n\0s\0e\0(\0h\0t\0t\0p\0s\0:\0/\0/"
+ "\0c\0r\0e\0a\0t\0i\0v\0e\0c\0o\0m\0m\0o\0n\0s\0.\0o\0r\0g\0/"
+ "\0l\0i\0c\0e\0n\0s\0e\0s\0/\0b\0y\0-\0s\0a\0/\0003\0.\0000\0/"
+ "\0l\0e\0g\0a\0l\0c\0o\0d\0e\0)XYZ "
+ "\0\0\0\0\0\0\366\326\0\1\0\0\0\0\323-"
+ "sf32\0\0\0\0\0\1\fB\0\0\5\336\377\377\363%"
+ "\0\0\a\223\0\0\375\220\377\377\373\241\377\377\375\242\0\0\3\334\0\0\300"
+ "nXYZ \0\0\0\0\0\0o\240\0\08\365\0\0\3\220XYZ "
+ "\0\0\0\0\0\0$\237\0\0\17\204\0\0\266\304XYZ "
+ "\0\0\0\0\0\0b\227\0\0\267\207\0\0\30\331para\0\0\0\0\0\3\0\0\0\1\0\0\0\1"
+ "\0\0\0\0\0\0\0\1\0\0\0\0\0\0chrm\0\0\0\0\0\3\0\0\0\0\243\327\0\0T|"
+ "\0\0L\315\0\0\231\232\0\0&"
+ "g\0\0\17\\mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\f\0\0\0\34\0G\0o\0o\0g"
+ "\0l\0emluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\26\0\0\0\34\0I\0m\0a\0g\0e"
+ "\0 \0c\0o\0d\0e\0c\0\0");
+ size_t profile_size = 896;
+ jxl::PaddedBytes icc_profile;
+ icc_profile.assign(profile, profile + profile_size);
+ return icc_profile;
+}
+
+} // namespace
+
+namespace jxl {
+namespace {
+
+void AppendTestBox(const char* type, const char* contents, size_t contents_size,
+ bool unbounded, PaddedBytes* bytes) {
+ AppendU32BE(contents_size + 8, bytes);
+ bytes->push_back(type[0]);
+ bytes->push_back(type[1]);
+ bytes->push_back(type[2]);
+ bytes->push_back(type[3]);
+ const uint8_t* contents_u = reinterpret_cast<const uint8_t*>(contents);
+ bytes->append(contents_u, contents_u + contents_size);
+}
+
+enum PreviewMode {
+ kNoPreview,
+ kSmallPreview,
+ kBigPreview,
+ kNumPreviewModes,
+};
+
+void GeneratePreview(PreviewMode preview_mode, ImageBundle* ib) {
+ if (preview_mode == kSmallPreview) {
+ ib->ShrinkTo(ib->xsize() / 7, ib->ysize() / 7);
+ } else if (preview_mode == kBigPreview) {
+ auto upsample7 = [&](const ImageF& in, ImageF* out) {
+ for (size_t y = 0; y < out->ysize(); ++y) {
+ for (size_t x = 0; x < out->xsize(); ++x) {
+ out->Row(y)[x] = in.ConstRow(y / 7)[x / 7];
+ }
+ }
+ };
+ Image3F preview(ib->xsize() * 7, ib->ysize() * 7);
+ for (size_t c = 0; c < 3; ++c) {
+ upsample7(ib->color()->Plane(c), &preview.Plane(c));
+ }
+ std::vector<ImageF> extra_channels;
+ for (size_t i = 0; i < ib->extra_channels().size(); ++i) {
+ ImageF ec(ib->xsize() * 7, ib->ysize() * 7);
+ upsample7(ib->extra_channels()[i], &ec);
+ extra_channels.emplace_back(std::move(ec));
+ }
+ ib->RemoveColor();
+ ib->ClearExtraChannels();
+ ib->SetFromImage(std::move(preview), ib->c_current());
+ ib->SetExtraChannels(std::move(extra_channels));
+ }
+}
+
+struct TestCodestreamParams {
+ CompressParams cparams;
+ CodeStreamBoxFormat box_format = kCSBF_None;
+ JxlOrientation orientation = JXL_ORIENT_IDENTITY;
+ PreviewMode preview_mode = kNoPreview;
+ bool add_intrinsic_size = false;
+ bool add_icc_profile = false;
+ float intensity_target = 0.0;
+ std::string color_space;
+ PaddedBytes* jpeg_codestream = nullptr;
+ const ProgressiveMode* progressive_mode = nullptr;
+};
+
+// Input pixels always given as 16-bit RGBA, 8 bytes per pixel.
+// include_alpha determines if the encoded image should contain the alpha
+// channel.
+// add_icc_profile: if false, encodes the image as sRGB using the JXL fields,
+// for grayscale or RGB images. If true, encodes the image using the ICC profile
+// returned by GetIccTestProfile, without the JXL fields, this requires the
+// image is RGB, not grayscale.
+// Providing jpeg_codestream will populate the jpeg_codestream with compressed
+// JPEG bytes, and make it possible to reconstruct those exact JPEG bytes using
+// the return value _if_ add_container indicates a box format.
+PaddedBytes CreateTestJXLCodestream(Span<const uint8_t> pixels, size_t xsize,
+ size_t ysize, size_t num_channels,
+ const TestCodestreamParams& params) {
+ // Compress the pixels with JPEG XL.
+ bool grayscale = (num_channels <= 2);
+ bool include_alpha = !(num_channels & 1) && params.jpeg_codestream == nullptr;
+ size_t bitdepth = params.jpeg_codestream == nullptr ? 16 : 8;
+ CodecInOut io;
+ io.SetSize(xsize, ysize);
+ ColorEncoding color_encoding;
+ if (params.add_icc_profile) {
+ // the hardcoded ICC profile we attach requires RGB.
+ EXPECT_EQ(false, grayscale);
+ EXPECT_TRUE(params.color_space.empty());
+ EXPECT_TRUE(color_encoding.SetICC(GetIccTestProfile()));
+ } else if (!params.color_space.empty()) {
+ JxlColorEncoding c;
+ EXPECT_TRUE(jxl::ParseDescription(params.color_space, &c));
+ EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding));
+ EXPECT_EQ(color_encoding.IsGray(), grayscale);
+ } else {
+ color_encoding = jxl::ColorEncoding::SRGB(/*is_gray=*/grayscale);
+ }
+ ThreadPool pool(nullptr, nullptr);
+ io.metadata.m.SetUintSamples(bitdepth);
+ if (include_alpha) {
+ io.metadata.m.SetAlphaBits(bitdepth);
+ }
+ if (params.intensity_target != 0) {
+ io.metadata.m.SetIntensityTarget(params.intensity_target);
+ }
+ JxlPixelFormat format = {static_cast<uint32_t>(num_channels), JXL_TYPE_UINT16,
+ JXL_BIG_ENDIAN, 0};
+ // Make the grayscale-ness of the io metadata color_encoding and the packed
+ // image match.
+ io.metadata.m.color_encoding = color_encoding;
+ EXPECT_TRUE(ConvertFromExternal(pixels, xsize, ysize, color_encoding,
+ /*bits_per_sample=*/16, format, &pool,
+ &io.Main()));
+ jxl::PaddedBytes jpeg_data;
+ if (params.jpeg_codestream != nullptr) {
+#if JPEGXL_ENABLE_JPEG
+ std::vector<uint8_t> jpeg_bytes;
+ io.jpeg_quality = 70;
+ EXPECT_TRUE(Encode(io, extras::Codec::kJPG, io.metadata.m.color_encoding,
+ /*bits_per_sample=*/8, &jpeg_bytes, &pool));
+ params.jpeg_codestream->append(jpeg_bytes.data(),
+ jpeg_bytes.data() + jpeg_bytes.size());
+ EXPECT_TRUE(jxl::jpeg::DecodeImageJPG(
+ jxl::Span<const uint8_t>(jpeg_bytes.data(), jpeg_bytes.size()), &io));
+ EXPECT_TRUE(
+ EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data, params.cparams));
+ io.metadata.m.xyb_encoded = false;
+#else // JPEGXL_ENABLE_JPEG
+ JXL_ABORT(
+ "unable to create reconstructible JPEG without JPEG support enabled");
+#endif // JPEGXL_ENABLE_JPEG
+ }
+ if (params.preview_mode) {
+ io.preview_frame = io.Main().Copy();
+ GeneratePreview(params.preview_mode, &io.preview_frame);
+ io.metadata.m.have_preview = true;
+ EXPECT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+ io.preview_frame.ysize()));
+ }
+ if (params.add_intrinsic_size) {
+ EXPECT_TRUE(io.metadata.m.intrinsic_size.Set(xsize / 3, ysize / 3));
+ }
+ io.metadata.m.orientation = params.orientation;
+ AuxOut aux_out;
+ PaddedBytes compressed;
+ PassesEncoderState enc_state;
+ if (params.progressive_mode) {
+ enc_state.progressive_splitter.SetProgressiveMode(*params.progressive_mode);
+ }
+ EXPECT_TRUE(EncodeFile(params.cparams, &io, &enc_state, &compressed,
+ GetJxlCms(), &aux_out, &pool));
+ CodeStreamBoxFormat add_container = params.box_format;
+ if (add_container != kCSBF_None) {
+ // Header with signature box and ftyp box.
+ const uint8_t header[] = {0, 0, 0, 0xc, 0x4a, 0x58, 0x4c, 0x20,
+ 0xd, 0xa, 0x87, 0xa, 0, 0, 0, 0x14,
+ 0x66, 0x74, 0x79, 0x70, 0x6a, 0x78, 0x6c, 0x20,
+ 0, 0, 0, 0, 0x6a, 0x78, 0x6c, 0x20};
+
+ bool is_multi = add_container == kCSBF_Multi ||
+ add_container == kCSBF_Multi_Zero_Terminated ||
+ add_container == kCSBF_Multi_Other_Terminated ||
+ add_container == kCSBF_Multi_Other_Zero_Terminated ||
+ add_container == kCSBF_Multi_First_Empty ||
+ add_container == kCSBF_Multi_Last_Empty_Other;
+
+ if (is_multi) {
+ size_t third = compressed.size() / 3;
+ std::vector<uint8_t> compressed0(compressed.data(),
+ compressed.data() + third);
+ std::vector<uint8_t> compressed1(compressed.data() + third,
+ compressed.data() + 2 * third);
+ std::vector<uint8_t> compressed2(compressed.data() + 2 * third,
+ compressed.data() + compressed.size());
+
+ PaddedBytes c;
+ c.append(header, header + sizeof(header));
+ if (params.jpeg_codestream != nullptr) {
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+ &c);
+ c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+ }
+ uint32_t jxlp_index = 0;
+ if (add_container == kCSBF_Multi_First_Empty) {
+ // Dummy (empty) codestream part
+ AppendU32BE(12, &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ AppendU32BE(jxlp_index++, &c);
+ }
+ // First codestream part
+ AppendU32BE(compressed0.size() + 12, &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ AppendU32BE(jxlp_index++, &c);
+ c.append(compressed0.data(), compressed0.data() + compressed0.size());
+ // A few non-codestream boxes in between
+ AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false, &c);
+ AppendTestBox(unk2_box_type, unk2_box_contents, unk2_box_size, false, &c);
+ // Dummy (empty) codestream part
+ AppendU32BE(12, &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ AppendU32BE(jxlp_index++, &c);
+ // Second codestream part
+ AppendU32BE(compressed1.size() + 12, &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ AppendU32BE(jxlp_index++, &c);
+ c.append(compressed1.data(), compressed1.data() + compressed1.size());
+ // Third (last) codestream part
+ AppendU32BE(add_container == kCSBF_Multi_Zero_Terminated
+ ? 0
+ : (compressed2.size() + 12),
+ &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ if (add_container != kCSBF_Multi_Last_Empty_Other) {
+ AppendU32BE(jxlp_index++ | 0x80000000, &c);
+ } else {
+ AppendU32BE(jxlp_index++, &c);
+ }
+ c.append(compressed2.data(), compressed2.data() + compressed2.size());
+ if (add_container == kCSBF_Multi_Last_Empty_Other) {
+ // Dummy (empty) codestream part
+ AppendU32BE(12, &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('p');
+ AppendU32BE(jxlp_index++ | 0x80000000, &c);
+ AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false,
+ &c);
+ }
+ if (add_container == kCSBF_Multi_Other_Terminated) {
+ AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false,
+ &c);
+ }
+ if (add_container == kCSBF_Multi_Other_Zero_Terminated) {
+ AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, true,
+ &c);
+ }
+ compressed.swap(c);
+ } else {
+ PaddedBytes c;
+ c.append(header, header + sizeof(header));
+ if (params.jpeg_codestream != nullptr) {
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+ &c);
+ c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+ }
+ if (add_container == kCSBF_Brob_Exif) {
+ c.append(box_brob_exif, box_brob_exif + box_brob_exif_size);
+ }
+ AppendU32BE(add_container == kCSBF_Single_Zero_Terminated
+ ? 0
+ : (compressed.size() + 8),
+ &c);
+ c.push_back('j');
+ c.push_back('x');
+ c.push_back('l');
+ c.push_back('c');
+ c.append(compressed.data(), compressed.data() + compressed.size());
+ if (add_container == kCSBF_Single_Other) {
+ AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false,
+ &c);
+ }
+ compressed.swap(c);
+ }
+ }
+
+ return compressed;
+}
+
+JxlDecoderStatus ProcessInputIgnoreBoxes(JxlDecoder* dec) {
+ JxlDecoderStatus status = JXL_DEC_BOX;
+ while (status == JXL_DEC_BOX) {
+ status = JxlDecoderProcessInput(dec);
+ }
+ return status;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(JxlDecoder* dec,
+ Span<const uint8_t> compressed,
+ const JxlPixelFormat& format,
+ bool use_callback, bool set_buffer_early,
+ bool use_resizable_runner,
+ bool require_boxes, bool expect_success,
+ PaddedBytes* icc = nullptr) {
+ JxlThreadParallelRunnerPtr runner_fixed;
+ JxlResizableParallelRunnerPtr runner_resizable;
+ JxlParallelRunner runner_fn;
+ void* runner;
+
+ if (use_resizable_runner) {
+ runner_resizable = JxlResizableParallelRunnerMake(nullptr);
+ runner = runner_resizable.get();
+ runner_fn = JxlResizableParallelRunner;
+ } else {
+ size_t hw_threads = JxlThreadParallelRunnerDefaultNumWorkerThreads();
+ runner_fixed =
+ JxlThreadParallelRunnerMake(nullptr, std::min<size_t>(hw_threads, 16));
+ runner = runner_fixed.get();
+ runner_fn = JxlThreadParallelRunner;
+ }
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, runner_fn, runner));
+
+ auto process_input =
+ require_boxes ? ProcessInputIgnoreBoxes : JxlDecoderProcessInput;
+
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | (set_buffer_early ? JXL_DEC_FRAME : 0) |
+ JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FULL_IMAGE |
+ (require_boxes ? JXL_DEC_BOX : 0) |
+ (icc != nullptr ? JXL_DEC_COLOR_ENCODING : 0)));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ if (use_resizable_runner) {
+ JxlResizableParallelRunnerSetThreads(
+ runner,
+ JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+ }
+
+ std::vector<uint8_t> pixels(buffer_size);
+ size_t bytes_per_pixel = format.num_channels *
+ test::GetDataBits(format.data_type) /
+ jxl::kBitsPerByte;
+ size_t stride = bytes_per_pixel * info.xsize;
+ if (format.align > 1) {
+ stride = jxl::DivCeil(stride, format.align) * format.align;
+ }
+ auto callback = [&](size_t x, size_t y, size_t num_pixels,
+ const void* pixels_row) {
+ memcpy(pixels.data() + stride * y + bytes_per_pixel * x, pixels_row,
+ num_pixels * bytes_per_pixel);
+ };
+
+ JxlDecoderStatus status = process_input(dec);
+
+ if (status == JXL_DEC_COLOR_ENCODING) {
+ size_t icc_size = 0;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &icc_size));
+ icc->resize(icc_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ icc->data(), icc_size));
+
+ status = process_input(dec);
+ }
+
+ std::vector<uint8_t> preview;
+ if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+ preview.resize(buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(),
+ preview.size()));
+ EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, process_input(dec));
+
+ status = process_input(dec);
+ }
+
+ if (set_buffer_early) {
+ EXPECT_EQ(JXL_DEC_FRAME, status);
+ } else {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, status);
+ }
+
+ if (use_callback) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutCallback(
+ dec, &format,
+ [](void* opaque, size_t x, size_t y, size_t xsize,
+ const void* pixels_row) {
+ auto cb = static_cast<decltype(&callback)>(opaque);
+ (*cb)(x, y, xsize, pixels_row);
+ },
+ /*opaque=*/&callback));
+ } else {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+ }
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input(dec));
+
+ // After the full image was output, JxlDecoderProcessInput should return
+ // success to indicate all is done, unless we requested boxes and the last
+ // box was not a terminal unbounded box, in which case it should ask for
+ // more input.
+ JxlDecoderStatus expected_status =
+ expect_success ? JXL_DEC_SUCCESS : JXL_DEC_NEED_MORE_INPUT;
+ EXPECT_EQ(expected_status, process_input(dec));
+
+ return pixels;
+}
+
+// Decodes one-shot with the API for non-streaming decoding tests.
+std::vector<uint8_t> DecodeWithAPI(Span<const uint8_t> compressed,
+ const JxlPixelFormat& format,
+ bool use_callback, bool set_buffer_early,
+ bool use_resizable_runner,
+ bool require_boxes, bool expect_success) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ std::vector<uint8_t> pixels =
+ DecodeWithAPI(dec, compressed, format, use_callback, set_buffer_early,
+ use_resizable_runner, require_boxes, expect_success);
+ JxlDecoderDestroy(dec);
+ return pixels;
+}
+
+} // namespace
+} // namespace jxl
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(DecodeTest, JxlSignatureCheckTest) {
+ std::vector<std::pair<int, std::vector<uint8_t>>> tests = {
+ // No JPEGXL header starts with 'a'.
+ {JXL_SIG_INVALID, {'a'}},
+ {JXL_SIG_INVALID, {'a', 'b', 'c', 'd', 'e', 'f'}},
+
+ // Empty file is not enough bytes.
+ {JXL_SIG_NOT_ENOUGH_BYTES, {}},
+
+ // JPEGXL headers.
+ {JXL_SIG_NOT_ENOUGH_BYTES, {0xff}}, // Part of a signature.
+ {JXL_SIG_INVALID, {0xff, 0xD8}}, // JPEG-1
+ {JXL_SIG_CODESTREAM, {0xff, 0x0a}},
+
+ // JPEGXL container file.
+ {JXL_SIG_CONTAINER,
+ {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0xA}},
+ // Ending with invalid byte.
+ {JXL_SIG_INVALID, {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0}},
+ // Part of signature.
+ {JXL_SIG_NOT_ENOUGH_BYTES,
+ {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87}},
+ {JXL_SIG_NOT_ENOUGH_BYTES, {0}},
+ };
+ for (const auto& test : tests) {
+ EXPECT_EQ(test.first,
+ JxlSignatureCheck(test.second.data(), test.second.size()))
+ << "Where test data is " << ::testing::PrintToString(test.second);
+ }
+}
+
+TEST(DecodeTest, DefaultAllocTest) {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_NE(nullptr, dec);
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, CustomAllocTest) {
+ struct CalledCounters {
+ int allocs = 0;
+ int frees = 0;
+ } counters;
+
+ JxlMemoryManager mm;
+ mm.opaque = &counters;
+ mm.alloc = [](void* opaque, size_t size) {
+ reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+ return malloc(size);
+ };
+ mm.free = [](void* opaque, void* address) {
+ reinterpret_cast<CalledCounters*>(opaque)->frees++;
+ free(address);
+ };
+
+ JxlDecoder* dec = JxlDecoderCreate(&mm);
+ EXPECT_NE(nullptr, dec);
+ EXPECT_LE(1, counters.allocs);
+ EXPECT_EQ(0, counters.frees);
+ JxlDecoderDestroy(dec);
+ EXPECT_LE(1, counters.frees);
+}
+
+// TODO(lode): add multi-threaded test when multithreaded pixel decoding from
+// API is implemented.
+TEST(DecodeTest, DefaultParallelRunnerTest) {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_NE(nullptr, dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, nullptr, nullptr));
+ JxlDecoderDestroy(dec);
+}
+
+// Creates the header of a JPEG XL file with various custom parameters for
+// testing.
+// xsize, ysize: image dimensions to store in the SizeHeader, max 512.
+// bits_per_sample, orientation: a selection of header parameters to test with.
+// orientation: image orientation to set in the metadata
+// alpha_bits: if non-0, alpha extra channel bits to set in the metadata. Also
+// gives the alpha channel the name "alpha_test"
+// have_container: add box container format around the codestream.
+// metadata_default: if true, ImageMetadata is set to default and
+// bits_per_sample, orientation and alpha_bits are ignored.
+// insert_box: insert an extra box before the codestream box, making the header
+// farther away from the front than is ideal. Only used if have_container.
+std::vector<uint8_t> GetTestHeader(size_t xsize, size_t ysize,
+ size_t bits_per_sample, size_t orientation,
+ size_t alpha_bits, bool xyb_encoded,
+ bool have_container, bool metadata_default,
+ bool insert_extra_box,
+ const jxl::PaddedBytes& icc_profile) {
+ jxl::BitWriter writer;
+ jxl::BitWriter::Allotment allotment(&writer, 65536); // Large enough
+
+ if (have_container) {
+ const std::vector<uint8_t> signature_box = {0, 0, 0, 0xc, 'J', 'X',
+ 'L', ' ', 0xd, 0xa, 0x87, 0xa};
+ const std::vector<uint8_t> filetype_box = {
+ 0, 0, 0, 0x14, 'f', 't', 'y', 'p', 'j', 'x',
+ 'l', ' ', 0, 0, 0, 0, 'j', 'x', 'l', ' '};
+ const std::vector<uint8_t> extra_box_header = {0, 0, 0, 0xff,
+ 't', 'e', 's', 't'};
+ // Beginning of codestream box, with an arbitrary size certainly large
+ // enough to contain the header
+ const std::vector<uint8_t> codestream_box_header = {0, 0, 0, 0xff,
+ 'j', 'x', 'l', 'c'};
+
+ for (size_t i = 0; i < signature_box.size(); i++) {
+ writer.Write(8, signature_box[i]);
+ }
+ for (size_t i = 0; i < filetype_box.size(); i++) {
+ writer.Write(8, filetype_box[i]);
+ }
+ if (insert_extra_box) {
+ for (size_t i = 0; i < extra_box_header.size(); i++) {
+ writer.Write(8, extra_box_header[i]);
+ }
+ for (size_t i = 0; i < 255 - 8; i++) {
+ writer.Write(8, 0);
+ }
+ }
+ for (size_t i = 0; i < codestream_box_header.size(); i++) {
+ writer.Write(8, codestream_box_header[i]);
+ }
+ }
+
+ // JXL signature
+ writer.Write(8, 0xff);
+ writer.Write(8, 0x0a);
+
+ // SizeHeader
+ jxl::CodecMetadata metadata;
+ EXPECT_TRUE(metadata.size.Set(xsize, ysize));
+ EXPECT_TRUE(WriteSizeHeader(metadata.size, &writer, 0, nullptr));
+
+ if (!metadata_default) {
+ metadata.m.SetUintSamples(bits_per_sample);
+ metadata.m.orientation = orientation;
+ metadata.m.SetAlphaBits(alpha_bits);
+ metadata.m.xyb_encoded = xyb_encoded;
+ if (alpha_bits != 0) {
+ metadata.m.extra_channel_info[0].name = "alpha_test";
+ }
+ }
+
+ if (!icc_profile.empty()) {
+ jxl::PaddedBytes copy = icc_profile;
+ EXPECT_TRUE(metadata.m.color_encoding.SetICC(std::move(copy)));
+ }
+
+ EXPECT_TRUE(jxl::Bundle::Write(metadata.m, &writer, 0, nullptr));
+ metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+ EXPECT_TRUE(jxl::Bundle::Write(metadata.transform_data, &writer, 0, nullptr));
+
+ if (!icc_profile.empty()) {
+ EXPECT_TRUE(metadata.m.color_encoding.WantICC());
+ EXPECT_TRUE(jxl::WriteICC(icc_profile, &writer, 0, nullptr));
+ }
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+ return std::vector<uint8_t>(
+ writer.GetSpan().data(),
+ writer.GetSpan().data() + writer.GetSpan().size());
+}
+
+TEST(DecodeTest, BasicInfoTest) {
+ size_t xsize[2] = {50, 33};
+ size_t ysize[2] = {50, 77};
+ size_t bits_per_sample[2] = {8, 23};
+ size_t orientation[2] = {3, 5};
+ size_t alpha_bits[2] = {0, 8};
+ JXL_BOOL have_container[2] = {0, 1};
+ bool xyb_encoded = false;
+
+ std::vector<std::vector<uint8_t>> test_samples;
+ // Test with direct codestream
+ test_samples.push_back(GetTestHeader(
+ xsize[0], ysize[0], bits_per_sample[0], orientation[0], alpha_bits[0],
+ xyb_encoded, have_container[0], /*metadata_default=*/false,
+ /*insert_extra_box=*/false, {}));
+ // Test with container and different parameters
+ test_samples.push_back(GetTestHeader(
+ xsize[1], ysize[1], bits_per_sample[1], orientation[1], alpha_bits[1],
+ xyb_encoded, have_container[1], /*metadata_default=*/false,
+ /*insert_extra_box=*/false, {}));
+
+ for (size_t i = 0; i < test_samples.size(); ++i) {
+ const std::vector<uint8_t>& data = test_samples[i];
+ // Test decoding too small header first, until we reach the final byte.
+ for (size_t size = 0; size <= data.size(); ++size) {
+ // Test with a new decoder for each tested byte size.
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+ const uint8_t* next_in = data.data();
+ size_t avail_in = size;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+
+ JxlBasicInfo info;
+ bool have_basic_info = !JxlDecoderGetBasicInfo(dec, &info);
+
+ if (size == data.size()) {
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+ // All header bytes given so the decoder must have the basic info.
+ EXPECT_EQ(true, have_basic_info);
+ EXPECT_EQ(have_container[i], info.have_container);
+ EXPECT_EQ(alpha_bits[i], info.alpha_bits);
+ // Orientations 5..8 swap the dimensions
+ if (orientation[i] >= 5) {
+ EXPECT_EQ(xsize[i], info.ysize);
+ EXPECT_EQ(ysize[i], info.xsize);
+ } else {
+ EXPECT_EQ(xsize[i], info.xsize);
+ EXPECT_EQ(ysize[i], info.ysize);
+ }
+ // The API should set the orientation to identity by default since it
+ // already applies the transformation internally by default.
+ EXPECT_EQ(1u, info.orientation);
+
+ EXPECT_EQ(3u, info.num_color_channels);
+
+ if (alpha_bits[i] != 0) {
+ // Expect an extra channel
+ EXPECT_EQ(1u, info.num_extra_channels);
+ JxlExtraChannelInfo extra;
+ EXPECT_EQ(0, JxlDecoderGetExtraChannelInfo(dec, 0, &extra));
+ EXPECT_EQ(alpha_bits[i], extra.bits_per_sample);
+ EXPECT_EQ(JXL_CHANNEL_ALPHA, extra.type);
+ EXPECT_EQ(0, extra.alpha_premultiplied);
+ // Verify the name "alpha_test" given to the alpha channel
+ EXPECT_EQ(10u, extra.name_length);
+ char name[11];
+ EXPECT_EQ(0,
+ JxlDecoderGetExtraChannelName(dec, 0, name, sizeof(name)));
+ EXPECT_EQ(std::string("alpha_test"), std::string(name));
+ } else {
+ EXPECT_EQ(0u, info.num_extra_channels);
+ }
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ } else {
+ // If we did not give the full header, the basic info should not be
+ // available. Allow a few bytes of slack due to some bits for default
+ // opsinmatrix/extension bits.
+ if (size + 2 < data.size()) {
+ EXPECT_EQ(false, have_basic_info);
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+ }
+ }
+
+ // Test that decoder doesn't allow setting a setting required at beginning
+ // unless it's reset
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+ JxlDecoderReset(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+ JxlDecoderDestroy(dec);
+ }
+ }
+}
+
+TEST(DecodeTest, BufferSizeTest) {
+ size_t xsize = 33;
+ size_t ysize = 77;
+ size_t bits_per_sample = 8;
+ size_t orientation = 1;
+ size_t alpha_bits = 8;
+ bool have_container = false;
+ bool xyb_encoded = false;
+
+ std::vector<uint8_t> header =
+ GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+ xyb_encoded, have_container, /*metadata_default=*/false,
+ /*insert_extra_box=*/false, {});
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+ const uint8_t* next_in = header.data();
+ size_t avail_in = header.size();
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+
+ JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+ size_t image_out_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &image_out_size));
+ EXPECT_EQ(xsize * ysize * 4, image_out_size);
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, BasicInfoSizeHintTest) {
+ // Test on a file where the size hint is too small initially due to inserting
+ // a box before the codestream (something that is normally not recommended)
+ size_t xsize = 50;
+ size_t ysize = 50;
+ size_t bits_per_sample = 16;
+ size_t orientation = 1;
+ size_t alpha_bits = 0;
+ bool xyb_encoded = false;
+ std::vector<uint8_t> data = GetTestHeader(
+ xsize, ysize, bits_per_sample, orientation, alpha_bits, xyb_encoded,
+ /*have_container=*/true, /*metadata_default=*/false,
+ /*insert_extra_box=*/true, {});
+
+ JxlDecoderStatus status;
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+
+ size_t hint0 = JxlDecoderSizeHintBasicInfo(dec);
+ // Test that the test works as intended: we construct a file on purpose to
+ // be larger than the first hint by having that extra box.
+ EXPECT_LT(hint0, data.size());
+ const uint8_t* next_in = data.data();
+ // Do as if we have only as many bytes as indicated by the hint available
+ size_t avail_in = std::min(hint0, data.size());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ status = JxlDecoderProcessInput(dec);
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status);
+ // Basic info cannot be available yet due to the extra inserted box.
+ EXPECT_EQ(false, !JxlDecoderGetBasicInfo(dec, nullptr));
+
+ size_t num_read = avail_in - JxlDecoderReleaseInput(dec);
+ EXPECT_LT(num_read, data.size());
+
+ size_t hint1 = JxlDecoderSizeHintBasicInfo(dec);
+ // The hint must be larger than the previous hint (taking already processed
+ // bytes into account, the hint is a hint for the next avail_in) since the
+ // decoder now knows there is a box in between.
+ EXPECT_GT(hint1 + num_read, hint0);
+ avail_in = std::min<size_t>(hint1, data.size() - num_read);
+ next_in += num_read;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ status = JxlDecoderProcessInput(dec);
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, status);
+ JxlBasicInfo info;
+ // We should have the basic info now, since we only added one box in-between,
+ // and the decoder should have known its size, its implementation can return
+ // a correct hint.
+ EXPECT_EQ(true, !JxlDecoderGetBasicInfo(dec, &info));
+
+ // Also test if the basic info is correct.
+ EXPECT_EQ(1, info.have_container);
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+ EXPECT_EQ(orientation, info.orientation);
+ EXPECT_EQ(bits_per_sample, info.bits_per_sample);
+
+ JxlDecoderDestroy(dec);
+}
+
+std::vector<uint8_t> GetIccTestHeader(const jxl::PaddedBytes& icc_profile,
+ bool xyb_encoded) {
+ size_t xsize = 50;
+ size_t ysize = 50;
+ size_t bits_per_sample = 16;
+ size_t orientation = 1;
+ size_t alpha_bits = 0;
+ return GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits,
+ xyb_encoded,
+ /*have_container=*/false, /*metadata_default=*/false,
+ /*insert_extra_box=*/false, icc_profile);
+}
+
+// Tests the case where pixels and metadata ICC profile are the same
+TEST(DecodeTest, IccProfileTestOriginal) {
+ jxl::PaddedBytes icc_profile = GetIccTestProfile();
+ bool xyb_encoded = false;
+ std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+ JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+ // Expect the opposite of xyb_encoded for uses_original_profile
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(JXL_TRUE, info.uses_original_profile);
+
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+ // the encoded color profile expected to be not available, since the image
+ // has an ICC profile instead
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+ size_t dec_profile_size;
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size));
+
+ // Check that can get return status with NULL size
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+ // The profiles must be equal. This requires they have equal size, and if
+ // they do, we can get the profile and compare the contents.
+ EXPECT_EQ(icc_profile.size(), dec_profile_size);
+ if (icc_profile.size() == dec_profile_size) {
+ jxl::PaddedBytes icc_profile2(icc_profile.size());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsICCProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ icc_profile2.data(), icc_profile2.size()));
+ EXPECT_EQ(icc_profile, icc_profile2);
+ }
+
+ // the data is not xyb_encoded, so same result expected for the pixel data
+ // color profile
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ &dec_profile_size));
+ EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+ JxlDecoderDestroy(dec);
+}
+
+// Tests the case where pixels and metadata ICC profile are different
+TEST(DecodeTest, IccProfileTestXybEncoded) {
+ jxl::PaddedBytes icc_profile = GetIccTestProfile();
+ bool xyb_encoded = true;
+ std::vector<uint8_t> data = GetIccTestHeader(icc_profile, xyb_encoded);
+ JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ JxlPixelFormat format_int = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+ // Expect the opposite of xyb_encoded for uses_original_profile
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(JXL_FALSE, info.uses_original_profile);
+
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+ // the encoded color profile expected to be not available, since the image
+ // has an ICC profile instead
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+ // Check that can get return status with NULL size
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr));
+
+ size_t dec_profile_size;
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size));
+
+ // The profiles must be equal. This requires they have equal size, and if
+ // they do, we can get the profile and compare the contents.
+ EXPECT_EQ(icc_profile.size(), dec_profile_size);
+ if (icc_profile.size() == dec_profile_size) {
+ jxl::PaddedBytes icc_profile2(icc_profile.size());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsICCProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ icc_profile2.data(), icc_profile2.size()));
+ EXPECT_EQ(icc_profile, icc_profile2);
+ }
+
+ // Data is xyb_encoded, so the data profile is a different profile, encoded
+ // as structured profile.
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, nullptr));
+ JxlColorEncoding pixel_encoding;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+ EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+ // The API returns LINEAR by default when the colorspace cannot be represented
+ // by enum values.
+ EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+ // Test the same but with integer format.
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format_int, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+ EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries);
+ EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+
+ // Test after setting the preferred color profile to non-linear sRGB:
+ // for XYB images with ICC profile, this setting is expected to take effect.
+ jxl::ColorEncoding temp_jxl_srgb = jxl::ColorEncoding::SRGB(false);
+ JxlColorEncoding pixel_encoding_srgb;
+ ConvertInternalToExternalColorEncoding(temp_jxl_srgb, &pixel_encoding_srgb);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_srgb));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+ EXPECT_EQ(JXL_TRANSFER_FUNCTION_SRGB, pixel_encoding.transfer_function);
+
+ // The decoder can also output this as a generated ICC profile anyway, and
+ // we're certain that it will differ from the above defined profile since
+ // the sRGB data should not have swapped R/G/B primaries.
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ &dec_profile_size));
+ // We don't need to dictate exactly what size the generated ICC profile
+ // must be (since there are many ways to represent the same color space),
+ // but it should not be zero.
+ EXPECT_NE(0u, dec_profile_size);
+ jxl::PaddedBytes icc_profile2(dec_profile_size);
+ if (0 != dec_profile_size) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ icc_profile2.data(), icc_profile2.size()));
+ // expected not equal
+ EXPECT_NE(icc_profile, icc_profile2);
+ }
+
+ // Test setting another different preferred profile, to verify that the
+ // returned JXL_COLOR_PROFILE_TARGET_DATA ICC profile is correctly
+ // updated.
+
+ jxl::ColorEncoding temp_jxl_linear = jxl::ColorEncoding::LinearSRGB(false);
+ JxlColorEncoding pixel_encoding_linear;
+ ConvertInternalToExternalColorEncoding(temp_jxl_linear,
+ &pixel_encoding_linear);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_linear));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding));
+ EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ &dec_profile_size));
+ EXPECT_NE(0u, dec_profile_size);
+ jxl::PaddedBytes icc_profile3(dec_profile_size);
+ if (0 != dec_profile_size) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_DATA,
+ icc_profile3.data(), icc_profile3.size()));
+ // expected not equal to the previously set preferred profile.
+ EXPECT_NE(icc_profile2, icc_profile3);
+ }
+
+ JxlDecoderDestroy(dec);
+}
+
+// Test decoding ICC from partial files byte for byte.
+// This test must pass also if JXL_CRASH_ON_ERROR is enabled, that is, the
+// decoding of the ANS histogram and stream of the encoded ICC profile must also
+// handle the case of not enough input bytes with StatusCode::kNotEnoughBytes
+// rather than fatal error status codes.
+TEST(DecodeTest, ICCPartialTest) {
+ jxl::PaddedBytes icc_profile = GetIccTestProfile();
+ std::vector<uint8_t> data = GetIccTestHeader(icc_profile, false);
+ JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ const uint8_t* next_in = data.data();
+ size_t avail_in = 0;
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING));
+
+ bool seen_basic_info = false;
+ bool seen_color_encoding = false;
+ size_t total_size = 0;
+
+ for (;;) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (total_size >= data.size()) {
+ // End of partial codestream with codestrema headers and ICC profile
+ // reached, it should not require more input since full image is not
+ // requested
+ FAIL();
+ break;
+ }
+ size_t increment = 1;
+ if (total_size + increment > data.size()) {
+ increment = data.size() - total_size;
+ }
+ total_size += increment;
+ avail_in += increment;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ EXPECT_FALSE(seen_basic_info);
+ seen_basic_info = true;
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ EXPECT_TRUE(seen_basic_info);
+ EXPECT_FALSE(seen_color_encoding);
+ seen_color_encoding = true;
+
+ // Sanity check that the ICC profile was decoded correctly
+ size_t dec_profile_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(dec, &format,
+ JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ &dec_profile_size));
+ EXPECT_EQ(icc_profile.size(), dec_profile_size);
+
+ } else if (status == JXL_DEC_SUCCESS) {
+ EXPECT_TRUE(seen_color_encoding);
+ break;
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ EXPECT_TRUE(seen_basic_info);
+ EXPECT_TRUE(seen_color_encoding);
+
+ JxlDecoderDestroy(dec);
+}
+
+struct PixelTestConfig {
+ // Input image definition.
+ bool grayscale;
+ bool include_alpha;
+ size_t xsize;
+ size_t ysize;
+ jxl::PreviewMode preview_mode;
+ bool add_intrinsic_size;
+ // Output format.
+ JxlEndianness endianness;
+ JxlDataType data_type;
+ uint32_t output_channels;
+ // Container options.
+ CodeStreamBoxFormat add_container;
+ // Decoding mode.
+ bool use_callback;
+ bool set_buffer_early;
+ bool use_resizable_runner;
+ // Exif orientation, 1-8
+ JxlOrientation orientation;
+ bool keep_orientation;
+ size_t upsampling;
+};
+
+class DecodeTestParam : public ::testing::TestWithParam<PixelTestConfig> {};
+
+TEST_P(DecodeTestParam, PixelTest) {
+ PixelTestConfig config = GetParam();
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ if (config.keep_orientation) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetKeepOrientation(dec, JXL_TRUE));
+ }
+
+ size_t num_pixels = config.xsize * config.ysize;
+ uint32_t orig_channels =
+ (config.grayscale ? 1 : 3) + (config.include_alpha ? 1 : 0);
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(config.xsize, config.ysize, orig_channels, 0);
+ JxlPixelFormat format_orig = {orig_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN,
+ 0};
+ jxl::TestCodestreamParams params;
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ params.cparams.resampling = config.upsampling;
+ params.cparams.ec_resampling = config.upsampling;
+ params.box_format = config.add_container;
+ params.orientation = config.orientation;
+ params.preview_mode = config.preview_mode;
+ params.add_intrinsic_size = config.add_intrinsic_size;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), config.xsize,
+ config.ysize, orig_channels, params);
+
+ JxlPixelFormat format = {config.output_channels, config.data_type,
+ config.endianness, 0};
+
+ bool swap_xy = !config.keep_orientation && (config.orientation > 4);
+ size_t xsize = swap_xy ? config.ysize : config.xsize;
+ size_t ysize = swap_xy ? config.xsize : config.ysize;
+
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, config.use_callback, config.set_buffer_early,
+ config.use_resizable_runner, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * config.output_channels *
+ jxl::test::GetDataBits(config.data_type) / jxl::kBitsPerByte,
+ pixels2.size());
+
+ // If an orientation transformation is expected, to compare the pixels, also
+ // apply this transformation to the original pixels. ConvertToExternal is
+ // used to achieve this, with a temporary conversion to CodecInOut and back.
+ if (config.orientation > 1 && !config.keep_orientation) {
+ jxl::Span<const uint8_t> bytes(pixels.data(), pixels.size());
+ jxl::ColorEncoding color_encoding =
+ jxl::ColorEncoding::SRGB(config.grayscale);
+
+ jxl::CodecInOut io;
+ if (config.include_alpha) io.metadata.m.SetAlphaBits(16);
+ io.metadata.m.color_encoding = color_encoding;
+ io.SetSize(config.xsize, config.ysize);
+
+ EXPECT_TRUE(ConvertFromExternal(bytes, config.xsize, config.ysize,
+ color_encoding, 16, format_orig, nullptr,
+ &io.Main()));
+
+ for (size_t i = 0; i < pixels.size(); i++) pixels[i] = 0;
+ EXPECT_TRUE(ConvertToExternal(
+ io.Main(), 16,
+ /*float_out=*/false, orig_channels, JXL_BIG_ENDIAN,
+ xsize * 2 * orig_channels, nullptr, pixels.data(), pixels.size(),
+ /*out_callback=*/{},
+ static_cast<jxl::Orientation>(config.orientation)));
+ }
+ if (config.upsampling == 1) {
+ EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format));
+ } else {
+ // resampling is of course not lossless, so as a rough check:
+ // count pixels that are more than off-by-25 in the 8-bit value of one of
+ // the channels
+ EXPECT_LE(
+ jxl::test::ComparePixels(
+ pixels.data(), pixels2.data(), xsize, ysize, format_orig, format,
+ 50.0 * (config.data_type == JXL_TYPE_UINT8 ? 1.0 : 256.0)),
+ 300u);
+ }
+
+ JxlDecoderDestroy(dec);
+}
+
+std::vector<PixelTestConfig> GeneratePixelTests() {
+ std::vector<PixelTestConfig> all_tests;
+ struct ChannelInfo {
+ bool grayscale;
+ bool include_alpha;
+ size_t output_channels;
+ };
+ ChannelInfo ch_info[] = {
+ {false, true, 4}, // RGBA -> RGBA
+ {true, false, 1}, // G -> G
+ {true, true, 1}, // GA -> G
+ {true, true, 2}, // GA -> GA
+ {false, false, 3}, // RGB -> RGB
+ {false, true, 3}, // RGBA -> RGB
+ {false, false, 4}, // RGB -> RGBA
+ };
+
+ struct OutputFormat {
+ JxlEndianness endianness;
+ JxlDataType data_type;
+ };
+ OutputFormat out_formats[] = {
+ {JXL_NATIVE_ENDIAN, JXL_TYPE_UINT8},
+ {JXL_LITTLE_ENDIAN, JXL_TYPE_UINT16},
+ {JXL_BIG_ENDIAN, JXL_TYPE_UINT16},
+ {JXL_NATIVE_ENDIAN, JXL_TYPE_FLOAT16},
+ {JXL_LITTLE_ENDIAN, JXL_TYPE_FLOAT},
+ {JXL_BIG_ENDIAN, JXL_TYPE_FLOAT},
+ };
+
+ auto make_test = [&](ChannelInfo ch, size_t xsize, size_t ysize,
+ jxl::PreviewMode preview_mode, bool intrinsic_size,
+ CodeStreamBoxFormat box, JxlOrientation orientation,
+ bool keep_orientation, OutputFormat format,
+ bool use_callback, bool set_buffer_early,
+ bool resizable_runner, size_t upsampling) {
+ PixelTestConfig c;
+ c.grayscale = ch.grayscale;
+ c.include_alpha = ch.include_alpha;
+ c.preview_mode = preview_mode;
+ c.add_intrinsic_size = intrinsic_size;
+ c.xsize = xsize;
+ c.ysize = ysize;
+ c.add_container = (CodeStreamBoxFormat)box;
+ c.output_channels = ch.output_channels;
+ c.data_type = format.data_type;
+ c.endianness = format.endianness;
+ c.use_callback = use_callback;
+ c.set_buffer_early = set_buffer_early;
+ c.use_resizable_runner = resizable_runner;
+ c.orientation = orientation;
+ c.keep_orientation = keep_orientation;
+ c.upsampling = upsampling;
+ all_tests.push_back(c);
+ };
+
+ // Test output formats and methods.
+ for (ChannelInfo ch : ch_info) {
+ for (int use_callback = 0; use_callback <= 1; use_callback++) {
+ for (size_t upsampling : {1, 2, 4, 8}) {
+ for (OutputFormat fmt : out_formats) {
+ make_test(ch, 301, 33, jxl::kNoPreview,
+ /*add_intrinsic_size=*/false,
+ CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, fmt, use_callback,
+ /*set_buffer_early=*/false, /*resizable_runner=*/false,
+ upsampling);
+ }
+ }
+ }
+ }
+ // Test codestream formats.
+ for (size_t box = 1; box < kCSBF_NUM_ENTRIES; ++box) {
+ make_test(ch_info[0], 77, 33, jxl::kNoPreview,
+ /*add_intrinsic_size=*/false, (CodeStreamBoxFormat)box,
+ JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, out_formats[0],
+ /*use_callback=*/false,
+ /*set_buffer_early=*/false, /*resizable_runner=*/false, 1);
+ }
+ // Test previews.
+ for (int preview_mode = 0; preview_mode < jxl::kNumPreviewModes;
+ preview_mode++) {
+ make_test(ch_info[0], 77, 33, (jxl::PreviewMode)preview_mode,
+ /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+ JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, out_formats[0],
+ /*use_callback=*/false, /*set_buffer_early=*/false,
+ /*resizable_runner=*/false, 1);
+ }
+ // Test intrinsic sizes.
+ for (int add_intrinsic_size = 0; add_intrinsic_size <= 1;
+ add_intrinsic_size++) {
+ make_test(ch_info[0], 55, 34, jxl::kNoPreview, add_intrinsic_size,
+ CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, out_formats[0],
+ /*use_callback=*/false, /*set_buffer_early=*/false,
+ /*resizable_runner=*/false, 1);
+ }
+ // Test setting buffers early.
+ make_test(ch_info[0], 300, 33, jxl::kNoPreview,
+ /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+ JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, out_formats[0],
+ /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*resizable_runner=*/false, 1);
+
+ // Test using the resizable runner
+ for (size_t i = 0; i < 4; i++) {
+ make_test(ch_info[0], 300 << i, 33 << i, jxl::kNoPreview,
+ /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None,
+ JXL_ORIENT_IDENTITY,
+ /*keep_orientation=*/false, out_formats[0],
+ /*use_callback=*/false, /*set_buffer_early=*/false,
+ /*resizable_runner=*/true, 1);
+ }
+
+ // Test orientations.
+ for (int orientation = 2; orientation <= 8; ++orientation) {
+ for (int keep_orientation = 0; keep_orientation <= 1; keep_orientation++) {
+ for (int use_callback = 0; use_callback <= 1; use_callback++) {
+ for (ChannelInfo ch : ch_info) {
+ for (OutputFormat fmt : out_formats) {
+ make_test(ch, 280, 12, jxl::kNoPreview,
+ /*add_intrinsic_size=*/false,
+ CodeStreamBoxFormat::kCSBF_None,
+ static_cast<JxlOrientation>(orientation),
+ /*keep_orientation=*/keep_orientation, fmt,
+ /*use_callback=*/use_callback, /*set_buffer_early=*/true,
+ /*resizable_runner=*/false, 1);
+ }
+ }
+ }
+ }
+ }
+
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os, const PixelTestConfig& c) {
+ os << c.xsize << "x" << c.ysize;
+ const char* colors[] = {"", "G", "GA", "RGB", "RGBA"};
+ os << colors[(c.grayscale ? 1 : 3) + (c.include_alpha ? 1 : 0)];
+ os << "to";
+ os << colors[c.output_channels];
+ switch (c.data_type) {
+ case JXL_TYPE_UINT8:
+ os << "u8";
+ break;
+ case JXL_TYPE_UINT16:
+ os << "u16";
+ break;
+ case JXL_TYPE_FLOAT:
+ os << "f32";
+ break;
+ case JXL_TYPE_FLOAT16:
+ os << "f16";
+ break;
+ default:
+ JXL_ASSERT(false);
+ };
+ if (jxl::test::GetDataBits(c.data_type) > jxl::kBitsPerByte) {
+ if (c.endianness == JXL_NATIVE_ENDIAN) {
+ // add nothing
+ } else if (c.endianness == JXL_BIG_ENDIAN) {
+ os << "BE";
+ } else if (c.endianness == JXL_LITTLE_ENDIAN) {
+ os << "LE";
+ }
+ }
+ if (c.add_container != CodeStreamBoxFormat::kCSBF_None) {
+ os << "Box";
+ os << (size_t)c.add_container;
+ }
+ if (c.preview_mode == jxl::kSmallPreview) os << "Preview";
+ if (c.preview_mode == jxl::kBigPreview) os << "BigPreview";
+ if (c.add_intrinsic_size) os << "IntrinicSize";
+ if (c.use_callback) os << "Callback";
+ if (c.set_buffer_early) os << "EarlyBuffer";
+ if (c.use_resizable_runner) os << "ResizableRunner";
+ if (c.orientation != 1) os << "O" << c.orientation;
+ if (c.keep_orientation) os << "Keep";
+ if (c.upsampling > 1) os << "x" << c.upsampling;
+ return os;
+}
+
+std::string PixelTestDescription(
+ const testing::TestParamInfo<DecodeTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeTest, DecodeTestParam,
+ testing::ValuesIn(GeneratePixelTests()),
+ PixelTestDescription);
+
+TEST(DecodeTest, PixelTestWithICCProfileLossless) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::TestCodestreamParams params;
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ params.add_icc_profile = true;
+ // For variation: some have container and no preview, others have preview
+ // and no container.
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ for (uint32_t channels = 3; channels <= 4; ++channels) {
+ {
+ JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/false,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels, pixels2.size());
+ EXPECT_EQ(0u,
+ jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format));
+ }
+ {
+ JxlPixelFormat format = {channels, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0};
+
+ // Test with the container for one of the pixel formats.
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/true, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels * 2, pixels2.size());
+ EXPECT_EQ(0u,
+ jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format));
+ }
+
+ {
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/false,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+ EXPECT_EQ(0u,
+ jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format));
+ }
+ }
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PixelTestWithICCProfileLossy) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::TestCodestreamParams params;
+ params.add_icc_profile = true;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+ uint32_t channels = 3;
+
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+
+ jxl::PaddedBytes icc;
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true, /*icc=*/&icc);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+
+ // The input pixels use the profile matching GetIccTestProfile, since we set
+ // add_icc_profile for CreateTestJXLCodestream to true.
+ jxl::ColorEncoding color_encoding0;
+ EXPECT_TRUE(color_encoding0.SetICC(GetIccTestProfile()));
+ jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+ jxl::CodecInOut io0;
+ io0.SetSize(xsize, ysize);
+ EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+ /*bits_per_sample=*/16, format_orig,
+ /*pool=*/nullptr, &io0.Main()));
+
+ jxl::ColorEncoding color_encoding1;
+ EXPECT_TRUE(color_encoding1.SetICC(std::move(icc)));
+ jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+ jxl::CodecInOut io1;
+ io1.SetSize(xsize, ysize);
+ EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+ /*bits_per_sample=*/32, format,
+ /*pool=*/nullptr, &io1.Main()));
+
+ jxl::ButteraugliParams ba;
+ EXPECT_THAT(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr),
+ IsSlightlyBelow(0.79f));
+
+ JxlDecoderDestroy(dec);
+}
+
+std::string ColorDescription(JxlColorEncoding c) {
+ jxl::ColorEncoding color_encoding;
+ EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding));
+ return Description(color_encoding);
+}
+
+std::string GetOrigProfile(JxlDecoder* dec) {
+ JxlColorEncoding c;
+ JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_ORIGINAL;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(dec, nullptr, target, &c));
+ return ColorDescription(c);
+}
+
+std::string GetDataProfile(JxlDecoder* dec) {
+ JxlColorEncoding c;
+ JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsEncodedProfile(dec, nullptr, target, &c));
+ return ColorDescription(c);
+}
+
+double ButteraugliDistance(size_t xsize, size_t ysize,
+ const std::vector<uint8_t>& pixels_in,
+ const jxl::ColorEncoding& color_in,
+ float intensity_in,
+ const std::vector<uint8_t>& pixels_out,
+ const jxl::ColorEncoding& color_out,
+ float intensity_out) {
+ jxl::CodecInOut in;
+ in.metadata.m.color_encoding = color_in;
+ in.metadata.m.SetIntensityTarget(intensity_in);
+ JxlPixelFormat format_in = {static_cast<uint32_t>(color_in.Channels()),
+ JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(pixels_in.data(), pixels_in.size()), xsize,
+ ysize, color_in,
+ /*bits_per_sample=*/16, format_in,
+ /*pool=*/nullptr, &in.Main()));
+ jxl::CodecInOut out;
+ out.metadata.m.color_encoding = color_out;
+ out.metadata.m.SetIntensityTarget(intensity_out);
+ JxlPixelFormat format_out = {static_cast<uint32_t>(color_out.Channels()),
+ JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(pixels_out.data(), pixels_out.size()), xsize,
+ ysize, color_out,
+ /*bits_per_sample=*/16, format_out,
+ /*pool=*/nullptr, &out.Main()));
+ return ButteraugliDistance(in.frames, out.frames, jxl::ButteraugliParams(),
+ jxl::GetJxlCms(), nullptr, nullptr);
+}
+
+class DecodeAllEncodingsTest
+ : public ::testing::TestWithParam<jxl::test::ColorEncodingDescriptor> {};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
+ DecodeAllEncodingsTestInstantiation, DecodeAllEncodingsTest,
+ ::testing::ValuesIn(jxl::test::AllEncodings()));
+TEST_P(DecodeAllEncodingsTest, PreserveOriginalProfileTest) {
+ size_t xsize = 123, ysize = 77;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE;
+ const auto& cdesc = GetParam();
+ jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(cdesc);
+ if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return;
+ std::string color_space_in = Description(c_in);
+ float intensity_in = c_in.tf.IsPQ() ? 10000 : 255;
+ printf("Testing input color space %s\n", color_space_in.c_str());
+ jxl::TestCodestreamParams params;
+ params.color_space = color_space_in;
+ params.intensity_target = intensity_in;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+ EXPECT_FALSE(info.uses_original_profile);
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+ EXPECT_EQ(GetDataProfile(dec), color_space_in);
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ std::vector<uint8_t> out(pixels.size());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, out.data(), out.size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in,
+ out, c_in, intensity_in);
+ EXPECT_LT(dist, 1.29);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlDecoderDestroy(dec);
+}
+
+namespace {
+void SetPreferredColorProfileTest(
+ const jxl::test::ColorEncodingDescriptor& from) {
+ size_t xsize = 123, ysize = 77;
+ int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE;
+ jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(from);
+ if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return;
+ if (c_in.white_point != jxl::WhitePoint::kD65) return;
+ uint32_t num_channels = c_in.Channels();
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::string color_space_in = Description(c_in);
+ float intensity_in = c_in.tf.IsPQ() ? 10000 : 255;
+ jxl::TestCodestreamParams params;
+ params.color_space = color_space_in;
+ params.intensity_target = intensity_in;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ auto all_encodings = jxl::test::AllEncodings();
+ all_encodings.push_back(
+ {jxl::ColorSpace::kXYB, jxl::WhitePoint::kD65, jxl::Primaries::kCustom,
+ jxl::TransferFunction::kUnknown, jxl::RenderingIntent::kPerceptual});
+ for (const auto& c1 : all_encodings) {
+ jxl::ColorEncoding c_out = jxl::test::ColorEncodingFromDescriptor(c1);
+ float intensity_out = intensity_in;
+ if (c_out.GetColorSpace() != jxl::ColorSpace::kXYB) {
+ if (c_out.rendering_intent != jxl::RenderingIntent::kRelative) {
+ continue;
+ }
+ if ((c_in.primaries == jxl::Primaries::k2100 &&
+ c_out.primaries != jxl::Primaries::k2100) ||
+ (c_in.primaries == jxl::Primaries::kP3 &&
+ c_out.primaries == jxl::Primaries::kSRGB)) {
+ // Converting to a narrower gamut does not work without gammut mapping.
+ continue;
+ }
+ }
+ if (c_out.tf.IsHLG() && intensity_out > 300) {
+ // The Linear->HLG OOTF function at this intensity level can push
+ // saturated colors out of gamut, so we would need gamut mapping in
+ // this case too.
+ continue;
+ }
+ std::string color_space_out = Description(c_out);
+ if (color_space_in == color_space_out) continue;
+ printf("Testing input color space %s with output color space %s\n",
+ color_space_in.c_str(), color_space_out.c_str());
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, data.data(), data.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+ EXPECT_FALSE(info.uses_original_profile);
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+ EXPECT_EQ(GetDataProfile(dec), color_space_in);
+ JxlColorEncoding encoding_out;
+ EXPECT_TRUE(jxl::ParseDescription(color_space_out, &encoding_out));
+ if (c_out.GetColorSpace() == jxl::ColorSpace::kXYB &&
+ (c_in.primaries != jxl::Primaries::kSRGB || c_in.tf.IsPQ())) {
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderSetPreferredColorProfile(dec, &encoding_out));
+ JxlDecoderDestroy(dec);
+ continue;
+ }
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreferredColorProfile(dec, &encoding_out));
+ EXPECT_EQ(GetOrigProfile(dec), color_space_in);
+ EXPECT_EQ(GetDataProfile(dec), color_space_out);
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ JxlPixelFormat out_format = format;
+ out_format.num_channels = c_out.Channels();
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &out_format, &buffer_size));
+ std::vector<uint8_t> out(buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &out_format, out.data(), out.size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in,
+ out, c_out, intensity_out);
+ if (c_in.white_point == c_out.white_point) {
+ EXPECT_LT(dist, 1.29);
+ } else {
+ EXPECT_LT(dist, 4.0);
+ }
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlDecoderDestroy(dec);
+ }
+}
+} // namespace
+
+TEST(DecodeTest, SetPreferredColorProfileTestFromGray) {
+ jxl::test::ColorEncodingDescriptor gray = {
+ jxl::ColorSpace::kGray, jxl::WhitePoint::kD65, jxl::Primaries::kSRGB,
+ jxl::TransferFunction::kSRGB, jxl::RenderingIntent::kRelative};
+ SetPreferredColorProfileTest(gray);
+}
+
+TEST_P(DecodeAllEncodingsTest, SetPreferredColorProfileTest) {
+ const auto& from = GetParam();
+ SetPreferredColorProfileTest(from);
+}
+
+// Tests the case of lossy sRGB image without alpha channel, decoded to RGB8
+// and to RGBA8
+TEST(DecodeTest, PixelTestOpaqueSrgbLossy) {
+ for (unsigned channels = 3; channels <= 4; channels++) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ jxl::TestCodestreamParams());
+
+ JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/true, /*set_buffer_early=*/false,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success*/ true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+ jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+ jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+ jxl::CodecInOut io0;
+ io0.SetSize(xsize, ysize);
+ EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+ /*bits_per_sample=*/16, format_orig,
+ /*pool=*/nullptr, &io0.Main()));
+
+ jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+ jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+ jxl::CodecInOut io1;
+ EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+ /*bits_per_sample=*/8, format,
+ /*pool=*/nullptr, &io1.Main()));
+
+ jxl::ButteraugliParams ba;
+ EXPECT_THAT(
+ ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr),
+ IsSlightlyBelow(0.7f));
+
+ JxlDecoderDestroy(dec);
+ }
+}
+
+// Opaque image with noise enabled, decoded to RGB8 and RGBA8.
+TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) {
+ for (unsigned channels = 3; channels <= 4; channels++) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ size_t xsize = 512, ysize = 300;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::TestCodestreamParams params;
+ params.cparams.noise = jxl::Override::kOn;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+
+ JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ JxlDecoderReset(dec);
+ EXPECT_EQ(num_pixels * channels, pixels2.size());
+
+ jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false);
+ jxl::Span<const uint8_t> span0(pixels.data(), pixels.size());
+ jxl::CodecInOut io0;
+ io0.SetSize(xsize, ysize);
+ EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0,
+ /*bits_per_sample=*/16, format_orig,
+ /*pool=*/nullptr, &io0.Main()));
+
+ jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false);
+ jxl::Span<const uint8_t> span1(pixels2.data(), pixels2.size());
+ jxl::CodecInOut io1;
+ EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1,
+ /*bits_per_sample=*/8, format,
+ /*pool=*/nullptr, &io1.Main()));
+
+ jxl::ButteraugliParams ba;
+ EXPECT_THAT(
+ ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr),
+ IsSlightlyBelow(1.7f));
+
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, ProcessEmptyInputWithBoxes) {
+ size_t xsize = 123, ysize = 77;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ jxl::CompressParams cparams;
+ uint32_t channels = 3;
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ jxl::TestCodestreamParams params;
+ params.box_format = (CodeStreamBoxFormat)i;
+ printf("Testing empty input with box format %d\n", (int)params.box_format);
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+ const int events =
+ JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_COLOR_ENCODING;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events));
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ const size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, compressed.size());
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, ExtraBytesAfterCompressedStream) {
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ jxl::CompressParams cparams;
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i;
+ if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+ printf("Testing with box format %d\n", (int)box_format);
+ size_t last_unknown_box_size = 0;
+ if (box_format == kCSBF_Single_Other) {
+ last_unknown_box_size = unk1_box_size + 8;
+ } else if (box_format == kCSBF_Multi_Other_Terminated) {
+ last_unknown_box_size = unk3_box_size + 8;
+ } else if (box_format == kCSBF_Multi_Last_Empty_Other) {
+ // If boxes are not required, the decoder won't consume the last empty
+ // jxlp box.
+ last_unknown_box_size = 12 + unk3_box_size + 8;
+ }
+ jxl::TestCodestreamParams params;
+ params.box_format = box_format;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+ // Add some more bytes after compressed data.
+ compressed.push_back(0);
+ compressed.push_back(1);
+ compressed.push_back(2);
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ uint32_t channels = 3;
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ size_t unconsumed_bytes = JxlDecoderReleaseInput(dec);
+ EXPECT_EQ(last_unknown_box_size + 3, unconsumed_bytes);
+ EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, ExtraBytesAfterCompressedStreamRequireBoxes) {
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ jxl::CompressParams cparams;
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i;
+ if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+ printf("Testing with box format %d\n", (int)box_format);
+ bool expect_success = (box_format == kCSBF_None ||
+ box_format == kCSBF_Single_Zero_Terminated ||
+ box_format == kCSBF_Multi_Zero_Terminated);
+ jxl::TestCodestreamParams params;
+ params.box_format = box_format;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+ // Add some more bytes after compressed data.
+ compressed.push_back(0);
+ compressed.push_back(1);
+ compressed.push_back(2);
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ uint32_t channels = 3;
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ format, /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/true, expect_success);
+ size_t unconsumed_bytes = JxlDecoderReleaseInput(dec);
+ EXPECT_EQ(3, unconsumed_bytes);
+ EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, ConcatenatedCompressedStreams) {
+ size_t xsize = 123, ysize = 77;
+ size_t num_pixels = xsize * ysize;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ jxl::CompressParams cparams;
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ CodeStreamBoxFormat first_box_format = (CodeStreamBoxFormat)i;
+ if (first_box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+ jxl::TestCodestreamParams params1;
+ params1.box_format = first_box_format;
+ jxl::PaddedBytes compressed1 = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params1);
+ for (int j = 0; j < kCSBF_NUM_ENTRIES; ++j) {
+ CodeStreamBoxFormat second_box_format = (CodeStreamBoxFormat)j;
+ if (second_box_format == kCSBF_Multi_Other_Zero_Terminated) continue;
+ printf("Testing with box format pair %d, %d\n", (int)first_box_format,
+ (int)second_box_format);
+ jxl::TestCodestreamParams params2;
+ params2.box_format = second_box_format;
+ jxl::PaddedBytes compressed2 = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ 3, params2);
+ jxl::PaddedBytes concat;
+ concat.append(compressed1);
+ concat.append(compressed2);
+ uint32_t channels = 3;
+ JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0};
+ size_t remaining = concat.size();
+ for (int part = 0; part < 2; ++part) {
+ printf(" Decoding part %d\n", part + 1);
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ size_t pos = concat.size() - remaining;
+ bool expect_success =
+ (part == 0 || second_box_format == kCSBF_None ||
+ second_box_format == kCSBF_Single_Zero_Terminated ||
+ second_box_format == kCSBF_Multi_Zero_Terminated);
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ dec, jxl::Span<const uint8_t>(concat.data() + pos, remaining),
+ format, /*use_callback=*/false, /*set_buffer_early=*/true,
+ /*use_resizable_runner=*/false, /*require_boxes=*/true,
+ expect_success);
+ EXPECT_EQ(num_pixels * channels * 4, pixels2.size());
+ remaining = JxlDecoderReleaseInput(dec);
+ JxlDecoderDestroy(dec);
+ }
+ EXPECT_EQ(0, remaining);
+ }
+ }
+}
+
+void TestPartialStream(bool reconstructible_jpeg) {
+ size_t xsize = 123, ysize = 77;
+ uint32_t channels = 4;
+ if (reconstructible_jpeg) {
+ channels = 3;
+ }
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, channels, 0);
+ JxlPixelFormat format_orig = {channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::TestCodestreamParams params;
+ if (reconstructible_jpeg) {
+ params.cparams.color_transform = jxl::ColorTransform::kNone;
+ } else {
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ }
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ jxl::PaddedBytes jpeg_output(64);
+ size_t used_jpeg_output = 0;
+
+ std::vector<jxl::PaddedBytes> codestreams(kCSBF_NUM_ENTRIES);
+ std::vector<jxl::PaddedBytes> jpeg_codestreams(kCSBF_NUM_ENTRIES);
+ for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ params.box_format = (CodeStreamBoxFormat)i;
+ if (reconstructible_jpeg) {
+ params.jpeg_codestream = &jpeg_codestreams[i];
+ }
+ codestreams[i] = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ channels, params);
+ }
+
+ // Test multiple step sizes, to test different combinations of the streaming
+ // box parsing.
+ std::vector<size_t> increments = {1, 3, 17, 23, 120, 700, 1050};
+
+ for (size_t index = 0; index < increments.size(); index++) {
+ for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ if (reconstructible_jpeg &&
+ (CodeStreamBoxFormat)i == CodeStreamBoxFormat::kCSBF_None) {
+ continue;
+ }
+ const jxl::PaddedBytes& data = codestreams[i];
+ const uint8_t* next_in = data.data();
+ size_t avail_in = 0;
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE |
+ JXL_DEC_JPEG_RECONSTRUCTION));
+
+ bool seen_basic_info = false;
+ bool seen_full_image = false;
+ bool seen_jpeg_recon = false;
+
+ size_t total_size = 0;
+
+ for (;;) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (total_size >= data.size()) {
+ // End of test data reached, it should have successfully decoded the
+ // image now.
+ FAIL();
+ break;
+ }
+
+ size_t increment = increments[index];
+ // End of the file reached, should be the final test.
+ if (total_size + increment > data.size()) {
+ increment = data.size() - total_size;
+ }
+ total_size += increment;
+ avail_in += increment;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ // This event should happen exactly once
+ EXPECT_FALSE(seen_basic_info);
+ if (seen_basic_info) break;
+ seen_basic_info = true;
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+ } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) {
+ EXPECT_FALSE(seen_basic_info);
+ EXPECT_FALSE(seen_full_image);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(dec, jpeg_output.data(),
+ jpeg_output.size()));
+ seen_jpeg_recon = true;
+ } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+ EXPECT_TRUE(seen_jpeg_recon);
+ used_jpeg_output =
+ jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+ jpeg_output.resize(jpeg_output.size() * 2);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(
+ dec, jpeg_output.data() + used_jpeg_output,
+ jpeg_output.size() - used_jpeg_output));
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(
+ dec, &format_orig, pixels2.data(), pixels2.size()));
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ // This event should happen exactly once
+ EXPECT_FALSE(seen_full_image);
+ if (seen_full_image) break;
+ // This event should happen after basic info
+ EXPECT_TRUE(seen_basic_info);
+ seen_full_image = true;
+ if (reconstructible_jpeg) {
+ used_jpeg_output =
+ jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec);
+ EXPECT_EQ(used_jpeg_output, jpeg_codestreams[i].size());
+ EXPECT_EQ(0, memcmp(jpeg_output.data(), jpeg_codestreams[i].data(),
+ used_jpeg_output));
+ } else {
+ EXPECT_EQ(pixels, pixels2);
+ }
+ } else if (status == JXL_DEC_SUCCESS) {
+ EXPECT_TRUE(seen_full_image);
+ break;
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ // Ensure the decoder emitted the basic info and full image events
+ EXPECT_TRUE(seen_basic_info);
+ EXPECT_TRUE(seen_full_image);
+
+ JxlDecoderDestroy(dec);
+ }
+ }
+}
+
+// Tests the return status when trying to decode pixels on incomplete file: it
+// should return JXL_DEC_NEED_MORE_INPUT, not error.
+TEST(DecodeTest, PixelPartialTest) { TestPartialStream(false); }
+
+#if JPEGXL_ENABLE_JPEG
+// Tests the return status when trying to decode JPEG bytes on incomplete file.
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGPartialTest)) {
+ TestPartialStream(true);
+}
+#endif // JPEGXL_ENABLE_JPEG
+
+// The DC event still exists, but is no longer implemented, it is deprecated.
+TEST(DecodeTest, DCNotGettableTest) {
+ // 1x1 pixel JXL image
+ std::string compressed(
+ "\377\n\0\20\260\23\0H\200("
+ "\0\334\0U\17\0\0\250P\31e\334\340\345\\\317\227\37:,"
+ "\246m\\gh\253m\vK\22E\306\261I\252C&pH\22\353 "
+ "\363\6\22\bp\0\200\237\34\231W2d\255$\1",
+ 68);
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(
+ dec, reinterpret_cast<const uint8_t*>(compressed.data()),
+ compressed.size()));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+
+ // Since the image is only 1x1 pixel, there is only 1 group, the decoder is
+ // unable to get DC size from this, and will not return the DC at all. Since
+ // no full image is requested either, it is expected to return success.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, PreviewTest) {
+ size_t xsize = 77, ysize = 120;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ for (jxl::PreviewMode mode : {jxl::kSmallPreview, jxl::kBigPreview}) {
+ jxl::TestCodestreamParams params;
+ params.preview_mode = mode;
+
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 3,
+ params);
+
+ JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_PREVIEW_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+
+ jxl::ColorEncoding c_srgb = jxl::ColorEncoding::SRGB(false);
+ jxl::CodecInOut io0;
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ c_srgb, /*bits_per_sample=*/16, format_orig, /*pool=*/nullptr,
+ &io0.Main()));
+ GeneratePreview(params.preview_mode, &io0.Main());
+
+ size_t xsize_preview = io0.Main().xsize();
+ size_t ysize_preview = io0.Main().ysize();
+ EXPECT_EQ(xsize_preview, info.preview.xsize);
+ EXPECT_EQ(ysize_preview, info.preview.ysize);
+ EXPECT_EQ(xsize_preview * ysize_preview * 3, buffer_size);
+
+ EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ std::vector<uint8_t> preview(buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(),
+ preview.size()));
+
+ EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+
+ jxl::CodecInOut io1;
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(preview.data(), preview.size()), xsize_preview,
+ ysize_preview, c_srgb,
+ /*bits_per_sample=*/8, format,
+ /*pool=*/nullptr, &io1.Main()));
+
+ jxl::ButteraugliParams ba;
+ // TODO(lode): this ButteraugliDistance silently returns 0 (dangerous for
+ // tests) if xsize or ysize is < 8, no matter how different the images, a
+ // tiny size that could happen for a preview. ButteraugliDiffmap does
+ // support smaller than 8x8, but jxl's ButteraugliDistance does not. Perhaps
+ // move butteraugli's <8x8 handling from ButteraugliDiffmap to
+ // ButteraugliComparator::Diffmap in butteraugli.cc.
+ EXPECT_LE(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr),
+ mode == jxl::kSmallPreview ? 0.7f : 1.2f);
+
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, AlignTest) {
+ size_t xsize = 123, ysize = 77;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::TestCodestreamParams params;
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ size_t align = 17;
+ JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align};
+ // On purpose not using jxl::RoundUpTo to test it independently.
+ size_t expected_line_bytes = (1 * 3 * xsize + align - 1) / align * align;
+
+ for (int use_callback = 0; use_callback <= 1; ++use_callback) {
+ std::vector<uint8_t> pixels2 = jxl::DecodeWithAPI(
+ jxl::Span<const uint8_t>(compressed.data(), compressed.size()), format,
+ use_callback, /*set_buffer_early=*/false,
+ /*use_resizable_runner=*/false, /*require_boxes=*/false,
+ /*expect_success=*/true);
+ EXPECT_EQ(expected_line_bytes * ysize, pixels2.size());
+ EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format));
+ }
+}
+
+TEST(DecodeTest, AnimationTest) {
+ size_t xsize = 123, ysize = 77;
+ static const size_t num_frames = 2;
+ std::vector<uint8_t> frames[2];
+ frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations(num_frames);
+ for (size_t i = 0; i < num_frames; ++i) {
+ frame_durations[i] = 5 + i;
+ }
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ jxl::ImageBundle bundle(&io.metadata.m);
+
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+ ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = frame_durations[i];
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ // Decode and test the animation frames
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+ EXPECT_EQ(0u, frame_header.name_length);
+ // For now, test with empty name, there's currently no easy way to encode
+ // a jxl file with a frame name because ImageBundle doesn't have a
+ // jxl::FrameHeader to set the name in. We can test the null termination
+ // character though.
+ char name;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameName(dec, &name, 1));
+ EXPECT_EQ(0, name);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, AnimationTestStreaming) {
+ size_t xsize = 123, ysize = 77;
+ static const size_t num_frames = 2;
+ std::vector<uint8_t> frames[2];
+ frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0);
+ frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1);
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations(num_frames);
+ for (size_t i = 0; i < num_frames; ++i) {
+ frame_durations[i] = 5 + i;
+ }
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ jxl::ImageBundle bundle(&io.metadata.m);
+
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+ ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = frame_durations[i];
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ // Decode and test the animation frames
+
+ const size_t step_size = 16;
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = 0;
+ size_t frame_headers_seen = 0;
+ size_t frames_seen = 0;
+ bool seen_basic_info = false;
+
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ std::vector<uint8_t> frames2[2];
+ for (size_t i = 0; i < num_frames; ++i) {
+ frames2[i].resize(frames[i].size());
+ }
+
+ size_t total_in = 0;
+ size_t loop_count = 0;
+
+ for (;;) {
+ if (loop_count++ > compressed.size()) {
+ fprintf(stderr, "Too many loops\n");
+ FAIL();
+ break;
+ }
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ auto status = JxlDecoderProcessInput(dec);
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+
+ if (status == JXL_DEC_SUCCESS) {
+ break;
+ } else if (status == JXL_DEC_ERROR) {
+ FAIL();
+ } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (total_in >= compressed.size()) {
+ fprintf(stderr, "Already gave all input data\n");
+ FAIL();
+ break;
+ }
+ size_t amount = step_size;
+ if (total_in + amount > compressed.size()) {
+ amount = compressed.size() - total_in;
+ }
+ avail_in += amount;
+ total_in += amount;
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, frames2[frames_seen].data(),
+ frames2[frames_seen].size()));
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ EXPECT_EQ(false, seen_basic_info);
+ seen_basic_info = true;
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+ } else if (status == JXL_DEC_FRAME) {
+ EXPECT_EQ(true, seen_basic_info);
+ frame_headers_seen++;
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ frames_seen++;
+ EXPECT_EQ(frame_headers_seen, frames_seen);
+ } else {
+ fprintf(stderr, "Unexpected status: %d\n", (int)status);
+ FAIL();
+ }
+ }
+
+ EXPECT_EQ(true, seen_basic_info);
+ EXPECT_EQ(num_frames, frames_seen);
+ EXPECT_EQ(num_frames, frame_headers_seen);
+ for (size_t i = 0; i < num_frames; ++i) {
+ EXPECT_EQ(frames[i], frames2[i]);
+ }
+
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, ExtraChannelTest) {
+ size_t xsize = 55, ysize = 257;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::TestCodestreamParams params;
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ size_t align = 17;
+ JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align};
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(1u, info.num_extra_channels);
+ EXPECT_EQ(JXL_FALSE, info.alpha_premultiplied);
+
+ JxlExtraChannelInfo extra_info;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info));
+ EXPECT_EQ(0, extra_info.type);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ size_t extra_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0));
+
+ std::vector<uint8_t> image(buffer_size);
+ std::vector<uint8_t> extra(extra_size);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, image.data(), image.size()));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetExtraChannelBuffer(
+ dec, &format, extra.data(), extra.size(), 0));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+ // After the full image was output, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlDecoderDestroy(dec);
+
+ EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), image.data(), xsize,
+ ysize, format_orig, format));
+
+ // Compare the extracted extra channel with the original alpha channel
+
+ std::vector<uint8_t> alpha(pixels.size() / 4);
+ for (size_t i = 0; i < pixels.size(); i += 8) {
+ size_t index_alpha = i / 4;
+ alpha[index_alpha + 0] = pixels[i + 6];
+ alpha[index_alpha + 1] = pixels[i + 7];
+ }
+ JxlPixelFormat format_alpha = format;
+ format_alpha.num_channels = 1;
+ JxlPixelFormat format_orig_alpha = format_orig;
+ format_orig_alpha.num_channels = 1;
+
+ EXPECT_EQ(0u,
+ jxl::test::ComparePixels(alpha.data(), extra.data(), xsize, ysize,
+ format_orig_alpha, format_alpha));
+}
+
+TEST(DecodeTest, SkipCurrentFrameTest) {
+ size_t xsize = 90, ysize = 120;
+ constexpr size_t num_frames = 7;
+ std::vector<uint8_t> frames[num_frames];
+ for (size_t i = 0; i < num_frames; i++) {
+ frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i);
+ }
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations(num_frames);
+ for (size_t i = 0; i < num_frames; ++i) {
+ frame_durations[i] = 5 + i;
+ }
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ jxl::ImageBundle bundle(&io.metadata.m);
+ if (i & 1) {
+ // Mark some frames as referenceable, others not.
+ bundle.use_for_next_frame = true;
+ }
+
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+ ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = frame_durations[i];
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ jxl::PassDefinition passes[] = {{2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 0, 1}};
+ jxl::ProgressiveMode progressive_mode{passes};
+ enc_state.progressive_splitter.SetProgressiveMode(progressive_mode);
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+ JXL_DEC_FRAME_PROGRESSION |
+ JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kLastPasses));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ printf("Decoding frame %d\n", (int)i);
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+ std::vector<uint8_t> pixels(buffer_size);
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+ if (i == 2) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+ continue;
+ }
+ EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(8, JxlDecoderGetIntendedDownsamplingRatio(dec));
+ if (i == 3) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+ continue;
+ }
+ EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(4, JxlDecoderGetIntendedDownsamplingRatio(dec));
+ if (i == 4) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+ continue;
+ }
+ EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(2, JxlDecoderGetIntendedDownsamplingRatio(dec));
+ if (i == 5) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec));
+ continue;
+ }
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameTest) {
+ size_t xsize = 90, ysize = 120;
+ constexpr size_t num_frames = 16;
+ std::vector<uint8_t> frames[num_frames];
+ for (size_t i = 0; i < num_frames; i++) {
+ frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i);
+ }
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations(num_frames);
+ for (size_t i = 0; i < num_frames; ++i) {
+ frame_durations[i] = 5 + i;
+ }
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ jxl::ImageBundle bundle(&io.metadata.m);
+ if (i & 1) {
+ // Mark some frames as referenceable, others not.
+ bundle.use_for_next_frame = true;
+ }
+
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frames[i].data(), frames[i].size()), xsize,
+ ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = frame_durations[i];
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ // Decode and test the animation frames
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ if (i == 3) {
+ JxlDecoderSkipFrames(dec, 5);
+ i += 5;
+ }
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ // Test rewinding the decoder and skipping different frames
+
+ JxlDecoderRewind(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ int test_skipping = (i == 9) ? 3 : 0;
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+ // should only skip the next frame, not the currently processed one.
+ if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+
+ if (test_skipping) i += test_skipping;
+ }
+
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameWithBlendingTest) {
+ size_t xsize = 90, ysize = 120;
+ constexpr size_t num_frames = 16;
+ std::vector<uint8_t> frames[num_frames];
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations(num_frames);
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ if (i < 5) {
+ std::vector<uint8_t> frame_internal =
+ jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2 + 1);
+ // An internal frame with 0 duration, and use_for_next_frame, this is a
+ // frame that is not rendered and not output by the API, but on which the
+ // rendered frames depend
+ jxl::ImageBundle bundle_internal(&io.metadata.m);
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frame_internal.data(),
+ frame_internal.size()),
+ xsize, ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle_internal));
+ bundle_internal.duration = 0;
+ bundle_internal.use_for_next_frame = true;
+ io.frames.push_back(std::move(bundle_internal));
+ }
+
+ std::vector<uint8_t> frame =
+ jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2);
+ // Actual rendered frame
+ frame_durations[i] = 5 + i;
+ jxl::ImageBundle bundle(&io.metadata.m);
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frame.data(), frame.size()), xsize, ysize,
+ jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = frame_durations[i];
+ // Create some variation in which frames depend on which.
+ if (i != 3 && i != 9 && i != 10) {
+ bundle.use_for_next_frame = true;
+ }
+ if (i != 12) {
+ bundle.blend = true;
+ // Choose a blend mode that depends on the pixels of the saved frame and
+ // doesn't use alpha
+ bundle.blendmode = jxl::BlendMode::kMul;
+ }
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ // Independently decode all frames without any skipping, to create the
+ // expected blended frames, for the actual tests below to compare with.
+ {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+ dec, JxlThreadParallelRunner, runner));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ for (size_t i = 0; i < num_frames; ++i) {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ frames[i].resize(xsize * ysize * 6);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+ frames[i].size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+ }
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+
+ // Test rewinding mid-way, not decoding all frames.
+ if (i == 8) {
+ break;
+ }
+ }
+
+ JxlDecoderRewind(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ if (i == 3) {
+ JxlDecoderSkipFrames(dec, 5);
+ i += 5;
+ }
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ // Test rewinding the decoder and skipping different frames
+
+ JxlDecoderRewind(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ int test_skipping = (i == 9) ? 3 : 0;
+ std::vector<uint8_t> pixels(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+ // should only skip the next frame, not the currently processed one.
+ if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ(frame_durations[i], frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels.data(), pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ xsize, ysize, format, format));
+
+ if (test_skipping) i += test_skipping;
+ }
+
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, SkipFrameWithAlphaBlendingTest) {
+ size_t xsize = 90, ysize = 120;
+ constexpr size_t num_frames = 16;
+ std::vector<uint8_t> frames[num_frames + 5];
+ JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.have_animation = true;
+ io.frames.clear();
+ io.frames.reserve(num_frames + 5);
+ io.SetSize(xsize, ysize);
+
+ std::vector<uint32_t> frame_durations_c;
+ std::vector<uint32_t> frame_durations_nc;
+ std::vector<uint32_t> frame_xsize, frame_ysize, frame_x0, frame_y0;
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ size_t cropxsize = 1 + xsize * 2 / (i + 1);
+ size_t cropysize = 1 + ysize * 3 / (i + 2);
+ int cropx0 = i * 3 - 8;
+ int cropy0 = i * 4 - 7;
+ if (i < 5) {
+ std::vector<uint8_t> frame_internal =
+ jxl::test::GetSomeTestImage(xsize / 2, ysize / 2, 4, i * 2 + 1);
+ // An internal frame with 0 duration, and use_for_next_frame, this is a
+ // frame that is not rendered and not output by default by the API, but on
+ // which the rendered frames depend
+ jxl::ImageBundle bundle_internal(&io.metadata.m);
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frame_internal.data(),
+ frame_internal.size()),
+ xsize / 2, ysize / 2, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle_internal));
+ bundle_internal.duration = 0;
+ bundle_internal.use_for_next_frame = true;
+ bundle_internal.origin = {13, 17};
+ io.frames.push_back(std::move(bundle_internal));
+ frame_durations_nc.push_back(0);
+ frame_xsize.push_back(xsize / 2);
+ frame_ysize.push_back(ysize / 2);
+ frame_x0.push_back(13);
+ frame_y0.push_back(17);
+ }
+
+ std::vector<uint8_t> frame =
+ jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2);
+ // Actual rendered frame
+ jxl::ImageBundle bundle(&io.metadata.m);
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
+ cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.duration = 5 + i;
+ frame_durations_nc.push_back(5 + i);
+ frame_durations_c.push_back(5 + i);
+ frame_xsize.push_back(cropxsize);
+ frame_ysize.push_back(cropysize);
+ frame_x0.push_back(cropx0);
+ frame_y0.push_back(cropy0);
+ bundle.origin = {cropx0, cropy0};
+ // Create some variation in which frames depend on which.
+ if (i != 3 && i != 9 && i != 10) {
+ bundle.use_for_next_frame = true;
+ }
+ if (i != 12) {
+ bundle.blend = true;
+ bundle.blendmode = jxl::BlendMode::kBlend;
+ }
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+ // try both with and without coalescing
+ for (auto coalescing : {JXL_TRUE, JXL_FALSE}) {
+ // Independently decode all frames without any skipping, to create the
+ // expected blended frames, for the actual tests below to compare with.
+ {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+ dec, JxlThreadParallelRunner, runner));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ if (coalescing) {
+ EXPECT_EQ(xsize * ysize * 8, buffer_size);
+ } else {
+ EXPECT_EQ(frame_xsize[i] * frame_ysize[i] * 8, buffer_size);
+ }
+ frames[i].resize(buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+ frames[i].size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+ }
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+ dec, JxlThreadParallelRunner, runner));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+ JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+
+ for (size_t i = 0; i < num_frames; ++i) {
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ std::vector<uint8_t> pixels(buffer_size);
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+ frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames, frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+ pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ if (coalescing) {
+ EXPECT_EQ(frame_header.layer_info.xsize, xsize);
+ } else {
+ EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]);
+ }
+ if (coalescing) {
+ EXPECT_EQ(frame_header.layer_info.ysize, ysize);
+ } else {
+ EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]);
+ }
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ frame_header.layer_info.xsize,
+ frame_header.layer_info.ysize,
+ format, format));
+
+ // Test rewinding mid-way, not decoding all frames.
+ if (i == 8) {
+ break;
+ }
+ }
+
+ JxlDecoderRewind(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+ if (i == 3) {
+ JxlDecoderSkipFrames(dec, 5);
+ i += 5;
+ }
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ std::vector<uint8_t> pixels(buffer_size);
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+ frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5),
+ frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+ pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ if (coalescing) {
+ EXPECT_EQ(frame_header.layer_info.xsize, xsize);
+ EXPECT_EQ(frame_header.layer_info.ysize, ysize);
+ EXPECT_EQ(frame_header.layer_info.crop_x0, 0);
+ EXPECT_EQ(frame_header.layer_info.crop_y0, 0);
+ } else {
+ EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]);
+ EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]);
+ EXPECT_EQ(frame_header.layer_info.crop_x0, frame_x0[i]);
+ EXPECT_EQ(frame_header.layer_info.crop_y0, frame_y0[i]);
+ EXPECT_EQ(frame_header.layer_info.blend_info.blendmode,
+ i != 12 + 5 && frame_header.duration != 0
+ ? 2
+ : 0); // kBlend or the default kReplace
+ }
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ frame_header.layer_info.xsize,
+ frame_header.layer_info.ysize,
+ format, format));
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ // Test rewinding the decoder and skipping different frames
+
+ JxlDecoderRewind(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+
+ for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) {
+ int test_skipping = (i == 9) ? 3 : 0;
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ std::vector<uint8_t> pixels(buffer_size);
+
+ // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this
+ // should only skip the next frame, not the currently processed one.
+ if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping);
+
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header));
+ EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]),
+ frame_header.duration);
+
+ EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5),
+ frame_header.is_last);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(),
+ pixels.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(),
+ frame_header.layer_info.xsize,
+ frame_header.layer_info.ysize,
+ format, format));
+
+ if (test_skipping) i += test_skipping;
+ }
+
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, OrientedCroppedFrameTest) {
+ const auto test = [](bool keep_orientation, uint32_t orientation,
+ uint32_t resampling) {
+ size_t xsize = 90, ysize = 120;
+ JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ size_t oxsize = (!keep_orientation && orientation > 4 ? ysize : xsize);
+ size_t oysize = (!keep_orientation && orientation > 4 ? xsize : ysize);
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetUintSamples(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.orientation = orientation;
+ io.frames.clear();
+ io.SetSize(xsize, ysize);
+
+ for (size_t i = 0; i < 3; ++i) {
+ size_t cropxsize = 1 + xsize * 2 / (i + 1);
+ size_t cropysize = 1 + ysize * 3 / (i + 2);
+ int cropx0 = i * 3 - 8;
+ int cropy0 = i * 4 - 7;
+
+ std::vector<uint8_t> frame =
+ jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2);
+ jxl::ImageBundle bundle(&io.metadata.m);
+ EXPECT_TRUE(ConvertFromExternal(
+ jxl::Span<const uint8_t>(frame.data(), frame.size()), cropxsize,
+ cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &bundle));
+ bundle.origin = {cropx0, cropy0};
+ bundle.use_for_next_frame = true;
+ io.frames.push_back(std::move(bundle));
+ }
+
+ jxl::CompressParams cparams;
+ cparams
+ .SetLossless(); // Lossless to verify pixels exactly after roundtrip.
+ cparams.speed_tier = jxl::SpeedTier::kThunder;
+ cparams.resampling = resampling;
+ jxl::AuxOut aux_out;
+ jxl::PaddedBytes compressed;
+ jxl::PassesEncoderState enc_state;
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed,
+ jxl::GetJxlCms(), &aux_out, nullptr));
+
+ // 0 is merged frame as decoded with coalescing enabled (default)
+ // 1-3 are non-coalesced frames as decoded with coalescing disabled
+ // 4 is the manually merged frame
+ std::vector<uint8_t> frames[5];
+ frames[4].resize(xsize * ysize * 8, 0);
+
+ // try both with and without coalescing
+ for (auto coalescing : {JXL_TRUE, JXL_FALSE}) {
+ // Independently decode all frames without any skipping, to create the
+ // expected blended frames, for the actual tests below to compare with.
+ {
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetKeepOrientation(dec, keep_orientation));
+ void* runner = JxlThreadParallelRunnerCreate(
+ NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner(
+ dec, JxlThreadParallelRunner, runner));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ for (size_t i = (coalescing ? 0 : 1); i < (coalescing ? 1 : 4); ++i) {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ JxlFrameHeader frame_header;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetFrameHeader(dec, &frame_header));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ if (coalescing) {
+ EXPECT_EQ(xsize * ysize * 8, buffer_size);
+ } else {
+ EXPECT_EQ(frame_header.layer_info.xsize *
+ frame_header.layer_info.ysize * 8,
+ buffer_size);
+ }
+ frames[i].resize(buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(),
+ frames[i].size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(frame_header.layer_info.blend_info.blendmode,
+ JXL_BLEND_REPLACE);
+ if (coalescing) {
+ EXPECT_EQ(frame_header.layer_info.xsize, oxsize);
+ EXPECT_EQ(frame_header.layer_info.ysize, oysize);
+ EXPECT_EQ(frame_header.layer_info.crop_x0, 0);
+ EXPECT_EQ(frame_header.layer_info.crop_y0, 0);
+ } else {
+ // manually merge this layer
+ int x0 = frame_header.layer_info.crop_x0;
+ int y0 = frame_header.layer_info.crop_y0;
+ int w = frame_header.layer_info.xsize;
+ int h = frame_header.layer_info.ysize;
+ for (int y = 0; y < static_cast<int>(oysize); y++) {
+ if (y < y0 || y >= y0 + h) continue;
+ // pointers do whole 16-bit RGBA pixels at a time
+ uint64_t* row_merged = static_cast<uint64_t*>(
+ (void*)(frames[4].data() + y * oxsize * 8));
+ uint64_t* row_layer = static_cast<uint64_t*>(
+ (void*)(frames[i].data() + (y - y0) * w * 8));
+ for (int x = 0; x < static_cast<int>(oxsize); x++) {
+ if (x < x0 || x >= x0 + w) continue;
+ row_merged[x] = row_layer[x - x0];
+ }
+ }
+ }
+ }
+
+ // After all frames were decoded, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlThreadParallelRunnerDestroy(runner);
+ JxlDecoderDestroy(dec);
+ }
+ }
+
+ EXPECT_EQ(0u, jxl::test::ComparePixels(frames[0].data(), frames[4].data(),
+ oxsize, oysize, format, format));
+ };
+
+ for (bool keep_orientation : {true, false}) {
+ for (uint32_t orientation = 1; orientation <= 8; orientation++) {
+ for (uint32_t resampling : {1, 2, 4, 8}) {
+ SCOPED_TRACE(testing::Message()
+ << "keep_orientation: " << keep_orientation << ", "
+ << "orientation: " << orientation << ", "
+ << "resampling: " << resampling);
+ test(keep_orientation, orientation, resampling);
+ }
+ }
+ }
+}
+
+struct FramePositions {
+ size_t frame_start;
+ size_t header_end;
+ size_t toc_end;
+ std::vector<size_t> section_end;
+};
+
+struct StreamPositions {
+ size_t codestream_start;
+ size_t codestream_end;
+ size_t basic_info;
+ size_t jbrd_end = 0;
+ std::vector<size_t> box_start;
+ std::vector<FramePositions> frames;
+};
+
+void AnalyzeCodestream(const jxl::PaddedBytes& data,
+ StreamPositions* streampos) {
+ // Unbox data to codestream and mark where it is broken up by boxes.
+ std::vector<uint8_t> codestream;
+ std::vector<std::pair<size_t, size_t>> breakpoints;
+ bool codestream_end = false;
+ ASSERT_LE(2, data.size());
+ if (data[0] == 0xff && data[1] == 0x0a) {
+ codestream = std::vector<uint8_t>(data.begin(), data.end());
+ streampos->codestream_start = 0;
+ } else {
+ const uint8_t* in = data.data();
+ size_t pos = 0;
+ while (pos < data.size()) {
+ ASSERT_LE(pos + 8, data.size());
+ streampos->box_start.push_back(pos);
+ size_t box_size = LoadBE32(in + pos);
+ if (box_size == 0) box_size = data.size() - pos;
+ ASSERT_LE(pos + box_size, data.size());
+ if (memcmp(in + pos + 4, "jxlc", 4) == 0) {
+ EXPECT_TRUE(codestream.empty());
+ streampos->codestream_start = pos + 8;
+ codestream.insert(codestream.end(), in + pos + 8, in + pos + box_size);
+ codestream_end = true;
+ } else if (memcmp(in + pos + 4, "jxlp", 4) == 0) {
+ codestream_end = (LoadBE32(in + pos + 8) & 0x80000000);
+ if (codestream.empty()) {
+ streampos->codestream_start = pos + 12;
+ } else if (box_size > 12 || !codestream_end) {
+ breakpoints.push_back({codestream.size(), 12});
+ }
+ codestream.insert(codestream.end(), in + pos + 12, in + pos + box_size);
+ } else if (memcmp(in + pos + 4, "jbrd", 4) == 0) {
+ EXPECT_TRUE(codestream.empty());
+ streampos->jbrd_end = pos + box_size;
+ } else if (!codestream.empty() && !codestream_end) {
+ breakpoints.push_back({codestream.size(), box_size});
+ }
+ pos += box_size;
+ }
+ ASSERT_EQ(pos, data.size());
+ }
+ // Translate codestream positions to boxed stream positions.
+ size_t offset = streampos->codestream_start;
+ size_t bp = 0;
+ auto add_offset = [&](size_t pos) {
+ while (bp < breakpoints.size() && pos >= breakpoints[bp].first) {
+ offset += breakpoints[bp++].second;
+ }
+ return pos + offset;
+ };
+ // Analyze the unboxed codestream.
+ jxl::BitReader br(
+ jxl::Span<const uint8_t>(codestream.data(), codestream.size()));
+ ASSERT_EQ(br.ReadFixedBits<16>(), 0x0AFF);
+ jxl::CodecMetadata metadata;
+ EXPECT_TRUE(ReadSizeHeader(&br, &metadata.size));
+ EXPECT_TRUE(ReadImageMetadata(&br, &metadata.m));
+ streampos->basic_info =
+ add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte);
+ metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+ EXPECT_TRUE(jxl::Bundle::Read(&br, &metadata.transform_data));
+ EXPECT_TRUE(br.JumpToByteBoundary());
+ bool has_preview = metadata.m.have_preview;
+ while (br.TotalBitsConsumed() < br.TotalBytes() * jxl::kBitsPerByte) {
+ FramePositions p;
+ p.frame_start = add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte);
+ jxl::FrameHeader frame_header(&metadata);
+ if (has_preview) {
+ frame_header.nonserialized_is_preview = true;
+ has_preview = false;
+ }
+ EXPECT_TRUE(ReadFrameHeader(&br, &frame_header));
+ p.header_end =
+ add_offset(jxl::DivCeil(br.TotalBitsConsumed(), jxl::kBitsPerByte));
+ jxl::FrameDimensions frame_dim = frame_header.ToFrameDimensions();
+ uint64_t groups_total_size;
+ const size_t toc_entries = jxl::NumTocEntries(
+ frame_dim.num_groups, frame_dim.num_dc_groups,
+ frame_header.passes.num_passes, /*has_ac_global=*/true);
+ std::vector<uint64_t> section_offsets;
+ std::vector<uint32_t> section_sizes;
+ EXPECT_TRUE(ReadGroupOffsets(toc_entries, &br, &section_offsets,
+ &section_sizes, &groups_total_size));
+ EXPECT_EQ(br.TotalBitsConsumed() % jxl::kBitsPerByte, 0);
+ size_t sections_start = br.TotalBitsConsumed() / jxl::kBitsPerByte;
+ p.toc_end = add_offset(sections_start);
+ for (size_t i = 0; i < toc_entries; ++i) {
+ size_t end = sections_start + section_offsets[i] + section_sizes[i];
+ p.section_end.push_back(add_offset(end));
+ }
+ br.SkipBits(groups_total_size * jxl::kBitsPerByte);
+ streampos->frames.push_back(p);
+ }
+ streampos->codestream_end = add_offset(codestream.size());
+ EXPECT_EQ(br.TotalBitsConsumed(), br.TotalBytes() * jxl::kBitsPerByte);
+ EXPECT_TRUE(br.Close());
+}
+
+enum ExpectedFlushState { NO_FLUSH, SAME_FLUSH, NEW_FLUSH };
+struct Breakpoint {
+ size_t file_pos;
+ ExpectedFlushState expect_flush;
+};
+
+void VerifyProgression(size_t xsize, size_t ysize, uint32_t num_channels,
+ const std::vector<uint8_t>& pixels,
+ const jxl::PaddedBytes& data,
+ std::vector<Breakpoint> breakpoints) {
+ // Size large enough for multiple groups, required to have progressive stages.
+ ASSERT_LT(256, xsize);
+ ASSERT_LT(256, ysize);
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ int bp = 0;
+ const uint8_t* next_in = data.data();
+ size_t avail_in = breakpoints[bp].file_pos;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ double prev_dist = 1.0;
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ printf("bp: %d status: 0x%x\n", bp, (int)status);
+ if (status == JXL_DEC_BASIC_INFO) {
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+ // Output buffer/callback not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+ pixels2.size()));
+ } else if (status == JXL_DEC_FRAME) {
+ // Nothing to do.
+ } else if (status == JXL_DEC_SUCCESS) {
+ EXPECT_EQ(bp + 1, breakpoints.size());
+ break;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT ||
+ status == JXL_DEC_FULL_IMAGE) {
+ if (breakpoints[bp].expect_flush == NO_FLUSH) {
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+ } else {
+ if (status != JXL_DEC_FULL_IMAGE) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+ }
+ double dist = jxl::test::DistanceRMS(pixels2.data(), pixels.data(),
+ xsize, ysize, format);
+ if (breakpoints[bp].expect_flush == NEW_FLUSH) {
+ EXPECT_LT(dist, prev_dist);
+ prev_dist = dist;
+ } else {
+ EXPECT_EQ(dist, prev_dist);
+ }
+ }
+ if (status == JXL_DEC_FULL_IMAGE) {
+ EXPECT_EQ(bp + 1, breakpoints.size());
+ continue;
+ }
+ ASSERT_LT(++bp, breakpoints.size());
+ next_in += avail_in - JxlDecoderReleaseInput(dec);
+ avail_in = breakpoints[bp].file_pos - (next_in - data.data());
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ } else {
+ printf("Unexpected status: 0x%x\n", (int)status);
+ FAIL(); // unexpected returned status
+ }
+ }
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, ProgressionTest) {
+ size_t xsize = 508, ysize = 470;
+ uint32_t num_channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.cparams.progressive_dc = 1;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ StreamPositions streampos;
+ AnalyzeCodestream(data, &streampos);
+ const std::vector<FramePositions>& fp = streampos.frames;
+ // We have preview, dc frame and regular frame.
+ EXPECT_EQ(3, fp.size());
+ EXPECT_EQ(7, fp[2].section_end.size());
+ EXPECT_EQ(data.size(), fp[2].section_end[6]);
+ std::vector<Breakpoint> breakpoints{
+ {fp[0].frame_start, NO_FLUSH}, // headers
+ {fp[1].frame_start, NO_FLUSH}, // preview
+ {fp[2].frame_start, NO_FLUSH}, // dc frame
+ {fp[2].section_end[0], NO_FLUSH}, // DC global
+ {fp[2].section_end[1] - 1, NO_FLUSH}, // partial DC group
+ {fp[2].section_end[1], NEW_FLUSH}, // DC group
+ {fp[2].section_end[2], SAME_FLUSH}, // AC global
+ {fp[2].section_end[3], NEW_FLUSH}, // AC group 0
+ {fp[2].section_end[4] - 1, SAME_FLUSH}, // partial AC group 1
+ {fp[2].section_end[4], NEW_FLUSH}, // AC group 1
+ {fp[2].section_end[5], NEW_FLUSH}, // AC group 2
+ {data.size() - 1, SAME_FLUSH}, // partial AC group 3
+ {data.size(), NEW_FLUSH}}; // full image
+ VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints);
+}
+
+TEST(DecodeTest, ProgressionTestLosslessAlpha) {
+ size_t xsize = 508, ysize = 470;
+ uint32_t num_channels = 4;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ params.cparams.responsive = 1;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ StreamPositions streampos;
+ AnalyzeCodestream(data, &streampos);
+ const std::vector<FramePositions>& fp = streampos.frames;
+ // We have preview, dc frame and regular frame.
+ EXPECT_EQ(1, fp.size());
+ EXPECT_EQ(7, fp[0].section_end.size());
+ EXPECT_EQ(data.size(), fp[0].section_end[6]);
+ std::vector<Breakpoint> breakpoints{
+ {fp[0].frame_start, NO_FLUSH}, // headers
+ {fp[0].section_end[0] - 1, NO_FLUSH}, // partial DC global
+ {fp[0].section_end[0], NEW_FLUSH}, // DC global
+ {fp[0].section_end[1], SAME_FLUSH}, // DC group
+ {fp[0].section_end[2], SAME_FLUSH}, // AC global
+ {fp[0].section_end[3], NEW_FLUSH}, // AC group 0
+ {fp[0].section_end[4] - 1, SAME_FLUSH}, // partial AC group 1
+ {fp[0].section_end[4], NEW_FLUSH}, // AC group 1
+ {fp[0].section_end[5], NEW_FLUSH}, // AC group 2
+ {data.size() - 1, SAME_FLUSH}, // partial AC group 3
+ {data.size(), NEW_FLUSH}}; // full image
+ VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints);
+}
+
+void VerifyFilePosition(size_t expected_pos, const jxl::PaddedBytes& data,
+ JxlDecoder* dec) {
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ size_t pos = data.size() - remaining;
+ EXPECT_EQ(expected_pos, pos);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, data.data() + pos, remaining));
+}
+
+TEST(DecodeTest, InputHandlingTestOneShot) {
+ size_t xsize = 508, ysize = 470;
+ uint32_t num_channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ printf("Testing with box format %d\n", i);
+ jxl::TestCodestreamParams params;
+ params.cparams.progressive_dc = 1;
+ params.preview_mode = jxl::kSmallPreview;
+ params.box_format = (CodeStreamBoxFormat)i;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ StreamPositions streampos;
+ AnalyzeCodestream(data, &streampos);
+ const std::vector<FramePositions>& fp = streampos.frames;
+ // We have preview, dc frame and regular frame.
+ EXPECT_EQ(3, fp.size());
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ int kNumEvents = 6;
+ int events[] = {
+ JXL_DEC_BASIC_INFO, JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE,
+ JXL_DEC_FRAME, JXL_DEC_FULL_IMAGE, JXL_DEC_FRAME_PROGRESSION,
+ };
+ size_t end_positions[] = {
+ streampos.basic_info, fp[0].frame_start,
+ fp[1].frame_start, fp[2].toc_end,
+ streampos.codestream_end, streampos.codestream_end};
+ int events_wanted = 0;
+ for (int j = 0; j < kNumEvents; ++j) {
+ events_wanted |= events[j];
+ size_t end_pos = end_positions[j];
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, data.data(), data.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(streampos.basic_info, data, dec);
+ if (j >= 1) {
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[0].frame_start, data, dec);
+ }
+ if (j >= 2) {
+ EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[0].toc_end, data, dec);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_GE(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+ buffer_size));
+ EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[1].frame_start, data, dec);
+ }
+ if (j >= 3) {
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[2].toc_end, data, dec);
+ if (j >= 5) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC));
+ }
+ }
+ if (j >= 4) {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[2].toc_end, data, dec);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+ pixels2.size()));
+ if (j >= 5) {
+ EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[2].section_end[1], data, dec);
+ }
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(streampos.codestream_end, data, dec);
+ }
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(end_pos, data, dec);
+ JxlDecoderDestroy(dec);
+ }
+ }
+}
+
+#if JPEGXL_ENABLE_JPEG
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(InputHandlingTestJPEGOneshot)) {
+ size_t xsize = 123;
+ size_t ysize = 77;
+ size_t channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0);
+ for (int i = 1; i < kCSBF_NUM_ENTRIES; ++i) {
+ printf("Testing with box format %d\n", i);
+ jxl::PaddedBytes jpeg_codestream;
+ jxl::TestCodestreamParams params;
+ params.cparams.color_transform = jxl::ColorTransform::kNone;
+ params.jpeg_codestream = &jpeg_codestream;
+ params.preview_mode = jxl::kSmallPreview;
+ params.box_format = (CodeStreamBoxFormat)i;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ channels, params);
+ JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ StreamPositions streampos;
+ AnalyzeCodestream(data, &streampos);
+ const std::vector<FramePositions>& fp = streampos.frames;
+ // We have preview and regular frame.
+ EXPECT_EQ(2, fp.size());
+ EXPECT_LT(0, streampos.jbrd_end);
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ int kNumEvents = 6;
+ int events[] = {JXL_DEC_BASIC_INFO, JXL_DEC_JPEG_RECONSTRUCTION,
+ JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE,
+ JXL_DEC_FRAME, JXL_DEC_FULL_IMAGE};
+ size_t end_positions[] = {streampos.basic_info, streampos.basic_info,
+ fp[0].frame_start, fp[1].frame_start,
+ fp[1].toc_end, streampos.codestream_end};
+ int events_wanted = 0;
+ for (int j = 0; j < kNumEvents; ++j) {
+ printf("j = %d\n", j);
+ events_wanted |= events[j];
+ size_t end_pos = end_positions[j];
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, data.data(), data.size()));
+ if (j >= 1) {
+ EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(streampos.jbrd_end, data, dec);
+ }
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(streampos.basic_info, data, dec);
+ if (j >= 2) {
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[0].frame_start, data, dec);
+ }
+ if (j >= 3) {
+ EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[0].toc_end, data, dec);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_GE(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+ buffer_size));
+ EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[1].frame_start, data, dec);
+ }
+ if (j >= 4) {
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[1].toc_end, data, dec);
+ }
+ if (j >= 5) {
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(fp[1].toc_end, data, dec);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+ pixels2.size()));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(streampos.codestream_end, data, dec);
+ }
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ VerifyFilePosition(end_pos, data, dec);
+ JxlDecoderDestroy(dec);
+ }
+ }
+}
+#endif // JPEGXL_ENABLE_JPEG
+
+TEST(DecodeTest, InputHandlingTestStreaming) {
+ size_t xsize = 508, ysize = 470;
+ uint32_t num_channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) {
+ printf("Testing with box format %d\n", i);
+ fflush(stdout);
+ jxl::TestCodestreamParams params;
+ params.cparams.progressive_dc = 1;
+ params.box_format = (CodeStreamBoxFormat)i;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ StreamPositions streampos;
+ AnalyzeCodestream(data, &streampos);
+ const std::vector<FramePositions>& fp = streampos.frames;
+ // We have preview, dc frame and regular frame.
+ EXPECT_EQ(3, fp.size());
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+ int events_wanted =
+ (JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_PREVIEW_IMAGE |
+ JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION |
+ JXL_DEC_BOX);
+ for (size_t increment : {1, 7, 27, 1024}) {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted));
+ size_t file_pos = 0;
+ size_t box_index = 0;
+ size_t avail_in = 0;
+ for (;;) {
+ const uint8_t* next_in = data.data() + file_pos;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ size_t consumed = avail_in - remaining;
+ file_pos += consumed;
+ avail_in += increment;
+ avail_in = std::min<size_t>(avail_in, data.size() - file_pos);
+ if (status == JXL_DEC_BASIC_INFO) {
+ EXPECT_EQ(file_pos, streampos.basic_info);
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ EXPECT_EQ(file_pos, streampos.frames[0].frame_start);
+ } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) {
+ EXPECT_EQ(file_pos, streampos.frames[0].toc_end);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_GE(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(),
+ buffer_size));
+ } else if (status == JXL_DEC_PREVIEW_IMAGE) {
+ EXPECT_EQ(file_pos, streampos.frames[1].frame_start);
+ } else if (status == JXL_DEC_FRAME) {
+ EXPECT_EQ(file_pos, streampos.frames[2].toc_end);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC));
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(file_pos, streampos.frames[2].toc_end);
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(),
+ pixels2.size()));
+ } else if (status == JXL_DEC_FRAME_PROGRESSION) {
+ EXPECT_EQ(file_pos, streampos.frames[2].section_end[1]);
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ EXPECT_EQ(file_pos, streampos.codestream_end);
+ } else if (status == JXL_DEC_SUCCESS) {
+ EXPECT_EQ(file_pos, streampos.codestream_end);
+ break;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT) {
+ EXPECT_LT(remaining, 12);
+ if ((i == kCSBF_None && file_pos >= 2) ||
+ (box_index > 0 && box_index < streampos.box_start.size() &&
+ file_pos >= streampos.box_start[box_index - 1] + 12 &&
+ file_pos < streampos.box_start[box_index])) {
+ EXPECT_EQ(remaining, 0);
+ }
+ if (file_pos == data.size()) break;
+ } else if (status == JXL_DEC_BOX) {
+ ASSERT_LT(box_index, streampos.box_start.size());
+ EXPECT_EQ(file_pos, streampos.box_start[box_index++]);
+ } else {
+ printf("Unexpected status: 0x%x\n", (int)status);
+ FAIL();
+ }
+ }
+ JxlDecoderDestroy(dec);
+ }
+ }
+}
+
+TEST(DecodeTest, FlushTest) {
+ // Size large enough for multiple groups, required to have progressive
+ // stages
+ size_t xsize = 333, ysize = 300;
+ uint32_t num_channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ // Ensure that the first part contains at least the full DC of the image,
+ // otherwise flush does not work.
+ size_t first_part = data.size() - 1;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Output buffer not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels2.data(), pixels2.size()));
+
+ // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+ // data was already input before, since the processing of the frame only
+ // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+ // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535).
+ // 29000 pixels can be above the threshold
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 29000u);
+
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+ data.size() - consumed));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ // Lower threshold for the final (still lossy) image
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 11000u);
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, FlushTestImageOutCallback) {
+ // Size large enough for multiple groups, required to have progressive
+ // stages
+ size_t xsize = 333, ysize = 300;
+ uint32_t num_channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ size_t bytes_per_pixel = format.num_channels * 2;
+ size_t stride = bytes_per_pixel * xsize;
+ auto callback = [&](size_t x, size_t y, size_t num_pixels,
+ const void* pixels_row) {
+ memcpy(pixels2.data() + stride * y + bytes_per_pixel * x, pixels_row,
+ num_pixels * bytes_per_pixel);
+ };
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ // Ensure that the first part contains at least the full DC of the image,
+ // otherwise flush does not work.
+ size_t first_part = data.size() - 1;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Output callback not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutCallback(
+ dec, &format,
+ [](void* opaque, size_t x, size_t y,
+ size_t xsize, const void* pixels_row) {
+ auto cb =
+ static_cast<decltype(&callback)>(opaque);
+ (*cb)(x, y, xsize, pixels_row);
+ },
+ /*opaque=*/&callback));
+
+ // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+ // data was already input before, since the processing of the frame only
+ // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+ // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535).
+ // 29000 pixels can be above the threshold
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 29000u);
+
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+ data.size() - consumed));
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ // Lower threshold for the final (still lossy) image
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 11000u);
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, FlushTestLossyProgressiveAlpha) {
+ // Size large enough for multiple groups, required to have progressive
+ // stages
+ size_t xsize = 333, ysize = 300;
+ uint32_t num_channels = 4;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ // Ensure that the first part contains at least the full DC of the image,
+ // otherwise flush does not work.
+ size_t first_part = data.size() - 1;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Output buffer not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels2.data(), pixels2.size()));
+
+ // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+ // data was already input before, since the processing of the frame only
+ // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 30000u);
+
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+ data.size() - consumed));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 11000u);
+
+ JxlDecoderDestroy(dec);
+}
+TEST(DecodeTest, FlushTestLossyProgressiveAlphaUpsampling) {
+ size_t xsize = 533, ysize = 401;
+ uint32_t num_channels = 4;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.cparams.resampling = 2;
+ params.cparams.ec_resampling = 4;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ // Ensure that the first part contains at least the full DC of the image,
+ // otherwise flush does not work.
+ size_t first_part = data.size() * 2 / 3;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Output buffer not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels2.data(), pixels2.size()));
+
+ // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+ // data was already input before, since the processing of the frame only
+ // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 125000u);
+
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+ data.size() - consumed));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 70000u);
+
+ JxlDecoderDestroy(dec);
+}
+TEST(DecodeTest, FlushTestLosslessProgressiveAlpha) {
+ // Size large enough for multiple groups, required to have progressive
+ // stages
+ size_t xsize = 333, ysize = 300;
+ uint32_t num_channels = 4;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ jxl::TestCodestreamParams params;
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ params.cparams.responsive = 1;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+
+ // Ensure that the first part contains at least the full DC of the image,
+ // otherwise flush does not work.
+ size_t first_part = data.size() / 2;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+
+ // Output buffer not yet set
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec));
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels2.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, pixels2.data(), pixels2.size()));
+
+ // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if
+ // data was already input before, since the processing of the frame only
+ // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME.
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format, 2560.0),
+ 2700u);
+
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+
+ size_t consumed = first_part - JxlDecoderReleaseInput(dec);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed,
+ data.size() - consumed));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize,
+ ysize, format, format),
+ 0u);
+
+ JxlDecoderDestroy(dec);
+}
+
+class DecodeProgressiveTest : public ::testing::TestWithParam<int> {};
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeProgressiveTestInstantiation,
+ DecodeProgressiveTest,
+ ::testing::Range(0, 8));
+TEST_P(DecodeProgressiveTest, ProgressiveEventTest) {
+ const int params = GetParam();
+ int single_group = params & 1;
+ int lossless = (params >> 1) & 1;
+ uint32_t num_channels = 3 + ((params >> 2) & 1);
+ std::set<JxlProgressiveDetail> progressive_details = {kDC, kLastPasses,
+ kPasses};
+ for (auto prog_detail : progressive_details) {
+ // Only few combinations are expected to support outputting
+ // intermediate flushes for complete DC and complete passes.
+ // The test can be updated if more cases are expected to support it.
+ bool expect_flush = (num_channels & 1) && !lossless;
+ size_t xsize, ysize;
+ if (single_group) {
+ // An image smaller than 256x256 ensures it contains only 1 group.
+ xsize = 99;
+ ysize = 100;
+ } else {
+ xsize = 277;
+ ysize = 280;
+ }
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0);
+ JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ jxl::ColorEncoding color_encoding = jxl::ColorEncoding::SRGB(false);
+ jxl::CodecInOut io;
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ color_encoding,
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &io.Main()));
+ jxl::TestCodestreamParams params;
+ if (lossless) {
+ params.cparams.SetLossless();
+ } else {
+ params.cparams.butteraugli_distance = 0.5f;
+ }
+ jxl::PassDefinition passes[] = {
+ {2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 1, 2}, {8, 0, 1}};
+ const int kNumPasses = 5;
+ jxl::ProgressiveMode progressive_mode{passes};
+ params.progressive_mode = &progressive_mode;
+ jxl::PaddedBytes data = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ num_channels, params);
+
+ for (size_t increment : {(size_t)1, data.size()}) {
+ printf(
+ "Testing with single_group=%d, lossless=%d, "
+ "num_channels=%d, prog_detail=%d, increment=%d\n",
+ single_group, lossless, (int)num_channels, (int)prog_detail,
+ (int)increment);
+ std::vector<std::vector<uint8_t>> passes(kNumPasses + 1);
+ for (int i = 0; i <= kNumPasses; ++i) {
+ passes[i].resize(pixels.size());
+ }
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME |
+ JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kFrames));
+ EXPECT_EQ(JXL_DEC_ERROR,
+ JxlDecoderSetProgressiveDetail(dec, kDCProgressive));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kDCGroups));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kGroups));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetProgressiveDetail(dec, prog_detail));
+
+ uint8_t* next_in = data.data();
+ size_t avail_in = 0;
+ size_t pos = 0;
+
+ auto process_input = [&]() {
+ for (;;) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, next_in, avail_in));
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+ if (status == JXL_DEC_NEED_MORE_INPUT && pos < data.size()) {
+ size_t chunk = std::min<size_t>(increment, data.size() - pos);
+ pos += chunk;
+ avail_in += chunk;
+ continue;
+ }
+ return status;
+ }
+ };
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input());
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+
+ EXPECT_EQ(JXL_DEC_FRAME, process_input());
+
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(pixels.size(), buffer_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, passes[kNumPasses].data(),
+ passes[kNumPasses].size()));
+
+ auto next_pass = [&](int pass) {
+ if (prog_detail <= kDC) return kNumPasses;
+ if (prog_detail <= kLastPasses) {
+ return std::min(pass + 2, kNumPasses);
+ }
+ return pass + 1;
+ };
+
+ if (expect_flush) {
+ // Return a particular downsampling ratio only after the last
+ // pass for that downsampling was processed.
+ int expected_downsampling_ratios[] = {8, 8, 4, 4, 2};
+ for (int p = 0; p < kNumPasses; p = next_pass(p)) {
+ EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, process_input());
+ EXPECT_EQ(expected_downsampling_ratios[p],
+ JxlDecoderGetIntendedDownsamplingRatio(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec));
+ passes[p] = passes[kNumPasses];
+ }
+ }
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input());
+ EXPECT_EQ(JXL_DEC_SUCCESS, process_input());
+
+ JxlDecoderDestroy(dec);
+
+ if (!expect_flush) {
+ continue;
+ }
+ jxl::ButteraugliParams ba;
+ std::vector<float> distances(kNumPasses + 1);
+ for (int p = 0;; p = next_pass(p)) {
+ jxl::CodecInOut io1;
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(passes[p].data(), passes[p].size()), xsize,
+ ysize, color_encoding,
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr, &io1.Main()));
+ distances[p] = ButteraugliDistance(io.frames, io1.frames, ba,
+ jxl::GetJxlCms(), nullptr, nullptr);
+ if (p == kNumPasses) break;
+ }
+ const float kMaxDistance[kNumPasses + 1] = {30.0f, 20.0f, 10.0f,
+ 5.0f, 3.0f, 2.0f};
+ EXPECT_LT(distances[kNumPasses], kMaxDistance[kNumPasses]);
+ for (int p = 0; p < kNumPasses;) {
+ int next_p = next_pass(p);
+ EXPECT_LT(distances[p], kMaxDistance[p]);
+ // Verify that the returned pass image is actually not the
+ // same as the next pass image, by checking that it has a bit
+ // worse butteraugli score.
+ EXPECT_LT(distances[next_p] * 1.1f, distances[p]);
+ p = next_p;
+ }
+ }
+ }
+}
+
+void VerifyJPEGReconstruction(const jxl::PaddedBytes& container,
+ const jxl::PaddedBytes& jpeg_bytes) {
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+ JxlDecoderSetInput(dec.get(), container.data(), container.size());
+ EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+ std::vector<uint8_t> reconstructed_buffer(128);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+ reconstructed_buffer.size()));
+ size_t used = 0;
+ JxlDecoderStatus process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+ while (process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+ used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+ reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+ reconstructed_buffer.size() - used));
+ process_result = JxlDecoderProcessInput(dec.get());
+ }
+ ASSERT_EQ(JXL_DEC_FULL_IMAGE, process_result);
+ used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+ ASSERT_EQ(used, jpeg_bytes.size());
+ EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), jpeg_bytes.data(), used));
+}
+
+#if JPEGXL_ENABLE_JPEG
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructTestCodestream)) {
+ size_t xsize = 123;
+ size_t ysize = 77;
+ size_t channels = 3;
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0);
+ jxl::PaddedBytes jpeg_codestream;
+ jxl::TestCodestreamParams params;
+ params.cparams.color_transform = jxl::ColorTransform::kNone;
+ params.box_format = kCSBF_Single;
+ params.jpeg_codestream = &jpeg_codestream;
+ params.preview_mode = jxl::kSmallPreview;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize,
+ channels, params);
+ VerifyJPEGReconstruction(compressed, jpeg_codestream);
+}
+#endif // JPEGXL_ENABLE_JPEG
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+ const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+ jxl::CodecInOut orig_io;
+ ASSERT_TRUE(
+ jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(orig), &orig_io));
+ orig_io.metadata.m.xyb_encoded = false;
+ jxl::BitWriter writer;
+ ASSERT_TRUE(WriteCodestreamHeaders(&orig_io.metadata, &writer, nullptr));
+ writer.ZeroPadToByte();
+ jxl::PassesEncoderState enc_state;
+ jxl::CompressParams cparams;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ ASSERT_TRUE(jxl::EncodeFrame(cparams, jxl::FrameInfo{}, &orig_io.metadata,
+ orig_io.Main(), &enc_state, jxl::GetJxlCms(),
+ /*pool=*/nullptr, &writer,
+ /*aux_out=*/nullptr));
+
+ jxl::PaddedBytes jpeg_data;
+ ASSERT_TRUE(
+ EncodeJPEGData(*orig_io.Main().jpeg_data.get(), &jpeg_data, cparams));
+ jxl::PaddedBytes container;
+ container.append(jxl::kContainerHeader,
+ jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false,
+ &container);
+ container.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &container);
+ jxl::PaddedBytes codestream = std::move(writer).TakeBytes();
+ container.append(codestream.data(), codestream.data() + codestream.size());
+ VerifyJPEGReconstruction(container, orig);
+}
+
+TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionMetadataTest)) {
+ const std::string jpeg_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jpg";
+ const std::string jxl_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jxl";
+ const jxl::PaddedBytes jpeg = jxl::test::ReadTestData(jpeg_path);
+ const jxl::PaddedBytes jxl = jxl::test::ReadTestData(jxl_path);
+ VerifyJPEGReconstruction(jxl, jpeg);
+}
+
+TEST(DecodeTest, ContinueFinalNonEssentialBoxTest) {
+ size_t xsize = 80, ysize = 90;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ jxl::TestCodestreamParams params;
+ params.box_format = kCSBF_Multi_Other_Terminated;
+ params.add_icc_profile = true;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+ StreamPositions streampos;
+ AnalyzeCodestream(compressed, &streampos);
+
+ // The non-essential final box size including 8-byte header
+ size_t final_box_size = unk3_box_size + 8;
+ size_t last_box_begin = compressed.size() - final_box_size;
+ // Verify that the test is indeed setup correctly to be at the beginning of
+ // the 'unkn' box header.
+ ASSERT_EQ(compressed[last_box_begin + 3], final_box_size);
+ ASSERT_EQ(compressed[last_box_begin + 4], 'u');
+ ASSERT_EQ(compressed[last_box_begin + 5], 'n');
+ ASSERT_EQ(compressed[last_box_begin + 6], 'k');
+ ASSERT_EQ(compressed[last_box_begin + 7], '3');
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), last_box_begin));
+
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec));
+ // The decoder returns success despite not having seen the final unknown box
+ // yet. This is because calling JxlDecoderCloseInput is not mandatory for
+ // backwards compatibility, so it doesn't know more bytes follow, the current
+ // bytes ended at a perfectly valid place.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ // Since the test was set up to end exactly at the boundary of the final
+ // codestream box, and the decoder returned success, all bytes are expected to
+ // be consumed until the end of the frame header.
+ EXPECT_EQ(remaining, last_box_begin - streampos.frames[0].toc_end);
+
+ // Now set the remaining non-codestream box as input.
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data() + last_box_begin,
+ compressed.size() - last_box_begin));
+ // Even though JxlDecoderProcessInput already returned JXL_DEC_SUCCESS before,
+ // when calling it again now after setting more input, success is expected, no
+ // event occurs but the box has been successfully skipped.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+}
+
+namespace {
+bool BoxTypeEquals(const std::string& type_string, JxlBoxType type) {
+ return type_string.size() == 4 && type_string[0] == type[0] &&
+ type_string[1] == type[1] && type_string[2] == type[2] &&
+ type_string[3] == type[3];
+}
+} // namespace
+
+TEST(DecodeTest, ExtentedBoxSizeTest) {
+ const std::string jxl_path = "jxl/boxes/square-extended-size-container.jxl";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jxl_path);
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+
+ JxlBoxType type;
+ uint64_t box_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, orig.data(), orig.size()));
+ EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ EXPECT_TRUE(BoxTypeEquals("JXL ", type));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+ EXPECT_EQ(12, box_size);
+ EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ EXPECT_TRUE(BoxTypeEquals("ftyp", type));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+ EXPECT_EQ(20, box_size);
+ EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ EXPECT_TRUE(BoxTypeEquals("jxlc", type));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+ EXPECT_EQ(72, box_size);
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(BoxTest)) {
+ size_t xsize = 1, ysize = 1;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ jxl::TestCodestreamParams params;
+ params.box_format = kCSBF_Multi_Other_Terminated;
+ params.add_icc_profile = true;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+
+ std::vector<std::string> expected_box_types = {
+ "JXL ", "ftyp", "jxlp", "unk1", "unk2", "jxlp", "jxlp", "jxlp", "unk3"};
+
+ // Value 0 means to not test the size: codestream is not required to be a
+ // particular exact size.
+ std::vector<size_t> expected_box_sizes = {12, 20, 0, 34, 18, 0, 0, 0, 20};
+
+ JxlBoxType type;
+ uint64_t box_size;
+ std::vector<uint8_t> contents(50);
+ size_t expected_release_size = 0;
+
+ // Cannot get these when decoding didn't start yet
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+
+ uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+ for (size_t i = 0; i < expected_box_types.size(); i++) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size));
+ EXPECT_TRUE(BoxTypeEquals(expected_box_types[i], type));
+ if (expected_box_sizes[i]) {
+ EXPECT_EQ(expected_box_sizes[i], box_size);
+ }
+
+ if (expected_release_size > 0) {
+ EXPECT_EQ(expected_release_size, JxlDecoderReleaseBoxBuffer(dec));
+ expected_release_size = 0;
+ }
+
+ if (type[0] == 'u' && type[1] == 'n' && type[2] == 'k') {
+ JxlDecoderSetBoxBuffer(dec, contents.data(), contents.size());
+ size_t expected_box_contents_size =
+ type[3] == '1' ? unk1_box_size
+ : (type[3] == '2' ? unk2_box_size : unk3_box_size);
+ expected_release_size = contents.size() - expected_box_contents_size;
+ }
+ size_t consumed = avail_in - JxlDecoderReleaseInput(dec);
+ next_in += consumed;
+ avail_in -= consumed;
+ }
+
+ // After the last DEC_BOX event, check that the input position is exactly at
+ // the stat of the box header.
+ EXPECT_EQ(avail_in, expected_box_sizes.back());
+
+ // Even though all input is given, the decoder cannot assume there aren't
+ // more boxes if the input was not closed.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in));
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec));
+ JxlDecoderCloseInput(dec);
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(ExifBrobBoxTest)) {
+ size_t xsize = 1, ysize = 1;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ jxl::TestCodestreamParams params;
+ // Lossless to verify pixels exactly after roundtrip.
+ params.cparams.SetLossless();
+ params.box_format = kCSBF_Brob_Exif;
+ params.add_icc_profile = true;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ // Test raw brob box, not brotli-decompressing
+ for (int streaming = 0; streaming < 2; ++streaming) {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+ if (!streaming) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ JxlDecoderCloseInput(dec);
+ }
+ // for streaming input case
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = 0;
+ size_t total_in = 0;
+ size_t step_size = 64;
+
+ std::vector<uint8_t> box_buffer;
+ size_t box_num_output;
+ bool seen_brob_begin = false;
+ bool seen_brob_end = false;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (streaming) {
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+ size_t amount = step_size;
+ if (total_in + amount > compressed.size()) {
+ amount = compressed.size() - total_in;
+ }
+ avail_in += amount;
+ total_in += amount;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, next_in, avail_in));
+ if (total_in == compressed.size()) JxlDecoderCloseInput(dec);
+ } else {
+ FAIL();
+ break;
+ }
+ } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+ if (!box_buffer.empty()) {
+ EXPECT_EQ(false, seen_brob_end);
+ seen_brob_end = true;
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ EXPECT_EQ(box_num_output, box_brob_exif_size - 8);
+ EXPECT_EQ(
+ 0, memcmp(box_buffer.data(), box_brob_exif + 8, box_num_output));
+ box_buffer.clear();
+ }
+ if (status == JXL_DEC_SUCCESS) break;
+ JxlBoxType type;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ if (BoxTypeEquals("brob", type)) {
+ EXPECT_EQ(false, seen_brob_begin);
+ seen_brob_begin = true;
+ box_buffer.resize(8);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+ }
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ box_buffer.resize(box_buffer.size() * 2);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+ box_buffer.size() - box_num_output);
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ EXPECT_EQ(true, seen_brob_begin);
+ EXPECT_EQ(true, seen_brob_end);
+
+ JxlDecoderDestroy(dec);
+ }
+
+ // Test decompressed brob box
+ for (int streaming = 0; streaming < 2; ++streaming) {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX));
+ if (!streaming) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ JxlDecoderCloseInput(dec);
+ }
+ // for streaming input case
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = 0;
+ size_t total_in = 0;
+ size_t step_size = 64;
+
+ std::vector<uint8_t> box_buffer;
+ size_t box_num_output;
+ bool seen_exif_begin = false;
+ bool seen_exif_end = false;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetDecompressBoxes(dec, JXL_TRUE));
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ if (streaming) {
+ size_t remaining = JxlDecoderReleaseInput(dec);
+ EXPECT_LE(remaining, avail_in);
+ next_in += avail_in - remaining;
+ avail_in = remaining;
+ size_t amount = step_size;
+ if (total_in + amount > compressed.size()) {
+ amount = compressed.size() - total_in;
+ }
+ avail_in += amount;
+ total_in += amount;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, next_in, avail_in));
+ if (total_in == compressed.size()) JxlDecoderCloseInput(dec);
+ } else {
+ FAIL();
+ break;
+ }
+ } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+ if (!box_buffer.empty()) {
+ EXPECT_EQ(false, seen_exif_end);
+ seen_exif_end = true;
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ // Expect that the output has the same size and contents as the
+ // uncompressed exif data. Only check contents if the sizes match to
+ // avoid comparing uninitialized memory in the test.
+ EXPECT_EQ(box_num_output, exif_uncompressed_size);
+ if (box_num_output == exif_uncompressed_size) {
+ EXPECT_EQ(0, memcmp(box_buffer.data(), exif_uncompressed,
+ exif_uncompressed_size));
+ }
+ box_buffer.clear();
+ }
+ if (status == JXL_DEC_SUCCESS) break;
+ JxlBoxType type;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_TRUE));
+ if (BoxTypeEquals("Exif", type)) {
+ EXPECT_EQ(false, seen_exif_begin);
+ seen_exif_begin = true;
+ box_buffer.resize(8);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+ }
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ box_buffer.resize(box_buffer.size() * 2);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+ box_buffer.size() - box_num_output);
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ EXPECT_EQ(true, seen_exif_begin);
+ EXPECT_EQ(true, seen_exif_end);
+
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, JXL_BOXES_TEST(PartialCodestreamBoxTest)) {
+ size_t xsize = 23, ysize = 81;
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ // Lossless to verify pixels exactly after roundtrip.
+ jxl::TestCodestreamParams params;
+ params.cparams.SetLossless();
+ params.cparams.speed_tier = jxl::SpeedTier::kThunder;
+ params.box_format = kCSBF_Multi;
+ params.add_icc_profile = true;
+ jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream(
+ jxl::Span<const uint8_t>(pixels.data(), pixels.size()), xsize, ysize, 4,
+ params);
+
+ std::vector<uint8_t> extracted_codestream;
+
+ {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ JxlDecoderCloseInput(dec);
+
+ size_t num_jxlp = 0;
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ std::vector<uint8_t> box_buffer;
+ size_t box_num_output;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ FAIL();
+ break;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(),
+ pixels2.size()));
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ continue;
+ } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) {
+ if (!box_buffer.empty()) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ EXPECT_GE(box_num_output, 4);
+ // Do not insert the first 4 bytes, which are not part of the
+ // codestream, but the partial codestream box index
+ extracted_codestream.insert(extracted_codestream.end(),
+ box_buffer.begin() + 4,
+ box_buffer.begin() + box_num_output);
+ box_buffer.clear();
+ }
+ if (status == JXL_DEC_SUCCESS) break;
+ JxlBoxType type;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE));
+ if (BoxTypeEquals("jxlp", type)) {
+ num_jxlp++;
+ box_buffer.resize(8);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size());
+ }
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ box_buffer.resize(box_buffer.size() * 2);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+ box_buffer.size() - box_num_output);
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ // The test file created with kCSBF_Multi is expected to have 4 jxlp boxes.
+ EXPECT_EQ(4, num_jxlp);
+
+ EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format_orig));
+
+ JxlDecoderDestroy(dec);
+ }
+
+ // Now test whether the codestream extracted from the jxlp boxes can itself
+ // also be decoded and gives the same pixels
+ {
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, extracted_codestream.data(),
+ extracted_codestream.size()));
+ JxlDecoderCloseInput(dec);
+
+ size_t num_boxes = 0;
+
+ std::vector<uint8_t> pixels2;
+ pixels2.resize(pixels.size());
+
+ std::vector<uint8_t> box_buffer;
+ size_t box_num_output;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec);
+ if (status == JXL_DEC_NEED_MORE_INPUT) {
+ FAIL();
+ break;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(info.xsize, xsize);
+ EXPECT_EQ(info.ysize, ysize);
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(),
+ pixels2.size()));
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ continue;
+ } else if (status == JXL_DEC_BOX) {
+ num_boxes++;
+ } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) {
+ size_t remaining = JxlDecoderReleaseBoxBuffer(dec);
+ box_num_output = box_buffer.size() - remaining;
+ box_buffer.resize(box_buffer.size() * 2);
+ JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output,
+ box_buffer.size() - box_num_output);
+ } else if (status == JXL_DEC_SUCCESS) {
+ break;
+ } else {
+ // We do not expect any other events or errors
+ FAIL();
+ break;
+ }
+ }
+
+ EXPECT_EQ(0, num_boxes); // The data does not use the container format.
+ EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize,
+ ysize, format_orig, format_orig));
+
+ JxlDecoderDestroy(dec);
+ }
+}
+
+TEST(DecodeTest, SpotColorTest) {
+ jxl::ThreadPool* pool = nullptr;
+ jxl::CodecInOut io;
+ size_t xsize = 55, ysize = 257;
+ io.metadata.m.color_encoding = jxl::ColorEncoding::LinearSRGB();
+ jxl::Image3F main(xsize, ysize);
+ jxl::ImageF spot(xsize, ysize);
+ jxl::ZeroFillImage(&main);
+ jxl::ZeroFillImage(&spot);
+
+ for (size_t y = 0; y < ysize; y++) {
+ float* JXL_RESTRICT rowm = main.PlaneRow(1, y);
+ float* JXL_RESTRICT rows = spot.Row(y);
+ for (size_t x = 0; x < xsize; x++) {
+ rowm[x] = (x + y) * (1.f / 255.f);
+ rows[x] = ((x ^ y) & 255) * (1.f / 255.f);
+ }
+ }
+ io.SetFromImage(std::move(main), jxl::ColorEncoding::LinearSRGB());
+ jxl::ExtraChannelInfo info;
+ info.bit_depth.bits_per_sample = 8;
+ info.dim_shift = 0;
+ info.type = jxl::ExtraChannel::kSpotColor;
+ info.spot_color[0] = 0.5f;
+ info.spot_color[1] = 0.2f;
+ info.spot_color[2] = 1.f;
+ info.spot_color[3] = 0.5f;
+
+ io.metadata.m.extra_channel_info.push_back(info);
+ std::vector<jxl::ImageF> ec;
+ ec.push_back(std::move(spot));
+ io.frames[0].SetExtraChannels(std::move(ec));
+
+ jxl::CompressParams cparams;
+ cparams.speed_tier = jxl::SpeedTier::kLightning;
+ cparams.modular_mode = true;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ cparams.butteraugli_distance = 0.f;
+
+ jxl::PaddedBytes compressed;
+ std::unique_ptr<jxl::PassesEncoderState> enc_state =
+ jxl::make_unique<jxl::PassesEncoderState>();
+ EXPECT_TRUE(jxl::EncodeFile(cparams, &io, enc_state.get(), &compressed,
+ jxl::GetJxlCms(), nullptr, pool));
+
+ for (size_t render_spot = 0; render_spot < 2; render_spot++) {
+ JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ JxlDecoder* dec = JxlDecoderCreate(NULL);
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+ if (!render_spot) {
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetRenderSpotcolors(dec, JXL_FALSE));
+ }
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetInput(dec, compressed.data(), compressed.size()));
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ JxlBasicInfo binfo;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &binfo));
+ EXPECT_EQ(1u, binfo.num_extra_channels);
+ EXPECT_EQ(xsize, binfo.xsize);
+ EXPECT_EQ(ysize, binfo.ysize);
+
+ JxlExtraChannelInfo extra_info;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info));
+ EXPECT_EQ((unsigned int)jxl::ExtraChannel::kSpotColor, extra_info.type);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ size_t extra_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0));
+
+ std::vector<uint8_t> image(buffer_size);
+ std::vector<uint8_t> extra(extra_size);
+ size_t bytes_per_pixel = format.num_channels *
+ jxl::test::GetDataBits(format.data_type) /
+ jxl::kBitsPerByte;
+ size_t stride = bytes_per_pixel * binfo.xsize;
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(
+ dec, &format, image.data(), image.size()));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetExtraChannelBuffer(dec, &format, extra.data(),
+ extra.size(), 0));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+ // After the full image was output, JxlDecoderProcessInput should return
+ // success to indicate all is done.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+ JxlDecoderDestroy(dec);
+
+ for (size_t y = 0; y < ysize; y++) {
+ uint8_t* JXL_RESTRICT rowm = image.data() + stride * y;
+ uint8_t* JXL_RESTRICT rows = extra.data() + xsize * y;
+ for (size_t x = 0; x < xsize; x++) {
+ if (!render_spot) {
+ // if spot color isn't rendered, main image should be as we made it
+ // (red and blue are all zeroes)
+
+ EXPECT_EQ(rowm[x * 3 + 0], 0);
+ EXPECT_EQ(rowm[x * 3 + 1], (x + y > 255 ? 255 : x + y));
+ EXPECT_EQ(rowm[x * 3 + 2], 0);
+ }
+ if (render_spot) {
+ // if spot color is rendered, expect red and blue to look like the
+ // spot color channel
+ EXPECT_LT(abs(rowm[x * 3 + 0] - (rows[x] * 0.25f)), 1);
+ EXPECT_LT(abs(rowm[x * 3 + 2] - (rows[x] * 0.5f)), 1);
+ }
+ EXPECT_EQ(rows[x], ((x ^ y) & 255));
+ }
+ }
+ }
+}
+
+TEST(DecodeTest, CloseInput) {
+ std::vector<uint8_t> partial_file = {0xff};
+
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec.get(),
+ JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE));
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec.get(), partial_file.data(),
+ partial_file.size()));
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get()));
+ EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get()));
+ JxlDecoderCloseInput(dec.get());
+ EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderProcessInput(dec.get()));
+}
diff --git a/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.cc b/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.cc
new file mode 100644
index 0000000000..aa57b2723f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.cc
@@ -0,0 +1,169 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/decode_to_jpeg.h"
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+JxlDecoderStatus JxlToJpegDecoder::Process(const uint8_t** next_in,
+ size_t* avail_in) {
+ if (!inside_box_) {
+ JXL_ABORT(
+ "processing of JPEG reconstruction data outside JPEG reconstruction "
+ "box");
+ }
+ Span<const uint8_t> to_decode;
+ if (box_until_eof_) {
+ // Until EOF means consume all data.
+ to_decode = Span<const uint8_t>(*next_in, *avail_in);
+ *next_in += *avail_in;
+ *avail_in = 0;
+ } else {
+ // Defined size means consume min(available, needed).
+ size_t avail_recon_in =
+ std::min<size_t>(*avail_in, box_size_ - buffer_.size());
+ to_decode = Span<const uint8_t>(*next_in, avail_recon_in);
+ *next_in += avail_recon_in;
+ *avail_in -= avail_recon_in;
+ }
+ bool old_data_exists = !buffer_.empty();
+ if (old_data_exists) {
+ // Append incoming data to buffer if we already had data in the buffer.
+ buffer_.insert(buffer_.end(), to_decode.data(),
+ to_decode.data() + to_decode.size());
+ to_decode = Span<const uint8_t>(buffer_.data(), buffer_.size());
+ }
+ if (!box_until_eof_ && to_decode.size() > box_size_) {
+ JXL_ABORT("JPEG reconstruction data to decode larger than expected");
+ }
+ if (box_until_eof_ || to_decode.size() == box_size_) {
+ // If undefined size, or the right size, try to decode.
+ jpeg_data_ = make_unique<jpeg::JPEGData>();
+ const auto status = jpeg::DecodeJPEGData(to_decode, jpeg_data_.get());
+ if (status.IsFatalError()) return JXL_DEC_ERROR;
+ if (status) {
+ // Successful decoding, emit event after updating state to track that we
+ // are no longer parsing JPEG reconstruction data.
+ inside_box_ = false;
+ return JXL_DEC_JPEG_RECONSTRUCTION;
+ }
+ if (box_until_eof_) {
+ // Unsuccessful decoding and undefined size, assume incomplete data. Copy
+ // the data if we haven't already.
+ if (!old_data_exists) {
+ buffer_.insert(buffer_.end(), to_decode.data(),
+ to_decode.data() + to_decode.size());
+ }
+ } else {
+ // Unsuccessful decoding of correct amount of data, assume error.
+ return JXL_DEC_ERROR;
+ }
+ } else {
+ // Not enough data, copy the data if we haven't already.
+ if (!old_data_exists) {
+ buffer_.insert(buffer_.end(), to_decode.data(),
+ to_decode.data() + to_decode.size());
+ }
+ }
+ return JXL_DEC_NEED_MORE_INPUT;
+}
+
+size_t JxlToJpegDecoder::NumExifMarkers(const jpeg::JPEGData& jpeg_data) {
+ size_t num = 0;
+ for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+ if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+ num++;
+ }
+ }
+ return num;
+}
+
+size_t JxlToJpegDecoder::NumXmpMarkers(const jpeg::JPEGData& jpeg_data) {
+ size_t num = 0;
+ for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+ if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+ num++;
+ }
+ }
+ return num;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::ExifBoxContentSize(
+ const jpeg::JPEGData& jpeg_data, size_t* size) {
+ for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+ if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+ if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kExifTag)) {
+ // too small for app marker header
+ return JXL_DEC_ERROR;
+ }
+ // The first 4 bytes are the TIFF header from the box contents, and are
+ // not included in the JPEG
+ *size = jpeg_data.app_data[i].size() + 4 - 3 - sizeof(jpeg::kExifTag);
+ return JXL_DEC_SUCCESS;
+ }
+ }
+ return JXL_DEC_ERROR;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::XmlBoxContentSize(
+ const jpeg::JPEGData& jpeg_data, size_t* size) {
+ for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) {
+ if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+ if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kXMPTag)) {
+ // too small for app marker header
+ return JXL_DEC_ERROR;
+ }
+ *size = jpeg_data.app_data[i].size() - 3 - sizeof(jpeg::kXMPTag);
+ return JXL_DEC_SUCCESS;
+ }
+ }
+ return JXL_DEC_ERROR;
+}
+
+JxlDecoderStatus JxlToJpegDecoder::SetExif(const uint8_t* data, size_t size,
+ jpeg::JPEGData* jpeg_data) {
+ for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) {
+ if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) {
+ if (jpeg_data->app_data[i].size() !=
+ size + 3 + sizeof(jpeg::kExifTag) - 4)
+ return JXL_DEC_ERROR;
+ // The first 9 bytes are used for JPEG marker header.
+ jpeg_data->app_data[i][0] = 0xE1;
+ // The second and third byte are already filled in correctly
+ memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kExifTag,
+ sizeof(jpeg::kExifTag));
+ // The first 4 bytes are the TIFF header from the box contents, and are
+ // not included in the JPEG
+ memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kExifTag),
+ data + 4, size - 4);
+ return JXL_DEC_SUCCESS;
+ }
+ }
+ return JXL_DEC_ERROR;
+}
+JxlDecoderStatus JxlToJpegDecoder::SetXmp(const uint8_t* data, size_t size,
+ jpeg::JPEGData* jpeg_data) {
+ for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) {
+ if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) {
+ if (jpeg_data->app_data[i].size() != size + 3 + sizeof(jpeg::kXMPTag))
+ return JXL_DEC_ERROR;
+ // The first 9 bytes are used for JPEG marker header.
+ jpeg_data->app_data[i][0] = 0xE1;
+ // The second and third byte are already filled in correctly
+ memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kXMPTag,
+ sizeof(jpeg::kXMPTag));
+ memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kXMPTag), data,
+ size);
+ return JXL_DEC_SUCCESS;
+ }
+ }
+ return JXL_DEC_ERROR;
+}
+
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.h b/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.h
new file mode 100644
index 0000000000..a64ace27a2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/decode_to_jpeg.h
@@ -0,0 +1,217 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_DECODE_TO_JPEG_H_
+#define LIB_JXL_DECODE_TO_JPEG_H_
+
+// JPEG XL to JPEG bytes decoder logic. The JxlToJpegDecoder class keeps track
+// of the decoder state needed to parse the JPEG reconstruction box and provide
+// the reconstructed JPEG to the output buffer.
+
+#include <jxl/decode.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace jxl {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+class JxlToJpegDecoder {
+ public:
+ // Returns whether an output buffer is set.
+ bool IsOutputSet() const { return next_out_ != nullptr; }
+
+ // Returns whether the decoder is parsing a boxa JPEG box was parsed.
+ bool IsParsingBox() const { return inside_box_; }
+
+ // Sets the output buffer used when producing JPEG output.
+ JxlDecoderStatus SetOutputBuffer(uint8_t* data, size_t size) {
+ if (next_out_) return JXL_DEC_ERROR;
+ next_out_ = data;
+ avail_size_ = size;
+ return JXL_DEC_SUCCESS;
+ }
+
+ // Releases the buffer set with SetOutputBuffer().
+ size_t ReleaseOutputBuffer() {
+ size_t result = avail_size_;
+ next_out_ = nullptr;
+ avail_size_ = 0;
+ return result;
+ }
+
+ void StartBox(bool box_until_eof, size_t contents_size) {
+ // A new box implies that we clear the buffer.
+ buffer_.clear();
+ inside_box_ = true;
+ if (box_until_eof) {
+ box_until_eof_ = true;
+ } else {
+ box_size_ = contents_size;
+ }
+ }
+
+ // Consumes data from next_in/avail_in to reconstruct JPEG data.
+ // Uses box_size_, inside_box_ and box_until_eof_ to calculate how much to
+ // consume. Potentially stores unparsed data in buffer_.
+ // Potentially populates jpeg_data_. Potentially updates inside_box_.
+ // Returns JXL_DEC_JPEG_RECONSTRUCTION when finished, JXL_DEC_NEED_MORE_INPUT
+ // if more input is needed, JXL_DEC_ERROR on parsing error.
+ JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in);
+
+ // Returns non-owned copy of the JPEGData, only after Process finished and
+ // the JPEGData was not yet moved to an image bundle with
+ // SetImageBundleJpegData.
+ jpeg::JPEGData* GetJpegData() { return jpeg_data_.get(); }
+
+ // Returns how many exif or xmp app markers are present in the JPEG data. A
+ // return value higher than 1 would require multiple exif boxes or multiple
+ // xmp boxes in the container format, and this is not supported by the API and
+ // considered an error. May only be called after Process returned success.
+ static size_t NumExifMarkers(const jpeg::JPEGData& jpeg_data);
+ static size_t NumXmpMarkers(const jpeg::JPEGData& jpeg_data);
+
+ // Returns box content size for metadata, using the known data from the app
+ // markers.
+ static JxlDecoderStatus ExifBoxContentSize(const jpeg::JPEGData& jpeg_data,
+ size_t* size);
+ static JxlDecoderStatus XmlBoxContentSize(const jpeg::JPEGData& jpeg_data,
+ size_t* size);
+
+ // Returns JXL_DEC_ERROR if there is no exif/XMP marker or the data size
+ // does not match, or this function is called before Process returned
+ // success, JXL_DEC_SUCCESS otherwise. As input, provide the full box contents
+ // but not the box header. In case of exif, this includes the 4-byte TIFF
+ // header, even though it won't be copied into the JPEG.
+ static JxlDecoderStatus SetExif(const uint8_t* data, size_t size,
+ jpeg::JPEGData* jpeg_data);
+ static JxlDecoderStatus SetXmp(const uint8_t* data, size_t size,
+ jpeg::JPEGData* jpeg_data);
+
+ // Sets the JpegData of the ImageBundle passed if there is anything to set.
+ // Releases the JpegData from this decoder if set.
+ Status SetImageBundleJpegData(ImageBundle* ib) {
+ if (IsOutputSet() && jpeg_data_ != nullptr) {
+ if (!jpeg::SetJPEGDataFromICC(ib->metadata()->color_encoding.ICC(),
+ jpeg_data_.get())) {
+ return false;
+ }
+ ib->jpeg_data.reset(jpeg_data_.release());
+ }
+ return true;
+ }
+
+ JxlDecoderStatus WriteOutput(const jpeg::JPEGData& jpeg_data) {
+ // Copy JPEG bytestream if desired.
+ uint8_t* tmp_next_out = next_out_;
+ size_t tmp_avail_size = avail_size_;
+ auto write = [&tmp_next_out, &tmp_avail_size](const uint8_t* buf,
+ size_t len) {
+ size_t to_write = std::min<size_t>(tmp_avail_size, len);
+ if (to_write != 0) memcpy(tmp_next_out, buf, to_write);
+ tmp_next_out += to_write;
+ tmp_avail_size -= to_write;
+ return to_write;
+ };
+ Status write_result = jpeg::WriteJpeg(jpeg_data, write);
+ if (!write_result) {
+ if (tmp_avail_size == 0) {
+ return JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+ }
+ return JXL_DEC_ERROR;
+ }
+ next_out_ = tmp_next_out;
+ avail_size_ = tmp_avail_size;
+ return JXL_DEC_SUCCESS;
+ }
+
+ private:
+ // Content of the most recently parsed JPEG reconstruction box if any.
+ std::vector<uint8_t> buffer_;
+
+ // Decoded content of the most recently parsed JPEG reconstruction box is
+ // stored here.
+ std::unique_ptr<jpeg::JPEGData> jpeg_data_;
+
+ // True if the decoder is currently reading bytes inside a JPEG reconstruction
+ // box.
+ bool inside_box_ = false;
+
+ // True if the JPEG reconstruction box had undefined size (all remaining
+ // bytes).
+ bool box_until_eof_ = false;
+ // Size of most recently parsed JPEG reconstruction box contents.
+ size_t box_size_ = 0;
+
+ // Next bytes to write JPEG reconstruction to.
+ uint8_t* next_out_ = nullptr;
+ // Available bytes to write JPEG reconstruction to.
+ size_t avail_size_ = 0;
+};
+
+#else
+
+// Fake class that disables support for decoding JPEG XL to JPEG.
+class JxlToJpegDecoder {
+ public:
+ bool IsOutputSet() const { return false; }
+ bool IsParsingBox() const { return false; }
+
+ JxlDecoderStatus SetOutputBuffer(uint8_t* /* data */, size_t /* size */) {
+ return JXL_DEC_ERROR;
+ }
+ size_t ReleaseOutputBuffer() { return 0; }
+
+ void StartBox(bool /* box_until_eof */, size_t /* contents_size */) {}
+
+ JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in) {
+ return JXL_DEC_ERROR;
+ }
+ jpeg::JPEGData* GetJpegData() { return nullptr; }
+
+ Status SetImageBundleJpegData(ImageBundle* /* ib */) { return true; }
+
+ static size_t NumExifMarkers(const jpeg::JPEGData& /*jpeg_data*/) {
+ return 0;
+ }
+ static size_t NumXmpMarkers(const jpeg::JPEGData& /*jpeg_data*/) { return 0; }
+ static size_t ExifBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/,
+ size_t* /*size*/) {
+ return JXL_DEC_ERROR;
+ }
+ static size_t XmlBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/,
+ size_t* /*size*/) {
+ return JXL_DEC_ERROR;
+ }
+ static JxlDecoderStatus SetExif(const uint8_t* /*data*/, size_t /*size*/,
+ jpeg::JPEGData* /*jpeg_data*/) {
+ return JXL_DEC_ERROR;
+ }
+ static JxlDecoderStatus SetXmp(const uint8_t* /*data*/, size_t /*size*/,
+ jpeg::JPEGData* /*jpeg_data*/) {
+ return JXL_DEC_ERROR;
+ }
+
+ JxlDecoderStatus WriteOutput(const jpeg::JPEGData& /* jpeg_data */) {
+ return JXL_DEC_SUCCESS;
+ }
+};
+
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+} // namespace jxl
+
+#endif // LIB_JXL_DECODE_TO_JPEG_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc b/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc
new file mode 100644
index 0000000000..2b4d84196f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.cc
@@ -0,0 +1,1168 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ac_strategy.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fast_math-inl.h"
+
+// Some of the floating point constants in this file and in other
+// files in the libjxl project have been obtained using the
+// tools/optimizer/simplex_fork.py tool. It is a variation of
+// Nelder-Mead optimization, and we generally try to minimize
+// BPP * pnorm aggregate as reported by the benchmark_xl tool,
+// but occasionally the values are optimized by using additional
+// constraints such as maintaining a certain density, or ratio of
+// popularity of integral transforms. Jyrki visually reviews all
+// such changes and often makes manual changes to maintain good
+// visual quality to changes where butteraugli was not sufficiently
+// sensitive to some kind of degradation. Unfortunately image quality
+// is still more of an art than science.
+
+// This must come before the begin/end_target, but HWY_ONCE is only true
+// after that, so use an "include guard".
+#ifndef LIB_JXL_ENC_AC_STRATEGY_
+#define LIB_JXL_ENC_AC_STRATEGY_
+// Parameters of the heuristic are marked with a OPTIMIZE comment.
+namespace jxl {
+
+// Debugging utilities.
+
+// Returns a linear sRGB color (as bytes) for each AC strategy.
+const uint8_t* TypeColor(const uint8_t& raw_strategy) {
+ JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+ static_assert(AcStrategy::kNumValidStrategies == 27, "Change colors");
+ static constexpr uint8_t kColors[][3] = {
+ {0xFF, 0xFF, 0x00}, // DCT8
+ {0xFF, 0x80, 0x80}, // HORNUSS
+ {0xFF, 0x80, 0x80}, // DCT2x2
+ {0xFF, 0x80, 0x80}, // DCT4x4
+ {0x80, 0xFF, 0x00}, // DCT16x16
+ {0x00, 0xC0, 0x00}, // DCT32x32
+ {0xC0, 0xFF, 0x00}, // DCT16x8
+ {0xC0, 0xFF, 0x00}, // DCT8x16
+ {0x00, 0xFF, 0x00}, // DCT32x8
+ {0x00, 0xFF, 0x00}, // DCT8x32
+ {0x00, 0xFF, 0x00}, // DCT32x16
+ {0x00, 0xFF, 0x00}, // DCT16x32
+ {0xFF, 0x80, 0x00}, // DCT4x8
+ {0xFF, 0x80, 0x00}, // DCT8x4
+ {0xFF, 0xFF, 0x80}, // AFV0
+ {0xFF, 0xFF, 0x80}, // AFV1
+ {0xFF, 0xFF, 0x80}, // AFV2
+ {0xFF, 0xFF, 0x80}, // AFV3
+ {0x00, 0xC0, 0xFF}, // DCT64x64
+ {0x00, 0xFF, 0xFF}, // DCT64x32
+ {0x00, 0xFF, 0xFF}, // DCT32x64
+ {0x00, 0x40, 0xFF}, // DCT128x128
+ {0x00, 0x80, 0xFF}, // DCT128x64
+ {0x00, 0x80, 0xFF}, // DCT64x128
+ {0x00, 0x00, 0xC0}, // DCT256x256
+ {0x00, 0x00, 0xFF}, // DCT256x128
+ {0x00, 0x00, 0xFF}, // DCT128x256
+ };
+ return kColors[raw_strategy];
+}
+
+const uint8_t* TypeMask(const uint8_t& raw_strategy) {
+ JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy));
+ static_assert(AcStrategy::kNumValidStrategies == 27, "Add masks");
+ // implicitly, first row and column is made dark
+ static constexpr uint8_t kMask[][64] = {
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ }, // DCT8
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 1, 0, 0, 1, 0, 0, //
+ 0, 0, 1, 0, 0, 1, 0, 0, //
+ 0, 0, 1, 1, 1, 1, 0, 0, //
+ 0, 0, 1, 0, 0, 1, 0, 0, //
+ 0, 0, 1, 0, 0, 1, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ }, // HORNUSS
+ {
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 1, 0, 1, 0, 1, 0, 1, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 1, 0, 1, 0, 1, 0, 1, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 1, 0, 1, 0, 1, 0, 1, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 1, 0, 1, 0, 1, 0, 1, 0, //
+ }, // 2x2
+ {
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ }, // 4x4
+ {}, // DCT16x16 (unused)
+ {}, // DCT32x32 (unused)
+ {}, // DCT16x8 (unused)
+ {}, // DCT8x16 (unused)
+ {}, // DCT32x8 (unused)
+ {}, // DCT8x32 (unused)
+ {}, // DCT32x16 (unused)
+ {}, // DCT16x32 (unused)
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ }, // DCT4x8
+ {
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ 0, 0, 0, 0, 1, 0, 0, 0, //
+ }, // DCT8x4
+ {
+ 1, 1, 1, 1, 1, 0, 0, 0, //
+ 1, 1, 1, 1, 0, 0, 0, 0, //
+ 1, 1, 1, 0, 0, 0, 0, 0, //
+ 1, 1, 0, 0, 0, 0, 0, 0, //
+ 1, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ }, // AFV0
+ {
+ 0, 0, 0, 0, 1, 1, 1, 1, //
+ 0, 0, 0, 0, 0, 1, 1, 1, //
+ 0, 0, 0, 0, 0, 0, 1, 1, //
+ 0, 0, 0, 0, 0, 0, 0, 1, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ }, // AFV1
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 1, 0, 0, 0, 0, 0, 0, 0, //
+ 1, 1, 0, 0, 0, 0, 0, 0, //
+ 1, 1, 1, 0, 0, 0, 0, 0, //
+ 1, 1, 1, 1, 0, 0, 0, 0, //
+ }, // AFV2
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 0, //
+ 0, 0, 0, 0, 0, 0, 0, 1, //
+ 0, 0, 0, 0, 0, 0, 1, 1, //
+ 0, 0, 0, 0, 0, 1, 1, 1, //
+ }, // AFV3
+ };
+ return kMask[raw_strategy];
+}
+
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize,
+ size_t ysize, const char* tag, AuxOut* aux_out) {
+ Image3F color_acs(xsize, ysize);
+ for (size_t y = 0; y < ysize; y++) {
+ float* JXL_RESTRICT rows[3] = {
+ color_acs.PlaneRow(0, y),
+ color_acs.PlaneRow(1, y),
+ color_acs.PlaneRow(2, y),
+ };
+ const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim);
+ for (size_t x = 0; x < xsize; x++) {
+ AcStrategy acs = acs_row[x / kBlockDim];
+ const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+ for (size_t c = 0; c < 3; c++) {
+ rows[c][x] = color[c] / 255.f;
+ }
+ }
+ }
+ size_t stride = color_acs.PixelsPerRow();
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) {
+ float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim);
+ const AcStrategyRow acs_row = ac_strategy.ConstRow(by);
+ for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy());
+ const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy());
+ if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) {
+ for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize;
+ iy++) {
+ for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize;
+ ix++) {
+ if (mask[iy * kBlockDim + ix]) {
+ row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f;
+ }
+ }
+ }
+ }
+ // draw block edges
+ for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() &&
+ bx * kBlockDim + ix < xsize;
+ ix++) {
+ row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f;
+ }
+ for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() &&
+ by * kBlockDim + iy < ysize;
+ iy++) {
+ row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f;
+ }
+ }
+ }
+ }
+ aux_out->DumpImage(tag, color_acs);
+}
+
+} // namespace jxl
+#endif // LIB_JXL_ENC_AC_STRATEGY_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Round;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+bool MultiBlockTransformCrossesHorizontalBoundary(
+ const AcStrategyImage& ac_strategy, size_t start_x, size_t y,
+ size_t end_x) {
+ if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) {
+ return false;
+ }
+ if (y % 8 == 0) {
+ // Nothing crosses 64x64 boundaries, and the memory on the other side
+ // of the 64x64 block may still uninitialized.
+ return false;
+ }
+ end_x = std::min(end_x, ac_strategy.xsize());
+ // The first multiblock might be before the start_x, let's adjust it
+ // to point to the first IsFirstBlock() == true block we find by backward
+ // tracing.
+ AcStrategyRow row = ac_strategy.ConstRow(y);
+ const size_t start_x_limit = start_x & ~7;
+ while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) {
+ --start_x;
+ }
+ for (size_t x = start_x; x < end_x;) {
+ if (row[x].IsFirstBlock()) {
+ x += row[x].covered_blocks_x();
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool MultiBlockTransformCrossesVerticalBoundary(
+ const AcStrategyImage& ac_strategy, size_t x, size_t start_y,
+ size_t end_y) {
+ if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) {
+ return false;
+ }
+ if (x % 8 == 0) {
+ // Nothing crosses 64x64 boundaries, and the memory on the other side
+ // of the 64x64 block may still uninitialized.
+ return false;
+ }
+ end_y = std::min(end_y, ac_strategy.ysize());
+ // The first multiblock might be before the start_y, let's adjust it
+ // to point to the first IsFirstBlock() == true block we find by backward
+ // tracing.
+ const size_t start_y_limit = start_y & ~7;
+ while (start_y != start_y_limit &&
+ !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) {
+ --start_y;
+ }
+
+ for (size_t y = start_y; y < end_y;) {
+ AcStrategyRow row = ac_strategy.ConstRow(y);
+ if (row[x].IsFirstBlock()) {
+ y += row[x].covered_blocks_y();
+ } else {
+ return true;
+ }
+ }
+ return false;
+}
+
+static const float kChromaErrorWeight[AcStrategy::kNumValidStrategies] = {
+ 0.95f, // DCT = 0,
+ 1.0f, // IDENTITY = 1,
+ 0.5f, // DCT2X2 = 2,
+ 1.0f, // DCT4X4 = 3,
+ 2.0f, // DCT16X16 = 4,
+ 2.0f, // DCT32X32 = 5,
+ 1.4f, // DCT16X8 = 6,
+ 1.4f, // DCT8X16 = 7,
+ 2.0f, // DCT32X8 = 8,
+ 2.0f, // DCT8X32 = 9,
+ 2.0f, // DCT32X16 = 10,
+ 2.0f, // DCT16X32 = 11,
+ 2.0f, // DCT4X8 = 12,
+ 2.0f, // DCT8X4 = 13,
+ 1.7f, // AFV0 = 14,
+ 1.7f, // AFV1 = 15,
+ 1.7f, // AFV2 = 16,
+ 1.7f, // AFV3 = 17,
+ 2.0f, // DCT64X64 = 18,
+ 2.0f, // DCT64X32 = 19,
+ 2.0f, // DCT32X64 = 20,
+ 2.0f, // DCT128X128 = 21,
+ 2.0f, // DCT128X64 = 22,
+ 2.0f, // DCT64X128 = 23,
+ 2.0f, // DCT256X256 = 24,
+ 2.0f, // DCT256X128 = 25,
+ 2.0f, // DCT128X256 = 26,
+};
+
+float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y,
+ const ACSConfig& config,
+ const float* JXL_RESTRICT cmap_factors, float* block,
+ float* scratch_space, uint32_t* quantized) {
+ const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize;
+
+ // Apply transform.
+ for (size_t c = 0; c < 3; c++) {
+ float* JXL_RESTRICT block_c = block + size * c;
+ TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y),
+ config.src_stride, block_c, scratch_space);
+ }
+
+ HWY_FULL(float) df;
+
+ const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y();
+ // avoid large blocks when there is a lot going on in red-green.
+ float cmul[3] = {kChromaErrorWeight[acs.RawStrategy()], 1.0f, 1.0f};
+ float quant_norm8 = 0;
+ float masking = 0;
+ if (num_blocks == 1) {
+ // When it is only one 8x8, we don't need aggregation of values.
+ quant_norm8 = config.Quant(x / 8, y / 8);
+ masking = 2.0f * config.Masking(x / 8, y / 8);
+ } else if (num_blocks == 2) {
+ // Taking max instead of 8th norm seems to work
+ // better for smallest blocks up to 16x8. Jyrki couldn't get
+ // improvements in trying the same for 16x16 blocks.
+ if (acs.covered_blocks_y() == 2) {
+ quant_norm8 =
+ std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1));
+ masking = 2.0f * std::max(config.Masking(x / 8, y / 8),
+ config.Masking(x / 8, y / 8 + 1));
+ } else {
+ quant_norm8 =
+ std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8));
+ masking = 2.0f * std::max(config.Masking(x / 8, y / 8),
+ config.Masking(x / 8 + 1, y / 8));
+ }
+ } else {
+ float masking_norm2 = 0;
+ float masking_max = 0;
+ // Load QF value, calculate empirical heuristic on masking field
+ // for weighting the information loss. Information loss manifests
+ // itself as ringing, and masking could hide it.
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ float qval = config.Quant(x / 8 + ix, y / 8 + iy);
+ qval *= qval;
+ qval *= qval;
+ quant_norm8 += qval * qval;
+ float maskval = config.Masking(x / 8 + ix, y / 8 + iy);
+ masking_max = std::max<float>(masking_max, maskval);
+ masking_norm2 += maskval * maskval;
+ }
+ }
+ quant_norm8 /= num_blocks;
+ quant_norm8 = FastPowf(quant_norm8, 1.0f / 8.0f);
+ masking_norm2 = sqrt(masking_norm2 / num_blocks);
+ // This is a highly empirical formula.
+ masking = (masking_norm2 + masking_max);
+ }
+ const auto q = Set(df, quant_norm8);
+
+ // Compute entropy.
+ float entropy = config.base_entropy;
+ auto info_loss = Zero(df);
+ auto info_loss2 = Zero(df);
+
+ for (size_t c = 0; c < 3; c++) {
+ const float* inv_matrix = config.dequant->InvMatrix(acs.RawStrategy(), c);
+ const auto cmap_factor = Set(df, cmap_factors[c]);
+
+ auto entropy_v = Zero(df);
+ auto nzeros_v = Zero(df);
+ auto cost1 = Set(df, config.cost1);
+ auto cost2 = Set(df, config.cost2);
+ auto cost_delta = Set(df, config.cost_delta);
+ for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) {
+ const auto in = Load(df, block + c * size + i);
+ const auto in_y = Mul(Load(df, block + size + i), cmap_factor);
+ const auto im = Load(df, inv_matrix + i);
+ const auto val = Mul(Sub(in, in_y), Mul(im, q));
+ const auto rval = Round(val);
+ const auto diff = AbsDiff(val, rval);
+ info_loss = Add(info_loss, diff);
+ info_loss2 = MulAdd(diff, diff, info_loss2);
+ const auto q = Abs(rval);
+ const auto q_is_zero = Eq(q, Zero(df));
+ entropy_v = Add(entropy_v, IfThenElseZero(Ge(q, Set(df, 1.5f)), cost2));
+ // We used to have q * C here, but that cost model seems to
+ // be punishing large values more than necessary. Sqrt tries
+ // to avoid large values less aggressively. Having high accuracy
+ // around zero is most important at low qualities, and there
+ // we have directly specified costs for 0, 1, and 2.
+ entropy_v = MulAdd(Sqrt(q), cost_delta, entropy_v);
+ nzeros_v = Add(nzeros_v, IfThenZeroElse(q_is_zero, Set(df, 1.0f)));
+ }
+ entropy_v = MulAdd(nzeros_v, cost1, entropy_v);
+
+ entropy += cmul[c] * GetLane(SumOfLanes(df, entropy_v));
+ size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v));
+ // Add #bit of num_nonzeros, as an estimate of the cost for encoding the
+ // number of non-zeros of the block.
+ size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1;
+ // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a
+ // bias.
+ entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits);
+ }
+ float ret =
+ entropy +
+ masking *
+ ((config.info_loss_multiplier * GetLane(SumOfLanes(df, info_loss))) +
+ (config.info_loss_multiplier2 *
+ sqrt(num_blocks * GetLane(SumOfLanes(df, info_loss2)))));
+ return ret;
+}
+
+uint8_t FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier,
+ const ACSConfig& config,
+ const float* JXL_RESTRICT cmap_factors,
+ AcStrategyImage* JXL_RESTRICT ac_strategy,
+ float* block, float* scratch_space,
+ uint32_t* quantized, float* entropy_out) {
+ struct TransformTry8x8 {
+ AcStrategy::Type type;
+ int encoding_speed_tier_max_limit;
+ float entropy_add;
+ float entropy_mul;
+ };
+ static const TransformTry8x8 kTransforms8x8[] = {
+ {
+ AcStrategy::Type::DCT,
+ 9,
+ 3.0f,
+ 0.745f,
+ },
+ {
+ AcStrategy::Type::DCT4X4,
+ 5,
+ 4.0f,
+ 0.7f,
+ },
+ {
+ AcStrategy::Type::DCT2X2,
+ 5,
+ 0.0f,
+ 0.66f,
+ },
+ {
+ AcStrategy::Type::DCT4X8,
+ 4,
+ 0.0f,
+ 0.700754622182473063f,
+ },
+ {
+ AcStrategy::Type::DCT8X4,
+ 4,
+ 0.0f,
+ 0.700754622182473063f,
+ },
+ {
+ AcStrategy::Type::IDENTITY,
+ 5,
+ 8.0f,
+ 0.81217614513585534f,
+ },
+ {
+ AcStrategy::Type::AFV0,
+ 4,
+ 3.0f,
+ 0.70086131125719425f,
+ },
+ {
+ AcStrategy::Type::AFV1,
+ 4,
+ 3.0f,
+ 0.70086131125719425f,
+ },
+ {
+ AcStrategy::Type::AFV2,
+ 4,
+ 3.0f,
+ 0.70086131125719425f,
+ },
+ {
+ AcStrategy::Type::AFV3,
+ 4,
+ 3.0f,
+ 0.70086131125719425f,
+ },
+ };
+ double best = 1e30;
+ uint8_t best_tx = kTransforms8x8[0].type;
+ for (auto tx : kTransforms8x8) {
+ if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) {
+ continue;
+ }
+ AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+ float entropy = EstimateEntropy(acs, x, y, config, cmap_factors, block,
+ scratch_space, quantized);
+ entropy = tx.entropy_add + tx.entropy_mul * entropy;
+ if (entropy < best) {
+ best_tx = tx.type;
+ best = entropy;
+ }
+ }
+ *entropy_out = best;
+ return best_tx;
+}
+
+// bx, by addresses the 64x64 block at 8x8 subresolution
+// cx, cy addresses the left, upper 8x8 block position of the candidate
+// transform.
+void TryMergeAcs(AcStrategy::Type acs_raw, size_t bx, size_t by, size_t cx,
+ size_t cy, const ACSConfig& config,
+ const float* JXL_RESTRICT cmap_factors,
+ AcStrategyImage* JXL_RESTRICT ac_strategy,
+ const float entropy_mul, const uint8_t candidate_priority,
+ uint8_t* priority, float* JXL_RESTRICT entropy_estimate,
+ float* block, float* scratch_space, uint32_t* quantized) {
+ AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+ float entropy_current = 0;
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) {
+ if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) {
+ // Transform would reuse already allocated blocks and
+ // lead to invalid overlaps, for example DCT64X32 vs.
+ // DCT32X64.
+ return;
+ }
+ entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)];
+ }
+ }
+ float entropy_candidate =
+ entropy_mul * EstimateEntropy(acs, (bx + cx) * 8, (by + cy) * 8, config,
+ cmap_factors, block, scratch_space,
+ quantized);
+ if (entropy_candidate >= entropy_current) return;
+ // Accept the candidate.
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ entropy_estimate[(cy + iy) * 8 + cx + ix] = 0;
+ priority[(cy + iy) * 8 + cx + ix] = candidate_priority;
+ }
+ }
+ ac_strategy->Set(bx + cx, by + cy, acs_raw);
+ entropy_estimate[cy * 8 + cx] = entropy_candidate;
+}
+
+static void SetEntropyForTransform(size_t cx, size_t cy,
+ const AcStrategy::Type acs_raw,
+ float entropy,
+ float* JXL_RESTRICT entropy_estimate) {
+ const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw);
+ for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) {
+ for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) {
+ entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0;
+ }
+ }
+ entropy_estimate[cy * 8 + cx] = entropy;
+}
+
+AcStrategy::Type AcsSquare(size_t blocks) {
+ if (blocks == 2) {
+ return AcStrategy::Type::DCT16X16;
+ } else if (blocks == 4) {
+ return AcStrategy::Type::DCT32X32;
+ } else {
+ return AcStrategy::Type::DCT64X64;
+ }
+}
+
+AcStrategy::Type AcsVerticalSplit(size_t blocks) {
+ if (blocks == 2) {
+ return AcStrategy::Type::DCT16X8;
+ } else if (blocks == 4) {
+ return AcStrategy::Type::DCT32X16;
+ } else {
+ return AcStrategy::Type::DCT64X32;
+ }
+}
+
+AcStrategy::Type AcsHorizontalSplit(size_t blocks) {
+ if (blocks == 2) {
+ return AcStrategy::Type::DCT8X16;
+ } else if (blocks == 4) {
+ return AcStrategy::Type::DCT16X32;
+ } else {
+ return AcStrategy::Type::DCT32X64;
+ }
+}
+
+// The following function tries to merge smaller transforms into
+// squares and the rectangles originating from a single middle division
+// (horizontal or vertical) fairly.
+//
+// This is now generalized to concern about squares
+// of blocks X blocks size, where a block is 8x8 pixels.
+void FindBestFirstLevelDivisionForSquare(
+ size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx,
+ size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors,
+ AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK,
+ const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate,
+ float* block, float* scratch_space, uint32_t* quantized) {
+ // We denote J for the larger dimension here, and K for the smaller.
+ // For example, for 32x32 block splitting, J would be 32, K 16.
+ const size_t blocks_half = blocks / 2;
+ const AcStrategy::Type acs_rawJXK = AcsVerticalSplit(blocks);
+ const AcStrategy::Type acs_rawKXJ = AcsHorizontalSplit(blocks);
+ const AcStrategy::Type acs_rawJXJ = AcsSquare(blocks);
+ const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK);
+ const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ);
+ const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ);
+ AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0);
+ AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half);
+ // Let's check if we can consider a JXJ block here at all.
+ // This is not necessary in the basic use of hierarchically merging
+ // blocks in the simplest possible way, but is needed when we try other
+ // 'floating' options of merging, possibly after a simple hierarchical
+ // merge has been explored.
+ if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx,
+ by + cy, bx + cx + blocks) ||
+ MultiBlockTransformCrossesHorizontalBoundary(
+ *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) ||
+ MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy,
+ by + cy + blocks) ||
+ MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks,
+ by + cy, by + cy + blocks)) {
+ return; // not suitable for JxJ analysis, some transforms leak out.
+ }
+ // For floating transforms there may be
+ // already blocks selected that make either or both JXK and
+ // KXJ not feasible for this location.
+ const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary(
+ *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks);
+ const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary(
+ *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks);
+ // Current entropies aggregated on NxN resolution.
+ float entropy[2][2] = {};
+ for (size_t dy = 0; dy < blocks; ++dy) {
+ for (size_t dx = 0; dx < blocks; ++dx) {
+ entropy[dy / blocks_half][dx / blocks_half] +=
+ entropy_estimate[(cy + dy) * 8 + (cx + dx)];
+ }
+ }
+ float entropy_JXK_left = std::numeric_limits<float>::max();
+ float entropy_JXK_right = std::numeric_limits<float>::max();
+ float entropy_KXJ_top = std::numeric_limits<float>::max();
+ float entropy_KXJ_bottom = std::numeric_limits<float>::max();
+ float entropy_JXJ = std::numeric_limits<float>::max();
+ if (allow_JXK) {
+ if (row0[bx + cx + 0].RawStrategy() != acs_rawJXK) {
+ entropy_JXK_left =
+ entropy_mul_JXK *
+ EstimateEntropy(acsJXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+ cmap_factors, block, scratch_space, quantized);
+ }
+ if (row0[bx + cx + blocks_half].RawStrategy() != acs_rawJXK) {
+ entropy_JXK_right =
+ entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + blocks_half) * 8,
+ (by + cy + 0) * 8, config,
+ cmap_factors, block, scratch_space,
+ quantized);
+ }
+ }
+ if (allow_KXJ) {
+ if (row0[bx + cx].RawStrategy() != acs_rawKXJ) {
+ entropy_KXJ_top =
+ entropy_mul_JXK *
+ EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config,
+ cmap_factors, block, scratch_space, quantized);
+ }
+ if (row1[bx + cx].RawStrategy() != acs_rawKXJ) {
+ entropy_KXJ_bottom =
+ entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8,
+ (by + cy + blocks_half) * 8, config,
+ cmap_factors, block, scratch_space,
+ quantized);
+ }
+ }
+ if (allow_square_transform) {
+ // We control the exploration of the square transform separately so that
+ // we can turn it off at high decoding speeds for 32x32, but still allow
+ // exploring 16x32 and 32x16.
+ entropy_JXJ = entropy_mul_JXJ * EstimateEntropy(acsJXJ, (bx + cx + 0) * 8,
+ (by + cy + 0) * 8, config,
+ cmap_factors, block,
+ scratch_space, quantized);
+ }
+
+ // Test if this block should have JXK or KXJ transforms,
+ // because it can have only one or the other.
+ float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) +
+ std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]);
+ float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) +
+ std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]);
+ if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) {
+ ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ);
+ SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate);
+ } else if (costJxN < costNxJ) {
+ if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) {
+ ac_strategy->Set(bx + cx, by + cy, acs_rawJXK);
+ SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left,
+ entropy_estimate);
+ }
+ if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) {
+ ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK);
+ SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK,
+ entropy_JXK_right, entropy_estimate);
+ }
+ } else {
+ if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) {
+ ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ);
+ SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top,
+ entropy_estimate);
+ }
+ if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) {
+ ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ);
+ SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ,
+ entropy_KXJ_bottom, entropy_estimate);
+ }
+ }
+}
+
+void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state,
+ const ACSConfig& config, const Rect& rect) {
+ // Main philosophy here:
+ // 1. First find best 8x8 transform for each area.
+ // 2. Merging them into larger transforms where possibly, but
+ // starting from the smallest transforms (16x8 and 8x16).
+ // Additional complication: 16x8 and 8x16 are considered
+ // simultanouesly and fairly against each other.
+ // We are looking at 64x64 squares since the YtoX and YtoB
+ // maps happen to be at that resolution, and having
+ // integral transforms cross these boundaries leads to
+ // additional complications.
+ const CompressParams& cparams = enc_state->cparams;
+ const float butteraugli_target = cparams.butteraugli_distance;
+ AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy;
+ // TODO(veluca): reuse allocations
+ auto mem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+ auto qmem = hwy::AllocateAligned<uint32_t>(AcStrategy::kMaxCoeffArea);
+ uint32_t* JXL_RESTRICT quantized = qmem.get();
+ float* JXL_RESTRICT block = mem.get();
+ float* JXL_RESTRICT scratch_space = mem.get() + 3 * AcStrategy::kMaxCoeffArea;
+ size_t bx = rect.x0();
+ size_t by = rect.y0();
+ JXL_ASSERT(rect.xsize() <= 8);
+ JXL_ASSERT(rect.ysize() <= 8);
+ size_t tx = bx / kColorTileDimInBlocks;
+ size_t ty = by / kColorTileDimInBlocks;
+ const float cmap_factors[3] = {
+ enc_state->shared.cmap.YtoXRatio(
+ enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]),
+ 0.0f,
+ enc_state->shared.cmap.YtoBRatio(
+ enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]),
+ };
+ if (cparams.speed_tier > SpeedTier::kHare) return;
+ // First compute the best 8x8 transform for each square. Later, we do not
+ // experiment with different combinations, but only use the best of the 8x8s
+ // when DCT8X8 is specified in the tree search.
+ // 8x8 transforms have 10 variants, but every larger transform is just a DCT.
+ float entropy_estimate[64] = {};
+ // Favor all 8x8 transforms (against 16x8 and larger transforms)) at
+ // low butteraugli_target distances.
+ static const float k8x8mul1 = -0.55;
+ static const float k8x8mul2 = 1.0;
+ static const float k8x8base = 1.4;
+ const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base);
+ for (size_t iy = 0; iy < rect.ysize(); iy++) {
+ for (size_t ix = 0; ix < rect.xsize(); ix++) {
+ float entropy = 0.0;
+ const uint8_t best_of_8x8s = FindBest8x8Transform(
+ 8 * (bx + ix), 8 * (by + iy), static_cast<int>(cparams.speed_tier),
+ config, cmap_factors, ac_strategy, block, scratch_space, quantized,
+ &entropy);
+ ac_strategy->Set(bx + ix, by + iy,
+ static_cast<AcStrategy::Type>(best_of_8x8s));
+ entropy_estimate[iy * 8 + ix] = entropy * mul8x8;
+ }
+ }
+ // Merge when a larger transform is better than the previously
+ // searched best combination of 8x8 transforms.
+ struct MergeTry {
+ AcStrategy::Type type;
+ uint8_t priority;
+ uint8_t decoding_speed_tier_max_limit;
+ uint8_t encoding_speed_tier_max_limit;
+ float entropy_mul;
+ };
+ static const float k8X16mul1 = -0.55;
+ static const float k8X16mul2 = 0.865;
+ static const float k8X16base = 1.6;
+ const float entropy_mul16X8 =
+ k8X16mul2 + k8X16mul1 / (butteraugli_target + k8X16base);
+ // const float entropy_mul16X8 = mul8X16 * 0.91195782912371126f;
+
+ static const float k16X16mul1 = -0.35;
+ static const float k16X16mul2 = 0.798;
+ static const float k16X16base = 2.0;
+ const float entropy_mul16X16 =
+ k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base);
+ // const float entropy_mul16X16 = mul16X16 * 0.83183417727960129f;
+
+ static const float k32X16mul1 = -0.1;
+ static const float k32X16mul2 = 0.854;
+ static const float k32X16base = 2.5;
+ const float entropy_mul16X32 =
+ k32X16mul2 + k32X16mul1 / (butteraugli_target + k32X16base);
+
+ const float entropy_mul32X32 = 0.93;
+ const float entropy_mul64X64 = 1.52f;
+ // TODO(jyrki): Consider this feedback in further changes:
+ // Also effectively when the multipliers for smaller blocks are
+ // below 1, this raises the bar for the bigger blocks even higher
+ // in that sense these constants are not independent (e.g. changing
+ // the constant for DCT16x32 by -5% (making it more likely) also
+ // means that DCT32x32 becomes harder to do when starting from
+ // two DCT16x32s). It might be better to make them more independent,
+ // e.g. by not applying the multiplier when storing the new entropy
+ // estimates in TryMergeToACSCandidate().
+ const MergeTry kTransformsForMerge[9] = {
+ {AcStrategy::Type::DCT16X8, 2, 4, 5, entropy_mul16X8},
+ {AcStrategy::Type::DCT8X16, 2, 4, 5, entropy_mul16X8},
+ // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its
+ // subdivisions. {AcStrategy::Type::DCT16X16, 3, entropy_mul16X16},
+ {AcStrategy::Type::DCT16X32, 4, 4, 4, entropy_mul16X32},
+ {AcStrategy::Type::DCT32X16, 4, 4, 4, entropy_mul16X32},
+ // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its
+ // subdivisions. {AcStrategy::Type::DCT32X32, 5, 1, 5,
+ // 0.9822994906548809f},
+ {AcStrategy::Type::DCT64X32, 6, 1, 3, 1.29f},
+ {AcStrategy::Type::DCT32X64, 6, 1, 3, 1.29f},
+ // {AcStrategy::Type::DCT64X64, 8, 1, 3, 2.0846542128012948f},
+ };
+ /*
+ These sizes not yet included in merge heuristic:
+ set(AcStrategy::Type::DCT32X8, 0.0f, 2.261390410971102f);
+ set(AcStrategy::Type::DCT8X32, 0.0f, 2.261390410971102f);
+ set(AcStrategy::Type::DCT128X128, 0.0f, 1.0f);
+ set(AcStrategy::Type::DCT128X64, 0.0f, 0.73f);
+ set(AcStrategy::Type::DCT64X128, 0.0f, 0.73f);
+ set(AcStrategy::Type::DCT256X256, 0.0f, 1.0f);
+ set(AcStrategy::Type::DCT256X128, 0.0f, 0.73f);
+ set(AcStrategy::Type::DCT128X256, 0.0f, 0.73f);
+ */
+
+ // Priority is a tricky kludge to avoid collisions so that transforms
+ // don't overlap.
+ uint8_t priority[64] = {};
+ bool enable_32x32 = cparams.decoding_speed_tier < 4;
+ for (auto tx : kTransformsForMerge) {
+ if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) {
+ continue;
+ }
+ AcStrategy acs = AcStrategy::FromRawStrategy(tx.type);
+
+ for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize();
+ cy += acs.covered_blocks_y()) {
+ for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize();
+ cx += acs.covered_blocks_x()) {
+ if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) {
+ if (cparams.decoding_speed_tier < 4 &&
+ tx.type == AcStrategy::Type::DCT32X64) {
+ // We handle both DCT8X16 and DCT16X8 at the same time.
+ if ((cy | cx) % 8 == 0) {
+ FindBestFirstLevelDivisionForSquare(
+ 8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+ tx.entropy_mul, entropy_mul64X64, entropy_estimate, block,
+ scratch_space, quantized);
+ }
+ continue;
+ } else if (tx.type == AcStrategy::Type::DCT32X16) {
+ // We handled both DCT8X16 and DCT16X8 at the same time,
+ // and that is above. The last column and last row,
+ // when the last column or last row is odd numbered,
+ // are still handled by TryMergeAcs.
+ continue;
+ }
+ }
+ if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+ (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+ // already covered by FindBest32X32
+ continue;
+ }
+
+ if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) {
+ if (tx.type == AcStrategy::Type::DCT16X32) {
+ // We handle both DCT8X16 and DCT16X8 at the same time.
+ if ((cy | cx) % 4 == 0) {
+ FindBestFirstLevelDivisionForSquare(
+ 4, enable_32x32, bx, by, cx, cy, config, cmap_factors,
+ ac_strategy, tx.entropy_mul, entropy_mul32X32,
+ entropy_estimate, block, scratch_space, quantized);
+ }
+ continue;
+ } else if (tx.type == AcStrategy::Type::DCT32X16) {
+ // We handled both DCT8X16 and DCT16X8 at the same time,
+ // and that is above. The last column and last row,
+ // when the last column or last row is odd numbered,
+ // are still handled by TryMergeAcs.
+ continue;
+ }
+ }
+ if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) ||
+ (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) {
+ // already covered by FindBest32X32
+ continue;
+ }
+ if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) {
+ if (tx.type == AcStrategy::Type::DCT8X16) {
+ // We handle both DCT8X16 and DCT16X8 at the same time.
+ if ((cy | cx) % 2 == 0) {
+ FindBestFirstLevelDivisionForSquare(
+ 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+ tx.entropy_mul, entropy_mul16X16, entropy_estimate, block,
+ scratch_space, quantized);
+ }
+ continue;
+ } else if (tx.type == AcStrategy::Type::DCT16X8) {
+ // We handled both DCT8X16 and DCT16X8 at the same time,
+ // and that is above. The last column and last row,
+ // when the last column or last row is odd numbered,
+ // are still handled by TryMergeAcs.
+ continue;
+ }
+ }
+ if ((tx.type == AcStrategy::Type::DCT8X16 && cy % 2 == 1) ||
+ (tx.type == AcStrategy::Type::DCT16X8 && cx % 2 == 1)) {
+ // already covered by FindBestFirstLevelDivisionForSquare
+ continue;
+ }
+ // All other merge sizes are handled here.
+ // Some of the DCT16X8s and DCT8X16s will still leak through here
+ // when there is an odd number of 8x8 blocks, then the last row
+ // and column will get their DCT16X8s and DCT8X16s through the
+ // normal integral transform merging process.
+ TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+ tx.entropy_mul, tx.priority, &priority[0], entropy_estimate,
+ block, scratch_space, quantized);
+ }
+ }
+ }
+ if (cparams.speed_tier >= SpeedTier::kHare) {
+ return;
+ }
+ // Here we still try to do some non-aligned matching, find a few more
+ // 16X8, 8X16 and 16X16s between the non-2-aligned blocks.
+ for (size_t cy = 0; cy + 1 < rect.ysize(); ++cy) {
+ for (size_t cx = 0; cx + 1 < rect.xsize(); ++cx) {
+ if ((cy | cx) % 2 != 0) {
+ FindBestFirstLevelDivisionForSquare(
+ 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+ entropy_mul16X8, entropy_mul16X16, entropy_estimate, block,
+ scratch_space, quantized);
+ }
+ }
+ }
+ // Non-aligned matching for 32X32, 16X32 and 32X16.
+ size_t step = cparams.speed_tier >= SpeedTier::kTortoise ? 2 : 1;
+ for (size_t cy = 0; cy + 3 < rect.ysize(); cy += step) {
+ for (size_t cx = 0; cx + 3 < rect.xsize(); cx += step) {
+ if ((cy | cx) % 4 == 0) {
+ continue; // Already tried with loop above (DCT16X32 case).
+ }
+ FindBestFirstLevelDivisionForSquare(
+ 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, ac_strategy,
+ entropy_mul16X32, entropy_mul32X32, entropy_estimate, block,
+ scratch_space, quantized);
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessRectACS);
+
+void AcStrategyHeuristics::Init(const Image3F& src,
+ PassesEncoderState* enc_state) {
+ this->enc_state = enc_state;
+ config.dequant = &enc_state->shared.matrices;
+ const CompressParams& cparams = enc_state->cparams;
+ const float butteraugli_target = cparams.butteraugli_distance;
+
+ if (cparams.speed_tier >= SpeedTier::kCheetah) {
+ JXL_CHECK(enc_state->shared.matrices.EnsureComputed(1)); // DCT8 only
+ } else {
+ uint32_t acs_mask = 0;
+ // All transforms up to 64x64.
+ for (size_t i = 0; i < AcStrategy::DCT128X128; i++) {
+ acs_mask |= (1 << i);
+ }
+ JXL_CHECK(enc_state->shared.matrices.EnsureComputed(acs_mask));
+ }
+
+ // Image row pointers and strides.
+ config.quant_field_row = enc_state->initial_quant_field.Row(0);
+ config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow();
+ auto& mask = enc_state->initial_quant_masking;
+ if (mask.xsize() > 0 && mask.ysize() > 0) {
+ config.masking_field_row = mask.Row(0);
+ config.masking_field_stride = mask.PixelsPerRow();
+ }
+
+ config.src_rows[0] = src.ConstPlaneRow(0, 0);
+ config.src_rows[1] = src.ConstPlaneRow(1, 0);
+ config.src_rows[2] = src.ConstPlaneRow(2, 0);
+ config.src_stride = src.PixelsPerRow();
+
+ // Entropy estimate is composed of two factors:
+ // - estimate of the number of bits that will be used by the block
+ // - information loss due to quantization
+ // The following constant controls the relative weights of these components.
+ config.info_loss_multiplier = 138.0f;
+ config.info_loss_multiplier2 = 50.46839691767866;
+ // TODO(jyrki): explore base_entropy setting more.
+ // A small value (0?) works better at high distance, while a larger value
+ // may be more effective at low distance/high bpp.
+ config.base_entropy = 0.0;
+ config.zeros_mul = 7.565053364251793f;
+ // Lots of +1 and -1 coefficients at high quality, it is
+ // beneficial to favor them. At low qualities zeros matter more
+ // and +1 / -1 coefficients are already quite harmful.
+ float slope = std::min<float>(1.0f, butteraugli_target * (1.0f / 3));
+ config.cost1 = 1 + slope * 8.8703248061477744f;
+ config.cost2 = 4.4628149885273363f;
+ config.cost_delta = 5.3359184934516337f;
+ JXL_ASSERT(enc_state->shared.ac_strategy.xsize() ==
+ enc_state->shared.frame_dim.xsize_blocks);
+ JXL_ASSERT(enc_state->shared.ac_strategy.ysize() ==
+ enc_state->shared.frame_dim.ysize_blocks);
+}
+
+void AcStrategyHeuristics::ProcessRect(const Rect& rect) {
+ PROFILER_FUNC;
+ const CompressParams& cparams = enc_state->cparams;
+ // In Falcon mode, use DCT8 everywhere and uniform quantization.
+ if (cparams.speed_tier >= SpeedTier::kCheetah) {
+ enc_state->shared.ac_strategy.FillDCT8(rect);
+ return;
+ }
+ HWY_DYNAMIC_DISPATCH(ProcessRectACS)
+ (enc_state, config, rect);
+}
+
+void AcStrategyHeuristics::Finalize(AuxOut* aux_out) {
+ const auto& ac_strategy = enc_state->shared.ac_strategy;
+ // Accounting and debug output.
+ if (aux_out != nullptr) {
+ aux_out->num_small_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::IDENTITY) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT2X2) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT4X4);
+ aux_out->num_dct4x8_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT4X8) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT8X4);
+ aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategy::Type::AFV0) +
+ ac_strategy.CountBlocks(AcStrategy::Type::AFV1) +
+ ac_strategy.CountBlocks(AcStrategy::Type::AFV2) +
+ ac_strategy.CountBlocks(AcStrategy::Type::AFV3);
+ aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT);
+ aux_out->num_dct8x16_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT8X16) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT16X8);
+ aux_out->num_dct8x32_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT8X32) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT32X8);
+ aux_out->num_dct16_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT16X16);
+ aux_out->num_dct16x32_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT16X32) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT32X16);
+ aux_out->num_dct32_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT32X32);
+ aux_out->num_dct32x64_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT32X64) +
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT64X32);
+ aux_out->num_dct64_blocks =
+ ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64);
+ }
+
+ if (WantDebugOutput(aux_out)) {
+ DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize,
+ enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out);
+ }
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h b/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h
new file mode 100644
index 0000000000..1ce3442ccf
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ac_strategy.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AC_STRATEGY_H_
+#define LIB_JXL_ENC_AC_STRATEGY_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be
+// used in each block, as well as the initial quantization field.
+
+namespace jxl {
+
+struct AuxOut;
+
+// AC strategy selection: utility struct.
+
+struct ACSConfig {
+ const DequantMatrices* JXL_RESTRICT dequant;
+ float info_loss_multiplier;
+ float info_loss_multiplier2;
+ float* JXL_RESTRICT quant_field_row;
+ size_t quant_field_stride;
+ float* JXL_RESTRICT masking_field_row;
+ size_t masking_field_stride;
+ const float* JXL_RESTRICT src_rows[3];
+ size_t src_stride;
+ // Cost for 1 (-1), 2 (-2) explicitly, cost for others computed with cost1 +
+ // cost2 + sqrt(q) * cost_delta.
+ float cost1;
+ float cost2;
+ float cost_delta;
+ float base_entropy;
+ float zeros_mul;
+ const float& Pixel(size_t c, size_t x, size_t y) const {
+ return src_rows[c][y * src_stride + x];
+ }
+ float Masking(size_t bx, size_t by) const {
+ JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0);
+ return masking_field_row[by * masking_field_stride + bx];
+ }
+ float Quant(size_t bx, size_t by) const {
+ JXL_DASSERT(quant_field_row[by * quant_field_stride + bx] > 0);
+ return quant_field_row[by * quant_field_stride + bx];
+ }
+};
+
+struct AcStrategyHeuristics {
+ void Init(const Image3F& src, PassesEncoderState* enc_state);
+ void ProcessRect(const Rect& rect);
+ void Finalize(AuxOut* aux_out);
+ ACSConfig config;
+ PassesEncoderState* enc_state;
+};
+
+// Debug.
+void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize,
+ size_t ysize, const char* tag, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_AC_STRATEGY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.cc b/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.cc
new file mode 100644
index 0000000000..f54204b059
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.cc
@@ -0,0 +1,1145 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_adaptive_quantization.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <string>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_adaptive_quantization.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_group.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// The following functions modulate an exponent (out_val) and return the updated
+// value. Their descriptor is limited to 8 lanes for 8x8 blocks.
+
+// Hack for mask estimation. Eventually replace this code with butteraugli's
+// masking.
+float ComputeMaskForAcStrategyUse(const float out_val) {
+ const float kMul = 1.0f;
+ const float kOffset = 0.001f;
+ return kMul / (out_val + kOffset);
+}
+
+template <class D, class V>
+V ComputeMask(const D d, const V out_val) {
+ const auto kBase = Set(d, -0.76471879237038032f);
+ const auto kMul4 = Set(d, 4.4585596705216615f);
+ const auto kMul2 = Set(d, 17.282053892620215f);
+ const auto kOffset2 = Set(d, 302.36961315317848f);
+ const auto kMul3 = Set(d, 7.0561261998705858f);
+ const auto kOffset3 = Set(d, 2.3179635626140773f);
+ const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3);
+ const auto kMul0 = Set(d, 0.80061762862741759f);
+ const auto k1 = Set(d, 1.0f);
+
+ // Avoid division by zero.
+ const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f));
+ const auto v2 = Div(k1, Add(v1, kOffset2));
+ const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3));
+ const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4));
+ // TODO(jyrki):
+ // A log or two here could make sense. In butteraugli we have effectively
+ // log(log(x + C)) for this kind of use, as a single log is used in
+ // saturating visual masking and here the modulation values are exponential,
+ // another log would counter that.
+ return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3))));
+}
+
+// mul and mul2 represent a scaling difference between jxl and butteraugli.
+static const float kSGmul = 226.77216153508914f;
+static const float kSGmul2 = 1.0f / 73.377132366608819f;
+static const float kLog2 = 0.693147181f;
+// Includes correction factor for std::log -> log2.
+static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2;
+static const float kSGVOffset = 7.7825991679894591f;
+
+template <bool invert, typename D, typename V>
+V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) {
+ // The opsin space in jxl is the cubic root of photons, i.e., v * v * v
+ // is related to the number of photons.
+ //
+ // SimpleGamma(v * v * v) is the psychovisual space in butteraugli.
+ // This ratio allows quantization to move from jxl's opsin space to
+ // butteraugli's log-gamma space.
+ float kEpsilon = 1e-2;
+ v = ZeroIfNegative(v);
+ const auto kNumMul = Set(d, kSGRetMul * 3 * kSGmul);
+ const auto kVOffset = Set(d, kSGVOffset * kLog2 + kEpsilon);
+ const auto kDenMul = Set(d, kLog2 * kSGmul);
+
+ const auto v2 = Mul(v, v);
+
+ const auto num = MulAdd(kNumMul, v2, Set(d, kEpsilon));
+ const auto den = MulAdd(Mul(kDenMul, v), v2, kVOffset);
+ return invert ? Div(num, den) : Div(den, num);
+}
+
+template <bool invert = false>
+static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) {
+ using DScalar = HWY_CAPPED(float, 1);
+ auto vscalar = Load(DScalar(), &v);
+ return GetLane(
+ RatioOfDerivativesOfCubicRootToSimpleGamma<invert>(DScalar(), vscalar));
+}
+
+// TODO(veluca): this function computes an approximation of the derivative of
+// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or
+// exact derivatives. For reference, SimpleGamma was:
+/*
+template <typename D, typename V>
+V SimpleGamma(const D d, V v) {
+ // A simple HDR compatible gamma function.
+ const auto mul = Set(d, kSGmul);
+ const auto kRetMul = Set(d, kSGRetMul);
+ const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f);
+ const auto kVOffset = Set(d, kSGVOffset);
+
+ v *= mul;
+
+ // This should happen rarely, but may lead to a NaN, which is rather
+ // undesirable. Since negative photons don't exist we solve the NaNs by
+ // clamping here.
+ // TODO(veluca): with FastLog2f, this no longer leads to NaNs.
+ v = ZeroIfNegative(v);
+ return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd;
+}
+*/
+
+template <class D, class V>
+V GammaModulation(const D d, const size_t x, const size_t y,
+ const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) {
+ const float kBias = 0.16f;
+ JXL_DASSERT(kBias > kOpsinAbsorbanceBias[0]);
+ JXL_DASSERT(kBias > kOpsinAbsorbanceBias[1]);
+ JXL_DASSERT(kBias > kOpsinAbsorbanceBias[2]);
+ auto overall_ratio = Zero(d);
+ auto bias = Set(d, kBias);
+ auto half = Set(d, 0.5f);
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+ const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto iny = Add(Load(d, row_in_y + x + dx), bias);
+ const auto inx = Load(d, row_in_x + x + dx);
+ const auto r = Sub(iny, inx);
+ const auto g = Add(iny, inx);
+ const auto ratio_r =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, r);
+ const auto ratio_g =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/true>(d, g);
+ const auto avg_ratio = Mul(half, Add(ratio_r, ratio_g));
+
+ overall_ratio = Add(overall_ratio, avg_ratio);
+ }
+ }
+ overall_ratio = Mul(SumOfLanes(d, overall_ratio), Set(d, 1.0f / 64));
+ // ideally -1.0, but likely optimal correction adds some entropy, so slightly
+ // less than that.
+ // ln(2) constant folded in because we want std::log but have FastLog2f.
+ const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f);
+ return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val);
+}
+
+template <class D, class V>
+V ColorModulation(const D d, const size_t x, const size_t y,
+ const ImageF& xyb_x, const ImageF& xyb_y, const ImageF& xyb_b,
+ const double butteraugli_target, V out_val) {
+ static const float kStrengthMul = 4.2456542701250122f;
+ static const float kRedRampStart = 0.18748564245760829f;
+ static const float kRedRampLength = 0.16701783842516479f;
+ static const float kBlueRampLength = 0.16117602661852037f;
+ static const float kBlueRampStart = 0.47897504338287333f;
+ const float strength = kStrengthMul * (1.0f - 0.15f * butteraugli_target);
+ if (strength < 0) {
+ return out_val;
+ }
+ // x values are smaller than y and b values, need to take the difference into
+ // account.
+ const float red_strength = strength * 6.0f;
+ const float blue_strength = strength;
+ {
+ // Reduce some bits from areas not blue or red.
+ const float offset = strength * -0.007; // 9174542291185913f;
+ out_val = Add(out_val, Set(d, offset));
+ }
+ // Calculate how much of the 8x8 block is covered with blue or red.
+ auto blue_coverage = Zero(d);
+ auto red_coverage = Zero(d);
+ auto bias_y = Set(d, 0.2f);
+ auto bias_y_add = Set(d, 0.1f);
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy);
+ const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy);
+ const float* const JXL_RESTRICT row_in_b = xyb_b.Row(y + dy);
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+ const auto pixel_y = Load(d, row_in_y + x + dx);
+ // Estimate redness-greeness relative to the intensity.
+ const auto pixel_xpy = Div(Abs(Load(d, row_in_x + x + dx)),
+ Max(Add(bias_y_add, pixel_y), bias_y));
+ const auto pixel_x =
+ Max(Set(d, 0.0f), Sub(pixel_xpy, Set(d, kRedRampStart)));
+ const auto pixel_b =
+ Max(Set(d, 0.0f), Sub(Load(d, row_in_b + x + dx),
+ Add(pixel_y, Set(d, kBlueRampStart))));
+ const auto blue_slope = Min(pixel_b, Set(d, kBlueRampLength));
+ const auto red_slope = Min(pixel_x, Set(d, kRedRampLength));
+ red_coverage = Add(red_coverage, red_slope);
+ blue_coverage = Add(blue_coverage, blue_slope);
+ }
+ }
+
+ // Saturate when the high red or high blue coverage is above a level.
+ // The idea here is that if a certain fraction of the block is red or
+ // blue we consider as if it was fully red or blue.
+ static const float ratio = 28.0f; // out of 64 pixels.
+
+ auto overall_red_coverage = SumOfLanes(d, red_coverage);
+ overall_red_coverage =
+ Min(overall_red_coverage, Set(d, ratio * kRedRampLength));
+ overall_red_coverage =
+ Mul(overall_red_coverage, Set(d, red_strength / ratio));
+
+ auto overall_blue_coverage = SumOfLanes(d, blue_coverage);
+ overall_blue_coverage =
+ Min(overall_blue_coverage, Set(d, ratio * kBlueRampLength));
+ overall_blue_coverage =
+ Mul(overall_blue_coverage, Set(d, blue_strength / ratio));
+
+ return Add(overall_red_coverage, Add(overall_blue_coverage, out_val));
+}
+
+// Change precision in 8x8 blocks that have high frequency content.
+template <class D, class V>
+V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb,
+ const V out_val) {
+ // Zero out the invalid differences for the rightmost value per row.
+ const Rebind<uint32_t, D> du;
+ HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u,
+ ~0u, ~0u, ~0u, 0};
+
+ auto sum = Zero(d); // sum of absolute differences with right and below
+
+ static const float valmin = 0.52489909479039587f;
+ auto valminv = Set(d, valmin);
+ for (size_t dy = 0; dy < 8; ++dy) {
+ const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x;
+ const float* JXL_RESTRICT row_in_next =
+ dy == 7 ? row_in : xyb.Row(y + dy + 1) + x;
+
+ // In SCALAR, there is no guarantee of having extra row padding.
+ // Hence, we need to ensure we don't access pixels outside the row itself.
+ // In SIMD modes, however, rows are padded, so it's safe to access one
+ // garbage value after the row. The vector then gets masked with kMaskRight
+ // to remove the influence of that value.
+#if HWY_TARGET != HWY_SCALAR
+ for (size_t dx = 0; dx < 8; dx += Lanes(d)) {
+#else
+ for (size_t dx = 0; dx < 7; dx += Lanes(d)) {
+#endif
+ const auto p = Load(d, row_in + dx);
+ const auto pr = LoadU(d, row_in + dx + 1);
+ const auto mask = BitCast(d, Load(du, kMaskRight + dx));
+ sum = Add(sum, And(mask, Min(valminv, AbsDiff(p, pr))));
+
+ const auto pd = Load(d, row_in_next + dx);
+ sum = Add(sum, Min(valminv, AbsDiff(p, pd)));
+ }
+#if HWY_TARGET == HWY_SCALAR
+ const auto p = Load(d, row_in + 7);
+ const auto pd = Load(d, row_in_next + 7);
+ sum = Add(sum, Min(valminv, AbsDiff(p, pd)));
+#endif
+ }
+ // more negative value gives more bpp
+ static const float kOffset = -2.6545897672771526;
+ static const float kMul = -0.049868161744916512;
+
+ sum = SumOfLanes(d, sum);
+ float scalar_sum = GetLane(sum);
+ static const float maxsum = 7.9076877647025947f;
+ static const float minsum = 0.53640540945659809f;
+ scalar_sum = std::min(maxsum, scalar_sum);
+ scalar_sum = std::max(minsum, scalar_sum);
+ scalar_sum += kOffset;
+ scalar_sum *= kMul;
+ return Add(Set(d, scalar_sum), out_val);
+}
+
+void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x,
+ const ImageF& xyb_y, const ImageF& xyb_b,
+ const float scale, const Rect& rect, ImageF* out) {
+ JXL_ASSERT(SameSize(xyb_x, xyb_y));
+ JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize());
+ JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize());
+
+ float base_level = 0.48f * scale;
+ float kDampenRampStart = 2.0f;
+ float kDampenRampEnd = 14.0f;
+ float dampen = 1.0f;
+ if (butteraugli_target >= kDampenRampStart) {
+ dampen = 1.0f - ((butteraugli_target - kDampenRampStart) /
+ (kDampenRampEnd - kDampenRampStart));
+ if (dampen < 0) {
+ dampen = 0;
+ }
+ }
+ const float mul = scale * dampen;
+ const float add = (1.0f - dampen) * base_level;
+ for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) {
+ const size_t y = iy * 8;
+ float* const JXL_RESTRICT row_out = out->Row(iy);
+ const HWY_CAPPED(float, kBlockDim) df;
+ for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) {
+ size_t x = ix * 8;
+ auto out_val = Set(df, row_out[ix]);
+ out_val = ComputeMask(df, out_val);
+ out_val = HfModulation(df, x, y, xyb_y, out_val);
+ out_val = ColorModulation(df, x, y, xyb_x, xyb_y, xyb_b,
+ butteraugli_target, out_val);
+ out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val);
+ // We want multiplicative quantization field, so everything
+ // until this point has been modulating the exponent.
+ row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add;
+ }
+ }
+}
+
+template <typename D, typename V>
+V MaskingSqrt(const D d, V v) {
+ static const float kLogOffset = 27.97044946785558f;
+ static const float kMul = 211.53333281566171f;
+ const auto mul_v = Set(d, kMul * 1e8);
+ const auto offset_v = Set(d, kLogOffset);
+ return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v)));
+}
+
+float MaskingSqrt(const float v) {
+ using DScalar = HWY_CAPPED(float, 1);
+ auto vscalar = Load(DScalar(), &v);
+ return GetLane(MaskingSqrt(DScalar(), vscalar));
+}
+
+void StoreMin4(const float v, float& min0, float& min1, float& min2,
+ float& min3) {
+ if (v < min3) {
+ if (v < min0) {
+ min3 = min2;
+ min2 = min1;
+ min1 = min0;
+ min0 = v;
+ } else if (v < min1) {
+ min3 = min2;
+ min2 = min1;
+ min1 = v;
+ } else if (v < min2) {
+ min3 = min2;
+ min2 = v;
+ } else {
+ min3 = v;
+ }
+ }
+}
+
+// Look for smooth areas near the area of degradation.
+// If the areas are generally smooth, don't do masking.
+// Output is downsampled 2x.
+void FuzzyErosion(const Rect& from_rect, const ImageF& from,
+ const Rect& to_rect, ImageF* to) {
+ const size_t xsize = from.xsize();
+ const size_t ysize = from.ysize();
+ constexpr int kStep = 1;
+ static_assert(kStep == 1, "Step must be 1");
+ JXL_ASSERT(to_rect.xsize() * 2 == from_rect.xsize());
+ JXL_ASSERT(to_rect.ysize() * 2 == from_rect.ysize());
+ for (size_t fy = 0; fy < from_rect.ysize(); ++fy) {
+ size_t y = fy + from_rect.y0();
+ size_t ym1 = y >= kStep ? y - kStep : y;
+ size_t yp1 = y + kStep < ysize ? y + kStep : y;
+ const float* rowt = from.Row(ym1);
+ const float* row = from.Row(y);
+ const float* rowb = from.Row(yp1);
+ float* row_out = to_rect.Row(to, fy / 2);
+ for (size_t fx = 0; fx < from_rect.xsize(); ++fx) {
+ size_t x = fx + from_rect.x0();
+ size_t xm1 = x >= kStep ? x - kStep : x;
+ size_t xp1 = x + kStep < xsize ? x + kStep : x;
+ float min0 = row[x];
+ float min1 = row[xm1];
+ float min2 = row[xp1];
+ float min3 = rowt[xm1];
+ // Sort the first four values.
+ if (min0 > min1) std::swap(min0, min1);
+ if (min0 > min2) std::swap(min0, min2);
+ if (min0 > min3) std::swap(min0, min3);
+ if (min1 > min2) std::swap(min1, min2);
+ if (min1 > min3) std::swap(min1, min3);
+ if (min2 > min3) std::swap(min2, min3);
+ // The remaining five values of a 3x3 neighbourhood.
+ StoreMin4(rowt[x], min0, min1, min2, min3);
+ StoreMin4(rowt[xp1], min0, min1, min2, min3);
+ StoreMin4(rowb[xm1], min0, min1, min2, min3);
+ StoreMin4(rowb[x], min0, min1, min2, min3);
+ StoreMin4(rowb[xp1], min0, min1, min2, min3);
+ static const float kMul0 = 0.125f;
+ static const float kMul1 = 0.075f;
+ static const float kMul2 = 0.06f;
+ static const float kMul3 = 0.05f;
+ float v = kMul0 * min0 + kMul1 * min1 + kMul2 * min2 + kMul3 * min3;
+ if (fx % 2 == 0 && fy % 2 == 0) {
+ row_out[fx / 2] = v;
+ } else {
+ row_out[fx / 2] += v;
+ }
+ }
+ }
+}
+
+struct AdaptiveQuantizationImpl {
+ void Init(const Image3F& xyb) {
+ JXL_DASSERT(xyb.xsize() % kBlockDim == 0);
+ JXL_DASSERT(xyb.ysize() % kBlockDim == 0);
+ const size_t xsize = xyb.xsize();
+ const size_t ysize = xyb.ysize();
+ aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim);
+ }
+ void PrepareBuffers(size_t num_threads) {
+ diff_buffer = ImageF(kEncTileDim + 8, num_threads);
+ for (size_t i = pre_erosion.size(); i < num_threads; i++) {
+ pre_erosion.emplace_back(kEncTileDimInBlocks * 2 + 2,
+ kEncTileDimInBlocks * 2 + 2);
+ }
+ }
+
+ void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb,
+ const Rect& rect, const int thread, ImageF* mask) {
+ PROFILER_ZONE("aq DiffPrecompute");
+ const size_t xsize = xyb.xsize();
+ const size_t ysize = xyb.ysize();
+
+ // The XYB gamma is 3.0 to be able to decode faster with two muls.
+ // Butteraugli's gamma is matching the gamma of human eye, around 2.6.
+ // We approximate the gamma difference by adding one cubic root into
+ // the adaptive quantization. This gives us a total gamma of 2.6666
+ // for quantization uses.
+ const float match_gamma_offset = 0.019;
+
+ const HWY_FULL(float) df;
+
+ size_t y_start = rect.y0() * 8;
+ size_t y_end = y_start + rect.ysize() * 8;
+
+ size_t x0 = rect.x0() * 8;
+ size_t x1 = x0 + rect.xsize() * 8;
+ if (x0 != 0) x0 -= 4;
+ if (x1 != xyb.xsize()) x1 += 4;
+ if (y_start != 0) y_start -= 4;
+ if (y_end != xyb.ysize()) y_end += 4;
+ pre_erosion[thread].ShrinkTo((x1 - x0) / 4, (y_end - y_start) / 4);
+
+ static const float limit = 0.2f;
+ // Computes image (padded to multiple of 8x8) of local pixel differences.
+ // Subsample both directions by 4.
+ for (size_t y = y_start; y < y_end; ++y) {
+ size_t y2 = y + 1 < ysize ? y + 1 : y;
+ size_t y1 = y > 0 ? y - 1 : y;
+
+ const float* row_in = xyb.PlaneRow(1, y);
+ const float* row_in1 = xyb.PlaneRow(1, y1);
+ const float* row_in2 = xyb.PlaneRow(1, y2);
+ float* JXL_RESTRICT row_out = diff_buffer.Row(thread);
+
+ auto scalar_pixel = [&](size_t x) {
+ const size_t x2 = x + 1 < xsize ? x + 1 : x;
+ const size_t x1 = x > 0 ? x - 1 : x;
+ const float base =
+ 0.25f * (row_in2[x] + row_in1[x] + row_in[x1] + row_in[x2]);
+ const float gammac = RatioOfDerivativesOfCubicRootToSimpleGamma(
+ row_in[x] + match_gamma_offset);
+ float diff = gammac * (row_in[x] - base);
+ diff *= diff;
+ if (diff >= limit) {
+ diff = limit;
+ }
+ diff = MaskingSqrt(diff);
+ if ((y % 4) != 0) {
+ row_out[x - x0] += diff;
+ } else {
+ row_out[x - x0] = diff;
+ }
+ };
+
+ size_t x = x0;
+ // First pixel of the row.
+ if (x0 == 0) {
+ scalar_pixel(x0);
+ ++x;
+ }
+ // SIMD
+ const auto match_gamma_offset_v = Set(df, match_gamma_offset);
+ const auto quarter = Set(df, 0.25f);
+ for (; x + 1 + Lanes(df) < x1; x += Lanes(df)) {
+ const auto in = LoadU(df, row_in + x);
+ const auto in_r = LoadU(df, row_in + x + 1);
+ const auto in_l = LoadU(df, row_in + x - 1);
+ const auto in_t = LoadU(df, row_in2 + x);
+ const auto in_b = LoadU(df, row_in1 + x);
+ auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b)));
+ auto gammacv =
+ RatioOfDerivativesOfCubicRootToSimpleGamma</*invert=*/false>(
+ df, Add(in, match_gamma_offset_v));
+ auto diff = Mul(gammacv, Sub(in, base));
+ diff = Mul(diff, diff);
+ diff = Min(diff, Set(df, limit));
+ diff = MaskingSqrt(df, diff);
+ if ((y & 3) != 0) {
+ diff = Add(diff, LoadU(df, row_out + x - x0));
+ }
+ StoreU(diff, df, row_out + x - x0);
+ }
+ // Scalar
+ for (; x < x1; ++x) {
+ scalar_pixel(x);
+ }
+ if (y % 4 == 3) {
+ float* row_dout = pre_erosion[thread].Row((y - y_start) / 4);
+ for (size_t x = 0; x < (x1 - x0) / 4; x++) {
+ row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] +
+ row_out[x * 4 + 2] + row_out[x * 4 + 3]) *
+ 0.25f;
+ }
+ }
+ }
+ Rect from_rect(x0 % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1,
+ rect.xsize() * 2, rect.ysize() * 2);
+ FuzzyErosion(from_rect, pre_erosion[thread], rect, &aq_map);
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ const float* aq_map_row = rect.ConstRow(aq_map, y);
+ float* mask_row = rect.Row(mask, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]);
+ }
+ }
+ PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1),
+ xyb.Plane(2), scale, rect, &aq_map);
+ }
+ std::vector<ImageF> pre_erosion;
+ ImageF aq_map;
+ ImageF diff_buffer;
+};
+
+ImageF AdaptiveQuantizationMap(const float butteraugli_target,
+ const Image3F& xyb,
+ const FrameDimensions& frame_dim, float scale,
+ ThreadPool* pool, ImageF* mask) {
+ PROFILER_ZONE("aq AdaptiveQuantMap");
+
+ AdaptiveQuantizationImpl impl;
+ impl.Init(xyb);
+ *mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ JXL_CHECK(RunOnPool(
+ pool, 0,
+ DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+ DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks),
+ [&](const size_t num_threads) {
+ impl.PrepareBuffers(num_threads);
+ return true;
+ },
+ [&](const uint32_t tid, const size_t thread) {
+ size_t n_enc_tiles =
+ DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks);
+ size_t tx = tid % n_enc_tiles;
+ size_t ty = tid / n_enc_tiles;
+ size_t by0 = ty * kEncTileDimInBlocks;
+ size_t by1 =
+ std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks);
+ size_t bx0 = tx * kEncTileDimInBlocks;
+ size_t bx1 =
+ std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks);
+ Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+ impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask);
+ },
+ "AQ DiffPrecompute"));
+
+ return std::move(impl).aq_map;
+}
+
+} // namespace
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(AdaptiveQuantizationMap);
+
+namespace {
+// If true, prints the quantization maps at each iteration.
+bool FLAGS_dump_quant_state = false;
+
+void DumpHeatmap(const AuxOut* aux_out, const std::string& label,
+ const ImageF& image, float good_threshold,
+ float bad_threshold) {
+ Image3F heatmap = CreateHeatMapImage(image, good_threshold, bad_threshold);
+ char filename[200];
+ snprintf(filename, sizeof(filename), "%s%05d", label.c_str(),
+ aux_out->num_butteraugli_iters);
+ aux_out->DumpImage(filename, heatmap);
+}
+
+void DumpHeatmaps(const AuxOut* aux_out, float ba_target,
+ const ImageF& quant_field, const ImageF& tile_heatmap,
+ const ImageF& bt_diffmap) {
+ if (!WantDebugOutput(aux_out)) return;
+ ImageF inv_qmap(quant_field.xsize(), quant_field.ysize());
+ for (size_t y = 0; y < quant_field.ysize(); ++y) {
+ const float* JXL_RESTRICT row_q = quant_field.ConstRow(y);
+ float* JXL_RESTRICT row_inv_q = inv_qmap.Row(y);
+ for (size_t x = 0; x < quant_field.xsize(); ++x) {
+ row_inv_q[x] = 1.0f / row_q[x]; // never zero
+ }
+ }
+ DumpHeatmap(aux_out, "quant_heatmap", inv_qmap, 4.0f * ba_target,
+ 6.0f * ba_target);
+ DumpHeatmap(aux_out, "tile_heatmap", tile_heatmap, ba_target,
+ 1.5f * ba_target);
+ // matches heat maps produced by the command line tool.
+ DumpHeatmap(aux_out, "bt_diffmap", bt_diffmap, ButteraugliFuzzyInverse(1.5),
+ ButteraugliFuzzyInverse(0.5));
+}
+
+ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin,
+ const AcStrategyImage& ac_strategy) {
+ PROFILER_FUNC;
+ const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size;
+ const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size;
+ ImageF tile_distmap(tile_xsize, tile_ysize);
+ size_t distmap_stride = tile_distmap.PixelsPerRow();
+ for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) {
+ AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y);
+ float* JXL_RESTRICT dist_row = tile_distmap.Row(tile_y);
+ for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) {
+ AcStrategy acs = ac_strategy_row[tile_x];
+ if (!acs.IsFirstBlock()) continue;
+ int this_tile_xsize = acs.covered_blocks_x() * tile_size;
+ int this_tile_ysize = acs.covered_blocks_y() * tile_size;
+ int y_begin = std::max<int>(0, tile_size * tile_y - margin);
+ int y_end = std::min<int>(distmap.ysize(),
+ tile_size * tile_y + this_tile_ysize + margin);
+ int x_begin = std::max<int>(0, tile_size * tile_x - margin);
+ int x_end = std::min<int>(distmap.xsize(),
+ tile_size * tile_x + this_tile_xsize + margin);
+ float dist_norm = 0.0;
+ double pixels = 0;
+ for (int y = y_begin; y < y_end; ++y) {
+ float ymul = 1.0;
+ constexpr float kBorderMul = 0.98f;
+ constexpr float kCornerMul = 0.7f;
+ if (margin != 0 && (y == y_begin || y == y_end - 1)) {
+ ymul = kBorderMul;
+ }
+ const float* const JXL_RESTRICT row = distmap.Row(y);
+ for (int x = x_begin; x < x_end; ++x) {
+ float xmul = ymul;
+ if (margin != 0 && (x == x_begin || x == x_end - 1)) {
+ if (xmul == 1.0) {
+ xmul = kBorderMul;
+ } else {
+ xmul = kCornerMul;
+ }
+ }
+ float v = row[x];
+ v *= v;
+ v *= v;
+ v *= v;
+ v *= v;
+ dist_norm += xmul * v;
+ pixels += xmul;
+ }
+ }
+ if (pixels == 0) pixels = 1;
+ // 16th norm is less than the max norm, we reduce the difference
+ // with this normalization factor.
+ constexpr float kTileNorm = 1.2f;
+ const float tile_dist =
+ kTileNorm * std::pow(dist_norm / pixels, 1.0f / 16.0f);
+ dist_row[tile_x] = tile_dist;
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ dist_row[tile_x + distmap_stride * iy + ix] = tile_dist;
+ }
+ }
+ }
+ }
+ return tile_distmap;
+}
+
+static const float kDcQuantPow = 0.83;
+static const float kDcQuant = 1.095924047623553f;
+static const float kAcQuant = 0.80751132443618624f;
+
+void FindBestQuantization(const ImageBundle& linear, const Image3F& opsin,
+ PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out) {
+ const CompressParams& cparams = enc_state->cparams;
+ if (cparams.resampling > 1 &&
+ cparams.original_butteraugli_distance <= 4.0 * cparams.resampling) {
+ // For downsampled opsin image, the butteraugli based adaptive quantization
+ // loop would only make the size bigger without improving the distance much,
+ // so in this case we enable it only for very high butteraugli targets.
+ return;
+ }
+ Quantizer& quantizer = enc_state->shared.quantizer;
+ ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+ ImageF& quant_field = enc_state->initial_quant_field;
+
+ // TODO(veluca): this should really be rather handled on the
+ // ButteraugliComparator side.
+ struct TemporaryShrink {
+ TemporaryShrink(ImageBundle& bundle, size_t xsize, size_t ysize)
+ : bundle(bundle),
+ orig_xsize(bundle.xsize()),
+ orig_ysize(bundle.ysize()) {
+ bundle.ShrinkTo(xsize, ysize);
+ }
+ TemporaryShrink(const TemporaryShrink&) = delete;
+ TemporaryShrink(TemporaryShrink&&) = delete;
+
+ ~TemporaryShrink() { bundle.ShrinkTo(orig_xsize, orig_ysize); }
+
+ ImageBundle& bundle;
+ size_t orig_xsize;
+ size_t orig_ysize;
+ } t(const_cast<ImageBundle&>(linear),
+ enc_state->shared.frame_header.nonserialized_metadata->xsize(),
+ enc_state->shared.frame_header.nonserialized_metadata->ysize());
+
+ const float butteraugli_target = cparams.butteraugli_distance;
+ const float original_butteraugli = cparams.original_butteraugli_distance;
+ ButteraugliParams params = cparams.ba_params;
+ params.intensity_target = linear.metadata()->IntensityTarget();
+ // Hack the default intensity target value to be 80.0, the intensity
+ // target of sRGB images and a more reasonable viewing default than
+ // JPEG XL file format's default.
+ if (fabs(params.intensity_target - 255.0f) < 1e-3) {
+ params.intensity_target = 80.0f;
+ }
+ JxlButteraugliComparator comparator(params, cms);
+ JXL_CHECK(comparator.SetReferenceImage(linear));
+ bool lower_is_better =
+ (comparator.GoodQualityScore() < comparator.BadQualityScore());
+ const float initial_quant_dc = InitialQuantDC(butteraugli_target);
+ AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+ &quant_field);
+ ImageF tile_distmap;
+ ImageF initial_quant_field = CopyImage(quant_field);
+
+ float initial_qf_min, initial_qf_max;
+ ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max);
+ float initial_qf_ratio = initial_qf_max / initial_qf_min;
+ float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio);
+ float asymmetry = 2;
+ if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low;
+ float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low);
+ float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry);
+
+ JXL_ASSERT(qf_higher / qf_lower < 253);
+
+ constexpr int kOriginalComparisonRound = 1;
+ int iters = cparams.max_butteraugli_iters;
+ if (iters > 7) {
+ iters = 7;
+ }
+ if (cparams.speed_tier != SpeedTier::kTortoise) {
+ iters = 2;
+ }
+ for (int i = 0; i < iters + 1; ++i) {
+ if (FLAGS_dump_quant_state) {
+ printf("\nQuantization field:\n");
+ for (size_t y = 0; y < quant_field.ysize(); ++y) {
+ for (size_t x = 0; x < quant_field.xsize(); ++x) {
+ printf(" %.5f", quant_field.Row(y)[x]);
+ }
+ printf("\n");
+ }
+ }
+ quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+ ImageBundle dec_linear = RoundtripImage(opsin, enc_state, cms, pool);
+ PROFILER_ZONE("enc Butteraugli");
+ float score;
+ ImageF diffmap;
+ JXL_CHECK(comparator.CompareWith(dec_linear, &diffmap, &score));
+ if (!lower_is_better) {
+ score = -score;
+ diffmap = ScaleImage(-1.0f, diffmap);
+ }
+ tile_distmap = TileDistMap(diffmap, 8 * cparams.resampling, 0,
+ enc_state->shared.ac_strategy);
+ if (WantDebugOutput(aux_out)) {
+ aux_out->DumpImage(("dec" + ToString(i)).c_str(), *dec_linear.color());
+ DumpHeatmaps(aux_out, butteraugli_target, quant_field, tile_distmap,
+ diffmap);
+ }
+ if (aux_out != nullptr) ++aux_out->num_butteraugli_iters;
+ if (cparams.log_search_state) {
+ float minval, maxval;
+ ImageMinMax(quant_field, &minval, &maxval);
+ printf("\nButteraugli iter: %d/%d\n", i, cparams.max_butteraugli_iters);
+ printf("Butteraugli distance: %f (target = %f)\n", score,
+ original_butteraugli);
+ printf("quant range: %f ... %f DC quant: %f\n", minval, maxval,
+ initial_quant_dc);
+ if (FLAGS_dump_quant_state) {
+ quantizer.DumpQuantizationMap(raw_quant_field);
+ }
+ }
+
+ if (i == iters) break;
+
+ double kPow[8] = {
+ 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ };
+ double kPowMod[8] = {
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ };
+ if (i == kOriginalComparisonRound) {
+ // Don't allow optimization to make the quant field a lot worse than
+ // what the initial guess was. This allows the AC field to have enough
+ // precision to reduce the oscillations due to the dc reconstruction.
+ double kInitMul = 0.6;
+ const double kOneMinusInitMul = 1.0 - kInitMul;
+ for (size_t y = 0; y < quant_field.ysize(); ++y) {
+ float* const JXL_RESTRICT row_q = quant_field.Row(y);
+ const float* const JXL_RESTRICT row_init = initial_quant_field.Row(y);
+ for (size_t x = 0; x < quant_field.xsize(); ++x) {
+ double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x];
+ if (row_q[x] < clamp) {
+ row_q[x] = clamp;
+ if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+ if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+ }
+ }
+ }
+ }
+
+ double cur_pow = 0.0;
+ if (i < 7) {
+ cur_pow = kPow[i] + (original_butteraugli - 1.0) * kPowMod[i];
+ if (cur_pow < 0) {
+ cur_pow = 0;
+ }
+ }
+ if (cur_pow == 0.0) {
+ for (size_t y = 0; y < quant_field.ysize(); ++y) {
+ const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+ float* const JXL_RESTRICT row_q = quant_field.Row(y);
+ for (size_t x = 0; x < quant_field.xsize(); ++x) {
+ const float diff = row_dist[x] / original_butteraugli;
+ if (diff > 1.0f) {
+ float old = row_q[x];
+ row_q[x] *= diff;
+ int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+ int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+ if (qf_old == qf_new) {
+ row_q[x] = old + quantizer.Scale();
+ }
+ }
+ if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+ if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+ }
+ }
+ } else {
+ for (size_t y = 0; y < quant_field.ysize(); ++y) {
+ const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y);
+ float* const JXL_RESTRICT row_q = quant_field.Row(y);
+ for (size_t x = 0; x < quant_field.xsize(); ++x) {
+ const float diff = row_dist[x] / original_butteraugli;
+ if (diff <= 1.0f) {
+ row_q[x] *= std::pow(diff, cur_pow);
+ } else {
+ float old = row_q[x];
+ row_q[x] *= diff;
+ int qf_old = old * quantizer.InvGlobalScale() + 0.5;
+ int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5;
+ if (qf_old == qf_new) {
+ row_q[x] = old + quantizer.Scale();
+ }
+ }
+ if (row_q[x] > qf_higher) row_q[x] = qf_higher;
+ if (row_q[x] < qf_lower) row_q[x] = qf_lower;
+ }
+ }
+ }
+ }
+ quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+void FindBestQuantizationMaxError(const Image3F& opsin,
+ PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out) {
+ // TODO(szabadka): Make this work for non-opsin color spaces.
+ const CompressParams& cparams = enc_state->cparams;
+ Quantizer& quantizer = enc_state->shared.quantizer;
+ ImageI& raw_quant_field = enc_state->shared.raw_quant_field;
+ ImageF& quant_field = enc_state->initial_quant_field;
+
+ // TODO(veluca): better choice of this value.
+ const float initial_quant_dc =
+ 16 * std::sqrt(0.1f / cparams.butteraugli_distance);
+ AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field),
+ &quant_field);
+
+ const float inv_max_err[3] = {1.0f / enc_state->cparams.max_error[0],
+ 1.0f / enc_state->cparams.max_error[1],
+ 1.0f / enc_state->cparams.max_error[2]};
+
+ for (int i = 0; i < cparams.max_butteraugli_iters + 1; ++i) {
+ quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+ if (aux_out) {
+ aux_out->DumpXybImage(("ops" + ToString(i)).c_str(), opsin);
+ }
+ ImageBundle decoded = RoundtripImage(opsin, enc_state, cms, pool);
+ if (aux_out) {
+ aux_out->DumpXybImage(("dec" + ToString(i)).c_str(), *decoded.color());
+ }
+
+ for (size_t by = 0; by < enc_state->shared.frame_dim.ysize_blocks; by++) {
+ AcStrategyRow ac_strategy_row =
+ enc_state->shared.ac_strategy.ConstRow(by);
+ for (size_t bx = 0; bx < enc_state->shared.frame_dim.xsize_blocks; bx++) {
+ AcStrategy acs = ac_strategy_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ float max_error = 0;
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = by * kBlockDim;
+ y < (by + acs.covered_blocks_y()) * kBlockDim; y++) {
+ if (y >= decoded.ysize()) continue;
+ const float* JXL_RESTRICT in_row = opsin.ConstPlaneRow(c, y);
+ const float* JXL_RESTRICT dec_row =
+ decoded.color()->ConstPlaneRow(c, y);
+ for (size_t x = bx * kBlockDim;
+ x < (bx + acs.covered_blocks_x()) * kBlockDim; x++) {
+ if (x >= decoded.xsize()) continue;
+ max_error = std::max(
+ std::abs(in_row[x] - dec_row[x]) * inv_max_err[c], max_error);
+ }
+ }
+ }
+ // Target an error between max_error/2 and max_error.
+ // If the error in the varblock is above the target, increase the qf to
+ // compensate. If the error is below the target, decrease the qf.
+ // However, to avoid an excessive increase of the qf, only do so if the
+ // error is less than half the maximum allowed error.
+ const float qf_mul = (max_error < 0.5f) ? max_error * 2.0f
+ : (max_error > 1.0f) ? max_error
+ : 1.0f;
+ for (size_t qy = by; qy < by + acs.covered_blocks_y(); qy++) {
+ float* JXL_RESTRICT quant_field_row = quant_field.Row(qy);
+ for (size_t qx = bx; qx < bx + acs.covered_blocks_x(); qx++) {
+ quant_field_row[qx] *= qf_mul;
+ }
+ }
+ }
+ }
+ }
+ quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field);
+}
+
+} // namespace
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+ ImageF* quant_field) {
+ // Replace the whole quant_field in non-8x8 blocks with the maximum of each
+ // 8x8 block.
+ size_t stride = quant_field->PixelsPerRow();
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, y);
+ float* JXL_RESTRICT quant_row = rect.Row(quant_field, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ AcStrategy acs = ac_strategy_row[x];
+ if (!acs.IsFirstBlock()) continue;
+ JXL_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize());
+ JXL_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize());
+ float max = quant_row[x];
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ max = std::max(quant_row[x + ix + iy * stride], max);
+ }
+ }
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ quant_row[x + ix + iy * stride] = max;
+ }
+ }
+ }
+ }
+}
+
+float InitialQuantDC(float butteraugli_target) {
+ const float kDcMul = 0.3; // Butteraugli target where non-linearity kicks in.
+ const float butteraugli_target_dc = std::max<float>(
+ 0.5f * butteraugli_target,
+ std::min<float>(butteraugli_target,
+ kDcMul * std::pow((1.0f / kDcMul) * butteraugli_target,
+ kDcQuantPow)));
+ // We want the maximum DC value to be at most 2**15 * kInvDCQuant / quant_dc.
+ // The maximum DC value might not be in the kXybRange because of inverse
+ // gaborish, so we add some slack to the maximum theoretical quant obtained
+ // this way (64).
+ return std::min(kDcQuant / butteraugli_target_dc, 50.f);
+}
+
+ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin,
+ const FrameDimensions& frame_dim, ThreadPool* pool,
+ float rescale, ImageF* mask) {
+ PROFILER_FUNC;
+ const float quant_ac = kAcQuant / butteraugli_target;
+ return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)(
+ butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask);
+}
+
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+ PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, double rescale) {
+ const CompressParams& cparams = enc_state->cparams;
+ if (cparams.max_error_mode) {
+ PROFILER_ZONE("enc find best maxerr");
+ FindBestQuantizationMaxError(opsin, enc_state, cms, pool, aux_out);
+ } else if (cparams.speed_tier <= SpeedTier::kKitten) {
+ // Normal encoding to a butteraugli score.
+ PROFILER_ZONE("enc find best2");
+ FindBestQuantization(*linear, opsin, enc_state, cms, pool, aux_out);
+ }
+}
+
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool) {
+ PROFILER_ZONE("enc roundtrip");
+ std::unique_ptr<PassesDecoderState> dec_state =
+ jxl::make_unique<PassesDecoderState>();
+ JXL_CHECK(dec_state->output_encoding_info.SetFromMetadata(
+ *enc_state->shared.metadata));
+ dec_state->shared = &enc_state->shared;
+ JXL_ASSERT(opsin.ysize() % kBlockDim == 0);
+
+ const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim);
+ const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim);
+ const size_t num_groups = xsize_groups * ysize_groups;
+
+ size_t num_special_frames = enc_state->special_frames.size();
+
+ std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+ jxl::make_unique<ModularFrameEncoder>(enc_state->shared.frame_header,
+ enc_state->cparams);
+ JXL_CHECK(InitializePassesEncoder(opsin, cms, pool, enc_state,
+ modular_frame_encoder.get(), nullptr));
+ JXL_CHECK(dec_state->Init());
+ JXL_CHECK(dec_state->InitForAC(pool));
+
+ ImageBundle decoded(&enc_state->shared.metadata->m);
+ decoded.origin = enc_state->shared.frame_header.frame_origin;
+ decoded.SetFromImage(Image3F(opsin.xsize(), opsin.ysize()),
+ dec_state->output_encoding_info.color_encoding);
+
+ PassesDecoderState::PipelineOptions options;
+ options.use_slow_render_pipeline = false;
+ options.coalescing = true;
+ options.render_spotcolors = false;
+
+ // Same as dec_state->shared->frame_header.nonserialized_metadata->m
+ const ImageMetadata& metadata = *decoded.metadata();
+
+ JXL_CHECK(dec_state->PreparePipeline(&decoded, options));
+
+ hwy::AlignedUniquePtr<GroupDecCache[]> group_dec_caches;
+ const auto allocate_storage = [&](const size_t num_threads) -> Status {
+ JXL_RETURN_IF_ERROR(
+ dec_state->render_pipeline->PrepareForThreads(num_threads,
+ /*use_group_ids=*/false));
+ group_dec_caches = hwy::MakeUniqueAlignedArray<GroupDecCache>(num_threads);
+ return true;
+ };
+ const auto process_group = [&](const uint32_t group_index,
+ const size_t thread) {
+ if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) {
+ ComputeSigma(dec_state->shared->BlockGroupRect(group_index),
+ dec_state.get());
+ }
+ RenderPipelineInput input =
+ dec_state->render_pipeline->GetInputBuffers(group_index, thread);
+ JXL_CHECK(DecodeGroupForRoundtrip(
+ enc_state->coeffs, group_index, dec_state.get(),
+ &group_dec_caches[thread], thread, input, &decoded, nullptr));
+ for (size_t c = 0; c < metadata.num_extra_channels; c++) {
+ std::pair<ImageF*, Rect> ri = input.GetBuffer(3 + c);
+ FillPlane(0.0f, ri.first, ri.second);
+ }
+ input.Done();
+ };
+ JXL_CHECK(RunOnPool(pool, 0, num_groups, allocate_storage, process_group,
+ "AQ loop"));
+
+ // Ensure we don't create any new special frames.
+ enc_state->special_frames.resize(num_special_frames);
+
+ return decoded;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.h b/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.h
new file mode 100644
index 0000000000..a63c574492
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_adaptive_quantization.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+#define LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
+
+#include <stddef.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Heuristics to find a good quantizer for a given image. InitialQuantField
+// produces a quantization field (i.e. relative quantization amounts for each
+// block) out of an opsin-space image. `InitialQuantField` uses heuristics,
+// `FindBestQuantizer` (in non-fast mode) will run multiple encoding-decoding
+// steps and try to improve the given quant field.
+
+namespace jxl {
+
+struct AuxOut;
+
+// Computes the decoded image for a given set of compression parameters. Mainly
+// used in the FindBestQuantization loops and in some tests.
+// TODO(veluca): this doesn't seem the best possible file for this function.
+ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool);
+
+// Returns an image subsampled by kBlockDim in each direction. If the value
+// at pixel (x,y) in the returned image is greater than 1.0, it means that
+// more fine-grained quantization should be used in the corresponding block
+// of the input image, while a value less than 1.0 indicates that less
+// fine-grained quantization should be enough. Returns a mask, too, which
+// can later be used to make better decisions about ac strategy.
+ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin,
+ const FrameDimensions& frame_dim, ThreadPool* pool,
+ float rescale, ImageF* initial_quant_mask);
+
+float InitialQuantDC(float butteraugli_target);
+
+void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect,
+ ImageF* quant_field);
+
+// Returns a quantizer that uses an adjusted version of the provided
+// quant_field. Also computes the dequant_map corresponding to the given
+// dequant_float_map and chosen quantization levels.
+// `linear` is only used in Kitten mode or slower.
+void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin,
+ PassesEncoderState* enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, double rescale = 1.0);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ans.cc b/third_party/jpeg-xl/lib/jxl/enc_ans.cc
new file mode 100644
index 0000000000..4249426bc9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ans.cc
@@ -0,0 +1,1688 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ans.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_huffman.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+namespace {
+
+bool ans_fuzzer_friendly_ = false;
+
+static const int kMaxNumSymbolsForSmallCode = 4;
+
+void ANSBuildInfoTable(const ANSHistBin* counts, const AliasTable::Entry* table,
+ size_t alphabet_size, size_t log_alpha_size,
+ ANSEncSymbolInfo* info) {
+ size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size;
+ size_t entry_size_minus_1 = (1 << log_entry_size) - 1;
+ // create valid alias table for empty streams.
+ for (size_t s = 0; s < std::max<size_t>(1, alphabet_size); ++s) {
+ const ANSHistBin freq = s == alphabet_size ? ANS_TAB_SIZE : counts[s];
+ info[s].freq_ = static_cast<uint16_t>(freq);
+#ifdef USE_MULT_BY_RECIPROCAL
+ if (freq != 0) {
+ info[s].ifreq_ =
+ ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_;
+ } else {
+ info[s].ifreq_ = 1; // shouldn't matter (symbol shouldn't occur), but...
+ }
+#endif
+ info[s].reverse_map_.resize(freq);
+ }
+ for (int i = 0; i < ANS_TAB_SIZE; i++) {
+ AliasTable::Symbol s =
+ AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1);
+ info[s.value].reverse_map_[s.offset] = i;
+ }
+}
+
+float EstimateDataBits(const ANSHistBin* histogram, const ANSHistBin* counts,
+ size_t len) {
+ float sum = 0.0f;
+ int total_histogram = 0;
+ int total_counts = 0;
+ for (size_t i = 0; i < len; ++i) {
+ total_histogram += histogram[i];
+ total_counts += counts[i];
+ if (histogram[i] > 0) {
+ JXL_ASSERT(counts[i] > 0);
+ // += histogram[i] * -log(counts[i]/total_counts)
+ sum += histogram[i] *
+ std::max(0.0f, ANS_LOG_TAB_SIZE - FastLog2f(counts[i]));
+ }
+ }
+ if (total_histogram > 0) {
+ // Used only in assert.
+ (void)total_counts;
+ JXL_ASSERT(total_counts == ANS_TAB_SIZE);
+ }
+ return sum;
+}
+
+float EstimateDataBitsFlat(const ANSHistBin* histogram, size_t len) {
+ const float flat_bits = std::max(FastLog2f(len), 0.0f);
+ float total_histogram = 0;
+ for (size_t i = 0; i < len; ++i) {
+ total_histogram += histogram[i];
+ }
+ return total_histogram * flat_bits;
+}
+
+// Static Huffman code for encoding logcounts. The last symbol is used as RLE
+// sequence.
+static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = {
+ 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7,
+};
+static const uint8_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = {
+ 17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65,
+};
+
+// Returns the difference between largest count that can be represented and is
+// smaller than "count" and smallest representable count larger than "count".
+static int SmallestIncrement(uint32_t count, uint32_t shift) {
+ int bits = count == 0 ? -1 : FloorLog2Nonzero(count);
+ int drop_bits = bits - GetPopulationCountPrecision(bits, shift);
+ return drop_bits < 0 ? 1 : (1 << drop_bits);
+}
+
+template <bool minimize_error_of_sum>
+bool RebalanceHistogram(const float* targets, int max_symbol, int table_size,
+ uint32_t shift, int* omit_pos, ANSHistBin* counts) {
+ int sum = 0;
+ float sum_nonrounded = 0.0;
+ int remainder_pos = 0; // if all of them are handled in first loop
+ int remainder_log = -1;
+ for (int n = 0; n < max_symbol; ++n) {
+ if (targets[n] > 0 && targets[n] < 1.0f) {
+ counts[n] = 1;
+ sum_nonrounded += targets[n];
+ sum += counts[n];
+ }
+ }
+ const float discount_ratio =
+ (table_size - sum) / (table_size - sum_nonrounded);
+ JXL_ASSERT(discount_ratio > 0);
+ JXL_ASSERT(discount_ratio <= 1.0f);
+ // Invariant for minimize_error_of_sum == true:
+ // abs(sum - sum_nonrounded)
+ // <= SmallestIncrement(max(targets[])) + max_symbol
+ for (int n = 0; n < max_symbol; ++n) {
+ if (targets[n] >= 1.0f) {
+ sum_nonrounded += targets[n];
+ counts[n] =
+ static_cast<ANSHistBin>(targets[n] * discount_ratio); // truncate
+ if (counts[n] == 0) counts[n] = 1;
+ if (counts[n] == table_size) counts[n] = table_size - 1;
+ // Round the count to the closest nonzero multiple of SmallestIncrement
+ // (when minimize_error_of_sum is false) or one of two closest so as to
+ // keep the sum as close as possible to sum_nonrounded.
+ int inc = SmallestIncrement(counts[n], shift);
+ counts[n] -= counts[n] & (inc - 1);
+ // TODO(robryk): Should we rescale targets[n]?
+ const float target =
+ minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n];
+ if (counts[n] == 0 ||
+ (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) {
+ counts[n] += inc;
+ }
+ sum += counts[n];
+ const int count_log = FloorLog2Nonzero(static_cast<uint32_t>(counts[n]));
+ if (count_log > remainder_log) {
+ remainder_pos = n;
+ remainder_log = count_log;
+ }
+ }
+ }
+ JXL_ASSERT(remainder_pos != -1);
+ // NOTE: This is the only place where counts could go negative. We could
+ // detect that, return false and make ANSHistBin uint32_t.
+ counts[remainder_pos] -= sum - table_size;
+ *omit_pos = remainder_pos;
+ return counts[remainder_pos] > 0;
+}
+
+Status NormalizeCounts(ANSHistBin* counts, int* omit_pos, const int length,
+ const int precision_bits, uint32_t shift,
+ int* num_symbols, int* symbols) {
+ const int32_t table_size = 1 << precision_bits; // target sum / table size
+ uint64_t total = 0;
+ int max_symbol = 0;
+ int symbol_count = 0;
+ for (int n = 0; n < length; ++n) {
+ total += counts[n];
+ if (counts[n] > 0) {
+ if (symbol_count < kMaxNumSymbolsForSmallCode) {
+ symbols[symbol_count] = n;
+ }
+ ++symbol_count;
+ max_symbol = n + 1;
+ }
+ }
+ *num_symbols = symbol_count;
+ if (symbol_count == 0) {
+ return true;
+ }
+ if (symbol_count == 1) {
+ counts[symbols[0]] = table_size;
+ return true;
+ }
+ if (symbol_count > table_size)
+ return JXL_FAILURE("Too many entries in an ANS histogram");
+
+ const float norm = 1.f * table_size / total;
+ std::vector<float> targets(max_symbol);
+ for (size_t n = 0; n < targets.size(); ++n) {
+ targets[n] = norm * counts[n];
+ }
+ if (!RebalanceHistogram<false>(&targets[0], max_symbol, table_size, shift,
+ omit_pos, counts)) {
+ // Use an alternative rebalancing mechanism if the one above failed
+ // to create a histogram that is positive wherever the original one was.
+ if (!RebalanceHistogram<true>(&targets[0], max_symbol, table_size, shift,
+ omit_pos, counts)) {
+ return JXL_FAILURE("Logic error: couldn't rebalance a histogram");
+ }
+ }
+ return true;
+}
+
+struct SizeWriter {
+ size_t size = 0;
+ void Write(size_t num, size_t bits) { size += num; }
+};
+
+template <typename Writer>
+void StoreVarLenUint8(size_t n, Writer* writer) {
+ JXL_DASSERT(n <= 255);
+ if (n == 0) {
+ writer->Write(1, 0);
+ } else {
+ writer->Write(1, 1);
+ size_t nbits = FloorLog2Nonzero(n);
+ writer->Write(3, nbits);
+ writer->Write(nbits, n - (1ULL << nbits));
+ }
+}
+
+template <typename Writer>
+void StoreVarLenUint16(size_t n, Writer* writer) {
+ JXL_DASSERT(n <= 65535);
+ if (n == 0) {
+ writer->Write(1, 0);
+ } else {
+ writer->Write(1, 1);
+ size_t nbits = FloorLog2Nonzero(n);
+ writer->Write(4, nbits);
+ writer->Write(nbits, n - (1ULL << nbits));
+ }
+}
+
+template <typename Writer>
+bool EncodeCounts(const ANSHistBin* counts, const int alphabet_size,
+ const int omit_pos, const int num_symbols, uint32_t shift,
+ const int* symbols, Writer* writer) {
+ bool ok = true;
+ if (num_symbols <= 2) {
+ // Small tree marker to encode 1-2 symbols.
+ writer->Write(1, 1);
+ if (num_symbols == 0) {
+ writer->Write(1, 0);
+ StoreVarLenUint8(0, writer);
+ } else {
+ writer->Write(1, num_symbols - 1);
+ for (int i = 0; i < num_symbols; ++i) {
+ StoreVarLenUint8(symbols[i], writer);
+ }
+ }
+ if (num_symbols == 2) {
+ writer->Write(ANS_LOG_TAB_SIZE, counts[symbols[0]]);
+ }
+ } else {
+ // Mark non-small tree.
+ writer->Write(1, 0);
+ // Mark non-flat histogram.
+ writer->Write(1, 0);
+
+ // Precompute sequences for RLE encoding. Contains the number of identical
+ // values starting at a given index. Only contains the value at the first
+ // element of the series.
+ std::vector<uint32_t> same(alphabet_size, 0);
+ int last = 0;
+ for (int i = 1; i < alphabet_size; i++) {
+ // Store the sequence length once different symbol reached, or we're at
+ // the end, or the length is longer than we can encode, or we are at
+ // the omit_pos. We don't support including the omit_pos in an RLE
+ // sequence because this value may use a different amount of log2 bits
+ // than standard, it is too complex to handle in the decoder.
+ if (counts[i] != counts[last] || i + 1 == alphabet_size ||
+ (i - last) >= 255 || i == omit_pos || i == omit_pos + 1) {
+ same[last] = (i - last);
+ last = i + 1;
+ }
+ }
+
+ int length = 0;
+ std::vector<int> logcounts(alphabet_size);
+ int omit_log = 0;
+ for (int i = 0; i < alphabet_size; ++i) {
+ JXL_ASSERT(counts[i] <= ANS_TAB_SIZE);
+ JXL_ASSERT(counts[i] >= 0);
+ if (i == omit_pos) {
+ length = i + 1;
+ } else if (counts[i] > 0) {
+ logcounts[i] = FloorLog2Nonzero(static_cast<uint32_t>(counts[i])) + 1;
+ length = i + 1;
+ if (i < omit_pos) {
+ omit_log = std::max(omit_log, logcounts[i] + 1);
+ } else {
+ omit_log = std::max(omit_log, logcounts[i]);
+ }
+ }
+ }
+ logcounts[omit_pos] = omit_log;
+
+ // Elias gamma-like code for shift. Only difference is that if the number
+ // of bits to be encoded is equal to FloorLog2(ANS_LOG_TAB_SIZE+1), we skip
+ // the terminating 0 in unary coding.
+ int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1);
+ int log = FloorLog2Nonzero(shift + 1);
+ writer->Write(log, (1 << log) - 1);
+ if (log != upper_bound_log) writer->Write(1, 0);
+ writer->Write(log, ((1 << log) - 1) & (shift + 1));
+
+ // Since num_symbols >= 3, we know that length >= 3, therefore we encode
+ // length - 3.
+ if (length - 3 > 255) {
+ // Pretend that everything is OK, but complain about correctness later.
+ StoreVarLenUint8(255, writer);
+ ok = false;
+ } else {
+ StoreVarLenUint8(length - 3, writer);
+ }
+
+ // The logcount values are encoded with a static Huffman code.
+ static const size_t kMinReps = 4;
+ size_t rep = ANS_LOG_TAB_SIZE + 1;
+ for (int i = 0; i < length; ++i) {
+ if (i > 0 && same[i - 1] > kMinReps) {
+ // Encode the RLE symbol and skip the repeated ones.
+ writer->Write(kLogCountBitLengths[rep], kLogCountSymbols[rep]);
+ StoreVarLenUint8(same[i - 1] - kMinReps - 1, writer);
+ i += same[i - 1] - 2;
+ continue;
+ }
+ writer->Write(kLogCountBitLengths[logcounts[i]],
+ kLogCountSymbols[logcounts[i]]);
+ }
+ for (int i = 0; i < length; ++i) {
+ if (i > 0 && same[i - 1] > kMinReps) {
+ // Skip symbols encoded by RLE.
+ i += same[i - 1] - 2;
+ continue;
+ }
+ if (logcounts[i] > 1 && i != omit_pos) {
+ int bitcount = GetPopulationCountPrecision(logcounts[i] - 1, shift);
+ int drop_bits = logcounts[i] - 1 - bitcount;
+ JXL_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0);
+ writer->Write(bitcount, (counts[i] >> drop_bits) - (1 << bitcount));
+ }
+ }
+ }
+ return ok;
+}
+
+void EncodeFlatHistogram(const int alphabet_size, BitWriter* writer) {
+ // Mark non-small tree.
+ writer->Write(1, 0);
+ // Mark uniform histogram.
+ writer->Write(1, 1);
+ JXL_ASSERT(alphabet_size > 0);
+ // Encode alphabet size.
+ StoreVarLenUint8(alphabet_size - 1, writer);
+}
+
+float ComputeHistoAndDataCost(const ANSHistBin* histogram, size_t alphabet_size,
+ uint32_t method) {
+ if (method == 0) { // Flat code
+ return ANS_LOG_TAB_SIZE + 2 +
+ EstimateDataBitsFlat(histogram, alphabet_size);
+ }
+ // Non-flat: shift = method-1.
+ uint32_t shift = method - 1;
+ std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+ int omit_pos = 0;
+ int num_symbols;
+ int symbols[kMaxNumSymbolsForSmallCode] = {};
+ JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+ ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+ SizeWriter writer;
+ // Ignore the correctness, no real encoding happens at this stage.
+ (void)EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, shift,
+ symbols, &writer);
+ return writer.size +
+ EstimateDataBits(histogram, counts.data(), alphabet_size);
+}
+
+uint32_t ComputeBestMethod(
+ const ANSHistBin* histogram, size_t alphabet_size, float* cost,
+ HistogramParams::ANSHistogramStrategy ans_histogram_strategy) {
+ size_t method = 0;
+ float fcost = ComputeHistoAndDataCost(histogram, alphabet_size, 0);
+ auto try_shift = [&](size_t shift) {
+ float c = ComputeHistoAndDataCost(histogram, alphabet_size, shift + 1);
+ if (c < fcost) {
+ method = shift + 1;
+ fcost = c;
+ }
+ };
+ switch (ans_histogram_strategy) {
+ case HistogramParams::ANSHistogramStrategy::kPrecise: {
+ for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift++) {
+ try_shift(shift);
+ }
+ break;
+ }
+ case HistogramParams::ANSHistogramStrategy::kApproximate: {
+ for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift += 2) {
+ try_shift(shift);
+ }
+ break;
+ }
+ case HistogramParams::ANSHistogramStrategy::kFast: {
+ try_shift(0);
+ try_shift(ANS_LOG_TAB_SIZE / 2);
+ try_shift(ANS_LOG_TAB_SIZE);
+ break;
+ }
+ };
+ *cost = fcost;
+ return method;
+}
+
+} // namespace
+
+// Returns an estimate of the cost of encoding this histogram and the
+// corresponding data.
+size_t BuildAndStoreANSEncodingData(
+ HistogramParams::ANSHistogramStrategy ans_histogram_strategy,
+ const ANSHistBin* histogram, size_t alphabet_size, size_t log_alpha_size,
+ bool use_prefix_code, ANSEncSymbolInfo* info, BitWriter* writer) {
+ if (use_prefix_code) {
+ if (alphabet_size <= 1) return 0;
+ std::vector<uint32_t> histo(alphabet_size);
+ for (size_t i = 0; i < alphabet_size; i++) {
+ histo[i] = histogram[i];
+ JXL_CHECK(histogram[i] >= 0);
+ }
+ size_t cost = 0;
+ {
+ std::vector<uint8_t> depths(alphabet_size);
+ std::vector<uint16_t> bits(alphabet_size);
+ if (writer == nullptr) {
+ BitWriter tmp_writer;
+ BitWriter::Allotment allotment(
+ &tmp_writer, 8 * alphabet_size + 8); // safe upper bound
+ BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(),
+ bits.data(), &tmp_writer);
+ allotment.ReclaimAndCharge(&tmp_writer, 0, /*aux_out=*/nullptr);
+ cost = tmp_writer.BitsWritten();
+ } else {
+ size_t start = writer->BitsWritten();
+ BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(),
+ bits.data(), writer);
+ cost = writer->BitsWritten() - start;
+ }
+ for (size_t i = 0; i < alphabet_size; i++) {
+ info[i].bits = depths[i] == 0 ? 0 : bits[i];
+ info[i].depth = depths[i];
+ }
+ }
+ // Estimate data cost.
+ for (size_t i = 0; i < alphabet_size; i++) {
+ cost += histogram[i] * info[i].depth;
+ }
+ return cost;
+ }
+ JXL_ASSERT(alphabet_size <= ANS_TAB_SIZE);
+ // Ensure we ignore trailing zeros in the histogram.
+ if (alphabet_size != 0) {
+ size_t largest_symbol = 0;
+ for (size_t i = 0; i < alphabet_size; i++) {
+ if (histogram[i] != 0) largest_symbol = i;
+ }
+ alphabet_size = largest_symbol + 1;
+ }
+ float cost;
+ uint32_t method = ComputeBestMethod(histogram, alphabet_size, &cost,
+ ans_histogram_strategy);
+ JXL_ASSERT(cost >= 0);
+ int num_symbols;
+ int symbols[kMaxNumSymbolsForSmallCode] = {};
+ std::vector<ANSHistBin> counts(histogram, histogram + alphabet_size);
+ if (!counts.empty()) {
+ size_t sum = 0;
+ for (size_t i = 0; i < counts.size(); i++) {
+ sum += counts[i];
+ }
+ if (sum == 0) {
+ counts[0] = ANS_TAB_SIZE;
+ }
+ }
+ if (method == 0) {
+ counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE);
+ AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+ InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+ ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+ if (writer != nullptr) {
+ EncodeFlatHistogram(alphabet_size, writer);
+ }
+ return cost;
+ }
+ int omit_pos = 0;
+ uint32_t shift = method - 1;
+ JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size,
+ ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols));
+ AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE];
+ InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a);
+ ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info);
+ if (writer != nullptr) {
+ bool ok = EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols,
+ shift, symbols, writer);
+ (void)ok;
+ JXL_DASSERT(ok);
+ }
+ return cost;
+}
+
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size) {
+ float c;
+ ComputeBestMethod(data, alphabet_size, &c,
+ HistogramParams::ANSHistogramStrategy::kFast);
+ return c;
+}
+
+template <typename Writer>
+void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer,
+ size_t log_alpha_size) {
+ writer->Write(CeilLog2Nonzero(log_alpha_size + 1),
+ uint_config.split_exponent);
+ if (uint_config.split_exponent == log_alpha_size) {
+ return; // msb/lsb don't matter.
+ }
+ size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1);
+ writer->Write(nbits, uint_config.msb_in_token);
+ nbits = CeilLog2Nonzero(uint_config.split_exponent -
+ uint_config.msb_in_token + 1);
+ writer->Write(nbits, uint_config.lsb_in_token);
+}
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+ Writer* writer, size_t log_alpha_size) {
+ // TODO(veluca): RLE?
+ for (size_t i = 0; i < uint_config.size(); i++) {
+ EncodeUintConfig(uint_config[i], writer, log_alpha_size);
+ }
+}
+template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+ BitWriter*, size_t);
+
+namespace {
+
+void ChooseUintConfigs(const HistogramParams& params,
+ const std::vector<std::vector<Token>>& tokens,
+ const std::vector<uint8_t>& context_map,
+ std::vector<Histogram>* clustered_histograms,
+ EntropyEncodingData* codes, size_t* log_alpha_size) {
+ codes->uint_config.resize(clustered_histograms->size());
+
+ if (params.uint_method == HistogramParams::HybridUintMethod::kNone) return;
+ if (params.uint_method == HistogramParams::HybridUintMethod::k000) {
+ codes->uint_config.clear();
+ codes->uint_config.resize(clustered_histograms->size(),
+ HybridUintConfig(0, 0, 0));
+ return;
+ }
+ if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+ codes->uint_config.clear();
+ codes->uint_config.resize(clustered_histograms->size(),
+ HybridUintConfig(2, 0, 1));
+ return;
+ }
+
+ // Brute-force method that tries a few options.
+ std::vector<HybridUintConfig> configs;
+ if (params.uint_method == HistogramParams::HybridUintMethod::kBest) {
+ configs = {
+ HybridUintConfig(4, 2, 0), // default
+ HybridUintConfig(4, 1, 0), // less precise
+ HybridUintConfig(4, 2, 1), // add sign
+ HybridUintConfig(4, 2, 2), // add sign+parity
+ HybridUintConfig(4, 1, 2), // add parity but less msb
+ // Same as above, but more direct coding.
+ HybridUintConfig(5, 2, 0), HybridUintConfig(5, 1, 0),
+ HybridUintConfig(5, 2, 1), HybridUintConfig(5, 2, 2),
+ HybridUintConfig(5, 1, 2),
+ // Same as above, but less direct coding.
+ HybridUintConfig(3, 2, 0), HybridUintConfig(3, 1, 0),
+ HybridUintConfig(3, 2, 1), HybridUintConfig(3, 1, 2),
+ // For near-lossless.
+ HybridUintConfig(4, 1, 3), HybridUintConfig(5, 1, 4),
+ HybridUintConfig(5, 2, 3), HybridUintConfig(6, 1, 5),
+ HybridUintConfig(6, 2, 4), HybridUintConfig(6, 0, 0),
+ // Other
+ HybridUintConfig(0, 0, 0), // varlenuint
+ HybridUintConfig(2, 0, 1), // works well for ctx map
+ HybridUintConfig(7, 0, 0), // direct coding
+ HybridUintConfig(8, 0, 0), // direct coding
+ HybridUintConfig(9, 0, 0), // direct coding
+ HybridUintConfig(10, 0, 0), // direct coding
+ HybridUintConfig(11, 0, 0), // direct coding
+ HybridUintConfig(12, 0, 0), // direct coding
+ };
+ } else if (params.uint_method == HistogramParams::HybridUintMethod::kFast) {
+ configs = {
+ HybridUintConfig(4, 2, 0), // default
+ HybridUintConfig(4, 1, 2), // add parity but less msb
+ HybridUintConfig(0, 0, 0), // smallest histograms
+ HybridUintConfig(2, 0, 1), // works well for ctx map
+ };
+ }
+
+ std::vector<float> costs(clustered_histograms->size(),
+ std::numeric_limits<float>::max());
+ std::vector<uint32_t> extra_bits(clustered_histograms->size());
+ std::vector<uint8_t> is_valid(clustered_histograms->size());
+ size_t max_alpha =
+ codes->use_prefix_code ? PREFIX_MAX_ALPHABET_SIZE : ANS_MAX_ALPHABET_SIZE;
+ for (HybridUintConfig cfg : configs) {
+ std::fill(is_valid.begin(), is_valid.end(), true);
+ std::fill(extra_bits.begin(), extra_bits.end(), 0);
+
+ for (size_t i = 0; i < clustered_histograms->size(); i++) {
+ (*clustered_histograms)[i].Clear();
+ }
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token token = tokens[i][j];
+ // TODO(veluca): do not ignore lz77 commands.
+ if (token.is_lz77_length) continue;
+ size_t histo = context_map[token.context];
+ uint32_t tok, nbits, bits;
+ cfg.Encode(token.value, &tok, &nbits, &bits);
+ if (tok >= max_alpha ||
+ (codes->lz77.enabled && tok >= codes->lz77.min_symbol)) {
+ is_valid[histo] = false;
+ continue;
+ }
+ extra_bits[histo] += nbits;
+ (*clustered_histograms)[histo].Add(tok);
+ }
+ }
+
+ for (size_t i = 0; i < clustered_histograms->size(); i++) {
+ if (!is_valid[i]) continue;
+ float cost = (*clustered_histograms)[i].PopulationCost() + extra_bits[i];
+ // add signaling cost of the hybriduintconfig itself
+ cost += CeilLog2Nonzero(cfg.split_exponent + 1);
+ cost += CeilLog2Nonzero(cfg.split_exponent - cfg.msb_in_token + 1);
+ if (cost < costs[i]) {
+ codes->uint_config[i] = cfg;
+ costs[i] = cost;
+ }
+ }
+ }
+
+ // Rebuild histograms.
+ for (size_t i = 0; i < clustered_histograms->size(); i++) {
+ (*clustered_histograms)[i].Clear();
+ }
+ *log_alpha_size = 4;
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token token = tokens[i][j];
+ uint32_t tok, nbits, bits;
+ size_t histo = context_map[token.context];
+ (token.is_lz77_length ? codes->lz77.length_uint_config
+ : codes->uint_config[histo])
+ .Encode(token.value, &tok, &nbits, &bits);
+ tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+ (*clustered_histograms)[histo].Add(tok);
+ while (tok >= (1u << *log_alpha_size)) (*log_alpha_size)++;
+ }
+ }
+#if JXL_ENABLE_ASSERT
+ size_t max_log_alpha_size = codes->use_prefix_code ? PREFIX_MAX_BITS : 8;
+ JXL_ASSERT(*log_alpha_size <= max_log_alpha_size);
+#endif
+}
+
+class HistogramBuilder {
+ public:
+ explicit HistogramBuilder(const size_t num_contexts)
+ : histograms_(num_contexts) {}
+
+ void VisitSymbol(int symbol, size_t histo_idx) {
+ JXL_DASSERT(histo_idx < histograms_.size());
+ histograms_[histo_idx].Add(symbol);
+ }
+
+ // NOTE: `layer` is only for clustered_entropy; caller does ReclaimAndCharge.
+ size_t BuildAndStoreEntropyCodes(
+ const HistogramParams& params,
+ const std::vector<std::vector<Token>>& tokens, EntropyEncodingData* codes,
+ std::vector<uint8_t>* context_map, bool use_prefix_code,
+ BitWriter* writer, size_t layer, AuxOut* aux_out) const {
+ size_t cost = 0;
+ codes->encoding_info.clear();
+ std::vector<Histogram> clustered_histograms(histograms_);
+ context_map->resize(histograms_.size());
+ if (histograms_.size() > 1) {
+ if (!ans_fuzzer_friendly_) {
+ std::vector<uint32_t> histogram_symbols;
+ ClusterHistograms(params, histograms_, kClustersLimit,
+ &clustered_histograms, &histogram_symbols);
+ for (size_t c = 0; c < histograms_.size(); ++c) {
+ (*context_map)[c] = static_cast<uint8_t>(histogram_symbols[c]);
+ }
+ } else {
+ fill(context_map->begin(), context_map->end(), 0);
+ size_t max_symbol = 0;
+ for (const Histogram& h : histograms_) {
+ max_symbol = std::max(h.data_.size(), max_symbol);
+ }
+ size_t num_symbols = 1 << CeilLog2Nonzero(max_symbol + 1);
+ clustered_histograms.resize(1);
+ clustered_histograms[0].Clear();
+ for (size_t i = 0; i < num_symbols; i++) {
+ clustered_histograms[0].Add(i);
+ }
+ }
+ if (writer != nullptr) {
+ EncodeContextMap(*context_map, clustered_histograms.size(), writer,
+ layer, aux_out);
+ }
+ }
+ if (aux_out != nullptr) {
+ for (size_t i = 0; i < clustered_histograms.size(); ++i) {
+ aux_out->layers[layer].clustered_entropy +=
+ clustered_histograms[i].ShannonEntropy();
+ }
+ }
+ codes->use_prefix_code = use_prefix_code;
+ size_t log_alpha_size = codes->lz77.enabled ? 8 : 7; // Sane default.
+ if (ans_fuzzer_friendly_) {
+ codes->uint_config.clear();
+ codes->uint_config.resize(1, HybridUintConfig(7, 0, 0));
+ } else {
+ ChooseUintConfigs(params, tokens, *context_map, &clustered_histograms,
+ codes, &log_alpha_size);
+ }
+ if (log_alpha_size < 5) log_alpha_size = 5;
+ SizeWriter size_writer; // Used if writer == nullptr to estimate costs.
+ cost += 1;
+ if (writer) writer->Write(1, use_prefix_code);
+
+ if (use_prefix_code) {
+ log_alpha_size = PREFIX_MAX_BITS;
+ } else {
+ cost += 2;
+ }
+ if (writer == nullptr) {
+ EncodeUintConfigs(codes->uint_config, &size_writer, log_alpha_size);
+ } else {
+ if (!use_prefix_code) writer->Write(2, log_alpha_size - 5);
+ EncodeUintConfigs(codes->uint_config, writer, log_alpha_size);
+ }
+ if (use_prefix_code) {
+ for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+ size_t num_symbol = 1;
+ for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+ if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+ }
+ if (writer) {
+ StoreVarLenUint16(num_symbol - 1, writer);
+ } else {
+ StoreVarLenUint16(num_symbol - 1, &size_writer);
+ }
+ }
+ }
+ cost += size_writer.size;
+ for (size_t c = 0; c < clustered_histograms.size(); ++c) {
+ size_t num_symbol = 1;
+ for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) {
+ if (clustered_histograms[c].data_[i]) num_symbol = i + 1;
+ }
+ codes->encoding_info.emplace_back();
+ codes->encoding_info.back().resize(std::max<size_t>(1, num_symbol));
+
+ BitWriter::Allotment allotment(writer, 256 + num_symbol * 24);
+ cost += BuildAndStoreANSEncodingData(
+ params.ans_histogram_strategy, clustered_histograms[c].data_.data(),
+ num_symbol, log_alpha_size, use_prefix_code,
+ codes->encoding_info.back().data(), writer);
+ allotment.FinishedHistogram(writer);
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ }
+ return cost;
+ }
+
+ const Histogram& Histo(size_t i) const { return histograms_[i]; }
+
+ private:
+ std::vector<Histogram> histograms_;
+};
+
+class SymbolCostEstimator {
+ public:
+ SymbolCostEstimator(size_t num_contexts, bool force_huffman,
+ const std::vector<std::vector<Token>>& tokens,
+ const LZ77Params& lz77) {
+ HistogramBuilder builder(num_contexts);
+ // Build histograms for estimating lz77 savings.
+ HybridUintConfig uint_config;
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token token = tokens[i][j];
+ uint32_t tok, nbits, bits;
+ (token.is_lz77_length ? lz77.length_uint_config : uint_config)
+ .Encode(token.value, &tok, &nbits, &bits);
+ tok += token.is_lz77_length ? lz77.min_symbol : 0;
+ builder.VisitSymbol(tok, token.context);
+ }
+ }
+ max_alphabet_size_ = 0;
+ for (size_t i = 0; i < num_contexts; i++) {
+ max_alphabet_size_ =
+ std::max(max_alphabet_size_, builder.Histo(i).data_.size());
+ }
+ bits_.resize(num_contexts * max_alphabet_size_);
+ // TODO(veluca): SIMD?
+ add_symbol_cost_.resize(num_contexts);
+ for (size_t i = 0; i < num_contexts; i++) {
+ float inv_total = 1.0f / (builder.Histo(i).total_count_ + 1e-8f);
+ float total_cost = 0;
+ for (size_t j = 0; j < builder.Histo(i).data_.size(); j++) {
+ size_t cnt = builder.Histo(i).data_[j];
+ float cost = 0;
+ if (cnt != 0 && cnt != builder.Histo(i).total_count_) {
+ cost = -FastLog2f(cnt * inv_total);
+ if (force_huffman) cost = std::ceil(cost);
+ } else if (cnt == 0) {
+ cost = ANS_LOG_TAB_SIZE; // Highest possible cost.
+ }
+ bits_[i * max_alphabet_size_ + j] = cost;
+ total_cost += cost * builder.Histo(i).data_[j];
+ }
+ // Penalty for adding a lz77 symbol to this contest (only used for static
+ // cost model). Higher penalty for contexts that have a very low
+ // per-symbol entropy.
+ add_symbol_cost_[i] = std::max(0.0f, 6.0f - total_cost * inv_total);
+ }
+ }
+ float Bits(size_t ctx, size_t sym) const {
+ return bits_[ctx * max_alphabet_size_ + sym];
+ }
+ float LenCost(size_t ctx, size_t len, const LZ77Params& lz77) const {
+ uint32_t nbits, bits, tok;
+ lz77.length_uint_config.Encode(len, &tok, &nbits, &bits);
+ tok += lz77.min_symbol;
+ return nbits + Bits(ctx, tok);
+ }
+ float DistCost(size_t len, const LZ77Params& lz77) const {
+ uint32_t nbits, bits, tok;
+ HybridUintConfig().Encode(len, &tok, &nbits, &bits);
+ return nbits + Bits(lz77.nonserialized_distance_context, tok);
+ }
+ float AddSymbolCost(size_t idx) const { return add_symbol_cost_[idx]; }
+
+ private:
+ size_t max_alphabet_size_;
+ std::vector<float> bits_;
+ std::vector<float> add_symbol_cost_;
+};
+
+void ApplyLZ77_RLE(const HistogramParams& params, size_t num_contexts,
+ const std::vector<std::vector<Token>>& tokens,
+ LZ77Params& lz77,
+ std::vector<std::vector<Token>>& tokens_lz77) {
+ // TODO(veluca): tune heuristics here.
+ SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+ float bit_decrease = 0;
+ size_t total_symbols = 0;
+ tokens_lz77.resize(tokens.size());
+ std::vector<float> sym_cost;
+ HybridUintConfig uint_config;
+ for (size_t stream = 0; stream < tokens.size(); stream++) {
+ size_t distance_multiplier =
+ params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+ const auto& in = tokens[stream];
+ auto& out = tokens_lz77[stream];
+ total_symbols += in.size();
+ // Cumulative sum of bit costs.
+ sym_cost.resize(in.size() + 1);
+ for (size_t i = 0; i < in.size(); i++) {
+ uint32_t tok, nbits, unused_bits;
+ uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+ sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+ }
+ out.reserve(in.size());
+ for (size_t i = 0; i < in.size(); i++) {
+ size_t num_to_copy = 0;
+ size_t distance_symbol = 0; // 1 for RLE.
+ if (distance_multiplier != 0) {
+ distance_symbol = 1; // Special distance 1 if enabled.
+ JXL_DASSERT(kSpecialDistances[1][0] == 1);
+ JXL_DASSERT(kSpecialDistances[1][1] == 0);
+ }
+ if (i > 0) {
+ for (; i + num_to_copy < in.size(); num_to_copy++) {
+ if (in[i + num_to_copy].value != in[i - 1].value) {
+ break;
+ }
+ }
+ }
+ if (num_to_copy == 0) {
+ out.push_back(in[i]);
+ continue;
+ }
+ float cost = sym_cost[i + num_to_copy] - sym_cost[i];
+ // This subtraction might overflow, but that's OK.
+ size_t lz77_len = num_to_copy - lz77.min_length;
+ float lz77_cost = num_to_copy >= lz77.min_length
+ ? CeilLog2Nonzero(lz77_len + 1) + 1
+ : 0;
+ if (num_to_copy < lz77.min_length || cost <= lz77_cost) {
+ for (size_t j = 0; j < num_to_copy; j++) {
+ out.push_back(in[i + j]);
+ }
+ i += num_to_copy - 1;
+ continue;
+ }
+ // Output the LZ77 length
+ out.emplace_back(in[i].context, lz77_len);
+ out.back().is_lz77_length = true;
+ i += num_to_copy - 1;
+ bit_decrease += cost - lz77_cost;
+ // Output the LZ77 copy distance.
+ out.emplace_back(lz77.nonserialized_distance_context, distance_symbol);
+ }
+ }
+
+ if (bit_decrease > total_symbols * 0.2 + 16) {
+ lz77.enabled = true;
+ }
+}
+
+// Hash chain for LZ77 matching
+struct HashChain {
+ size_t size_;
+ std::vector<uint32_t> data_;
+
+ unsigned hash_num_values_ = 32768;
+ unsigned hash_mask_ = hash_num_values_ - 1;
+ unsigned hash_shift_ = 5;
+
+ std::vector<int> head;
+ std::vector<uint32_t> chain;
+ std::vector<int> val;
+
+ // Speed up repetitions of zero
+ std::vector<int> headz;
+ std::vector<uint32_t> chainz;
+ std::vector<uint32_t> zeros;
+ uint32_t numzeros = 0;
+
+ size_t window_size_;
+ size_t window_mask_;
+ size_t min_length_;
+ size_t max_length_;
+
+ // Map of special distance codes.
+ std::unordered_map<int, int> special_dist_table_;
+ size_t num_special_distances_ = 0;
+
+ uint32_t maxchainlength = 256; // window_size_ to allow all
+
+ HashChain(const Token* data, size_t size, size_t window_size,
+ size_t min_length, size_t max_length, size_t distance_multiplier)
+ : size_(size),
+ window_size_(window_size),
+ window_mask_(window_size - 1),
+ min_length_(min_length),
+ max_length_(max_length) {
+ data_.resize(size);
+ for (size_t i = 0; i < size; i++) {
+ data_[i] = data[i].value;
+ }
+
+ head.resize(hash_num_values_, -1);
+ val.resize(window_size_, -1);
+ chain.resize(window_size_);
+ for (uint32_t i = 0; i < window_size_; ++i) {
+ chain[i] = i; // same value as index indicates uninitialized
+ }
+
+ zeros.resize(window_size_);
+ headz.resize(window_size_ + 1, -1);
+ chainz.resize(window_size_);
+ for (uint32_t i = 0; i < window_size_; ++i) {
+ chainz[i] = i;
+ }
+ // Translate distance to special distance code.
+ if (distance_multiplier) {
+ // Count down, so if due to small distance multiplier multiple distances
+ // map to the same code, the smallest code will be used in the end.
+ for (int i = kNumSpecialDistances - 1; i >= 0; --i) {
+ int xi = kSpecialDistances[i][0];
+ int yi = kSpecialDistances[i][1];
+ int distance = yi * distance_multiplier + xi;
+ // Ensure that we map distance 1 to the lowest symbols.
+ if (distance < 1) distance = 1;
+ special_dist_table_[distance] = i;
+ }
+ num_special_distances_ = kNumSpecialDistances;
+ }
+ }
+
+ uint32_t GetHash(size_t pos) const {
+ uint32_t result = 0;
+ if (pos + 2 < size_) {
+ // TODO(lode): take the MSB's of the uint32_t values into account as well,
+ // given that the hash code itself is less than 32 bits.
+ result ^= (uint32_t)(data_[pos + 0] << 0u);
+ result ^= (uint32_t)(data_[pos + 1] << hash_shift_);
+ result ^= (uint32_t)(data_[pos + 2] << (hash_shift_ * 2));
+ } else {
+ // No need to compute hash of last 2 bytes, the length 2 is too short.
+ return 0;
+ }
+ return result & hash_mask_;
+ }
+
+ uint32_t CountZeros(size_t pos, uint32_t prevzeros) const {
+ size_t end = pos + window_size_;
+ if (end > size_) end = size_;
+ if (prevzeros > 0) {
+ if (prevzeros >= window_mask_ && data_[end - 1] == 0 &&
+ end == pos + window_size_) {
+ return prevzeros;
+ } else {
+ return prevzeros - 1;
+ }
+ }
+ uint32_t num = 0;
+ while (pos + num < end && data_[pos + num] == 0) num++;
+ return num;
+ }
+
+ void Update(size_t pos) {
+ uint32_t hashval = GetHash(pos);
+ uint32_t wpos = pos & window_mask_;
+
+ val[wpos] = (int)hashval;
+ if (head[hashval] != -1) chain[wpos] = head[hashval];
+ head[hashval] = wpos;
+
+ if (pos > 0 && data_[pos] != data_[pos - 1]) numzeros = 0;
+ numzeros = CountZeros(pos, numzeros);
+
+ zeros[wpos] = numzeros;
+ if (headz[numzeros] != -1) chainz[wpos] = headz[numzeros];
+ headz[numzeros] = wpos;
+ }
+
+ void Update(size_t pos, size_t len) {
+ for (size_t i = 0; i < len; i++) {
+ Update(pos + i);
+ }
+ }
+
+ template <typename CB>
+ void FindMatches(size_t pos, int max_dist, const CB& found_match) const {
+ uint32_t wpos = pos & window_mask_;
+ uint32_t hashval = GetHash(pos);
+ uint32_t hashpos = chain[wpos];
+
+ int prev_dist = 0;
+ int end = std::min<int>(pos + max_length_, size_);
+ uint32_t chainlength = 0;
+ uint32_t best_len = 0;
+ for (;;) {
+ int dist = (hashpos <= wpos) ? (wpos - hashpos)
+ : (wpos - hashpos + window_mask_ + 1);
+ if (dist < prev_dist) break;
+ prev_dist = dist;
+ uint32_t len = 0;
+ if (dist > 0) {
+ int i = pos;
+ int j = pos - dist;
+ if (numzeros > 3) {
+ int r = std::min<int>(numzeros - 1, zeros[hashpos]);
+ if (i + r >= end) r = end - i - 1;
+ i += r;
+ j += r;
+ }
+ while (i < end && data_[i] == data_[j]) {
+ i++;
+ j++;
+ }
+ len = i - pos;
+ // This can trigger even if the new length is slightly smaller than the
+ // best length, because it is possible for a slightly cheaper distance
+ // symbol to occur.
+ if (len >= min_length_ && len + 2 >= best_len) {
+ auto it = special_dist_table_.find(dist);
+ int dist_symbol = (it == special_dist_table_.end())
+ ? (num_special_distances_ + dist - 1)
+ : it->second;
+ found_match(len, dist_symbol);
+ if (len > best_len) best_len = len;
+ }
+ }
+
+ chainlength++;
+ if (chainlength >= maxchainlength) break;
+
+ if (numzeros >= 3 && len > numzeros) {
+ if (hashpos == chainz[hashpos]) break;
+ hashpos = chainz[hashpos];
+ if (zeros[hashpos] != numzeros) break;
+ } else {
+ if (hashpos == chain[hashpos]) break;
+ hashpos = chain[hashpos];
+ if (val[hashpos] != (int)hashval) break; // outdated hash value
+ }
+ }
+ }
+ void FindMatch(size_t pos, int max_dist, size_t* result_dist_symbol,
+ size_t* result_len) const {
+ *result_dist_symbol = 0;
+ *result_len = 1;
+ FindMatches(pos, max_dist, [&](size_t len, size_t dist_symbol) {
+ if (len > *result_len ||
+ (len == *result_len && *result_dist_symbol > dist_symbol)) {
+ *result_len = len;
+ *result_dist_symbol = dist_symbol;
+ }
+ });
+ }
+};
+
+float LenCost(size_t len) {
+ uint32_t nbits, bits, tok;
+ HybridUintConfig(1, 0, 0).Encode(len, &tok, &nbits, &bits);
+ constexpr float kCostTable[] = {
+ 2.797667318563126, 3.213177690381199, 2.5706009246743737,
+ 2.408392498667534, 2.829649191872326, 3.3923087753324577,
+ 4.029267451554331, 4.415576699706408, 4.509357574741465,
+ 9.21481543803004, 10.020590190114898, 11.858671627804766,
+ 12.45853300490526, 11.713105831990857, 12.561996324849314,
+ 13.775477692278367, 13.174027068768641,
+ };
+ size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+ if (tok >= table_size) tok = table_size - 1;
+ return kCostTable[tok] + nbits;
+}
+
+// TODO(veluca): this does not take into account usage or non-usage of distance
+// multipliers.
+float DistCost(size_t dist) {
+ uint32_t nbits, bits, tok;
+ HybridUintConfig(7, 0, 0).Encode(dist, &tok, &nbits, &bits);
+ constexpr float kCostTable[] = {
+ 6.368282626312716, 5.680793277090298, 8.347404197105247,
+ 7.641619201599141, 6.914328374119438, 7.959808291537444,
+ 8.70023120759855, 8.71378518934703, 9.379132523982769,
+ 9.110472749092708, 9.159029569270908, 9.430936766731973,
+ 7.278284055315169, 7.8278514904267755, 10.026641158289236,
+ 9.976049229827066, 9.64351607048908, 9.563403863480442,
+ 10.171474111762747, 10.45950155077234, 9.994813912104219,
+ 10.322524683741156, 8.465808729388186, 8.756254166066853,
+ 10.160930174662234, 10.247329273413435, 10.04090403724809,
+ 10.129398517544082, 9.342311691539546, 9.07608009102374,
+ 10.104799540677513, 10.378079384990906, 10.165828974075072,
+ 10.337595322341553, 7.940557464567944, 10.575665823319431,
+ 11.023344321751955, 10.736144698831827, 11.118277044595054,
+ 7.468468230648442, 10.738305230932939, 10.906980780216568,
+ 10.163468216353817, 10.17805759656433, 11.167283670483565,
+ 11.147050200274544, 10.517921919244333, 10.651764778156886,
+ 10.17074446448919, 11.217636876224745, 11.261630721139484,
+ 11.403140815247259, 10.892472096873417, 11.1859607804481,
+ 8.017346947551262, 7.895143720278828, 11.036577113822025,
+ 11.170562110315794, 10.326988722591086, 10.40872184751056,
+ 11.213498225466386, 11.30580635516863, 10.672272515665442,
+ 10.768069466228063, 11.145257364153565, 11.64668307145549,
+ 10.593156194627339, 11.207499484844943, 10.767517766396908,
+ 10.826629811407042, 10.737764794499988, 10.6200448518045,
+ 10.191315385198092, 8.468384171390085, 11.731295299170432,
+ 11.824619886654398, 10.41518844301179, 10.16310536548649,
+ 10.539423685097576, 10.495136599328031, 10.469112847728267,
+ 11.72057686174922, 10.910326337834674, 11.378921834673758,
+ 11.847759036098536, 11.92071647623854, 10.810628276345282,
+ 11.008601085273893, 11.910326337834674, 11.949212023423133,
+ 11.298614839104337, 11.611603659010392, 10.472930394619985,
+ 11.835564720850282, 11.523267392285337, 12.01055816679611,
+ 8.413029688994023, 11.895784139536406, 11.984679534970505,
+ 11.220654278717394, 11.716311684833672, 10.61036646226114,
+ 10.89849965960364, 10.203762898863669, 10.997560826267238,
+ 11.484217379438984, 11.792836176993665, 12.24310468755171,
+ 11.464858097919262, 12.212747017409377, 11.425595666074955,
+ 11.572048533398757, 12.742093965163013, 11.381874288645637,
+ 12.191870445817015, 11.683156920035426, 11.152442115262197,
+ 11.90303691580457, 11.653292787169159, 11.938615382266098,
+ 16.970641701570223, 16.853602280380002, 17.26240782594733,
+ 16.644655390108507, 17.14310889757499, 16.910935455445955,
+ 17.505678976959697, 17.213498225466388, 2.4162310293553024,
+ 3.494587244462329, 3.5258600986408344, 3.4959806589517095,
+ 3.098390886949687, 3.343454654302911, 3.588847442290287,
+ 4.14614790111827, 5.152948641990529, 7.433696808092598,
+ 9.716311684833672,
+ };
+ size_t table_size = sizeof kCostTable / sizeof *kCostTable;
+ if (tok >= table_size) tok = table_size - 1;
+ return kCostTable[tok] + nbits;
+}
+
+void ApplyLZ77_LZ77(const HistogramParams& params, size_t num_contexts,
+ const std::vector<std::vector<Token>>& tokens,
+ LZ77Params& lz77,
+ std::vector<std::vector<Token>>& tokens_lz77) {
+ // TODO(veluca): tune heuristics here.
+ SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77);
+ float bit_decrease = 0;
+ size_t total_symbols = 0;
+ tokens_lz77.resize(tokens.size());
+ HybridUintConfig uint_config;
+ std::vector<float> sym_cost;
+ for (size_t stream = 0; stream < tokens.size(); stream++) {
+ size_t distance_multiplier =
+ params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+ const auto& in = tokens[stream];
+ auto& out = tokens_lz77[stream];
+ total_symbols += in.size();
+ // Cumulative sum of bit costs.
+ sym_cost.resize(in.size() + 1);
+ for (size_t i = 0; i < in.size(); i++) {
+ uint32_t tok, nbits, unused_bits;
+ uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+ sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+ }
+
+ out.reserve(in.size());
+ size_t max_distance = in.size();
+ size_t min_length = lz77.min_length;
+ JXL_ASSERT(min_length >= 3);
+ size_t max_length = in.size();
+
+ // Use next power of two as window size.
+ size_t window_size = 1;
+ while (window_size < max_distance && window_size < kWindowSize) {
+ window_size <<= 1;
+ }
+
+ HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+ distance_multiplier);
+ size_t len, dist_symbol;
+
+ const size_t max_lazy_match_len = 256; // 0 to disable lazy matching
+
+ // Whether the next symbol was already updated (to test lazy matching)
+ bool already_updated = false;
+ for (size_t i = 0; i < in.size(); i++) {
+ out.push_back(in[i]);
+ if (!already_updated) chain.Update(i);
+ already_updated = false;
+ chain.FindMatch(i, max_distance, &dist_symbol, &len);
+ if (len >= min_length) {
+ if (len < max_lazy_match_len && i + 1 < in.size()) {
+ // Try length at next symbol lazy matching
+ chain.Update(i + 1);
+ already_updated = true;
+ size_t len2, dist_symbol2;
+ chain.FindMatch(i + 1, max_distance, &dist_symbol2, &len2);
+ if (len2 > len) {
+ // Use the lazy match. Add literal, and use the next length starting
+ // from the next byte.
+ ++i;
+ already_updated = false;
+ len = len2;
+ dist_symbol = dist_symbol2;
+ out.push_back(in[i]);
+ }
+ }
+
+ float cost = sym_cost[i + len] - sym_cost[i];
+ size_t lz77_len = len - lz77.min_length;
+ float lz77_cost = LenCost(lz77_len) + DistCost(dist_symbol) +
+ sce.AddSymbolCost(out.back().context);
+
+ if (lz77_cost <= cost) {
+ out.back().value = len - min_length;
+ out.back().is_lz77_length = true;
+ out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+ bit_decrease += cost - lz77_cost;
+ } else {
+ // LZ77 match ignored, and symbol already pushed. Push all other
+ // symbols and skip.
+ for (size_t j = 1; j < len; j++) {
+ out.push_back(in[i + j]);
+ }
+ }
+
+ if (already_updated) {
+ chain.Update(i + 2, len - 2);
+ already_updated = false;
+ } else {
+ chain.Update(i + 1, len - 1);
+ }
+ i += len - 1;
+ } else {
+ // Literal, already pushed
+ }
+ }
+ }
+
+ if (bit_decrease > total_symbols * 0.2 + 16) {
+ lz77.enabled = true;
+ }
+}
+
+void ApplyLZ77_Optimal(const HistogramParams& params, size_t num_contexts,
+ const std::vector<std::vector<Token>>& tokens,
+ LZ77Params& lz77,
+ std::vector<std::vector<Token>>& tokens_lz77) {
+ std::vector<std::vector<Token>> tokens_for_cost_estimate;
+ ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_for_cost_estimate);
+ // If greedy-LZ77 does not give better compression than no-lz77, no reason to
+ // run the optimal matching.
+ if (!lz77.enabled) return;
+ SymbolCostEstimator sce(num_contexts + 1, params.force_huffman,
+ tokens_for_cost_estimate, lz77);
+ tokens_lz77.resize(tokens.size());
+ HybridUintConfig uint_config;
+ std::vector<float> sym_cost;
+ std::vector<uint32_t> dist_symbols;
+ for (size_t stream = 0; stream < tokens.size(); stream++) {
+ size_t distance_multiplier =
+ params.image_widths.size() > stream ? params.image_widths[stream] : 0;
+ const auto& in = tokens[stream];
+ auto& out = tokens_lz77[stream];
+ // Cumulative sum of bit costs.
+ sym_cost.resize(in.size() + 1);
+ for (size_t i = 0; i < in.size(); i++) {
+ uint32_t tok, nbits, unused_bits;
+ uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits);
+ sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i];
+ }
+
+ out.reserve(in.size());
+ size_t max_distance = in.size();
+ size_t min_length = lz77.min_length;
+ JXL_ASSERT(min_length >= 3);
+ size_t max_length = in.size();
+
+ // Use next power of two as window size.
+ size_t window_size = 1;
+ while (window_size < max_distance && window_size < kWindowSize) {
+ window_size <<= 1;
+ }
+
+ HashChain chain(in.data(), in.size(), window_size, min_length, max_length,
+ distance_multiplier);
+
+ struct MatchInfo {
+ uint32_t len;
+ uint32_t dist_symbol;
+ uint32_t ctx;
+ float total_cost = std::numeric_limits<float>::max();
+ };
+ // Total cost to encode the first N symbols.
+ std::vector<MatchInfo> prefix_costs(in.size() + 1);
+ prefix_costs[0].total_cost = 0;
+
+ size_t rle_length = 0;
+ size_t skip_lz77 = 0;
+ for (size_t i = 0; i < in.size(); i++) {
+ chain.Update(i);
+ float lit_cost =
+ prefix_costs[i].total_cost + sym_cost[i + 1] - sym_cost[i];
+ if (prefix_costs[i + 1].total_cost > lit_cost) {
+ prefix_costs[i + 1].dist_symbol = 0;
+ prefix_costs[i + 1].len = 1;
+ prefix_costs[i + 1].ctx = in[i].context;
+ prefix_costs[i + 1].total_cost = lit_cost;
+ }
+ if (skip_lz77 > 0) {
+ skip_lz77--;
+ continue;
+ }
+ dist_symbols.clear();
+ chain.FindMatches(i, max_distance,
+ [&dist_symbols](size_t len, size_t dist_symbol) {
+ if (dist_symbols.size() <= len) {
+ dist_symbols.resize(len + 1, dist_symbol);
+ }
+ if (dist_symbol < dist_symbols[len]) {
+ dist_symbols[len] = dist_symbol;
+ }
+ });
+ if (dist_symbols.size() <= min_length) continue;
+ {
+ size_t best_cost = dist_symbols.back();
+ for (size_t j = dist_symbols.size() - 1; j >= min_length; j--) {
+ if (dist_symbols[j] < best_cost) {
+ best_cost = dist_symbols[j];
+ }
+ dist_symbols[j] = best_cost;
+ }
+ }
+ for (size_t j = min_length; j < dist_symbols.size(); j++) {
+ // Cost model that uses results from lazy LZ77.
+ float lz77_cost = sce.LenCost(in[i].context, j - min_length, lz77) +
+ sce.DistCost(dist_symbols[j], lz77);
+ float cost = prefix_costs[i].total_cost + lz77_cost;
+ if (prefix_costs[i + j].total_cost > cost) {
+ prefix_costs[i + j].len = j;
+ prefix_costs[i + j].dist_symbol = dist_symbols[j] + 1;
+ prefix_costs[i + j].ctx = in[i].context;
+ prefix_costs[i + j].total_cost = cost;
+ }
+ }
+ // We are in a RLE sequence: skip all the symbols except the first 8 and
+ // the last 8. This avoid quadratic costs for sequences with long runs of
+ // the same symbol.
+ if ((dist_symbols.back() == 0 && distance_multiplier == 0) ||
+ (dist_symbols.back() == 1 && distance_multiplier != 0)) {
+ rle_length++;
+ } else {
+ rle_length = 0;
+ }
+ if (rle_length >= 8 && dist_symbols.size() > 9) {
+ skip_lz77 = dist_symbols.size() - 10;
+ rle_length = 0;
+ }
+ }
+ size_t pos = in.size();
+ while (pos > 0) {
+ bool is_lz77_length = prefix_costs[pos].dist_symbol != 0;
+ if (is_lz77_length) {
+ size_t dist_symbol = prefix_costs[pos].dist_symbol - 1;
+ out.emplace_back(lz77.nonserialized_distance_context, dist_symbol);
+ }
+ size_t val = is_lz77_length ? prefix_costs[pos].len - min_length
+ : in[pos - 1].value;
+ out.emplace_back(prefix_costs[pos].ctx, val);
+ out.back().is_lz77_length = is_lz77_length;
+ pos -= prefix_costs[pos].len;
+ }
+ std::reverse(out.begin(), out.end());
+ }
+}
+
+void ApplyLZ77(const HistogramParams& params, size_t num_contexts,
+ const std::vector<std::vector<Token>>& tokens, LZ77Params& lz77,
+ std::vector<std::vector<Token>>& tokens_lz77) {
+ lz77.enabled = false;
+ if (params.force_huffman) {
+ lz77.min_symbol = std::min(PREFIX_MAX_ALPHABET_SIZE - 32, 512);
+ } else {
+ lz77.min_symbol = 224;
+ }
+ if (params.lz77_method == HistogramParams::LZ77Method::kNone) {
+ return;
+ } else if (params.lz77_method == HistogramParams::LZ77Method::kRLE) {
+ ApplyLZ77_RLE(params, num_contexts, tokens, lz77, tokens_lz77);
+ } else if (params.lz77_method == HistogramParams::LZ77Method::kLZ77) {
+ ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_lz77);
+ } else if (params.lz77_method == HistogramParams::LZ77Method::kOptimal) {
+ ApplyLZ77_Optimal(params, num_contexts, tokens, lz77, tokens_lz77);
+ } else {
+ JXL_ABORT("Not implemented");
+ }
+}
+} // namespace
+
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+ size_t num_contexts,
+ std::vector<std::vector<Token>>& tokens,
+ EntropyEncodingData* codes,
+ std::vector<uint8_t>* context_map,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ size_t total_bits = 0;
+ codes->lz77.nonserialized_distance_context = num_contexts;
+ std::vector<std::vector<Token>> tokens_lz77;
+ ApplyLZ77(params, num_contexts, tokens, codes->lz77, tokens_lz77);
+ if (ans_fuzzer_friendly_) {
+ codes->lz77.length_uint_config = HybridUintConfig(10, 0, 0);
+ codes->lz77.min_symbol = 2048;
+ }
+
+ const size_t max_contexts = std::min(num_contexts, kClustersLimit);
+ BitWriter::Allotment allotment(writer,
+ 128 + num_contexts * 40 + max_contexts * 96);
+ if (writer) {
+ JXL_CHECK(Bundle::Write(codes->lz77, writer, layer, aux_out));
+ } else {
+ size_t ebits, bits;
+ JXL_CHECK(Bundle::CanEncode(codes->lz77, &ebits, &bits));
+ total_bits += bits;
+ }
+ if (codes->lz77.enabled) {
+ if (writer) {
+ size_t b = writer->BitsWritten();
+ EncodeUintConfig(codes->lz77.length_uint_config, writer,
+ /*log_alpha_size=*/8);
+ total_bits += writer->BitsWritten() - b;
+ } else {
+ SizeWriter size_writer;
+ EncodeUintConfig(codes->lz77.length_uint_config, &size_writer,
+ /*log_alpha_size=*/8);
+ total_bits += size_writer.size;
+ }
+ num_contexts += 1;
+ tokens = std::move(tokens_lz77);
+ }
+ size_t total_tokens = 0;
+ // Build histograms.
+ HistogramBuilder builder(num_contexts);
+ HybridUintConfig uint_config; // Default config for clustering.
+ // Unless we are using the kContextMap histogram option.
+ if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) {
+ uint_config = HybridUintConfig(2, 0, 1);
+ }
+ if (params.uint_method == HistogramParams::HybridUintMethod::k000) {
+ uint_config = HybridUintConfig(0, 0, 0);
+ }
+ if (ans_fuzzer_friendly_) {
+ uint_config = HybridUintConfig(10, 0, 0);
+ }
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ if (codes->lz77.enabled) {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token& token = tokens[i][j];
+ total_tokens++;
+ uint32_t tok, nbits, bits;
+ (token.is_lz77_length ? codes->lz77.length_uint_config : uint_config)
+ .Encode(token.value, &tok, &nbits, &bits);
+ tok += token.is_lz77_length ? codes->lz77.min_symbol : 0;
+ builder.VisitSymbol(tok, token.context);
+ }
+ } else if (num_contexts == 1) {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token& token = tokens[i][j];
+ total_tokens++;
+ uint32_t tok, nbits, bits;
+ uint_config.Encode(token.value, &tok, &nbits, &bits);
+ builder.VisitSymbol(tok, /*token.context=*/0);
+ }
+ } else {
+ for (size_t j = 0; j < tokens[i].size(); ++j) {
+ const Token& token = tokens[i][j];
+ total_tokens++;
+ uint32_t tok, nbits, bits;
+ uint_config.Encode(token.value, &tok, &nbits, &bits);
+ builder.VisitSymbol(tok, token.context);
+ }
+ }
+ }
+
+ bool use_prefix_code =
+ params.force_huffman || total_tokens < 100 ||
+ params.clustering == HistogramParams::ClusteringType::kFastest ||
+ ans_fuzzer_friendly_;
+ if (!use_prefix_code) {
+ bool all_singleton = true;
+ for (size_t i = 0; i < num_contexts; i++) {
+ if (builder.Histo(i).ShannonEntropy() >= 1e-5) {
+ all_singleton = false;
+ }
+ }
+ if (all_singleton) {
+ use_prefix_code = true;
+ }
+ }
+
+ // Encode histograms.
+ total_bits += builder.BuildAndStoreEntropyCodes(params, tokens, codes,
+ context_map, use_prefix_code,
+ writer, layer, aux_out);
+ allotment.FinishedHistogram(writer);
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+
+ if (aux_out != nullptr) {
+ aux_out->layers[layer].num_clustered_histograms +=
+ codes->encoding_info.size();
+ }
+ return total_bits;
+}
+
+size_t WriteTokens(const std::vector<Token>& tokens,
+ const EntropyEncodingData& codes,
+ const std::vector<uint8_t>& context_map, BitWriter* writer) {
+ size_t num_extra_bits = 0;
+ if (codes.use_prefix_code) {
+ for (size_t i = 0; i < tokens.size(); i++) {
+ uint32_t tok, nbits, bits;
+ const Token& token = tokens[i];
+ size_t histo = context_map[token.context];
+ (token.is_lz77_length ? codes.lz77.length_uint_config
+ : codes.uint_config[histo])
+ .Encode(token.value, &tok, &nbits, &bits);
+ tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+ // Combine two calls to the BitWriter. Equivalent to:
+ // writer->Write(codes.encoding_info[histo][tok].depth,
+ // codes.encoding_info[histo][tok].bits);
+ // writer->Write(nbits, bits);
+ uint64_t data = codes.encoding_info[histo][tok].bits;
+ data |= bits << codes.encoding_info[histo][tok].depth;
+ writer->Write(codes.encoding_info[histo][tok].depth + nbits, data);
+ num_extra_bits += nbits;
+ }
+ return num_extra_bits;
+ }
+ std::vector<uint64_t> out;
+ std::vector<uint8_t> out_nbits;
+ out.reserve(tokens.size());
+ out_nbits.reserve(tokens.size());
+ uint64_t allbits = 0;
+ size_t numallbits = 0;
+ // Writes in *reversed* order.
+ auto addbits = [&](size_t bits, size_t nbits) {
+ if (JXL_UNLIKELY(nbits)) {
+ JXL_DASSERT(bits >> nbits == 0);
+ if (JXL_UNLIKELY(numallbits + nbits > BitWriter::kMaxBitsPerCall)) {
+ out.push_back(allbits);
+ out_nbits.push_back(numallbits);
+ numallbits = allbits = 0;
+ }
+ allbits <<= nbits;
+ allbits |= bits;
+ numallbits += nbits;
+ }
+ };
+ const int end = tokens.size();
+ ANSCoder ans;
+ if (codes.lz77.enabled || context_map.size() > 1) {
+ for (int i = end - 1; i >= 0; --i) {
+ const Token token = tokens[i];
+ const uint8_t histo = context_map[token.context];
+ uint32_t tok, nbits, bits;
+ (token.is_lz77_length ? codes.lz77.length_uint_config
+ : codes.uint_config[histo])
+ .Encode(tokens[i].value, &tok, &nbits, &bits);
+ tok += token.is_lz77_length ? codes.lz77.min_symbol : 0;
+ const ANSEncSymbolInfo& info = codes.encoding_info[histo][tok];
+ // Extra bits first as this is reversed.
+ addbits(bits, nbits);
+ num_extra_bits += nbits;
+ uint8_t ans_nbits = 0;
+ uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits);
+ addbits(ans_bits, ans_nbits);
+ }
+ } else {
+ for (int i = end - 1; i >= 0; --i) {
+ uint32_t tok, nbits, bits;
+ codes.uint_config[0].Encode(tokens[i].value, &tok, &nbits, &bits);
+ const ANSEncSymbolInfo& info = codes.encoding_info[0][tok];
+ // Extra bits first as this is reversed.
+ addbits(bits, nbits);
+ num_extra_bits += nbits;
+ uint8_t ans_nbits = 0;
+ uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits);
+ addbits(ans_bits, ans_nbits);
+ }
+ }
+ const uint32_t state = ans.GetState();
+ writer->Write(32, state);
+ writer->Write(numallbits, allbits);
+ for (int i = out.size(); i > 0; --i) {
+ writer->Write(out_nbits[i - 1], out[i - 1]);
+ }
+ return num_extra_bits;
+}
+
+void WriteTokens(const std::vector<Token>& tokens,
+ const EntropyEncodingData& codes,
+ const std::vector<uint8_t>& context_map, BitWriter* writer,
+ size_t layer, AuxOut* aux_out) {
+ BitWriter::Allotment allotment(writer, 32 * tokens.size() + 32 * 1024 * 4);
+ size_t num_extra_bits = WriteTokens(tokens, codes, context_map, writer);
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ if (aux_out != nullptr) {
+ aux_out->layers[layer].extra_bits += num_extra_bits;
+ }
+}
+
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly) {
+#if JXL_IS_DEBUG_BUILD // Guard against accidental / malicious changes.
+ ans_fuzzer_friendly_ = ans_fuzzer_friendly;
+#endif
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ans.h b/third_party/jpeg-xl/lib/jxl/enc_ans.h
new file mode 100644
index 0000000000..a4afb19b4e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ans.h
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_H_
+#define LIB_JXL_ENC_ANS_H_
+
+// Library to encode the ANS population counts to the bit-stream and encode
+// symbols based on the respective distributions.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/ans_common.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans_params.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/huffman_table.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+#define USE_MULT_BY_RECIPROCAL
+
+// precision must be equal to: #bits(state_) + #bits(freq)
+#define RECIPROCAL_PRECISION (32 + ANS_LOG_TAB_SIZE)
+
+// Data structure representing one element of the encoding table built
+// from a distribution.
+// TODO(veluca): split this up, or use an union.
+struct ANSEncSymbolInfo {
+ // ANS
+ uint16_t freq_;
+ std::vector<uint16_t> reverse_map_;
+#ifdef USE_MULT_BY_RECIPROCAL
+ uint64_t ifreq_;
+#endif
+ // Prefix coding.
+ uint8_t depth;
+ uint16_t bits;
+};
+
+class ANSCoder {
+ public:
+ ANSCoder() : state_(ANS_SIGNATURE << 16) {}
+
+ uint32_t PutSymbol(const ANSEncSymbolInfo& t, uint8_t* nbits) {
+ uint32_t bits = 0;
+ *nbits = 0;
+ if ((state_ >> (32 - ANS_LOG_TAB_SIZE)) >= t.freq_) {
+ bits = state_ & 0xffff;
+ state_ >>= 16;
+ *nbits = 16;
+ }
+#ifdef USE_MULT_BY_RECIPROCAL
+ // We use mult-by-reciprocal trick, but that requires 64b calc.
+ const uint32_t v = (state_ * t.ifreq_) >> RECIPROCAL_PRECISION;
+ const uint32_t offset = t.reverse_map_[state_ - v * t.freq_];
+ state_ = (v << ANS_LOG_TAB_SIZE) + offset;
+#else
+ state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) +
+ t.reverse_map_[state_ % t.freq_];
+#endif
+ return bits;
+ }
+
+ uint32_t GetState() const { return state_; }
+
+ private:
+ uint32_t state_;
+};
+
+// RebalanceHistogram requires a signed type.
+using ANSHistBin = int32_t;
+
+struct EntropyEncodingData {
+ std::vector<std::vector<ANSEncSymbolInfo>> encoding_info;
+ bool use_prefix_code;
+ std::vector<HybridUintConfig> uint_config;
+ LZ77Params lz77;
+};
+
+// Integer to be encoded by an entropy coder, either ANS or Huffman.
+struct Token {
+ Token() {}
+ Token(uint32_t c, uint32_t value)
+ : is_lz77_length(false), context(c), value(value) {}
+ uint32_t is_lz77_length : 1;
+ uint32_t context : 31;
+ uint32_t value;
+};
+
+// Returns an estimate of the number of bits required to encode the given
+// histogram (header bits plus data bits).
+float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size);
+
+// Apply context clustering, compute histograms and encode them. Returns an
+// estimate of the total bits used for encoding the stream. If `writer` ==
+// nullptr, the bit estimate will not take into account the context map (which
+// does not get written if `num_contexts` == 1).
+size_t BuildAndEncodeHistograms(const HistogramParams& params,
+ size_t num_contexts,
+ std::vector<std::vector<Token>>& tokens,
+ EntropyEncodingData* codes,
+ std::vector<uint8_t>* context_map,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out);
+
+// Write the tokens to a string.
+void WriteTokens(const std::vector<Token>& tokens,
+ const EntropyEncodingData& codes,
+ const std::vector<uint8_t>& context_map, BitWriter* writer,
+ size_t layer, AuxOut* aux_out);
+
+// Same as above, but assumes allotment created by caller.
+size_t WriteTokens(const std::vector<Token>& tokens,
+ const EntropyEncodingData& codes,
+ const std::vector<uint8_t>& context_map, BitWriter* writer);
+
+// Exposed for tests; to be used with Writer=BitWriter only.
+template <typename Writer>
+void EncodeUintConfigs(const std::vector<HybridUintConfig>& uint_config,
+ Writer* writer, size_t log_alpha_size);
+extern template void EncodeUintConfigs(const std::vector<HybridUintConfig>&,
+ BitWriter*, size_t);
+
+// Globally set the option to create fuzzer-friendly ANS streams. Negatively
+// impacts compression. Not thread-safe.
+void SetANSFuzzerFriendly(bool ans_fuzzer_friendly);
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_ANS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ans_params.h b/third_party/jpeg-xl/lib/jxl/enc_ans_params.h
new file mode 100644
index 0000000000..50ca31dc03
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ans_params.h
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ANS_PARAMS_H_
+#define LIB_JXL_ENC_ANS_PARAMS_H_
+
+// Encoder-only parameter needed for ANS entropy encoding methods.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct HistogramParams {
+ enum class ClusteringType {
+ kFastest, // Only 4 clusters.
+ kFast,
+ kBest,
+ };
+
+ enum class HybridUintMethod {
+ kNone, // just use kHybridUint420Config.
+ k000, // force the fastest option.
+ kFast, // just try a couple of options.
+ kContextMap, // fast choice for ctx map.
+ kBest,
+ };
+
+ enum class LZ77Method {
+ kNone, // do not try lz77.
+ kRLE, // only try doing RLE.
+ kLZ77, // try lz77 with backward references.
+ kOptimal, // optimal-matching LZ77 parsing.
+ };
+
+ enum class ANSHistogramStrategy {
+ kFast, // Only try some methods, early exit.
+ kApproximate, // Only try some methods.
+ kPrecise, // Try all methods.
+ };
+
+ HistogramParams() = default;
+
+ HistogramParams(SpeedTier tier, size_t num_ctx) {
+ if (tier > SpeedTier::kFalcon) {
+ clustering = ClusteringType::kFastest;
+ lz77_method = LZ77Method::kNone;
+ } else if (tier > SpeedTier::kTortoise) {
+ clustering = ClusteringType::kFast;
+ } else {
+ clustering = ClusteringType::kBest;
+ }
+ if (tier > SpeedTier::kTortoise) {
+ uint_method = HybridUintMethod::kNone;
+ }
+ if (tier >= SpeedTier::kSquirrel) {
+ ans_histogram_strategy = ANSHistogramStrategy::kApproximate;
+ }
+ }
+
+ ClusteringType clustering = ClusteringType::kBest;
+ HybridUintMethod uint_method = HybridUintMethod::kBest;
+ LZ77Method lz77_method = LZ77Method::kRLE;
+ ANSHistogramStrategy ans_histogram_strategy = ANSHistogramStrategy::kPrecise;
+ std::vector<size_t> image_widths;
+ size_t max_histograms = ~0;
+ bool force_huffman = false;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_ANS_PARAMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.cc b/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.cc
new file mode 100644
index 0000000000..9030430e2b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.cc
@@ -0,0 +1,325 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_ar_control_field.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_ar_control_field.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state,
+ const Rect& rect,
+ ArControlFieldHeuristics::TempImages* temp_image) {
+ constexpr size_t N = kBlockDim;
+ ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness;
+ ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field;
+ JXL_ASSERT(
+ epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks &&
+ epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks);
+
+ if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR ||
+ enc_state->cparams.speed_tier > SpeedTier::kWombat ||
+ enc_state->shared.frame_header.loop_filter.epf_iters == 0) {
+ FillPlane(static_cast<uint8_t>(4), epf_sharpness, rect);
+ return;
+ }
+
+ // Likely better to have a higher X weight, like:
+ // const float kChannelWeights[3] = {47.0f, 4.35f, 0.287f};
+ const float kChannelWeights[3] = {4.35f, 4.35f, 0.287f};
+ const float kChannelWeightsLapNeg[3] = {-0.125f * kChannelWeights[0],
+ -0.125f * kChannelWeights[1],
+ -0.125f * kChannelWeights[2]};
+ const size_t sharpness_stride =
+ static_cast<size_t>(epf_sharpness->PixelsPerRow());
+
+ size_t by0 = rect.y0();
+ size_t by1 = rect.y0() + rect.ysize();
+ size_t bx0 = rect.x0();
+ size_t bx1 = rect.x0() + rect.xsize();
+ temp_image->InitOnce();
+ ImageF& laplacian_sqrsum = temp_image->laplacian_sqrsum;
+ // Calculate the L2 of the 3x3 Laplacian in an integral transform
+ // (for example 32x32 dct). This relates to transforms ability
+ // to propagate artefacts.
+ size_t y0 = by0 == 0 ? 2 : 0;
+ size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4
+ : opsin.ysize() + 2 - by0 * N;
+ size_t x0 = bx0 == 0 ? 2 : 0;
+ size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4
+ : opsin.xsize() + 2 - bx0 * N;
+ HWY_FULL(float) df;
+ for (size_t y = y0; y < y1; y++) {
+ float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y);
+ size_t cy = y + by0 * N - 2;
+ const float* JXL_RESTRICT in_row_t[3];
+ const float* JXL_RESTRICT in_row[3];
+ const float* JXL_RESTRICT in_row_b[3];
+ for (size_t c = 0; c < 3; c++) {
+ in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy);
+ in_row[c] = opsin.PlaneRow(c, cy);
+ in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy);
+ }
+ auto compute_laplacian_scalar = [&](size_t x) {
+ size_t cx = x + bx0 * N - 2;
+ const size_t prevX = cx >= 1 ? cx - 1 : cx;
+ const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx;
+ float sumsqr = 0;
+ for (size_t c = 0; c < 3; c++) {
+ float laplacian =
+ kChannelWeights[c] * in_row[c][cx] +
+ kChannelWeightsLapNeg[c] *
+ (in_row[c][prevX] + in_row[c][nextX] + in_row_b[c][prevX] +
+ in_row_b[c][cx] + in_row_b[c][nextX] + in_row_t[c][prevX] +
+ in_row_t[c][cx] + in_row_t[c][nextX]);
+ sumsqr += laplacian * laplacian;
+ }
+ laplacian_sqrsum_row[x] = sumsqr;
+ };
+ size_t x = x0;
+ for (; x + bx0 * N < 3; x++) {
+ compute_laplacian_scalar(x);
+ }
+ // Interior. One extra pixel of border as the last pixel is special.
+ for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize();
+ x += Lanes(df)) {
+ size_t cx = x + bx0 * N - 2;
+ auto sumsqr = Zero(df);
+ for (size_t c = 0; c < 3; c++) {
+ auto laplacian =
+ Mul(LoadU(df, in_row[c] + cx), Set(df, kChannelWeights[c]));
+ auto sum_oth0 = LoadU(df, in_row[c] + cx - 1);
+ auto sum_oth1 = LoadU(df, in_row[c] + cx + 1);
+ auto sum_oth2 = LoadU(df, in_row_t[c] + cx - 1);
+ auto sum_oth3 = LoadU(df, in_row_t[c] + cx);
+ sum_oth0 = Add(sum_oth0, LoadU(df, in_row_t[c] + cx + 1));
+ sum_oth1 = Add(sum_oth1, LoadU(df, in_row_b[c] + cx - 1));
+ sum_oth2 = Add(sum_oth2, LoadU(df, in_row_b[c] + cx));
+ sum_oth3 = Add(sum_oth3, LoadU(df, in_row_b[c] + cx + 1));
+ sum_oth0 = Add(sum_oth0, sum_oth1);
+ sum_oth2 = Add(sum_oth2, sum_oth3);
+ sum_oth0 = Add(sum_oth0, sum_oth2);
+ laplacian =
+ MulAdd(Set(df, kChannelWeightsLapNeg[c]), sum_oth0, laplacian);
+ sumsqr = MulAdd(laplacian, laplacian, sumsqr);
+ }
+ StoreU(sumsqr, df, laplacian_sqrsum_row + x);
+ }
+ for (; x < x1; x++) {
+ compute_laplacian_scalar(x);
+ }
+ }
+ HWY_CAPPED(float, 4) df4;
+ // Calculate the L2 of the 3x3 Laplacian in 4x4 blocks within the area
+ // of the integral transform. Sample them within the integral transform
+ // with two offsets (0,0) and (-2, -2) pixels (sqrsum_00 and sqrsum_22,
+ // respectively).
+ ImageF& sqrsum_00 = temp_image->sqrsum_00;
+ size_t sqrsum_00_stride = sqrsum_00.PixelsPerRow();
+ float* JXL_RESTRICT sqrsum_00_row = sqrsum_00.Row(0);
+ for (size_t y = 0; y < (by1 - by0) * 2; y++) {
+ const float* JXL_RESTRICT rows_in[4];
+ for (size_t iy = 0; iy < 4; iy++) {
+ rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy + 2);
+ }
+ float* JXL_RESTRICT row_out = sqrsum_00_row + y * sqrsum_00_stride;
+ for (size_t x = 0; x < (bx1 - bx0) * 2; x++) {
+ auto sum = Zero(df4);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+ sum = Add(sum, LoadU(df4, rows_in[iy] + x * 4 + ix + 2));
+ }
+ }
+ row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f);
+ }
+ }
+ // Indexing iy and ix is a bit tricky as we include a 2 pixel border
+ // around the block for evenness calculations. This is similar to what
+ // we did in guetzli for the observability of artefacts, except there
+ // the element is a sliding 5x5, not sparsely sampled 4x4 box like here.
+ ImageF& sqrsum_22 = temp_image->sqrsum_22;
+ size_t sqrsum_22_stride = sqrsum_22.PixelsPerRow();
+ float* JXL_RESTRICT sqrsum_22_row = sqrsum_22.Row(0);
+ for (size_t y = 0; y < (by1 - by0) * 2 + 1; y++) {
+ const float* JXL_RESTRICT rows_in[4];
+ for (size_t iy = 0; iy < 4; iy++) {
+ rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy);
+ }
+ float* JXL_RESTRICT row_out = sqrsum_22_row + y * sqrsum_22_stride;
+ // ignore pixels outside the image.
+ // Y coordinates are relative to by0*8+y*4.
+ size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2;
+ size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2
+ ? 4
+ : opsin.ysize() - y * 4 - by0 * 8 + 2;
+ for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) {
+ // ignore pixels outside the image.
+ // X coordinates are relative to bx0*8.
+ size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2;
+ size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2
+ ? x * 4 + 4
+ : opsin.xsize() - bx0 * 8 + 2;
+ if (ex - sx == 4 && ey - sy == 4) {
+ auto sum = Zero(df4);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix += Lanes(df4)) {
+ sum = Add(sum, Load(df4, rows_in[iy] + sx + ix));
+ }
+ }
+ row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f);
+ } else {
+ float sum = 0;
+ for (size_t iy = sy; iy < ey; iy++) {
+ for (size_t ix = sx; ix < ex; ix++) {
+ sum += rows_in[iy][ix];
+ }
+ }
+ row_out[x] = std::sqrt(sum / ((ex - sx) * (ey - sy)));
+ }
+ }
+ }
+ for (size_t by = by0; by < by1; by++) {
+ AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by);
+ uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by);
+ float* JXL_RESTRICT quant_row = quant->Row(by);
+ for (size_t bx = bx0; bx < bx1; bx++) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ // The errors are going to be linear to the quantization value in this
+ // locality. We only have access to the initial quant field here.
+ float quant_val = 1.0f / quant_row[bx];
+
+ const auto sq00 = [&](size_t y, size_t x) {
+ return sqrsum_00_row[((by - by0) * 2 + y) * sqrsum_00_stride +
+ (bx - bx0) * 2 + x];
+ };
+ const auto sq22 = [&](size_t y, size_t x) {
+ return sqrsum_22_row[((by - by0) * 2 + y) * sqrsum_22_stride +
+ (bx - bx0) * 2 + x];
+ };
+ float sqrsum_integral_transform = 0;
+ for (size_t iy = 0; iy < acs.covered_blocks_y() * 2; iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x() * 2; ix++) {
+ sqrsum_integral_transform += sq00(iy, ix) * sq00(iy, ix);
+ }
+ }
+ sqrsum_integral_transform /=
+ 4 * acs.covered_blocks_x() * acs.covered_blocks_y();
+ sqrsum_integral_transform = std::sqrt(sqrsum_integral_transform);
+ // If masking is high or amplitude of the artefacts is low, then no
+ // smoothing is needed.
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ // Five 4x4 blocks for masking estimation, all within the
+ // 8x8 area.
+ float minval_1 = std::min(sq00(2 * iy + 0, 2 * ix + 0),
+ sq00(2 * iy + 0, 2 * ix + 1));
+ float minval_2 = std::min(sq00(2 * iy + 1, 2 * ix + 0),
+ sq00(2 * iy + 1, 2 * ix + 1));
+ float minval = std::min(minval_1, minval_2);
+ minval = std::min(minval, sq22(2 * iy + 1, 2 * ix + 1));
+ // Nine more 4x4 blocks for masking estimation, includes
+ // the 2 pixel area around the 8x8 block being controlled.
+ float minval2_1 = std::min(sq22(2 * iy + 0, 2 * ix + 0),
+ sq22(2 * iy + 0, 2 * ix + 1));
+ float minval2_2 = std::min(sq22(2 * iy + 0, 2 * ix + 2),
+ sq22(2 * iy + 1, 2 * ix + 0));
+ float minval2_3 = std::min(sq22(2 * iy + 1, 2 * ix + 1),
+ sq22(2 * iy + 1, 2 * ix + 2));
+ float minval2_4 = std::min(sq22(2 * iy + 2, 2 * ix + 0),
+ sq22(2 * iy + 2, 2 * ix + 1));
+ float minval2_5 = std::min(minval2_1, minval2_2);
+ float minval2_6 = std::min(minval2_3, minval2_4);
+ float minval2 = std::min(minval2_5, minval2_6);
+ minval2 = std::min(minval2, sq22(2 * iy + 2, 2 * ix + 2));
+ float minval3 = std::min(minval, minval2);
+ minval *= 0.125f;
+ minval += 0.625f * minval3;
+ minval +=
+ 0.125f * std::min(1.5f * minval3, sq22(2 * iy + 1, 2 * ix + 1));
+ minval += 0.125f * minval2;
+ // Larger kBias, less smoothing for low intensity changes.
+ float kDeltaLimit = 3.2;
+ float bias = 0.0625f * quant_val;
+ float delta =
+ (sqrsum_integral_transform + (kDeltaLimit + 0.05) * bias) /
+ (minval + bias);
+ int out = 4;
+ if (delta > kDeltaLimit) {
+ out = 4; // smooth
+ } else {
+ out = 0;
+ }
+ // 'threshold' is separate from 'bias' for easier tuning of these
+ // heuristics.
+ float threshold = 0.0625f * quant_val;
+ const float kSmoothLimit = 0.085f;
+ float smooth = 0.20f * (sq00(2 * iy + 0, 2 * ix + 0) +
+ sq00(2 * iy + 0, 2 * ix + 1) +
+ sq00(2 * iy + 1, 2 * ix + 0) +
+ sq00(2 * iy + 1, 2 * ix + 1) + minval);
+ if (smooth < kSmoothLimit * threshold) {
+ out = 4;
+ }
+ out_row[bx + sharpness_stride * iy + ix] = out;
+ }
+ }
+ }
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ProcessTile);
+
+void ArControlFieldHeuristics::RunRect(const Rect& block_rect,
+ const Image3F& opsin,
+ PassesEncoderState* enc_state,
+ size_t thread) {
+ HWY_DYNAMIC_DISPATCH(ProcessTile)
+ (opsin, enc_state, block_rect, &temp_images[thread]);
+}
+
+} // namespace jxl
+
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.h b/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.h
new file mode 100644
index 0000000000..aabe71f46f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_ar_control_field.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+#define LIB_JXL_ENC_AR_CONTROL_FIELD_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct PassesEncoderState;
+
+struct ArControlFieldHeuristics {
+ struct TempImages {
+ void InitOnce() {
+ if (laplacian_sqrsum.xsize() != 0) return;
+ laplacian_sqrsum = ImageF(kEncTileDim + 4, kEncTileDim + 4);
+ sqrsum_00 = ImageF(kEncTileDim / 4, kEncTileDim / 4);
+ sqrsum_22 = ImageF(kEncTileDim / 4 + 1, kEncTileDim / 4 + 1);
+ }
+
+ ImageF laplacian_sqrsum;
+ ImageF sqrsum_00;
+ ImageF sqrsum_22;
+ };
+
+ void PrepareForThreads(size_t num_threads) {
+ temp_images.resize(num_threads);
+ }
+
+ void RunRect(const Rect& block_rect, const Image3F& opsin,
+ PassesEncoderState* enc_state, size_t thread);
+
+ std::vector<TempImages> temp_images;
+ ImageB* epf_sharpness;
+ ImageF* quant;
+ bool all_default;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_AR_ENC_CONTROL_FIELD_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_aux_out.cc b/third_party/jpeg-xl/lib/jxl/enc_aux_out.cc
new file mode 100644
index 0000000000..1c141d1727
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_aux_out.cc
@@ -0,0 +1,205 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_aux_out.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <numeric> // accumulate
+#include <sstream>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+const char* LayerName(size_t layer) {
+ switch (layer) {
+ case kLayerHeader:
+ return "Headers";
+ case kLayerTOC:
+ return "TOC";
+ case kLayerDictionary:
+ return "Patches";
+ case kLayerSplines:
+ return "Splines";
+ case kLayerNoise:
+ return "Noise";
+ case kLayerQuant:
+ return "Quantizer";
+ case kLayerModularTree:
+ return "ModularTree";
+ case kLayerModularGlobal:
+ return "ModularGlobal";
+ case kLayerDC:
+ return "DC";
+ case kLayerModularDcGroup:
+ return "ModularDcGroup";
+ case kLayerControlFields:
+ return "ControlFields";
+ case kLayerOrder:
+ return "CoeffOrder";
+ case kLayerAC:
+ return "ACHistograms";
+ case kLayerACTokens:
+ return "ACTokens";
+ case kLayerModularAcGroup:
+ return "ModularAcGroup";
+ default:
+ JXL_ABORT("Invalid layer %d\n", static_cast<int>(layer));
+ }
+}
+
+void AuxOut::LayerTotals::Print(size_t num_inputs) const {
+ printf("%10" PRId64, static_cast<int64_t>(total_bits));
+ if (histogram_bits != 0) {
+ printf(" [c/i:%6.2f | hst:%8" PRId64 " | ex:%8" PRId64 " | h+c+e:%12.3f",
+ num_clustered_histograms * 1.0 / num_inputs,
+ static_cast<int64_t>(histogram_bits >> 3),
+ static_cast<int64_t>(extra_bits >> 3),
+ (histogram_bits + clustered_entropy + extra_bits) / 8.0);
+ printf("]");
+ }
+ printf("\n");
+}
+
+void AuxOut::Assimilate(const AuxOut& victim) {
+ for (size_t i = 0; i < layers.size(); ++i) {
+ layers[i].Assimilate(victim.layers[i]);
+ }
+ num_blocks += victim.num_blocks;
+ num_small_blocks += victim.num_small_blocks;
+ num_dct4x8_blocks += victim.num_dct4x8_blocks;
+ num_afv_blocks += victim.num_afv_blocks;
+ num_dct8_blocks += victim.num_dct8_blocks;
+ num_dct8x16_blocks += victim.num_dct8x16_blocks;
+ num_dct8x32_blocks += victim.num_dct8x32_blocks;
+ num_dct16_blocks += victim.num_dct16_blocks;
+ num_dct16x32_blocks += victim.num_dct16x32_blocks;
+ num_dct32_blocks += victim.num_dct32_blocks;
+ num_dct32x64_blocks += victim.num_dct32x64_blocks;
+ num_dct64_blocks += victim.num_dct64_blocks;
+ num_butteraugli_iters += victim.num_butteraugli_iters;
+ for (size_t i = 0; i < dc_pred_usage.size(); ++i) {
+ dc_pred_usage[i] += victim.dc_pred_usage[i];
+ dc_pred_usage_xb[i] += victim.dc_pred_usage_xb[i];
+ }
+ max_quant_rescale = std::max(max_quant_rescale, victim.max_quant_rescale);
+ min_quant_rescale = std::min(min_quant_rescale, victim.min_quant_rescale);
+ max_bitrate_error = std::max(max_bitrate_error, victim.max_bitrate_error);
+ min_bitrate_error = std::min(min_bitrate_error, victim.min_bitrate_error);
+}
+
+void AuxOut::Print(size_t num_inputs) const {
+ if (num_inputs == 0) return;
+
+ LayerTotals all_layers;
+ for (size_t i = 0; i < layers.size(); ++i) {
+ all_layers.Assimilate(layers[i]);
+ }
+
+ printf("Average butteraugli iters: %10.2f\n",
+ num_butteraugli_iters * 1.0 / num_inputs);
+ if (min_quant_rescale != 1.0 || max_quant_rescale != 1.0) {
+ printf("quant rescale range: %f .. %f\n", min_quant_rescale,
+ max_quant_rescale);
+ printf("bitrate error range: %.3f%% .. %.3f%%\n",
+ 100.0f * min_bitrate_error, 100.0f * max_bitrate_error);
+ }
+
+ for (size_t i = 0; i < layers.size(); ++i) {
+ if (layers[i].total_bits != 0) {
+ printf("Total layer bits %-10s\t", LayerName(i));
+ printf("%10f%%", 100.0 * layers[i].total_bits / all_layers.total_bits);
+ layers[i].Print(num_inputs);
+ }
+ }
+ printf("Total image size ");
+ all_layers.Print(num_inputs);
+
+ const uint32_t dc_pred_total =
+ std::accumulate(dc_pred_usage.begin(), dc_pred_usage.end(), 0u);
+ const uint32_t dc_pred_total_xb =
+ std::accumulate(dc_pred_usage_xb.begin(), dc_pred_usage_xb.end(), 0u);
+ if (dc_pred_total + dc_pred_total_xb != 0) {
+ printf("\nDC pred Y XB:\n");
+ for (size_t i = 0; i < dc_pred_usage.size(); ++i) {
+ printf(" %6u (%5.2f%%) %6u (%5.2f%%)\n", dc_pred_usage[i],
+ 100.0 * dc_pred_usage[i] / dc_pred_total, dc_pred_usage_xb[i],
+ 100.0 * dc_pred_usage_xb[i] / dc_pred_total_xb);
+ }
+ }
+
+ size_t total_blocks = 0;
+ size_t total_positions = 0;
+ if (total_blocks != 0 && total_positions != 0) {
+ printf("\n\t\t Blocks\t\tPositions\t\t\tBlocks/Position\n");
+ printf(" Total:\t\t %7" PRIuS "\t\t %7" PRIuS " \t\t\t%10f%%\n\n",
+ total_blocks, total_positions,
+ 100.0 * total_blocks / total_positions);
+ }
+}
+
+template <typename T>
+void AuxOut::DumpImage(const char* label, const Image3<T>& image) const {
+ if (!dump_image) return;
+ if (debug_prefix.empty()) return;
+ std::ostringstream pathname;
+ pathname << debug_prefix << label << ".png";
+ (void)dump_image(ConvertToFloat(image), ColorEncoding::SRGB(),
+ pathname.str());
+}
+template void AuxOut::DumpImage(const char* label,
+ const Image3<float>& image) const;
+template void AuxOut::DumpImage(const char* label,
+ const Image3<uint8_t>& image) const;
+
+template <typename T>
+void AuxOut::DumpPlaneNormalized(const char* label,
+ const Plane<T>& image) const {
+ T min;
+ T max;
+ ImageMinMax(image, &min, &max);
+ Image3B normalized(image.xsize(), image.ysize());
+ for (size_t c = 0; c < 3; ++c) {
+ float mul = min == max ? 0 : (255.0f / (max - min));
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const T* JXL_RESTRICT row_in = image.ConstRow(y);
+ uint8_t* JXL_RESTRICT row_out = normalized.PlaneRow(c, y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ row_out[x] = static_cast<uint8_t>((row_in[x] - min) * mul);
+ }
+ }
+ }
+ DumpImage(label, normalized);
+}
+template void AuxOut::DumpPlaneNormalized(const char* label,
+ const Plane<float>& image) const;
+template void AuxOut::DumpPlaneNormalized(const char* label,
+ const Plane<uint8_t>& image) const;
+
+void AuxOut::DumpXybImage(const char* label, const Image3F& image) const {
+ if (!dump_image) return;
+ if (debug_prefix.empty()) return;
+ std::ostringstream pathname;
+ pathname << debug_prefix << label << ".png";
+
+ Image3F linear(image.xsize(), image.ysize());
+ OpsinParams opsin_params;
+ opsin_params.Init(kDefaultIntensityTarget);
+ OpsinToLinear(image, Rect(linear), nullptr, &linear, opsin_params);
+
+ (void)dump_image(std::move(linear), ColorEncoding::LinearSRGB(),
+ pathname.str());
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_aux_out.h b/third_party/jpeg-xl/lib/jxl/enc_aux_out.h
new file mode 100644
index 0000000000..78222823ae
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_aux_out.h
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_AUX_OUT_H_
+#define LIB_JXL_AUX_OUT_H_
+
+// Optional output information for debugging and analyzing size usage.
+
+#include <stddef.h>
+
+#include <array>
+#include <functional>
+#include <string>
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/jxl_inspection.h"
+
+namespace jxl {
+
+struct ColorEncoding;
+
+// For LayerName and AuxOut::layers[] index. Order does not matter.
+enum {
+ kLayerHeader = 0,
+ kLayerTOC,
+ kLayerDictionary,
+ kLayerSplines,
+ kLayerNoise,
+ kLayerQuant,
+ kLayerModularTree,
+ kLayerModularGlobal,
+ kLayerDC,
+ kLayerModularDcGroup,
+ kLayerControlFields,
+ kLayerOrder,
+ kLayerAC,
+ kLayerACTokens,
+ kLayerModularAcGroup,
+ kNumImageLayers
+};
+
+const char* LayerName(size_t layer);
+
+// Statistics gathered during compression or decompression.
+struct AuxOut {
+ private:
+ struct LayerTotals {
+ void Assimilate(const LayerTotals& victim) {
+ num_clustered_histograms += victim.num_clustered_histograms;
+ histogram_bits += victim.histogram_bits;
+ extra_bits += victim.extra_bits;
+ total_bits += victim.total_bits;
+ clustered_entropy += victim.clustered_entropy;
+ }
+ void Print(size_t num_inputs) const;
+
+ size_t num_clustered_histograms = 0;
+ size_t extra_bits = 0;
+
+ // Set via BitsWritten below
+ size_t histogram_bits = 0;
+ size_t total_bits = 0;
+
+ double clustered_entropy = 0.0;
+ };
+
+ public:
+ AuxOut() = default;
+ AuxOut(const AuxOut&) = default;
+
+ void Assimilate(const AuxOut& victim);
+
+ void Print(size_t num_inputs) const;
+
+ size_t TotalBits() const {
+ size_t total = 0;
+ for (const auto& layer : layers) {
+ total += layer.total_bits;
+ }
+ return total;
+ }
+
+ template <typename T>
+ void DumpImage(const char* label, const Image3<T>& image) const;
+
+ void DumpXybImage(const char* label, const Image3F& image) const;
+
+ template <typename T>
+ void DumpPlaneNormalized(const char* label, const Plane<T>& image) const;
+
+ void SetInspectorImage3F(const jxl::InspectorImage3F& inspector) {
+ inspector_image3f_ = inspector;
+ }
+
+ // Allows hooking intermediate data inspection into various places of the
+ // processing pipeline. Returns true iff processing should proceed.
+ bool InspectImage3F(const char* label, const Image3F& image) {
+ if (inspector_image3f_ != nullptr) {
+ return inspector_image3f_(label, image);
+ }
+ return true;
+ }
+
+ std::array<LayerTotals, kNumImageLayers> layers;
+ size_t num_blocks = 0;
+
+ // Number of blocks that use larger DCT (set by ac_strategy).
+ size_t num_small_blocks = 0;
+ size_t num_dct4x8_blocks = 0;
+ size_t num_afv_blocks = 0;
+ size_t num_dct8_blocks = 0;
+ size_t num_dct8x16_blocks = 0;
+ size_t num_dct8x32_blocks = 0;
+ size_t num_dct16_blocks = 0;
+ size_t num_dct16x32_blocks = 0;
+ size_t num_dct32_blocks = 0;
+ size_t num_dct32x64_blocks = 0;
+ size_t num_dct64_blocks = 0;
+
+ std::array<uint32_t, 8> dc_pred_usage = {{0}};
+ std::array<uint32_t, 8> dc_pred_usage_xb = {{0}};
+
+ int num_butteraugli_iters = 0;
+
+ float max_quant_rescale = 1.0f;
+ float min_quant_rescale = 1.0f;
+ float min_bitrate_error = 0.0f;
+ float max_bitrate_error = 0.0f;
+
+ // If not empty, additional debugging information (e.g. debug images) is
+ // saved in files with this prefix.
+ std::string debug_prefix;
+
+ // By how much the decoded image was downsampled relative to the encoded
+ // image.
+ size_t downsampling = 1;
+
+ jxl::InspectorImage3F inspector_image3f_;
+
+ std::function<Status(Image3F&&, const ColorEncoding&, const std::string&)>
+ dump_image = nullptr;
+};
+
+extern template void AuxOut::DumpImage(const char* label,
+ const Image3<float>& image) const;
+extern template void AuxOut::DumpImage(const char* label,
+ const Image3<uint8_t>& image) const;
+extern template void AuxOut::DumpPlaneNormalized(
+ const char* label, const Plane<float>& image) const;
+extern template void AuxOut::DumpPlaneNormalized(
+ const char* label, const Plane<uint8_t>& image) const;
+
+// Used to skip image creation if they won't be written to debug directory.
+static inline bool WantDebugOutput(const AuxOut* aux_out) {
+ // Need valid pointer and filename.
+ return aux_out != nullptr && !aux_out->debug_prefix.empty();
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_AUX_OUT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_bit_writer.cc b/third_party/jpeg-xl/lib/jxl/enc_bit_writer.cc
new file mode 100644
index 0000000000..7964c28f76
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_bit_writer.cc
@@ -0,0 +1,201 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_bit_writer.h"
+
+#include <string.h> // memcpy
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_aux_out.h"
+
+namespace jxl {
+
+BitWriter::Allotment::Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits)
+ : max_bits_(max_bits) {
+ if (writer == nullptr) return;
+ prev_bits_written_ = writer->BitsWritten();
+ const size_t prev_bytes = writer->storage_.size();
+ const size_t next_bytes = DivCeil(max_bits, kBitsPerByte);
+ writer->storage_.resize(prev_bytes + next_bytes);
+ parent_ = writer->current_allotment_;
+ writer->current_allotment_ = this;
+}
+
+BitWriter::Allotment::~Allotment() {
+ if (!called_) {
+ // Not calling is a bug - unused storage will not be reclaimed.
+ JXL_ABORT("Did not call Allotment::ReclaimUnused");
+ }
+}
+
+void BitWriter::Allotment::FinishedHistogram(BitWriter* JXL_RESTRICT writer) {
+ if (writer == nullptr) return;
+ JXL_ASSERT(!called_); // Call before ReclaimUnused
+ JXL_ASSERT(histogram_bits_ == 0); // Do not call twice
+ JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+ histogram_bits_ = writer->BitsWritten() - prev_bits_written_;
+}
+
+void BitWriter::Allotment::ReclaimAndCharge(BitWriter* JXL_RESTRICT writer,
+ size_t layer,
+ AuxOut* JXL_RESTRICT aux_out) {
+ size_t used_bits, unused_bits;
+ PrivateReclaim(writer, &used_bits, &unused_bits);
+
+#if 0
+ printf("Layer %s bits: max %" PRIuS " used %" PRIuS " unused %" PRIuS "\n",
+ LayerName(layer), MaxBits(), used_bits, unused_bits);
+#endif
+
+ // This may be a nested call with aux_out == null. Whenever we know that
+ // aux_out is null, we can call ReclaimUnused directly.
+ if (aux_out != nullptr) {
+ aux_out->layers[layer].total_bits += used_bits;
+ aux_out->layers[layer].histogram_bits += HistogramBits();
+ }
+}
+
+void BitWriter::Allotment::PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+ size_t* JXL_RESTRICT used_bits,
+ size_t* JXL_RESTRICT unused_bits) {
+ JXL_ASSERT(!called_); // Do not call twice
+ called_ = true;
+ if (writer == nullptr) return;
+
+ JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_);
+ *used_bits = writer->BitsWritten() - prev_bits_written_;
+ JXL_ASSERT(*used_bits <= max_bits_);
+ *unused_bits = max_bits_ - *used_bits;
+
+ // Reclaim unused bytes whole bytes from writer's allotment.
+ const size_t unused_bytes = *unused_bits / kBitsPerByte; // truncate
+ JXL_ASSERT(writer->storage_.size() >= unused_bytes);
+ writer->storage_.resize(writer->storage_.size() - unused_bytes);
+ writer->current_allotment_ = parent_;
+ // Ensure we don't also charge the parent for these bits.
+ auto parent = parent_;
+ while (parent != nullptr) {
+ parent->prev_bits_written_ += *used_bits;
+ parent = parent->parent_;
+ }
+}
+
+void BitWriter::AppendByteAligned(const Span<const uint8_t>& span) {
+ if (span.empty()) return;
+ storage_.resize(storage_.size() + span.size() + 1); // extra zero padding
+
+ // Concatenate by copying bytes because both source and destination are bytes.
+ JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+ size_t pos = BitsWritten() / kBitsPerByte;
+ memcpy(storage_.data() + pos, span.data(), span.size());
+ pos += span.size();
+ storage_[pos++] = 0; // for next Write
+ JXL_ASSERT(pos <= storage_.size());
+ bits_written_ += span.size() * kBitsPerByte;
+}
+
+void BitWriter::AppendByteAligned(const BitWriter& other) {
+ JXL_ASSERT(other.BitsWritten() % kBitsPerByte == 0);
+ JXL_ASSERT(other.BitsWritten() / kBitsPerByte != 0);
+
+ AppendByteAligned(other.GetSpan());
+}
+
+void BitWriter::AppendByteAligned(const std::vector<BitWriter>& others) {
+ // Total size to add so we can preallocate
+ size_t other_bytes = 0;
+ for (const BitWriter& writer : others) {
+ JXL_ASSERT(writer.BitsWritten() % kBitsPerByte == 0);
+ other_bytes += writer.BitsWritten() / kBitsPerByte;
+ }
+ if (other_bytes == 0) {
+ // No bytes to append: this happens for example when creating per-group
+ // storage for groups, but not writing anything in them for e.g. lossless
+ // images with no alpha. Do nothing.
+ return;
+ }
+ storage_.resize(storage_.size() + other_bytes + 1); // extra zero padding
+
+ // Concatenate by copying bytes because both source and destination are bytes.
+ JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+ size_t pos = BitsWritten() / kBitsPerByte;
+ for (const BitWriter& writer : others) {
+ const Span<const uint8_t> span = writer.GetSpan();
+ if (!span.empty()) {
+ memcpy(storage_.data() + pos, span.data(), span.size());
+ pos += span.size();
+ }
+ }
+ storage_[pos++] = 0; // for next Write
+ JXL_ASSERT(pos <= storage_.size());
+ bits_written_ += other_bytes * kBitsPerByte;
+}
+
+// TODO(lode): avoid code duplication
+void BitWriter::AppendByteAligned(
+ const std::vector<std::unique_ptr<BitWriter>>& others) {
+ // Total size to add so we can preallocate
+ size_t other_bytes = 0;
+ for (const auto& writer : others) {
+ JXL_ASSERT(writer->BitsWritten() % kBitsPerByte == 0);
+ other_bytes += writer->BitsWritten() / kBitsPerByte;
+ }
+ if (other_bytes == 0) {
+ // No bytes to append: this happens for example when creating per-group
+ // storage for groups, but not writing anything in them for e.g. lossless
+ // images with no alpha. Do nothing.
+ return;
+ }
+ storage_.resize(storage_.size() + other_bytes + 1); // extra zero padding
+
+ // Concatenate by copying bytes because both source and destination are bytes.
+ JXL_ASSERT(BitsWritten() % kBitsPerByte == 0);
+ size_t pos = BitsWritten() / kBitsPerByte;
+ for (const auto& writer : others) {
+ const Span<const uint8_t> span = writer->GetSpan();
+ memcpy(storage_.data() + pos, span.data(), span.size());
+ pos += span.size();
+ }
+ storage_[pos++] = 0; // for next Write
+ JXL_ASSERT(pos <= storage_.size());
+ bits_written_ += other_bytes * kBitsPerByte;
+}
+
+// Example: let's assume that 3 bits (Rs below) have been written already:
+// BYTE+0 BYTE+1 BYTE+2
+// 0000 0RRR ???? ???? ???? ????
+//
+// Now, we could write up to 5 bits by just shifting them left by 3 bits and
+// OR'ing to BYTE-0.
+//
+// For n > 5 bits, we write the lowest 5 bits as above, then write the next
+// lowest bits into BYTE+1 starting from its lower bits and so on.
+void BitWriter::Write(size_t n_bits, uint64_t bits) {
+ JXL_DASSERT((bits >> n_bits) == 0);
+ JXL_DASSERT(n_bits <= kMaxBitsPerCall);
+ uint8_t* p = &storage_[bits_written_ / kBitsPerByte];
+ const size_t bits_in_first_byte = bits_written_ % kBitsPerByte;
+ bits <<= bits_in_first_byte;
+#if JXL_BYTE_ORDER_LITTLE
+ uint64_t v = *p;
+ // Last (partial) or next byte to write must be zero-initialized!
+ // PaddedBytes initializes the first, and Write/Append maintain this.
+ JXL_DASSERT(v >> bits_in_first_byte == 0);
+ v |= bits;
+ memcpy(p, &v, sizeof(v)); // Write bytes: possibly more than n_bits/8
+#else
+ *p++ |= static_cast<uint8_t>(bits & 0xFF);
+ for (size_t bits_left_to_write = n_bits + bits_in_first_byte;
+ bits_left_to_write >= 9; bits_left_to_write -= 8) {
+ bits >>= 8;
+ *p++ = static_cast<uint8_t>(bits & 0xFF);
+ }
+ *p = 0;
+#endif
+ bits_written_ += n_bits;
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_bit_writer.h b/third_party/jpeg-xl/lib/jxl/enc_bit_writer.h
new file mode 100644
index 0000000000..d3fac15a68
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_bit_writer.h
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BIT_WRITER_H_
+#define LIB_JXL_ENC_BIT_WRITER_H_
+
+// BitWriter class: unbuffered writes using unaligned 64-bit stores.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+struct BitWriter {
+ // Upper bound on `n_bits` in each call to Write. We shift a 64-bit word by
+ // 7 bits (max already valid bits in the last byte) and at least 1 bit is
+ // needed to zero-initialize the bit-stream ahead (i.e. if 7 bits are valid
+ // and we write 57 bits, then the next write will access a byte that was not
+ // yet zero-initialized).
+ static constexpr size_t kMaxBitsPerCall = 56;
+
+ BitWriter() : bits_written_(0) {}
+
+ // Disallow copying - may lead to bugs.
+ BitWriter(const BitWriter&) = delete;
+ BitWriter& operator=(const BitWriter&) = delete;
+ BitWriter(BitWriter&&) = default;
+ BitWriter& operator=(BitWriter&&) = default;
+
+ size_t BitsWritten() const { return bits_written_; }
+
+ Span<const uint8_t> GetSpan() const {
+ // Callers must ensure byte alignment to avoid uninitialized bits.
+ JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+ return Span<const uint8_t>(storage_.data(), bits_written_ / kBitsPerByte);
+ }
+
+ // Example usage: bytes = std::move(writer).TakeBytes(); Useful for the
+ // top-level encoder which returns PaddedBytes, not a BitWriter.
+ // *this must be an rvalue reference and is invalid afterwards.
+ PaddedBytes&& TakeBytes() && {
+ // Callers must ensure byte alignment to avoid uninitialized bits.
+ JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+ storage_.resize(bits_written_ / kBitsPerByte);
+ return std::move(storage_);
+ }
+
+ private:
+ // Must be byte-aligned before calling.
+ void AppendByteAligned(const Span<const uint8_t>& span);
+
+ public:
+ // NOTE: no allotment needed, the other BitWriters have already been charged.
+ void AppendByteAligned(const BitWriter& other);
+ void AppendByteAligned(const std::vector<std::unique_ptr<BitWriter>>& others);
+ void AppendByteAligned(const std::vector<BitWriter>& others);
+
+ class Allotment {
+ public:
+ // Expands a BitWriter's storage. Must happen before calling Write or
+ // ZeroPadToByte. Must call ReclaimUnused after writing to reclaim the
+ // unused storage so that BitWriter memory use remains tightly bounded.
+ Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits);
+ ~Allotment();
+
+ size_t MaxBits() const { return max_bits_; }
+
+ // Call after writing a histogram, but before ReclaimUnused.
+ void FinishedHistogram(BitWriter* JXL_RESTRICT writer);
+
+ size_t HistogramBits() const {
+ JXL_ASSERT(called_);
+ return histogram_bits_;
+ }
+
+ void ReclaimAndCharge(BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* JXL_RESTRICT aux_out);
+
+ private:
+ void PrivateReclaim(BitWriter* JXL_RESTRICT writer,
+ size_t* JXL_RESTRICT used_bits,
+ size_t* JXL_RESTRICT unused_bits);
+
+ size_t prev_bits_written_;
+ const size_t max_bits_;
+ size_t histogram_bits_ = 0;
+ bool called_ = false;
+ Allotment* parent_;
+ };
+
+ // Writes bits into bytes in increasing addresses, and within a byte
+ // least-significant-bit first.
+ //
+ // The function can write up to 56 bits in one go.
+ void Write(size_t n_bits, uint64_t bits);
+
+ // This should only rarely be used - e.g. when the current location will be
+ // referenced via byte offset (TOCs point to groups), or byte-aligned reading
+ // is required for speed.
+ void ZeroPadToByte() {
+ const size_t remainder_bits =
+ RoundUpBitsToByteMultiple(bits_written_) - bits_written_;
+ if (remainder_bits == 0) return;
+ Write(remainder_bits, 0);
+ JXL_ASSERT(bits_written_ % kBitsPerByte == 0);
+ }
+
+ private:
+ size_t bits_written_;
+ PaddedBytes storage_;
+ Allotment* current_allotment_ = nullptr;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_BIT_WRITER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.cc b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.cc
new file mode 100644
index 0000000000..5711f45884
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.cc
@@ -0,0 +1,99 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_butteraugli_comparator.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_image_bundle.h"
+
+namespace jxl {
+
+JxlButteraugliComparator::JxlButteraugliComparator(
+ const ButteraugliParams& params, const JxlCmsInterface& cms)
+ : params_(params), cms_(cms) {}
+
+Status JxlButteraugliComparator::SetReferenceImage(const ImageBundle& ref) {
+ const ImageBundle* ref_linear_srgb;
+ ImageMetadata metadata = *ref.metadata();
+ ImageBundle store(&metadata);
+ if (!TransformIfNeeded(ref, ColorEncoding::LinearSRGB(ref.IsGray()), cms_,
+ /*pool=*/nullptr, &store, &ref_linear_srgb)) {
+ return false;
+ }
+
+ comparator_.reset(
+ new ButteraugliComparator(ref_linear_srgb->color(), params_));
+ xsize_ = ref.xsize();
+ ysize_ = ref.ysize();
+ return true;
+}
+
+Status JxlButteraugliComparator::CompareWith(const ImageBundle& actual,
+ ImageF* diffmap, float* score) {
+ if (!comparator_) {
+ return JXL_FAILURE("Must set reference image first");
+ }
+ if (xsize_ != actual.xsize() || ysize_ != actual.ysize()) {
+ return JXL_FAILURE("Images must have same size");
+ }
+
+ const ImageBundle* actual_linear_srgb;
+ ImageMetadata metadata = *actual.metadata();
+ ImageBundle store(&metadata);
+ if (!TransformIfNeeded(actual, ColorEncoding::LinearSRGB(actual.IsGray()),
+ cms_,
+ /*pool=*/nullptr, &store, &actual_linear_srgb)) {
+ return false;
+ }
+
+ ImageF temp_diffmap(xsize_, ysize_);
+ comparator_->Diffmap(actual_linear_srgb->color(), temp_diffmap);
+
+ if (score != nullptr) {
+ *score = ButteraugliScoreFromDiffmap(temp_diffmap, &params_);
+ }
+ if (diffmap != nullptr) {
+ diffmap->Swap(temp_diffmap);
+ }
+
+ return true;
+}
+
+float JxlButteraugliComparator::GoodQualityScore() const {
+ return ButteraugliFuzzyInverse(1.5);
+}
+
+float JxlButteraugliComparator::BadQualityScore() const {
+ return ButteraugliFuzzyInverse(0.5);
+}
+
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+ const ButteraugliParams& params,
+ const JxlCmsInterface& cms, ImageF* distmap,
+ ThreadPool* pool) {
+ JxlButteraugliComparator comparator(params, cms);
+ return ComputeScore(rgb0, rgb1, &comparator, cms, distmap, pool);
+}
+
+float ButteraugliDistance(const std::vector<ImageBundle>& frames0,
+ const std::vector<ImageBundle>& frames1,
+ const ButteraugliParams& params,
+ const JxlCmsInterface& cms, ImageF* distmap,
+ ThreadPool* pool) {
+ JxlButteraugliComparator comparator(params, cms);
+ JXL_ASSERT(frames0.size() == frames1.size());
+ float max_dist = 0.0f;
+ for (size_t i = 0; i < frames0.size(); ++i) {
+ max_dist = std::max(
+ max_dist,
+ ComputeScore(frames0[i], frames1[i], &comparator, cms, distmap, pool));
+ }
+ return max_dist;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.h b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.h
new file mode 100644
index 0000000000..6c37d1dc7d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_comparator.h
@@ -0,0 +1,59 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+#define LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+
+#include <memory>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/enc_comparator.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class JxlButteraugliComparator : public Comparator {
+ public:
+ explicit JxlButteraugliComparator(const ButteraugliParams& params,
+ const JxlCmsInterface& cms);
+
+ Status SetReferenceImage(const ImageBundle& ref) override;
+
+ Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+ float* score) override;
+
+ float GoodQualityScore() const override;
+ float BadQualityScore() const override;
+
+ private:
+ ButteraugliParams params_;
+ JxlCmsInterface cms_;
+ std::unique_ptr<ButteraugliComparator> comparator_;
+ size_t xsize_ = 0;
+ size_t ysize_ = 0;
+};
+
+// Returns the butteraugli distance between rgb0 and rgb1.
+// If distmap is not null, it must be the same size as rgb0 and rgb1.
+float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1,
+ const ButteraugliParams& params,
+ const JxlCmsInterface& cms, ImageF* distmap = nullptr,
+ ThreadPool* pool = nullptr);
+
+float ButteraugliDistance(const std::vector<ImageBundle>& frames0,
+ const std::vector<ImageBundle>& frames1,
+ const ButteraugliParams& params,
+ const JxlCmsInterface& cms, ImageF* distmap = nullptr,
+ ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.cc b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.cc
new file mode 100644
index 0000000000..fe5629dcda
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.cc
@@ -0,0 +1,211 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+
+#include <math.h>
+#include <stdlib.h>
+
+#include <atomic>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_butteraugli_pnorm.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Rebind;
+
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+ double p) {
+ PROFILER_FUNC;
+
+ const double onePerPixels = 1.0 / (distmap.ysize() * distmap.xsize());
+ if (std::abs(p - 3.0) < 1E-6) {
+ double sum1[3] = {0.0};
+
+// Prefer double if possible, but otherwise use float rather than scalar.
+#if HWY_CAP_FLOAT64
+ using T = double;
+ const Rebind<float, HWY_FULL(double)> df;
+#else
+ using T = float;
+#endif
+ const HWY_FULL(T) d;
+ constexpr size_t N = MaxLanes(HWY_FULL(T)());
+ // Manually aligned storage to avoid asan crash on clang-7 due to
+ // unaligned spill.
+ HWY_ALIGN T sum_totals0[N] = {0};
+ HWY_ALIGN T sum_totals1[N] = {0};
+ HWY_ALIGN T sum_totals2[N] = {0};
+
+ for (size_t y = 0; y < distmap.ysize(); ++y) {
+ const float* JXL_RESTRICT row = distmap.ConstRow(y);
+
+ auto sums0 = Zero(d);
+ auto sums1 = Zero(d);
+ auto sums2 = Zero(d);
+
+ size_t x = 0;
+ for (; x + Lanes(d) <= distmap.xsize(); x += Lanes(d)) {
+#if HWY_CAP_FLOAT64
+ const auto d1 = PromoteTo(d, Load(df, row + x));
+#else
+ const auto d1 = Load(d, row + x);
+#endif
+ const auto d2 = Mul(d1, Mul(d1, d1));
+ sums0 = Add(sums0, d2);
+ const auto d3 = Mul(d2, d2);
+ sums1 = Add(sums1, d3);
+ const auto d4 = Mul(d3, d3);
+ sums2 = Add(sums2, d4);
+ }
+
+ Store(Add(sums0, Load(d, sum_totals0)), d, sum_totals0);
+ Store(Add(sums1, Load(d, sum_totals1)), d, sum_totals1);
+ Store(Add(sums2, Load(d, sum_totals2)), d, sum_totals2);
+
+ for (; x < distmap.xsize(); ++x) {
+ const double d1 = row[x];
+ double d2 = d1 * d1 * d1;
+ sum1[0] += d2;
+ d2 *= d2;
+ sum1[1] += d2;
+ d2 *= d2;
+ sum1[2] += d2;
+ }
+ }
+ double v = 0;
+ v += pow(
+ onePerPixels * (sum1[0] + GetLane(SumOfLanes(d, Load(d, sum_totals0)))),
+ 1.0 / (p * 1.0));
+ v += pow(
+ onePerPixels * (sum1[1] + GetLane(SumOfLanes(d, Load(d, sum_totals1)))),
+ 1.0 / (p * 2.0));
+ v += pow(
+ onePerPixels * (sum1[2] + GetLane(SumOfLanes(d, Load(d, sum_totals2)))),
+ 1.0 / (p * 4.0));
+ v /= 3.0;
+ return v;
+ } else {
+ static std::atomic<int> once{0};
+ if (once.fetch_add(1, std::memory_order_relaxed) == 0) {
+ JXL_WARNING("WARNING: using slow ComputeDistanceP");
+ }
+ double sum1[3] = {0.0};
+ for (size_t y = 0; y < distmap.ysize(); ++y) {
+ const float* JXL_RESTRICT row = distmap.ConstRow(y);
+ for (size_t x = 0; x < distmap.xsize(); ++x) {
+ double d2 = std::pow(row[x], p);
+ sum1[0] += d2;
+ d2 *= d2;
+ sum1[1] += d2;
+ d2 *= d2;
+ sum1[2] += d2;
+ }
+ }
+ double v = 0;
+ for (int i = 0; i < 3; ++i) {
+ v += pow(onePerPixels * (sum1[i]), 1.0 / (p * (1 << i)));
+ }
+ v /= 3.0;
+ return v;
+ }
+}
+
+// TODO(lode): take alpha into account when needed
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2,
+ const JxlCmsInterface& cms) {
+ PROFILER_FUNC;
+ // Convert to sRGB - closer to perception than linear.
+ const Image3F* srgb1 = &ib1.color();
+ Image3F copy1;
+ if (!ib1.IsSRGB()) {
+ JXL_CHECK(
+ ib1.CopyTo(Rect(ib1), ColorEncoding::SRGB(ib1.IsGray()), cms, &copy1));
+ srgb1 = &copy1;
+ }
+ const Image3F* srgb2 = &ib2.color();
+ Image3F copy2;
+ if (!ib2.IsSRGB()) {
+ JXL_CHECK(
+ ib2.CopyTo(Rect(ib2), ColorEncoding::SRGB(ib2.IsGray()), cms, &copy2));
+ srgb2 = &copy2;
+ }
+
+ JXL_CHECK(SameSize(*srgb1, *srgb2));
+
+ // TODO(veluca): SIMD.
+ float yuvmatrix[3][3] = {{0.299, 0.587, 0.114},
+ {-0.14713, -0.28886, 0.436},
+ {0.615, -0.51499, -0.10001}};
+ double sum_of_squares[3] = {};
+ for (size_t y = 0; y < srgb1->ysize(); ++y) {
+ const float* JXL_RESTRICT row1[3];
+ const float* JXL_RESTRICT row2[3];
+ for (size_t j = 0; j < 3; j++) {
+ row1[j] = srgb1->ConstPlaneRow(j, y);
+ row2[j] = srgb2->ConstPlaneRow(j, y);
+ }
+ for (size_t x = 0; x < srgb1->xsize(); ++x) {
+ float cdiff[3] = {};
+ // YUV conversion is linear, so we can run it on the difference.
+ for (size_t j = 0; j < 3; j++) {
+ cdiff[j] = row1[j][x] - row2[j][x];
+ }
+ float yuvdiff[3] = {};
+ for (size_t j = 0; j < 3; j++) {
+ for (size_t k = 0; k < 3; k++) {
+ yuvdiff[j] += yuvmatrix[j][k] * cdiff[k];
+ }
+ }
+ for (size_t j = 0; j < 3; j++) {
+ sum_of_squares[j] += yuvdiff[j] * yuvdiff[j];
+ }
+ }
+ }
+ // Weighted PSNR as in JPEG-XL: chroma counts 1/8.
+ const float weights[3] = {6.0f / 8, 1.0f / 8, 1.0f / 8};
+ // Avoid squaring the weight - 1/64 is too extreme.
+ double norm = 0;
+ for (size_t i = 0; i < 3; i++) {
+ norm += std::sqrt(sum_of_squares[i]) * weights[i];
+ }
+ // This function returns distance *squared*.
+ return norm * norm;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeDistanceP);
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+ double p) {
+ return HWY_DYNAMIC_DISPATCH(ComputeDistanceP)(distmap, params, p);
+}
+
+HWY_EXPORT(ComputeDistance2);
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2,
+ const JxlCmsInterface& cms) {
+ return HWY_DYNAMIC_DISPATCH(ComputeDistance2)(ib1, ib2, cms);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.h b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.h
new file mode 100644
index 0000000000..cf6872e5d0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_butteraugli_pnorm.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
+#define LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Computes p-norm given the butteraugli distmap.
+double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params,
+ double p);
+
+double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2,
+ const JxlCmsInterface& cms);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_BUTTERAUGLI_PNORM_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_cache.cc b/third_party/jpeg-xl/lib/jxl/enc_cache.cc
new file mode 100644
index 0000000000..fc3e5c9f30
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_cache.cc
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cache.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms,
+ ThreadPool* pool, PassesEncoderState* enc_state,
+ ModularFrameEncoder* modular_frame_encoder,
+ AuxOut* aux_out) {
+ PROFILER_FUNC;
+
+ PassesSharedState& JXL_RESTRICT shared = enc_state->shared;
+
+ enc_state->histogram_idx.resize(shared.frame_dim.num_groups);
+
+ enc_state->x_qm_multiplier =
+ std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f);
+ enc_state->b_qm_multiplier =
+ std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f);
+
+ if (enc_state->coeffs.size() < shared.frame_header.passes.num_passes) {
+ enc_state->coeffs.reserve(shared.frame_header.passes.num_passes);
+ for (size_t i = enc_state->coeffs.size();
+ i < shared.frame_header.passes.num_passes; i++) {
+ // Allocate enough coefficients for each group on every row.
+ enc_state->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+ kGroupDim * kGroupDim, shared.frame_dim.num_groups));
+ }
+ }
+ while (enc_state->coeffs.size() > shared.frame_header.passes.num_passes) {
+ enc_state->coeffs.pop_back();
+ }
+
+ float scale =
+ shared.quantizer.ScaleGlobalScale(enc_state->cparams.quant_ac_rescale);
+ DequantMatricesScaleDC(&shared.matrices, scale);
+ shared.quantizer.RecomputeFromGlobalScale();
+
+ Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, shared.frame_dim.num_groups, ThreadPool::NoInit,
+ [&](size_t group_idx, size_t _) {
+ ComputeCoefficients(group_idx, enc_state, opsin, &dc);
+ },
+ "Compute coeffs"));
+
+ if (shared.frame_header.flags & FrameHeader::kUseDcFrame) {
+ CompressParams cparams = enc_state->cparams;
+ cparams.dots = Override::kOff;
+ cparams.noise = Override::kOff;
+ cparams.patches = Override::kOff;
+ cparams.gaborish = Override::kOff;
+ cparams.epf = 0;
+ cparams.resampling = 1;
+ cparams.ec_resampling = 1;
+ // The DC frame will have alpha=0. Don't erase its contents.
+ cparams.keep_invisible = Override::kOn;
+ JXL_ASSERT(cparams.progressive_dc > 0);
+ cparams.progressive_dc--;
+ // Use kVarDCT in max_error_mode for intermediate progressive DC,
+ // and kModular for the smallest DC (first in the bitstream)
+ if (cparams.progressive_dc == 0) {
+ cparams.modular_mode = true;
+ cparams.speed_tier =
+ SpeedTier(std::max(static_cast<int>(SpeedTier::kTortoise),
+ static_cast<int>(cparams.speed_tier) - 1));
+ cparams.butteraugli_distance =
+ std::max(kMinButteraugliDistance,
+ enc_state->cparams.butteraugli_distance * 0.02f);
+ } else {
+ cparams.max_error_mode = true;
+ for (size_t c = 0; c < 3; c++) {
+ cparams.max_error[c] = shared.quantizer.MulDC()[c];
+ }
+ // Guess a distance that produces good initial results.
+ cparams.butteraugli_distance =
+ std::max(kMinButteraugliDistance,
+ enc_state->cparams.butteraugli_distance * 0.1f);
+ }
+ ImageBundle ib(&shared.metadata->m);
+ // This is a lie - dc is in XYB
+ // (but EncodeFrame will skip RGB->XYB conversion anyway)
+ ib.SetFromImage(
+ std::move(dc),
+ ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray()));
+ if (!ib.metadata()->extra_channel_info.empty()) {
+ // Add dummy extra channels to the patch image: dc_level frames do not yet
+ // support extra channels, but the codec expects that the amount of extra
+ // channels in frames matches that in the metadata of the codestream.
+ std::vector<ImageF> extra_channels;
+ extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+ for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+ extra_channels.emplace_back(ib.xsize(), ib.ysize());
+ // Must initialize the image with data to not affect blending with
+ // uninitialized memory.
+ // TODO(lode): dc_level must copy and use the real extra channels
+ // instead.
+ ZeroFillImage(&extra_channels.back());
+ }
+ ib.SetExtraChannels(std::move(extra_channels));
+ }
+ std::unique_ptr<PassesEncoderState> state =
+ jxl::make_unique<PassesEncoderState>();
+
+ auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+ FrameInfo dc_frame_info;
+ dc_frame_info.frame_type = FrameType::kDCFrame;
+ dc_frame_info.dc_level = shared.frame_header.dc_level + 1;
+ dc_frame_info.ib_needs_color_transform = false;
+ dc_frame_info.save_before_color_transform = true; // Implicitly true
+ AuxOut dc_aux_out;
+ if (aux_out) {
+ dc_aux_out.debug_prefix = aux_out->debug_prefix;
+ }
+ JXL_CHECK(EncodeFrame(cparams, dc_frame_info, shared.metadata, ib,
+ state.get(), cms, pool, special_frame.get(),
+ aux_out ? &dc_aux_out : nullptr));
+ if (aux_out) {
+ for (const auto& l : dc_aux_out.layers) {
+ aux_out->layers[kLayerDC].Assimilate(l);
+ }
+ }
+ const Span<const uint8_t> encoded = special_frame->GetSpan();
+ enc_state->special_frames.emplace_back(std::move(special_frame));
+
+ ImageBundle decoded(&shared.metadata->m);
+ std::unique_ptr<PassesDecoderState> dec_state =
+ jxl::make_unique<PassesDecoderState>();
+ JXL_CHECK(
+ dec_state->output_encoding_info.SetFromMetadata(*shared.metadata));
+ const uint8_t* frame_start = encoded.data();
+ size_t encoded_size = encoded.size();
+ for (int i = 0; i <= cparams.progressive_dc; ++i) {
+ JXL_CHECK(DecodeFrame(dec_state.get(), pool, frame_start, encoded_size,
+ &decoded, *shared.metadata));
+ frame_start += decoded.decoded_bytes();
+ encoded_size -= decoded.decoded_bytes();
+ }
+ // TODO(lode): shared.frame_header.dc_level should be equal to
+ // dec_state.shared->frame_header.dc_level - 1 here, since above we set
+ // dc_frame_info.dc_level = shared.frame_header.dc_level + 1, and
+ // dc_frame_info.dc_level is used by EncodeFrame. However, if EncodeFrame
+ // outputs multiple frames, this assumption could be wrong.
+ shared.dc_storage =
+ CopyImage(dec_state->shared->dc_frames[shared.frame_header.dc_level]);
+ ZeroFillImage(&shared.quant_dc);
+ shared.dc = &shared.dc_storage;
+ JXL_CHECK(encoded_size == 0);
+ } else {
+ auto compute_dc_coeffs = [&](int group_index, int /* thread */) {
+ modular_frame_encoder->AddVarDCTDC(
+ dc, group_index, enc_state->cparams.speed_tier < SpeedTier::kFalcon,
+ enc_state, /*jpeg_transcode=*/false);
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups,
+ ThreadPool::NoInit, compute_dc_coeffs,
+ "Compute DC coeffs"));
+ // TODO(veluca): this is only useful in tests and if inspection is enabled.
+ if (!(shared.frame_header.flags & FrameHeader::kSkipAdaptiveDCSmoothing)) {
+ AdaptiveDCSmoothing(shared.quantizer.MulDC(), &shared.dc_storage, pool);
+ }
+ }
+ auto compute_ac_meta = [&](int group_index, int /* thread */) {
+ modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false,
+ enc_state);
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups,
+ ThreadPool::NoInit, compute_ac_meta,
+ "Compute AC Metadata"));
+
+ if (aux_out != nullptr) {
+ aux_out->InspectImage3F("compressed_image:InitializeFrameEncCache:dc_dec",
+ shared.dc_storage);
+ }
+ return true;
+}
+
+void EncCache::InitOnce() {
+ PROFILER_FUNC;
+
+ if (num_nzeroes.xsize() == 0) {
+ num_nzeroes = Image3I(kGroupDimInBlocks, kGroupDimInBlocks);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_cache.h b/third_party/jpeg-xl/lib/jxl/enc_cache.h
new file mode 100644
index 0000000000..6c7870ba00
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_cache.h
@@ -0,0 +1,93 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CACHE_H_
+#define LIB_JXL_ENC_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_heuristics.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_progressive_split.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/passes_state.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Contains encoder state.
+struct PassesEncoderState {
+ PassesSharedState shared;
+
+ ImageF initial_quant_field; // Invalid in Falcon mode.
+ ImageF initial_quant_masking; // Invalid in Falcon mode.
+
+ // Per-pass DCT coefficients for the image. One row per group.
+ std::vector<std::unique_ptr<ACImage>> coeffs;
+
+ // Raw data for special (reference+DC) frames.
+ std::vector<std::unique_ptr<BitWriter>> special_frames;
+
+ // For splitting into passes.
+ ProgressiveSplitter progressive_splitter;
+
+ CompressParams cparams;
+
+ struct PassData {
+ std::vector<std::vector<Token>> ac_tokens;
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ };
+
+ std::vector<PassData> passes;
+ std::vector<uint8_t> histogram_idx;
+
+ // Coefficient orders that are non-default.
+ std::vector<uint32_t> used_orders;
+
+ // Multiplier to be applied to the quant matrices of the x channel.
+ float x_qm_multiplier = 1.0f;
+ float b_qm_multiplier = 1.0f;
+
+ // Heuristics to be used by the encoder.
+ std::unique_ptr<EncoderHeuristics> heuristics =
+ make_unique<DefaultEncoderHeuristics>();
+};
+
+// Initialize per-frame information.
+class ModularFrameEncoder;
+Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms,
+ ThreadPool* pool,
+ PassesEncoderState* passes_enc_state,
+ ModularFrameEncoder* modular_frame_encoder,
+ AuxOut* aux_out);
+
+// Working area for ComputeCoefficients (per-group!)
+struct EncCache {
+ // Allocates memory when first called, shrinks images to current group size.
+ void InitOnce();
+
+ // TokenizeCoefficients
+ Image3I num_nzeroes;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CACHE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.cc b/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.cc
new file mode 100644
index 0000000000..0cdd2a7823
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.cc
@@ -0,0 +1,409 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_chroma_from_luma.h"
+
+#include <float.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_chroma_from_luma.cc"
+#include <hwy/aligned_allocator.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+
+static HWY_FULL(float) df;
+
+struct CFLFunction {
+ static constexpr float kCoeff = 1.f / 3;
+ static constexpr float kThres = 100.0f;
+ static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+ CFLFunction(const float* values_m, const float* values_s, size_t num,
+ float base, float distance_mul)
+ : values_m(values_m),
+ values_s(values_s),
+ num(num),
+ base(base),
+ distance_mul(distance_mul) {}
+
+ // Returns f'(x), where f is 1/3 * sum ((|color residual| + 1)^2-1) +
+ // distance_mul * x^2 * num.
+ float Compute(float x, float eps, float* fpeps, float* fmeps) const {
+ float first_derivative = 2 * distance_mul * num * x;
+ float first_derivative_peps = 2 * distance_mul * num * (x + eps);
+ float first_derivative_meps = 2 * distance_mul * num * (x - eps);
+
+ const auto inv_color_factor = Set(df, kInvColorFactor);
+ const auto thres = Set(df, kThres);
+ const auto coeffx2 = Set(df, kCoeff * 2.0f);
+ const auto one = Set(df, 1.0f);
+ const auto zero = Set(df, 0.0f);
+ const auto base_v = Set(df, base);
+ const auto x_v = Set(df, x);
+ const auto xpe_v = Set(df, x + eps);
+ const auto xme_v = Set(df, x - eps);
+ auto fd_v = Zero(df);
+ auto fdpe_v = Zero(df);
+ auto fdme_v = Zero(df);
+ JXL_ASSERT(num % Lanes(df) == 0);
+
+ for (size_t i = 0; i < num; i += Lanes(df)) {
+ // color residual = ax + b
+ const auto a = Mul(inv_color_factor, Load(df, values_m + i));
+ const auto b =
+ Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i));
+ const auto v = MulAdd(a, x_v, b);
+ const auto vpe = MulAdd(a, xpe_v, b);
+ const auto vme = MulAdd(a, xme_v, b);
+ const auto av = Abs(v);
+ const auto avpe = Abs(vpe);
+ const auto avme = Abs(vme);
+ const auto acoeffx2 = Mul(coeffx2, a);
+ auto d = Mul(acoeffx2, Add(av, one));
+ auto dpe = Mul(acoeffx2, Add(avpe, one));
+ auto dme = Mul(acoeffx2, Add(avme, one));
+ d = IfThenElse(Lt(v, zero), Sub(zero, d), d);
+ dpe = IfThenElse(Lt(vpe, zero), Sub(zero, dpe), dpe);
+ dme = IfThenElse(Lt(vme, zero), Sub(zero, dme), dme);
+ const auto above = Ge(av, thres);
+ // TODO(eustas): use IfThenElseZero
+ fd_v = Add(fd_v, IfThenElse(above, zero, d));
+ fdpe_v = Add(fdpe_v, IfThenElse(above, zero, dpe));
+ fdme_v = Add(fdme_v, IfThenElse(above, zero, dme));
+ }
+
+ *fpeps = first_derivative_peps + GetLane(SumOfLanes(df, fdpe_v));
+ *fmeps = first_derivative_meps + GetLane(SumOfLanes(df, fdme_v));
+ return first_derivative + GetLane(SumOfLanes(df, fd_v));
+ }
+
+ const float* JXL_RESTRICT values_m;
+ const float* JXL_RESTRICT values_s;
+ size_t num;
+ float base;
+ float distance_mul;
+};
+
+// Chroma-from-luma search, values_m will have luma -- and values_s chroma.
+int32_t FindBestMultiplier(const float* values_m, const float* values_s,
+ size_t num, float base, float distance_mul,
+ bool fast) {
+ if (num == 0) {
+ return 0;
+ }
+ float x;
+ if (fast) {
+ static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor;
+ auto ca = Zero(df);
+ auto cb = Zero(df);
+ const auto inv_color_factor = Set(df, kInvColorFactor);
+ const auto base_v = Set(df, base);
+ for (size_t i = 0; i < num; i += Lanes(df)) {
+ // color residual = ax + b
+ const auto a = Mul(inv_color_factor, Load(df, values_m + i));
+ const auto b =
+ Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i));
+ ca = MulAdd(a, a, ca);
+ cb = MulAdd(a, b, cb);
+ }
+ // + distance_mul * x^2 * num
+ x = -GetLane(SumOfLanes(df, cb)) /
+ (GetLane(SumOfLanes(df, ca)) + num * distance_mul * 0.5f);
+ } else {
+ constexpr float eps = 100;
+ constexpr float kClamp = 20.0f;
+ CFLFunction fn(values_m, values_s, num, base, distance_mul);
+ x = 0;
+ // Up to 20 Newton iterations, with approximate derivatives.
+ // Derivatives are approximate due to the high amount of noise in the exact
+ // derivatives.
+ for (size_t i = 0; i < 20; i++) {
+ float dfpeps, dfmeps;
+ float df = fn.Compute(x, eps, &dfpeps, &dfmeps);
+ float ddf = (dfpeps - dfmeps) / (2 * eps);
+ float kExperimentalInsignificantStabilizer = 0.85;
+ float step = df / (ddf + kExperimentalInsignificantStabilizer);
+ x -= std::min(kClamp, std::max(-kClamp, step));
+ if (std::abs(step) < 3e-3) break;
+ }
+ }
+ // CFL seems to be tricky for larger transforms for HF components
+ // close to zero. This heuristic brings the solutions closer to zero
+ // and reduces red-green oscillations.
+ float towards_zero = 2.6;
+ if (x >= towards_zero) {
+ x -= towards_zero;
+ } else if (x <= -towards_zero) {
+ x += towards_zero;
+ } else {
+ x = 0;
+ }
+ return std::max(-128.0f, std::min(127.0f, roundf(x)));
+}
+
+void InitDCStorage(size_t num_blocks, ImageF* dc_values) {
+ // First row: Y channel
+ // Second row: X channel
+ // Third row: Y channel
+ // Fourth row: B channel
+ *dc_values = ImageF(RoundUpTo(num_blocks, Lanes(df)), 4);
+
+ JXL_ASSERT(dc_values->xsize() != 0);
+ // Zero-fill the last lanes
+ for (size_t y = 0; y < 4; y++) {
+ for (size_t x = dc_values->xsize() - Lanes(df); x < dc_values->xsize();
+ x++) {
+ dc_values->Row(y)[x] = 0;
+ }
+ }
+}
+
+void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x,
+ int32_t* dc_b) {
+ constexpr float kDistanceMultiplierDC = 1e-5f;
+ const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0);
+ const float* JXL_RESTRICT dc_values_x = dc_values.Row(1);
+ const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2);
+ const float* JXL_RESTRICT dc_values_b = dc_values.Row(3);
+ *dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f,
+ kDistanceMultiplierDC, fast);
+ *dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(),
+ kYToBRatio, kDistanceMultiplierDC, fast);
+}
+
+void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant,
+ const AcStrategyImage* ac_strategy,
+ const ImageI* raw_quant_field, const Quantizer* quantizer,
+ const Rect& r, bool fast, bool use_dct8, ImageSB* map_x,
+ ImageSB* map_b, ImageF* dc_values, float* mem) {
+ static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks,
+ "Invalid color tile dim");
+ size_t xsize_blocks = opsin.xsize() / kBlockDim;
+ constexpr float kDistanceMultiplierAC = 1e-9f;
+
+ const size_t y0 = r.y0();
+ const size_t x0 = r.x0();
+ const size_t x1 = r.x0() + r.xsize();
+ const size_t y1 = r.y0() + r.ysize();
+
+ int ty = y0 / kColorTileDimInBlocks;
+ int tx = x0 / kColorTileDimInBlocks;
+
+ int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty);
+ int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty);
+
+ float* JXL_RESTRICT dc_values_yx = dc_values->Row(0);
+ float* JXL_RESTRICT dc_values_x = dc_values->Row(1);
+ float* JXL_RESTRICT dc_values_yb = dc_values->Row(2);
+ float* JXL_RESTRICT dc_values_b = dc_values->Row(3);
+
+ // All are aligned.
+ float* HWY_RESTRICT block_y = mem;
+ float* HWY_RESTRICT block_x = block_y + AcStrategy::kMaxCoeffArea;
+ float* HWY_RESTRICT block_b = block_x + AcStrategy::kMaxCoeffArea;
+ float* HWY_RESTRICT coeffs_yx = block_b + AcStrategy::kMaxCoeffArea;
+ float* HWY_RESTRICT coeffs_x = coeffs_yx + kColorTileDim * kColorTileDim;
+ float* HWY_RESTRICT coeffs_yb = coeffs_x + kColorTileDim * kColorTileDim;
+ float* HWY_RESTRICT coeffs_b = coeffs_yb + kColorTileDim * kColorTileDim;
+ float* HWY_RESTRICT scratch_space = coeffs_b + kColorTileDim * kColorTileDim;
+ JXL_DASSERT(scratch_space + 2 * AcStrategy::kMaxCoeffArea ==
+ block_y + CfLHeuristics::kItemsPerThread);
+
+ // Small (~256 bytes each)
+ HWY_ALIGN_MAX float
+ dc_y[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+ HWY_ALIGN_MAX float
+ dc_x[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+ HWY_ALIGN_MAX float
+ dc_b[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {};
+ size_t num_ac = 0;
+
+ for (size_t y = y0; y < y1; ++y) {
+ const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim);
+ const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim);
+ const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim);
+ size_t stride = opsin.PixelsPerRow();
+
+ for (size_t x = x0; x < x1; x++) {
+ AcStrategy acs = use_dct8
+ ? AcStrategy::FromRawStrategy(AcStrategy::Type::DCT)
+ : ac_strategy->ConstRow(y)[x];
+ if (!acs.IsFirstBlock()) continue;
+ size_t xs = acs.covered_blocks_x();
+ TransformFromPixels(acs.Strategy(), row_y + x * kBlockDim, stride,
+ block_y, scratch_space);
+ DCFromLowestFrequencies(acs.Strategy(), block_y, dc_y, xs);
+ TransformFromPixels(acs.Strategy(), row_x + x * kBlockDim, stride,
+ block_x, scratch_space);
+ DCFromLowestFrequencies(acs.Strategy(), block_x, dc_x, xs);
+ TransformFromPixels(acs.Strategy(), row_b + x * kBlockDim, stride,
+ block_b, scratch_space);
+ DCFromLowestFrequencies(acs.Strategy(), block_b, dc_b, xs);
+ const float* const JXL_RESTRICT qm_x =
+ dequant.InvMatrix(acs.Strategy(), 0);
+ const float* const JXL_RESTRICT qm_b =
+ dequant.InvMatrix(acs.Strategy(), 2);
+ float q_dc_x = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(0);
+ float q_dc_b = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(2);
+
+ // Copy DCs in dc_values.
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < xs; ix++) {
+ dc_values_yx[(iy + y) * xsize_blocks + ix + x] =
+ dc_y[iy * xs + ix] * q_dc_x;
+ dc_values_x[(iy + y) * xsize_blocks + ix + x] =
+ dc_x[iy * xs + ix] * q_dc_x;
+ dc_values_yb[(iy + y) * xsize_blocks + ix + x] =
+ dc_y[iy * xs + ix] * q_dc_b;
+ dc_values_b[(iy + y) * xsize_blocks + ix + x] =
+ dc_b[iy * xs + ix] * q_dc_b;
+ }
+ }
+
+ // Do not use this block for computing AC CfL.
+ if (acs.covered_blocks_x() + x0 > x1 ||
+ acs.covered_blocks_y() + y0 > y1) {
+ continue;
+ }
+
+ // Copy AC coefficients in the local block. The order in which
+ // coefficients get stored does not matter.
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ CoefficientLayout(&cy, &cx);
+ // Zero out LFs. This introduces terms in the optimization loop that
+ // don't affect the result, as they are all 0, but allow for simpler
+ // SIMDfication.
+ for (size_t iy = 0; iy < cy; iy++) {
+ for (size_t ix = 0; ix < cx; ix++) {
+ block_y[cx * kBlockDim * iy + ix] = 0;
+ block_x[cx * kBlockDim * iy + ix] = 0;
+ block_b[cx * kBlockDim * iy + ix] = 0;
+ }
+ }
+ // Unclear why this is like it is. (This works slightly better
+ // than the previous approach which was also a hack.)
+ const float qq =
+ (raw_quant_field == nullptr) ? 1.0f : raw_quant_field->Row(y)[x];
+ // Experimentally values 128-130 seem best -- I don't know why we
+ // need this multiplier.
+ const float kStrangeMultiplier = 128;
+ float q = use_dct8 ? 1 : quantizer->Scale() * kStrangeMultiplier * qq;
+ const auto qv = Set(df, q);
+ for (size_t i = 0; i < cx * cy * 64; i += Lanes(df)) {
+ const auto b_y = Load(df, block_y + i);
+ const auto b_x = Load(df, block_x + i);
+ const auto b_b = Load(df, block_b + i);
+ const auto qqm_x = Mul(qv, Load(df, qm_x + i));
+ const auto qqm_b = Mul(qv, Load(df, qm_b + i));
+ Store(Mul(b_y, qqm_x), df, coeffs_yx + num_ac);
+ Store(Mul(b_x, qqm_x), df, coeffs_x + num_ac);
+ Store(Mul(b_y, qqm_b), df, coeffs_yb + num_ac);
+ Store(Mul(b_b, qqm_b), df, coeffs_b + num_ac);
+ num_ac += Lanes(df);
+ }
+ }
+ }
+ JXL_CHECK(num_ac % Lanes(df) == 0);
+ row_out_x[tx] = FindBestMultiplier(coeffs_yx, coeffs_x, num_ac, 0.0f,
+ kDistanceMultiplierAC, fast);
+ row_out_b[tx] = FindBestMultiplier(coeffs_yb, coeffs_b, num_ac, kYToBRatio,
+ kDistanceMultiplierAC, fast);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InitDCStorage);
+HWY_EXPORT(ComputeDC);
+HWY_EXPORT(ComputeTile);
+
+void CfLHeuristics::Init(const Image3F& opsin) {
+ size_t xsize_blocks = opsin.xsize() / kBlockDim;
+ size_t ysize_blocks = opsin.ysize() / kBlockDim;
+ HWY_DYNAMIC_DISPATCH(InitDCStorage)
+ (xsize_blocks * ysize_blocks, &dc_values);
+}
+
+void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin,
+ const DequantMatrices& dequant,
+ const AcStrategyImage* ac_strategy,
+ const ImageI* raw_quant_field,
+ const Quantizer* quantizer, bool fast,
+ size_t thread, ColorCorrelationMap* cmap) {
+ bool use_dct8 = ac_strategy == nullptr;
+ HWY_DYNAMIC_DISPATCH(ComputeTile)
+ (opsin, dequant, ac_strategy, raw_quant_field, quantizer, r, fast, use_dct8,
+ &cmap->ytox_map, &cmap->ytob_map, &dc_values,
+ mem.get() + thread * kItemsPerThread);
+}
+
+void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) {
+ int32_t ytob_dc = 0;
+ int32_t ytox_dc = 0;
+ HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc);
+ cmap->SetYToBDC(ytob_dc);
+ cmap->SetYToXDC(ytox_dc);
+}
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+ size_t layer, AuxOut* aux_out) {
+ float color_factor = map->GetColorFactor();
+ float base_correlation_x = map->GetBaseCorrelationX();
+ float base_correlation_b = map->GetBaseCorrelationB();
+ int32_t ytox_dc = map->GetYToXDC();
+ int32_t ytob_dc = map->GetYToBDC();
+
+ BitWriter::Allotment allotment(writer, 1 + 2 * kBitsPerByte + 12 + 32);
+ if (ytox_dc == 0 && ytob_dc == 0 && color_factor == kDefaultColorFactor &&
+ base_correlation_x == 0.0f && base_correlation_b == kYToBRatio) {
+ writer->Write(1, 1);
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ return;
+ }
+ writer->Write(1, 0);
+ JXL_CHECK(U32Coder::Write(kColorFactorDist, color_factor, writer));
+ JXL_CHECK(F16Coder::Write(base_correlation_x, writer));
+ JXL_CHECK(F16Coder::Write(base_correlation_b, writer));
+ writer->Write(kBitsPerByte, ytox_dc - std::numeric_limits<int8_t>::min());
+ writer->Write(kBitsPerByte, ytob_dc - std::numeric_limits<int8_t>::min());
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.h b/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.h
new file mode 100644
index 0000000000..899b91b041
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_chroma_from_luma.h
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+#define LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
+
+// Chroma-from-luma, computed using heuristics to determine the best linear
+// model for the X and B channels from the Y channel.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+struct AuxOut;
+class Quantizer;
+
+void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer,
+ size_t layer, AuxOut* aux_out);
+
+struct CfLHeuristics {
+ void Init(const Image3F& opsin);
+
+ void PrepareForThreads(size_t num_threads) {
+ mem = hwy::AllocateAligned<float>(num_threads * kItemsPerThread);
+ }
+
+ void ComputeTile(const Rect& r, const Image3F& opsin,
+ const DequantMatrices& dequant,
+ const AcStrategyImage* ac_strategy,
+ const ImageI* raw_quant_field, const Quantizer* quantizer,
+ bool fast, size_t thread, ColorCorrelationMap* cmap);
+
+ void ComputeDC(bool fast, ColorCorrelationMap* cmap);
+
+ ImageF dc_values;
+ hwy::AlignedFreeUniquePtr<float[]> mem;
+
+ // Working set is too large for stack; allocate dynamically.
+ constexpr static size_t kItemsPerThread =
+ AcStrategy::kMaxCoeffArea * 3 // Blocks
+ + kColorTileDim * kColorTileDim * 4 // AC coeff storage
+ + AcStrategy::kMaxCoeffArea * 2; // Scratch space
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CHROMA_FROM_LUMA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_cluster.cc b/third_party/jpeg-xl/lib/jxl/enc_cluster.cc
new file mode 100644
index 0000000000..c79b3ac834
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_cluster.cc
@@ -0,0 +1,295 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_cluster.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <queue>
+#include <tuple>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_cluster.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <class V>
+V Entropy(V count, V inv_total, V total) {
+ const HWY_CAPPED(float, Histogram::kRounding) d;
+ const auto zero = Set(d, 0.0f);
+ // TODO(eustas): why (0 - x) instead of Neg(x)?
+ return IfThenZeroElse(
+ Eq(count, total),
+ Sub(zero, Mul(count, FastLog2f(d, Mul(inv_total, count)))));
+}
+
+void HistogramEntropy(const Histogram& a) {
+ a.entropy_ = 0.0f;
+ if (a.total_count_ == 0) return;
+
+ const HWY_CAPPED(float, Histogram::kRounding) df;
+ const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+ const auto inv_tot = Set(df, 1.0f / a.total_count_);
+ auto entropy_lanes = Zero(df);
+ auto total = Set(df, a.total_count_);
+
+ for (size_t i = 0; i < a.data_.size(); i += Lanes(di)) {
+ const auto counts = LoadU(di, &a.data_[i]);
+ entropy_lanes =
+ Add(entropy_lanes, Entropy(ConvertTo(df, counts), inv_tot, total));
+ }
+ a.entropy_ += GetLane(SumOfLanes(df, entropy_lanes));
+}
+
+float HistogramDistance(const Histogram& a, const Histogram& b) {
+ if (a.total_count_ == 0 || b.total_count_ == 0) return 0;
+
+ const HWY_CAPPED(float, Histogram::kRounding) df;
+ const HWY_CAPPED(int32_t, Histogram::kRounding) di;
+
+ const auto inv_tot = Set(df, 1.0f / (a.total_count_ + b.total_count_));
+ auto distance_lanes = Zero(df);
+ auto total = Set(df, a.total_count_ + b.total_count_);
+
+ for (size_t i = 0; i < std::max(a.data_.size(), b.data_.size());
+ i += Lanes(di)) {
+ const auto a_counts =
+ a.data_.size() > i ? LoadU(di, &a.data_[i]) : Zero(di);
+ const auto b_counts =
+ b.data_.size() > i ? LoadU(di, &b.data_[i]) : Zero(di);
+ const auto counts = ConvertTo(df, Add(a_counts, b_counts));
+ distance_lanes = Add(distance_lanes, Entropy(counts, inv_tot, total));
+ }
+ const float total_distance = GetLane(SumOfLanes(df, distance_lanes));
+ return total_distance - a.entropy_ - b.entropy_;
+}
+
+// First step of a k-means clustering with a fancy distance metric.
+void FastClusterHistograms(const std::vector<Histogram>& in,
+ size_t max_histograms, std::vector<Histogram>* out,
+ std::vector<uint32_t>* histogram_symbols) {
+ PROFILER_FUNC;
+ out->clear();
+ out->reserve(max_histograms);
+ histogram_symbols->clear();
+ histogram_symbols->resize(in.size(), max_histograms);
+
+ std::vector<float> dists(in.size(), std::numeric_limits<float>::max());
+ size_t largest_idx = 0;
+ for (size_t i = 0; i < in.size(); i++) {
+ if (in[i].total_count_ == 0) {
+ (*histogram_symbols)[i] = 0;
+ dists[i] = 0.0f;
+ continue;
+ }
+ HistogramEntropy(in[i]);
+ if (in[i].total_count_ > in[largest_idx].total_count_) {
+ largest_idx = i;
+ }
+ }
+
+ constexpr float kMinDistanceForDistinct = 48.0f;
+ while (out->size() < max_histograms) {
+ (*histogram_symbols)[largest_idx] = out->size();
+ out->push_back(in[largest_idx]);
+ dists[largest_idx] = 0.0f;
+ largest_idx = 0;
+ for (size_t i = 0; i < in.size(); i++) {
+ if (dists[i] == 0.0f) continue;
+ dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]);
+ if (dists[i] > dists[largest_idx]) largest_idx = i;
+ }
+ if (dists[largest_idx] < kMinDistanceForDistinct) break;
+ }
+
+ for (size_t i = 0; i < in.size(); i++) {
+ if ((*histogram_symbols)[i] != max_histograms) continue;
+ size_t best = 0;
+ float best_dist = HistogramDistance(in[i], (*out)[best]);
+ for (size_t j = 1; j < out->size(); j++) {
+ float dist = HistogramDistance(in[i], (*out)[j]);
+ if (dist < best_dist) {
+ best = j;
+ best_dist = dist;
+ }
+ }
+ (*out)[best].AddHistogram(in[i]);
+ HistogramEntropy((*out)[best]);
+ (*histogram_symbols)[i] = best;
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(FastClusterHistograms); // Local function
+HWY_EXPORT(HistogramEntropy); // Local function
+
+float Histogram::ShannonEntropy() const {
+ HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this);
+ return entropy_;
+}
+
+namespace {
+// -----------------------------------------------------------------------------
+// Histogram refinement
+
+// Reorder histograms in *out so that the new symbols in *symbols come in
+// increasing order.
+void HistogramReindex(std::vector<Histogram>* out,
+ std::vector<uint32_t>* symbols) {
+ std::vector<Histogram> tmp(*out);
+ std::map<int, int> new_index;
+ int next_index = 0;
+ for (uint32_t symbol : *symbols) {
+ if (new_index.find(symbol) == new_index.end()) {
+ new_index[symbol] = next_index;
+ (*out)[next_index] = tmp[symbol];
+ ++next_index;
+ }
+ }
+ out->resize(next_index);
+ for (uint32_t& symbol : *symbols) {
+ symbol = new_index[symbol];
+ }
+}
+
+} // namespace
+
+// Clusters similar histograms in 'in' together, the selected histograms are
+// placed in 'out', and for each index in 'in', *histogram_symbols will
+// indicate which of the 'out' histograms is the best approximation.
+void ClusterHistograms(const HistogramParams params,
+ const std::vector<Histogram>& in, size_t max_histograms,
+ std::vector<Histogram>* out,
+ std::vector<uint32_t>* histogram_symbols) {
+ max_histograms = std::min(max_histograms, params.max_histograms);
+ max_histograms = std::min(max_histograms, in.size());
+ if (params.clustering == HistogramParams::ClusteringType::kFastest) {
+ max_histograms = std::min(max_histograms, static_cast<size_t>(4));
+ }
+
+ HWY_DYNAMIC_DISPATCH(FastClusterHistograms)
+ (in, max_histograms, out, histogram_symbols);
+
+ if (params.clustering == HistogramParams::ClusteringType::kBest) {
+ for (size_t i = 0; i < out->size(); i++) {
+ (*out)[i].entropy_ =
+ ANSPopulationCost((*out)[i].data_.data(), (*out)[i].data_.size());
+ }
+ uint32_t next_version = 2;
+ std::vector<uint32_t> version(out->size(), 1);
+ std::vector<uint32_t> renumbering(out->size());
+ std::iota(renumbering.begin(), renumbering.end(), 0);
+
+ // Try to pair up clusters if doing so reduces the total cost.
+
+ struct HistogramPair {
+ // validity of a pair: p.version == max(version[i], version[j])
+ float cost;
+ uint32_t first;
+ uint32_t second;
+ uint32_t version;
+ // We use > because priority queues sort in *decreasing* order, but we
+ // want lower cost elements to appear first.
+ bool operator<(const HistogramPair& other) const {
+ return std::make_tuple(cost, first, second, version) >
+ std::make_tuple(other.cost, other.first, other.second,
+ other.version);
+ }
+ };
+
+ // Create list of all pairs by increasing merging cost.
+ std::priority_queue<HistogramPair> pairs_to_merge;
+ for (uint32_t i = 0; i < out->size(); i++) {
+ for (uint32_t j = i + 1; j < out->size(); j++) {
+ Histogram histo;
+ histo.AddHistogram((*out)[i]);
+ histo.AddHistogram((*out)[j]);
+ float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+ (*out)[i].entropy_ - (*out)[j].entropy_;
+ // Avoid enqueueing pairs that are not advantageous to merge.
+ if (cost >= 0) continue;
+ pairs_to_merge.push(
+ HistogramPair{cost, i, j, std::max(version[i], version[j])});
+ }
+ }
+
+ // Merge the best pair to merge, add new pairs that get formed as a
+ // consequence.
+ while (!pairs_to_merge.empty()) {
+ uint32_t first = pairs_to_merge.top().first;
+ uint32_t second = pairs_to_merge.top().second;
+ uint32_t ver = pairs_to_merge.top().version;
+ pairs_to_merge.pop();
+ if (ver != std::max(version[first], version[second]) ||
+ version[first] == 0 || version[second] == 0) {
+ continue;
+ }
+ (*out)[first].AddHistogram((*out)[second]);
+ (*out)[first].entropy_ = ANSPopulationCost((*out)[first].data_.data(),
+ (*out)[first].data_.size());
+ for (size_t i = 0; i < renumbering.size(); i++) {
+ if (renumbering[i] == second) {
+ renumbering[i] = first;
+ }
+ }
+ version[second] = 0;
+ version[first] = next_version++;
+ for (uint32_t j = 0; j < out->size(); j++) {
+ if (j == first) continue;
+ if (version[j] == 0) continue;
+ Histogram histo;
+ histo.AddHistogram((*out)[first]);
+ histo.AddHistogram((*out)[j]);
+ float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) -
+ (*out)[first].entropy_ - (*out)[j].entropy_;
+ // Avoid enqueueing pairs that are not advantageous to merge.
+ if (cost >= 0) continue;
+ pairs_to_merge.push(
+ HistogramPair{cost, std::min(first, j), std::max(first, j),
+ std::max(version[first], version[j])});
+ }
+ }
+ std::vector<uint32_t> reverse_renumbering(out->size(), -1);
+ size_t num_alive = 0;
+ for (size_t i = 0; i < out->size(); i++) {
+ if (version[i] == 0) continue;
+ (*out)[num_alive++] = (*out)[i];
+ reverse_renumbering[i] = num_alive - 1;
+ }
+ out->resize(num_alive);
+ for (size_t i = 0; i < histogram_symbols->size(); i++) {
+ (*histogram_symbols)[i] =
+ reverse_renumbering[renumbering[(*histogram_symbols)[i]]];
+ }
+ }
+
+ // Convert the context map to a canonical form.
+ HistogramReindex(out, histogram_symbols);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_cluster.h b/third_party/jpeg-xl/lib/jxl/enc_cluster.h
new file mode 100644
index 0000000000..4b062e820c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_cluster.h
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for clustering similar histograms together.
+
+#ifndef LIB_JXL_ENC_CLUSTER_H_
+#define LIB_JXL_ENC_CLUSTER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/enc_ans.h"
+
+namespace jxl {
+
+struct Histogram {
+ Histogram() {
+ total_count_ = 0;
+ entropy_ = 0.0;
+ }
+ void Clear() {
+ data_.clear();
+ total_count_ = 0;
+ }
+ void Add(size_t symbol) {
+ if (data_.size() <= symbol) {
+ data_.resize(DivCeil(symbol + 1, kRounding) * kRounding);
+ }
+ ++data_[symbol];
+ ++total_count_;
+ }
+ void AddHistogram(const Histogram& other) {
+ if (other.data_.size() > data_.size()) {
+ data_.resize(other.data_.size());
+ }
+ for (size_t i = 0; i < other.data_.size(); ++i) {
+ data_[i] += other.data_[i];
+ }
+ total_count_ += other.total_count_;
+ }
+ float PopulationCost() const {
+ return ANSPopulationCost(data_.data(), data_.size());
+ }
+ float ShannonEntropy() const;
+
+ std::vector<ANSHistBin> data_;
+ size_t total_count_;
+ mutable float entropy_; // WARNING: not kept up-to-date.
+ static constexpr size_t kRounding = 8;
+};
+
+void ClusterHistograms(HistogramParams params, const std::vector<Histogram>& in,
+ size_t max_histograms, std::vector<Histogram>* out,
+ std::vector<uint32_t>* histogram_symbols);
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CLUSTER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc
new file mode 100644
index 0000000000..389b53598a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.cc
@@ -0,0 +1,291 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/lehmer_code.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+std::pair<uint32_t, uint32_t> ComputeUsedOrders(
+ const SpeedTier speed, const AcStrategyImage& ac_strategy,
+ const Rect& rect) {
+ // Only uses DCT8 = 0, so bitfield = 1.
+ if (speed >= SpeedTier::kFalcon) return {1, 1};
+
+ uint32_t ret = 0;
+ uint32_t ret_customize = 0;
+ size_t xsize_blocks = rect.xsize();
+ size_t ysize_blocks = rect.ysize();
+ // TODO(veluca): precompute when doing DCT.
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+ int ord = kStrategyOrder[acs_row[bx].RawStrategy()];
+ // Do not customize coefficient orders for blocks bigger than 32x32.
+ ret |= 1u << ord;
+ if (ord > 6) {
+ continue;
+ }
+ ret_customize |= 1u << ord;
+ }
+ }
+ // Use default orders for small images.
+ if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0};
+ return {ret, ret_customize};
+}
+
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+ const AcStrategyImage& ac_strategy,
+ const FrameDimensions& frame_dim, uint32_t& used_orders,
+ uint16_t used_acs, coeff_order_t* JXL_RESTRICT order) {
+ std::vector<int32_t> num_zeros(kCoeffOrderMaxSize);
+ // If compressing at high speed and only using 8x8 DCTs, only consider a
+ // subset of blocks.
+ double block_fraction = 1.0f;
+ // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes
+ // encoding significantly less dense.
+ if (speed >= SpeedTier::kSquirrel && used_orders == 1) {
+ block_fraction = 0.5f;
+ }
+ // No need to compute number of zero coefficients if all orders are the
+ // default.
+ if (used_orders != 0) {
+ uint64_t threshold =
+ (std::numeric_limits<uint64_t>::max() >> 32) * block_fraction;
+ uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
+ static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
+ // Xorshift128+ adapted from xorshift128+-inl.h
+ auto use_sample = [&]() {
+ auto s1 = s[0];
+ const auto s0 = s[1];
+ const auto bits = s1 + s0; // b, c
+ s[0] = s0;
+ s1 ^= s1 << 23;
+ s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+ s[1] = s1;
+ return (bits >> 32) <= threshold;
+ };
+
+ // Count number of zero coefficients, separately for each DCT band.
+ // TODO(veluca): precompute when doing DCT.
+ for (size_t group_index = 0; group_index < frame_dim.num_groups;
+ group_index++) {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks,
+ kGroupDimInBlocks, kGroupDimInBlocks,
+ frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ ConstACPtr rows[3];
+ ACType type = acs.Type();
+ for (size_t c = 0; c < 3; c++) {
+ rows[c] = acs.PlaneRow(c, group_index, 0);
+ }
+ size_t ac_offset = 0;
+
+ // TODO(veluca): SIMDfy.
+ for (size_t by = 0; by < rect.ysize(); ++by) {
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < rect.xsize(); ++bx) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ if (!use_sample()) continue;
+ size_t size = kDCTBlockSize << acs.log2_covered_blocks();
+ for (size_t c = 0; c < 3; ++c) {
+ const size_t order_offset =
+ CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c);
+ if (type == ACType::k16) {
+ for (size_t k = 0; k < size; k++) {
+ bool is_zero = rows[c].ptr16[ac_offset + k] == 0;
+ num_zeros[order_offset + k] += is_zero ? 1 : 0;
+ }
+ } else {
+ for (size_t k = 0; k < size; k++) {
+ bool is_zero = rows[c].ptr32[ac_offset + k] == 0;
+ num_zeros[order_offset + k] += is_zero ? 1 : 0;
+ }
+ }
+ // Ensure LLFs are first in the order.
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ CoefficientLayout(&cy, &cx);
+ for (size_t iy = 0; iy < cy; iy++) {
+ for (size_t ix = 0; ix < cx; ix++) {
+ num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1;
+ }
+ }
+ }
+ ac_offset += size;
+ }
+ }
+ }
+ }
+ struct PosAndCount {
+ uint32_t pos;
+ uint32_t count;
+ };
+ auto mem = hwy::AllocateAligned<PosAndCount>(AcStrategy::kMaxCoeffArea);
+
+ std::vector<coeff_order_t> natural_order_buffer;
+
+ uint16_t computed = 0;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ uint8_t ord = kStrategyOrder[o];
+ if (computed & (1 << ord)) continue;
+ computed |= 1 << ord;
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y();
+
+ // Do nothing for transforms that don't appear.
+ if ((1 << ord) & ~used_acs) continue;
+
+ if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz);
+ acs.ComputeNaturalCoeffOrder(natural_order_buffer.data());
+
+ // Ensure natural coefficient order is not permuted if the order is
+ // not transmitted.
+ if ((1 << ord) & ~used_orders) {
+ for (size_t c = 0; c < 3; c++) {
+ size_t offset = CoeffOrderOffset(ord, c);
+ JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+ memcpy(&order[offset], natural_order_buffer.data(),
+ sz * sizeof(*order));
+ }
+ continue;
+ }
+
+ bool is_nondefault = false;
+ for (uint8_t c = 0; c < 3; c++) {
+ // Apply zig-zag order.
+ PosAndCount* pos_and_val = mem.get();
+ size_t offset = CoeffOrderOffset(ord, c);
+ JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz);
+ float inv_sqrt_sz = 1.0f / std::sqrt(sz);
+ for (size_t i = 0; i < sz; ++i) {
+ size_t pos = natural_order_buffer[i];
+ pos_and_val[i].pos = pos;
+ // We don't care for the exact number -> quantize number of zeros,
+ // to get less permuted order.
+ pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f;
+ }
+
+ // Stable-sort -> elements with same number of zeros will preserve their
+ // order.
+ auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool {
+ return a.count < b.count;
+ };
+ std::stable_sort(pos_and_val, pos_and_val + sz, comparator);
+
+ // Grab indices.
+ for (size_t i = 0; i < sz; ++i) {
+ order[offset + i] = pos_and_val[i].pos;
+ is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos;
+ }
+ }
+ if (!is_nondefault) {
+ used_orders &= ~(1 << ord);
+ }
+ }
+}
+
+namespace {
+
+void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+ size_t size, std::vector<Token>* tokens) {
+ std::vector<LehmerT> lehmer(size);
+ std::vector<uint32_t> temp(size + 1);
+ ComputeLehmerCode(order, temp.data(), size, lehmer.data());
+ size_t end = size;
+ while (end > skip && lehmer[end - 1] == 0) {
+ --end;
+ }
+ tokens->emplace_back(CoeffOrderContext(size), end - skip);
+ uint32_t last = 0;
+ for (size_t i = skip; i < end; ++i) {
+ tokens->emplace_back(CoeffOrderContext(last), lehmer[i]);
+ last = lehmer[i];
+ }
+}
+
+} // namespace
+
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+ size_t size, BitWriter* writer, int layer,
+ AuxOut* aux_out) {
+ std::vector<std::vector<Token>> tokens(1);
+ TokenizePermutation(order, skip, size, &tokens[0]);
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+ &codes, &context_map, writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+namespace {
+void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs,
+ std::vector<Token>* tokens, coeff_order_t* order_zigzag,
+ std::vector<coeff_order_t>& natural_order_lut) {
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+ for (size_t i = 0; i < size; ++i) {
+ order_zigzag[i] = natural_order_lut[order[i]];
+ }
+ TokenizePermutation(order_zigzag, llf, size, tokens);
+}
+} // namespace
+
+void EncodeCoeffOrders(uint16_t used_orders,
+ const coeff_order_t* JXL_RESTRICT order,
+ BitWriter* writer, size_t layer,
+ AuxOut* JXL_RESTRICT aux_out) {
+ auto mem = hwy::AllocateAligned<coeff_order_t>(AcStrategy::kMaxCoeffArea);
+ uint16_t computed = 0;
+ std::vector<std::vector<Token>> tokens(1);
+ std::vector<coeff_order_t> natural_order_lut;
+ for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) {
+ uint8_t ord = kStrategyOrder[o];
+ if (computed & (1 << ord)) continue;
+ computed |= 1 << ord;
+ if ((used_orders & (1 << ord)) == 0) continue;
+ AcStrategy acs = AcStrategy::FromRawStrategy(o);
+ const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y();
+ const size_t size = kDCTBlockSize * llf;
+ if (natural_order_lut.size() < size) natural_order_lut.resize(size);
+ acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data());
+ for (size_t c = 0; c < 3; c++) {
+ EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0],
+ mem.get(), natural_order_lut);
+ }
+ }
+ // Do not write anything if no order is used.
+ if (used_orders != 0) {
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens,
+ &codes, &context_map, writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_coeff_order.h b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.h
new file mode 100644
index 0000000000..3a43f4f986
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_coeff_order.h
@@ -0,0 +1,54 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COEFF_ORDER_H_
+#define LIB_JXL_ENC_COEFF_ORDER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Orders that are actually used in part of image. `rect` is in block units.
+// Returns {orders that are used, orders that might be made non-default}.
+std::pair<uint32_t, uint32_t> ComputeUsedOrders(
+ SpeedTier speed, const AcStrategyImage& ac_strategy, const Rect& rect);
+
+// Modify zig-zag order, so that DCT bands with more zeros go later.
+// Order of DCT bands with same number of zeros is untouched, so
+// permutation will be cheaper to encode.
+void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs,
+ const AcStrategyImage& ac_strategy,
+ const FrameDimensions& frame_dim, uint32_t& used_orders,
+ uint16_t used_acs, coeff_order_t* JXL_RESTRICT order);
+
+void EncodeCoeffOrders(uint16_t used_orders,
+ const coeff_order_t* JXL_RESTRICT order,
+ BitWriter* writer, size_t layer,
+ AuxOut* JXL_RESTRICT aux_out);
+
+// Encoding/decoding of a single permutation. `size`: number of elements in the
+// permutation. `skip`: number of elements to skip from the *beginning* of the
+// permutation.
+void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip,
+ size_t size, BitWriter* writer, int layer,
+ AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_COEFF_ORDER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_color_management.cc b/third_party/jpeg-xl/lib/jxl/enc_color_management.cc
new file mode 100644
index 0000000000..8a23ead473
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_color_management.cc
@@ -0,0 +1,1293 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_color_management.h"
+
+#ifndef JPEGXL_ENABLE_SKCMS
+#define JPEGXL_ENABLE_SKCMS 0
+#endif
+
+#include <math.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_color_management.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/transfer_functions-inl.h"
+#if JPEGXL_ENABLE_SKCMS
+#include "lib/jxl/enc_jxl_skcms.h"
+#else // JPEGXL_ENABLE_SKCMS
+#include "lcms2.h"
+#include "lcms2_plugin.h"
+#endif // JPEGXL_ENABLE_SKCMS
+
+#define JXL_CMS_VERBOSE 0
+
+// Define these only once. We can't use HWY_ONCE here because it is defined as
+// 1 only on the last pass.
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+namespace jxl {
+namespace {
+struct JxlCms {
+#if JPEGXL_ENABLE_SKCMS
+ PaddedBytes icc_src, icc_dst;
+ skcms_ICCProfile profile_src, profile_dst;
+#else
+ void* lcms_transform;
+#endif
+
+ // These fields are used when the HLG OOTF or inverse OOTF must be applied.
+ bool apply_hlg_ootf;
+ size_t hlg_ootf_num_channels;
+ // Y component of the primaries.
+ std::array<float, 3> hlg_ootf_luminances;
+
+ size_t channels_src;
+ size_t channels_dst;
+ ImageF buf_src;
+ ImageF buf_dst;
+ float intensity_target;
+ bool skip_lcms = false;
+ ExtraTF preprocess = ExtraTF::kNone;
+ ExtraTF postprocess = ExtraTF::kNone;
+};
+
+Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize,
+ bool forward);
+} // namespace
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_COLOR_MANAGEMENT_CC_
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+#if JXL_CMS_VERBOSE >= 2
+const size_t kX = 0; // pixel index, multiplied by 3 for RGB
+#endif
+
+// xform_src = UndoGammaCompression(buf_src).
+Status BeforeTransform(JxlCms* t, const float* buf_src, float* xform_src,
+ size_t buf_size) {
+ switch (t->preprocess) {
+ case ExtraTF::kNone:
+ JXL_DASSERT(false); // unreachable
+ break;
+
+ case ExtraTF::kPQ: {
+ // By default, PQ content has an intensity target of 10000, stored
+ // exactly.
+ HWY_FULL(float) df;
+ const auto multiplier = Set(df, t->intensity_target == 10000.f
+ ? 1.0f
+ : 10000.f / t->intensity_target);
+ for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+ const auto val = Load(df, buf_src + i);
+ const auto result =
+ Mul(multiplier, TF_PQ().DisplayFromEncoded(df, val));
+ Store(result, df, xform_src + i);
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("pre in %.4f %.4f %.4f undoPQ %.4f %.4f %.4f\n", buf_src[3 * kX],
+ buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+ xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+ break;
+ }
+
+ case ExtraTF::kHLG:
+ for (size_t i = 0; i < buf_size; ++i) {
+ xform_src[i] = static_cast<float>(
+ TF_HLG().DisplayFromEncoded(static_cast<double>(buf_src[i])));
+ }
+ if (t->apply_hlg_ootf) {
+ JXL_RETURN_IF_ERROR(
+ ApplyHlgOotf(t, xform_src, buf_size, /*forward=*/true));
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("pre in %.4f %.4f %.4f undoHLG %.4f %.4f %.4f\n", buf_src[3 * kX],
+ buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+ xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+ break;
+
+ case ExtraTF::kSRGB:
+ HWY_FULL(float) df;
+ for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+ const auto val = Load(df, buf_src + i);
+ const auto result = TF_SRGB().DisplayFromEncoded(val);
+ Store(result, df, xform_src + i);
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("pre in %.4f %.4f %.4f undoSRGB %.4f %.4f %.4f\n", buf_src[3 * kX],
+ buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX],
+ xform_src[3 * kX + 1], xform_src[3 * kX + 2]);
+#endif
+ break;
+ }
+ return true;
+}
+
+// Applies gamma compression in-place.
+Status AfterTransform(JxlCms* t, float* JXL_RESTRICT buf_dst, size_t buf_size) {
+ switch (t->postprocess) {
+ case ExtraTF::kNone:
+ JXL_DASSERT(false); // unreachable
+ break;
+ case ExtraTF::kPQ: {
+ HWY_FULL(float) df;
+ const auto multiplier =
+ Set(df, t->intensity_target == 10000.f ? 1.0f
+ : t->intensity_target * 1e-4f);
+ for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+ const auto val = Load(df, buf_dst + i);
+ const auto result =
+ TF_PQ().EncodedFromDisplay(df, Mul(multiplier, val));
+ Store(result, df, buf_dst + i);
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("after PQ enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+ buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+ break;
+ }
+ case ExtraTF::kHLG:
+ if (t->apply_hlg_ootf) {
+ JXL_RETURN_IF_ERROR(
+ ApplyHlgOotf(t, buf_dst, buf_size, /*forward=*/false));
+ }
+ for (size_t i = 0; i < buf_size; ++i) {
+ buf_dst[i] = static_cast<float>(
+ TF_HLG().EncodedFromDisplay(static_cast<double>(buf_dst[i])));
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("after HLG enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+ buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+ break;
+ case ExtraTF::kSRGB:
+ HWY_FULL(float) df;
+ for (size_t i = 0; i < buf_size; i += Lanes(df)) {
+ const auto val = Load(df, buf_dst + i);
+ const auto result =
+ TF_SRGB().EncodedFromDisplay(HWY_FULL(float)(), val);
+ Store(result, df, buf_dst + i);
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("after SRGB enc %.4f %.4f %.4f\n", buf_dst[3 * kX],
+ buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+ break;
+ }
+ return true;
+}
+
+Status DoColorSpaceTransform(void* cms_data, const size_t thread,
+ const float* buf_src, float* buf_dst,
+ size_t xsize) {
+ // No lock needed.
+ JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+
+ const float* xform_src = buf_src; // Read-only.
+ if (t->preprocess != ExtraTF::kNone) {
+ float* mutable_xform_src = t->buf_src.Row(thread); // Writable buffer.
+ JXL_RETURN_IF_ERROR(BeforeTransform(t, buf_src, mutable_xform_src,
+ xsize * t->channels_src));
+ xform_src = mutable_xform_src;
+ }
+
+#if JPEGXL_ENABLE_SKCMS
+ if (t->channels_src == 1 && !t->skip_lcms) {
+ // Expand from 1 to 3 channels, starting from the end in case
+ // xform_src == t->buf_src.Row(thread).
+ float* mutable_xform_src = t->buf_src.Row(thread);
+ for (size_t i = 0; i < xsize; ++i) {
+ const size_t x = xsize - i - 1;
+ mutable_xform_src[x * 3] = mutable_xform_src[x * 3 + 1] =
+ mutable_xform_src[x * 3 + 2] = xform_src[x];
+ }
+ xform_src = mutable_xform_src;
+ }
+#else
+ if (t->channels_src == 4 && !t->skip_lcms) {
+ // LCMS does CMYK in a weird way: 0 = white, 100 = max ink
+ float* mutable_xform_src = t->buf_src.Row(thread);
+ for (size_t x = 0; x < xsize * 4; ++x) {
+ mutable_xform_src[x] = 100.f - 100.f * mutable_xform_src[x];
+ }
+ xform_src = mutable_xform_src;
+ }
+#endif
+
+#if JXL_CMS_VERBOSE >= 2
+ // Save inputs for printing before in-place transforms overwrite them.
+ const float in0 = xform_src[3 * kX + 0];
+ const float in1 = xform_src[3 * kX + 1];
+ const float in2 = xform_src[3 * kX + 2];
+#endif
+
+ if (t->skip_lcms) {
+ if (buf_dst != xform_src) {
+ memcpy(buf_dst, xform_src, xsize * t->channels_src * sizeof(*buf_dst));
+ } // else: in-place, no need to copy
+ } else {
+#if JPEGXL_ENABLE_SKCMS
+ JXL_CHECK(
+ skcms_Transform(xform_src,
+ (t->channels_src == 4 ? skcms_PixelFormat_RGBA_ffff
+ : skcms_PixelFormat_RGB_fff),
+ skcms_AlphaFormat_Opaque, &t->profile_src, buf_dst,
+ skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+ &t->profile_dst, xsize));
+#else // JPEGXL_ENABLE_SKCMS
+ cmsDoTransform(t->lcms_transform, xform_src, buf_dst,
+ static_cast<cmsUInt32Number>(xsize));
+#endif // JPEGXL_ENABLE_SKCMS
+ }
+#if JXL_CMS_VERBOSE >= 2
+ printf("xform skip%d: %.4f %.4f %.4f (%p) -> (%p) %.4f %.4f %.4f\n",
+ t->skip_lcms, in0, in1, in2, xform_src, buf_dst, buf_dst[3 * kX],
+ buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]);
+#endif
+
+#if JPEGXL_ENABLE_SKCMS
+ if (t->channels_dst == 1 && !t->skip_lcms) {
+ // Contract back from 3 to 1 channel, this time forward.
+ float* grayscale_buf_dst = t->buf_dst.Row(thread);
+ for (size_t x = 0; x < xsize; ++x) {
+ grayscale_buf_dst[x] = buf_dst[x * 3];
+ }
+ buf_dst = grayscale_buf_dst;
+ }
+#endif
+
+ if (t->postprocess != ExtraTF::kNone) {
+ JXL_RETURN_IF_ERROR(AfterTransform(t, buf_dst, xsize * t->channels_dst));
+ }
+ return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(DoColorSpaceTransform);
+int DoColorSpaceTransform(void* t, size_t thread, const float* buf_src,
+ float* buf_dst, size_t xsize) {
+ return HWY_DYNAMIC_DISPATCH(DoColorSpaceTransform)(t, thread, buf_src,
+ buf_dst, xsize);
+}
+
+// Define to 1 on OS X as a workaround for older LCMS lacking MD5.
+#define JXL_CMS_OLD_VERSION 0
+
+#if JPEGXL_ENABLE_SKCMS
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const float XYZ[3]) {
+ const float factor = 1.f / (XYZ[0] + XYZ[1] + XYZ[2]);
+ CIExy xy;
+ xy.x = XYZ[0] * factor;
+ xy.y = XYZ[1] * factor;
+ return xy;
+}
+
+#else // JPEGXL_ENABLE_SKCMS
+// (LCMS interface requires xyY but we omit the Y for white points/primaries.)
+
+JXL_MUST_USE_RESULT CIExy CIExyFromxyY(const cmsCIExyY& xyY) {
+ CIExy xy;
+ xy.x = xyY.x;
+ xy.y = xyY.y;
+ return xy;
+}
+
+JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const cmsCIEXYZ& XYZ) {
+ cmsCIExyY xyY;
+ cmsXYZ2xyY(/*Dest=*/&xyY, /*Source=*/&XYZ);
+ return CIExyFromxyY(xyY);
+}
+
+JXL_MUST_USE_RESULT cmsCIEXYZ D50_XYZ() {
+ // Quantized D50 as stored in ICC profiles.
+ return {0.96420288, 1.0, 0.82490540};
+}
+
+// RAII
+
+struct ProfileDeleter {
+ void operator()(void* p) { cmsCloseProfile(p); }
+};
+using Profile = std::unique_ptr<void, ProfileDeleter>;
+
+struct TransformDeleter {
+ void operator()(void* p) { cmsDeleteTransform(p); }
+};
+using Transform = std::unique_ptr<void, TransformDeleter>;
+
+struct CurveDeleter {
+ void operator()(cmsToneCurve* p) { cmsFreeToneCurve(p); }
+};
+using Curve = std::unique_ptr<cmsToneCurve, CurveDeleter>;
+
+Status CreateProfileXYZ(const cmsContext context,
+ Profile* JXL_RESTRICT profile) {
+ profile->reset(cmsCreateXYZProfileTHR(context));
+ if (profile->get() == nullptr) return JXL_FAILURE("Failed to create XYZ");
+ return true;
+}
+
+#endif // !JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+// IMPORTANT: icc must outlive profile.
+Status DecodeProfile(const uint8_t* icc, size_t size,
+ skcms_ICCProfile* const profile) {
+ if (!skcms_Parse(icc, size, profile)) {
+ return JXL_FAILURE("Failed to parse ICC profile with %" PRIuS " bytes",
+ size);
+ }
+ return true;
+}
+#else // JPEGXL_ENABLE_SKCMS
+Status DecodeProfile(const cmsContext context, const PaddedBytes& icc,
+ Profile* profile) {
+ profile->reset(cmsOpenProfileFromMemTHR(context, icc.data(), icc.size()));
+ if (profile->get() == nullptr) {
+ return JXL_FAILURE("Failed to decode profile");
+ }
+
+ // WARNING: due to the LCMS MD5 issue mentioned above, many existing
+ // profiles have incorrect MD5, so do not even bother checking them nor
+ // generating warning clutter.
+
+ return true;
+}
+#endif // JPEGXL_ENABLE_SKCMS
+
+#if JPEGXL_ENABLE_SKCMS
+
+ColorSpace ColorSpaceFromProfile(const skcms_ICCProfile& profile) {
+ switch (profile.data_color_space) {
+ case skcms_Signature_RGB:
+ case skcms_Signature_CMYK:
+ // spec says CMYK is encoded as RGB (the kBlack extra channel signals that
+ // it is actually CMYK)
+ return ColorSpace::kRGB;
+ case skcms_Signature_Gray:
+ return ColorSpace::kGray;
+ default:
+ return ColorSpace::kUnknown;
+ }
+}
+
+// vector_out := matmul(matrix, vector_in)
+void MatrixProduct(const skcms_Matrix3x3& matrix, const float vector_in[3],
+ float vector_out[3]) {
+ for (int i = 0; i < 3; ++i) {
+ vector_out[i] = 0;
+ for (int j = 0; j < 3; ++j) {
+ vector_out[i] += matrix.vals[i][j] * vector_in[j];
+ }
+ }
+}
+
+// Returns white point that was specified when creating the profile.
+JXL_MUST_USE_RESULT Status UnadaptedWhitePoint(const skcms_ICCProfile& profile,
+ CIExy* out) {
+ float media_white_point_XYZ[3];
+ if (!skcms_GetWTPT(&profile, media_white_point_XYZ)) {
+ return JXL_FAILURE("ICC profile does not contain WhitePoint tag");
+ }
+ skcms_Matrix3x3 CHAD;
+ if (!skcms_GetCHAD(&profile, &CHAD)) {
+ // If there is no chromatic adaptation matrix, it means that the white point
+ // is already unadapted.
+ *out = CIExyFromXYZ(media_white_point_XYZ);
+ return true;
+ }
+ // Otherwise, it has been adapted to the PCS white point using said matrix,
+ // and the adaptation needs to be undone.
+ skcms_Matrix3x3 inverse_CHAD;
+ if (!skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD)) {
+ return JXL_FAILURE("Non-invertible ChromaticAdaptation matrix");
+ }
+ float unadapted_white_point_XYZ[3];
+ MatrixProduct(inverse_CHAD, media_white_point_XYZ, unadapted_white_point_XYZ);
+ *out = CIExyFromXYZ(unadapted_white_point_XYZ);
+ return true;
+}
+
+Status IdentifyPrimaries(const skcms_ICCProfile& profile,
+ const CIExy& wp_unadapted, ColorEncoding* c) {
+ if (!c->HasPrimaries()) return true;
+
+ skcms_Matrix3x3 CHAD, inverse_CHAD;
+ if (skcms_GetCHAD(&profile, &CHAD)) {
+ JXL_RETURN_IF_ERROR(skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD));
+ } else {
+ static constexpr skcms_Matrix3x3 kLMSFromXYZ = {
+ {{0.8951, 0.2664, -0.1614},
+ {-0.7502, 1.7135, 0.0367},
+ {0.0389, -0.0685, 1.0296}}};
+ static constexpr skcms_Matrix3x3 kXYZFromLMS = {
+ {{0.9869929, -0.1470543, 0.1599627},
+ {0.4323053, 0.5183603, 0.0492912},
+ {-0.0085287, 0.0400428, 0.9684867}}};
+ static constexpr float kWpD50XYZ[3] = {0.96420288, 1.0, 0.82490540};
+ float wp_unadapted_XYZ[3];
+ JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(wp_unadapted, wp_unadapted_XYZ));
+ float wp_D50_LMS[3], wp_unadapted_LMS[3];
+ MatrixProduct(kLMSFromXYZ, kWpD50XYZ, wp_D50_LMS);
+ MatrixProduct(kLMSFromXYZ, wp_unadapted_XYZ, wp_unadapted_LMS);
+ inverse_CHAD = {{{wp_unadapted_LMS[0] / wp_D50_LMS[0], 0, 0},
+ {0, wp_unadapted_LMS[1] / wp_D50_LMS[1], 0},
+ {0, 0, wp_unadapted_LMS[2] / wp_D50_LMS[2]}}};
+ inverse_CHAD = skcms_Matrix3x3_concat(&kXYZFromLMS, &inverse_CHAD);
+ inverse_CHAD = skcms_Matrix3x3_concat(&inverse_CHAD, &kLMSFromXYZ);
+ }
+
+ float XYZ[3];
+ PrimariesCIExy primaries;
+ CIExy* const chromaticities[] = {&primaries.r, &primaries.g, &primaries.b};
+ for (int i = 0; i < 3; ++i) {
+ float RGB[3] = {};
+ RGB[i] = 1;
+ skcms_Transform(RGB, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque,
+ &profile, XYZ, skcms_PixelFormat_RGB_fff,
+ skcms_AlphaFormat_Opaque, skcms_XYZD50_profile(), 1);
+ float unadapted_XYZ[3];
+ MatrixProduct(inverse_CHAD, XYZ, unadapted_XYZ);
+ *chromaticities[i] = CIExyFromXYZ(unadapted_XYZ);
+ }
+ return c->SetPrimaries(primaries);
+}
+
+void DetectTransferFunction(const skcms_ICCProfile& profile,
+ ColorEncoding* JXL_RESTRICT c) {
+ if (c->tf.SetImplicit()) return;
+
+ float gamma[3] = {};
+ if (profile.has_trc) {
+ const auto IsGamma = [](const skcms_TransferFunction& tf) {
+ return tf.a == 1 && tf.b == 0 &&
+ /* if b and d are zero, it is fine for c not to be */ tf.d == 0 &&
+ tf.e == 0 && tf.f == 0;
+ };
+ for (int i = 0; i < 3; ++i) {
+ if (profile.trc[i].table_entries == 0 &&
+ IsGamma(profile.trc->parametric)) {
+ gamma[i] = 1.f / profile.trc->parametric.g;
+ } else {
+ skcms_TransferFunction approximate_tf;
+ float max_error;
+ if (skcms_ApproximateCurve(&profile.trc[i], &approximate_tf,
+ &max_error)) {
+ if (IsGamma(approximate_tf)) {
+ gamma[i] = 1.f / approximate_tf.g;
+ }
+ }
+ }
+ }
+ }
+ if (gamma[0] != 0 && std::abs(gamma[0] - gamma[1]) < 1e-4f &&
+ std::abs(gamma[1] - gamma[2]) < 1e-4f) {
+ if (c->tf.SetGamma(gamma[0])) {
+ skcms_ICCProfile profile_test;
+ PaddedBytes bytes;
+ if (MaybeCreateProfile(*c, &bytes) &&
+ DecodeProfile(bytes.data(), bytes.size(), &profile_test) &&
+ skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) {
+ return;
+ }
+ }
+ }
+
+ for (TransferFunction tf : Values<TransferFunction>()) {
+ // Can only create profile from known transfer function.
+ if (tf == TransferFunction::kUnknown) continue;
+
+ c->tf.SetTransferFunction(tf);
+
+ skcms_ICCProfile profile_test;
+ PaddedBytes bytes;
+ if (MaybeCreateProfile(*c, &bytes) &&
+ DecodeProfile(bytes.data(), bytes.size(), &profile_test) &&
+ skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) {
+ return;
+ }
+ }
+
+ c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+#else // JPEGXL_ENABLE_SKCMS
+
+uint32_t Type32(const ColorEncoding& c, bool cmyk) {
+ if (cmyk) return TYPE_CMYK_FLT;
+ if (c.IsGray()) return TYPE_GRAY_FLT;
+ return TYPE_RGB_FLT;
+}
+
+uint32_t Type64(const ColorEncoding& c) {
+ if (c.IsGray()) return TYPE_GRAY_DBL;
+ return TYPE_RGB_DBL;
+}
+
+ColorSpace ColorSpaceFromProfile(const Profile& profile) {
+ switch (cmsGetColorSpace(profile.get())) {
+ case cmsSigRgbData:
+ case cmsSigCmykData:
+ return ColorSpace::kRGB;
+ case cmsSigGrayData:
+ return ColorSpace::kGray;
+ default:
+ return ColorSpace::kUnknown;
+ }
+}
+
+// "profile1" is pre-decoded to save time in DetectTransferFunction.
+Status ProfileEquivalentToICC(const cmsContext context, const Profile& profile1,
+ const PaddedBytes& icc, const ColorEncoding& c) {
+ const uint32_t type_src = Type64(c);
+
+ Profile profile2;
+ JXL_RETURN_IF_ERROR(DecodeProfile(context, icc, &profile2));
+
+ Profile profile_xyz;
+ JXL_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile_xyz));
+
+ const uint32_t intent = INTENT_RELATIVE_COLORIMETRIC;
+ const uint32_t flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+ cmsFLAGS_HIGHRESPRECALC;
+ Transform xform1(cmsCreateTransformTHR(context, profile1.get(), type_src,
+ profile_xyz.get(), TYPE_XYZ_DBL,
+ intent, flags));
+ Transform xform2(cmsCreateTransformTHR(context, profile2.get(), type_src,
+ profile_xyz.get(), TYPE_XYZ_DBL,
+ intent, flags));
+ if (xform1 == nullptr || xform2 == nullptr) {
+ return JXL_FAILURE("Failed to create transform");
+ }
+
+ double in[3];
+ double out1[3];
+ double out2[3];
+
+ // Uniformly spaced samples from very dark to almost fully bright.
+ const double init = 1E-3;
+ const double step = 0.2;
+
+ if (c.IsGray()) {
+ // Finer sampling and replicate each component.
+ for (in[0] = init; in[0] < 1.0; in[0] += step / 8) {
+ cmsDoTransform(xform1.get(), in, out1, 1);
+ cmsDoTransform(xform2.get(), in, out2, 1);
+ if (!ApproxEq(out1[0], out2[0], 2E-4)) {
+ return false;
+ }
+ }
+ } else {
+ for (in[0] = init; in[0] < 1.0; in[0] += step) {
+ for (in[1] = init; in[1] < 1.0; in[1] += step) {
+ for (in[2] = init; in[2] < 1.0; in[2] += step) {
+ cmsDoTransform(xform1.get(), in, out1, 1);
+ cmsDoTransform(xform2.get(), in, out2, 1);
+ for (size_t i = 0; i < 3; ++i) {
+ if (!ApproxEq(out1[i], out2[i], 2E-4)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+// Returns white point that was specified when creating the profile.
+// NOTE: we can't just use cmsSigMediaWhitePointTag because its interpretation
+// differs between ICC versions.
+JXL_MUST_USE_RESULT cmsCIEXYZ UnadaptedWhitePoint(const cmsContext context,
+ const Profile& profile,
+ const ColorEncoding& c) {
+ const cmsCIEXYZ* white_point = static_cast<const cmsCIEXYZ*>(
+ cmsReadTag(profile.get(), cmsSigMediaWhitePointTag));
+ if (white_point != nullptr &&
+ cmsReadTag(profile.get(), cmsSigChromaticAdaptationTag) == nullptr) {
+ // No chromatic adaptation matrix: the white point is already unadapted.
+ return *white_point;
+ }
+
+ cmsCIEXYZ XYZ = {1.0, 1.0, 1.0};
+ Profile profile_xyz;
+ if (!CreateProfileXYZ(context, &profile_xyz)) return XYZ;
+ // Array arguments are one per profile.
+ cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+ // Leave white point unchanged - that is what we're trying to extract.
+ cmsUInt32Number intents[2] = {INTENT_ABSOLUTE_COLORIMETRIC,
+ INTENT_ABSOLUTE_COLORIMETRIC};
+ cmsBool black_compensation[2] = {0, 0};
+ cmsFloat64Number adaption[2] = {0.0, 0.0};
+ // Only transforming a single pixel, so skip expensive optimizations.
+ cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+ Transform xform(cmsCreateExtendedTransform(
+ context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+ Type64(c), TYPE_XYZ_DBL, flags));
+ if (!xform) return XYZ; // TODO(lode): return error
+
+ // xy are relative, so magnitude does not matter if we ignore output Y.
+ const cmsFloat64Number in[3] = {1.0, 1.0, 1.0};
+ cmsDoTransform(xform.get(), in, &XYZ.X, 1);
+ return XYZ;
+}
+
+Status IdentifyPrimaries(const cmsContext context, const Profile& profile,
+ const cmsCIEXYZ& wp_unadapted, ColorEncoding* c) {
+ if (!c->HasPrimaries()) return true;
+ if (ColorSpaceFromProfile(profile) == ColorSpace::kUnknown) return true;
+
+ // These were adapted to the profile illuminant before storing in the profile.
+ const cmsCIEXYZ* adapted_r = static_cast<const cmsCIEXYZ*>(
+ cmsReadTag(profile.get(), cmsSigRedColorantTag));
+ const cmsCIEXYZ* adapted_g = static_cast<const cmsCIEXYZ*>(
+ cmsReadTag(profile.get(), cmsSigGreenColorantTag));
+ const cmsCIEXYZ* adapted_b = static_cast<const cmsCIEXYZ*>(
+ cmsReadTag(profile.get(), cmsSigBlueColorantTag));
+
+ cmsCIEXYZ converted_rgb[3];
+ if (adapted_r == nullptr || adapted_g == nullptr || adapted_b == nullptr) {
+ // No colorant tag, determine the XYZ coordinates of the primaries by
+ // converting from the colorspace.
+ Profile profile_xyz;
+ if (!CreateProfileXYZ(context, &profile_xyz)) {
+ return JXL_FAILURE("Failed to retrieve colorants");
+ }
+ // Array arguments are one per profile.
+ cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()};
+ cmsUInt32Number intents[2] = {INTENT_RELATIVE_COLORIMETRIC,
+ INTENT_RELATIVE_COLORIMETRIC};
+ cmsBool black_compensation[2] = {0, 0};
+ cmsFloat64Number adaption[2] = {0.0, 0.0};
+ // Only transforming three pixels, so skip expensive optimizations.
+ cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC;
+ Transform xform(cmsCreateExtendedTransform(
+ context, 2, profiles, black_compensation, intents, adaption, nullptr, 0,
+ Type64(*c), TYPE_XYZ_DBL, flags));
+ if (!xform) return JXL_FAILURE("Failed to retrieve colorants");
+
+ const cmsFloat64Number in[9] = {1.0, 0.0, 0.0, 0.0, 1.0,
+ 0.0, 0.0, 0.0, 1.0};
+ cmsDoTransform(xform.get(), in, &converted_rgb->X, 3);
+ adapted_r = &converted_rgb[0];
+ adapted_g = &converted_rgb[1];
+ adapted_b = &converted_rgb[2];
+ }
+
+ // TODO(janwas): no longer assume Bradford and D50.
+ // Undo the chromatic adaptation.
+ const cmsCIEXYZ d50 = D50_XYZ();
+
+ cmsCIEXYZ r, g, b;
+ cmsAdaptToIlluminant(&r, &d50, &wp_unadapted, adapted_r);
+ cmsAdaptToIlluminant(&g, &d50, &wp_unadapted, adapted_g);
+ cmsAdaptToIlluminant(&b, &d50, &wp_unadapted, adapted_b);
+
+ const PrimariesCIExy rgb = {CIExyFromXYZ(r), CIExyFromXYZ(g),
+ CIExyFromXYZ(b)};
+ return c->SetPrimaries(rgb);
+}
+
+void DetectTransferFunction(const cmsContext context, const Profile& profile,
+ ColorEncoding* JXL_RESTRICT c) {
+ if (c->tf.SetImplicit()) return;
+
+ float gamma = 0;
+ if (const auto* gray_trc = reinterpret_cast<const cmsToneCurve*>(
+ cmsReadTag(profile.get(), cmsSigGrayTRCTag))) {
+ const double estimated_gamma =
+ cmsEstimateGamma(gray_trc, /*precision=*/1e-4);
+ if (estimated_gamma > 0) {
+ gamma = 1. / estimated_gamma;
+ }
+ } else {
+ float rgb_gamma[3] = {};
+ int i = 0;
+ for (const auto tag :
+ {cmsSigRedTRCTag, cmsSigGreenTRCTag, cmsSigBlueTRCTag}) {
+ if (const auto* trc = reinterpret_cast<const cmsToneCurve*>(
+ cmsReadTag(profile.get(), tag))) {
+ const double estimated_gamma =
+ cmsEstimateGamma(trc, /*precision=*/1e-4);
+ if (estimated_gamma > 0) {
+ rgb_gamma[i] = 1. / estimated_gamma;
+ }
+ }
+ ++i;
+ }
+ if (rgb_gamma[0] != 0 && std::abs(rgb_gamma[0] - rgb_gamma[1]) < 1e-4f &&
+ std::abs(rgb_gamma[1] - rgb_gamma[2]) < 1e-4f) {
+ gamma = rgb_gamma[0];
+ }
+ }
+
+ if (gamma != 0 && c->tf.SetGamma(gamma)) {
+ PaddedBytes icc_test;
+ if (MaybeCreateProfile(*c, &icc_test) &&
+ ProfileEquivalentToICC(context, profile, icc_test, *c)) {
+ return;
+ }
+ }
+
+ for (TransferFunction tf : Values<TransferFunction>()) {
+ // Can only create profile from known transfer function.
+ if (tf == TransferFunction::kUnknown) continue;
+
+ c->tf.SetTransferFunction(tf);
+
+ PaddedBytes icc_test;
+ if (MaybeCreateProfile(*c, &icc_test) &&
+ ProfileEquivalentToICC(context, profile, icc_test, *c)) {
+ return;
+ }
+ }
+
+ c->tf.SetTransferFunction(TransferFunction::kUnknown);
+}
+
+void ErrorHandler(cmsContext context, cmsUInt32Number code, const char* text) {
+ JXL_WARNING("LCMS error %u: %s", code, text);
+}
+
+// Returns a context for the current thread, creating it if necessary.
+cmsContext GetContext() {
+ static thread_local void* context_;
+ if (context_ == nullptr) {
+ context_ = cmsCreateContext(nullptr, nullptr);
+ JXL_ASSERT(context_ != nullptr);
+
+ cmsSetLogErrorHandlerTHR(static_cast<cmsContext>(context_), &ErrorHandler);
+ }
+ return static_cast<cmsContext>(context_);
+}
+
+#endif // JPEGXL_ENABLE_SKCMS
+
+Status GetPrimariesLuminances(const ColorEncoding& encoding,
+ float luminances[3]) {
+ // Explanation:
+ // We know that the three primaries must sum to white:
+ //
+ // [Xr, Xg, Xb; [1; [Xw;
+ // Yr, Yg, Yb; × 1; = Yw;
+ // Zr, Zg, Zb] 1] Zw]
+ //
+ // By noting that X = x·(X+Y+Z), Y = y·(X+Y+Z) and Z = z·(X+Y+Z) (note the
+ // lower case indicating chromaticity), and factoring the totals (X+Y+Z) out
+ // of the left matrix and into the all-ones vector, we get:
+ //
+ // [xr, xg, xb; [Xr + Yr + Zr; [Xw;
+ // yr, yg, yb; × Xg + Yg + Zg; = Yw;
+ // zr, zg, zb] Xb + Yb + Zb] Zw]
+ //
+ // Which makes it apparent that we can compute those totals as:
+ //
+ // [Xr + Yr + Zr; inv([xr, xg, xb; [Xw;
+ // Xg + Yg + Zg; = yr, yg, yb; × Yw;
+ // Xb + Yb + Zb] zr, zg, zb]) Zw]
+ //
+ // From there, by multiplying each total by its corresponding y, we get Y for
+ // that primary.
+
+ float white_XYZ[3];
+ JXL_RETURN_IF_ERROR(
+ CIEXYZFromWhiteCIExy(encoding.GetWhitePoint(), white_XYZ));
+
+ const PrimariesCIExy primaries = encoding.GetPrimaries();
+ double chromaticities[3][3] = {
+ {primaries.r.x, primaries.g.x, primaries.b.x},
+ {primaries.r.y, primaries.g.y, primaries.b.y},
+ {1 - primaries.r.x - primaries.r.y, 1 - primaries.g.x - primaries.g.y,
+ 1 - primaries.b.x - primaries.b.y}};
+ JXL_RETURN_IF_ERROR(Inv3x3Matrix(&chromaticities[0][0]));
+ const double ys[3] = {primaries.r.y, primaries.g.y, primaries.b.y};
+ for (size_t i = 0; i < 3; ++i) {
+ luminances[i] = ys[i] * (chromaticities[i][0] * white_XYZ[0] +
+ chromaticities[i][1] * white_XYZ[1] +
+ chromaticities[i][2] * white_XYZ[2]);
+ }
+ return true;
+}
+
+Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize,
+ bool forward) {
+ if (295 <= t->intensity_target && t->intensity_target <= 305) {
+ // The gamma is approximately 1 so this can essentially be skipped.
+ return true;
+ }
+ float gamma = 1.2f * std::pow(1.111f, std::log2(t->intensity_target * 1e-3f));
+ if (!forward) gamma = 1.f / gamma;
+
+ switch (t->hlg_ootf_num_channels) {
+ case 1:
+ for (size_t x = 0; x < xsize; ++x) {
+ buf[x] = std::pow(buf[x], gamma);
+ }
+ break;
+
+ case 3:
+ for (size_t x = 0; x < xsize; x += 3) {
+ const float luminance = buf[x] * t->hlg_ootf_luminances[0] +
+ buf[x + 1] * t->hlg_ootf_luminances[1] +
+ buf[x + 2] * t->hlg_ootf_luminances[2];
+ const float ratio = std::pow(luminance, gamma - 1);
+ if (std::isfinite(ratio)) {
+ buf[x] *= ratio;
+ buf[x + 1] *= ratio;
+ buf[x + 2] *= ratio;
+ if (forward && gamma < 1) {
+ // If gamma < 1, the ratio above will be > 1 which can push bright
+ // saturated highlights out of gamut. There are several possible
+ // ways to bring them back in-gamut; this one preserves hue and
+ // saturation at the slight expense of luminance. If !forward, the
+ // previously-applied forward OOTF with gamma > 1 already pushed
+ // those highlights down and we are simply putting them back where
+ // they were so this is not necessary.
+ const float maximum =
+ std::max(buf[x], std::max(buf[x + 1], buf[x + 2]));
+ if (maximum > 1) {
+ const float normalizer = 1.f / maximum;
+ buf[x] *= normalizer;
+ buf[x + 1] *= normalizer;
+ buf[x + 2] *= normalizer;
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ return JXL_FAILURE("HLG OOTF not implemented for %" PRIuS " channels",
+ t->hlg_ootf_num_channels);
+ }
+ return true;
+}
+
+bool ApplyCICP(const uint8_t color_primaries,
+ const uint8_t transfer_characteristics,
+ const uint8_t matrix_coefficients, const uint8_t full_range,
+ ColorEncoding* JXL_RESTRICT c) {
+ if (matrix_coefficients != 0) return false;
+ if (full_range != 1) return false;
+
+ const auto primaries = static_cast<Primaries>(color_primaries);
+ const auto tf = static_cast<TransferFunction>(transfer_characteristics);
+ if (tf == TransferFunction::kUnknown || !EnumValid(tf)) return false;
+ if (primaries == Primaries::kCustom ||
+ !(color_primaries == 12 || EnumValid(primaries))) {
+ return false;
+ }
+ c->SetColorSpace(ColorSpace::kRGB);
+ c->tf.SetTransferFunction(tf);
+ if (primaries == Primaries::kP3) {
+ c->white_point = WhitePoint::kDCI;
+ c->primaries = Primaries::kP3;
+ } else if (color_primaries == 12) {
+ c->white_point = WhitePoint::kD65;
+ c->primaries = Primaries::kP3;
+ } else {
+ c->white_point = WhitePoint::kD65;
+ c->primaries = primaries;
+ }
+ return true;
+}
+
+} // namespace
+
+Status ColorEncoding::SetFieldsFromICC() {
+ // In case parsing fails, mark the ColorEncoding as invalid.
+ SetColorSpace(ColorSpace::kUnknown);
+ tf.SetTransferFunction(TransferFunction::kUnknown);
+
+ if (icc_.empty()) return JXL_FAILURE("Empty ICC profile");
+
+#if JPEGXL_ENABLE_SKCMS
+ if (icc_.size() < 128) {
+ return JXL_FAILURE("ICC file too small");
+ }
+
+ skcms_ICCProfile profile;
+ JXL_RETURN_IF_ERROR(skcms_Parse(icc_.data(), icc_.size(), &profile));
+
+ // skcms does not return the rendering intent, so get it from the file. It
+ // is encoded as big-endian 32-bit integer in bytes 60..63.
+ uint32_t rendering_intent32 = icc_[67];
+ if (rendering_intent32 > 3 || icc_[64] != 0 || icc_[65] != 0 ||
+ icc_[66] != 0) {
+ return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+ }
+ // ICC and RenderingIntent have the same values (0..3).
+ rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+
+ if (profile.has_CICP && ApplyCICP(profile.CICP.color_primaries,
+ profile.CICP.transfer_characteristics,
+ profile.CICP.matrix_coefficients,
+ profile.CICP.video_full_range_flag, this)) {
+ return true;
+ }
+
+ SetColorSpace(ColorSpaceFromProfile(profile));
+ cmyk_ = (profile.data_color_space == skcms_Signature_CMYK);
+
+ CIExy wp_unadapted;
+ JXL_RETURN_IF_ERROR(UnadaptedWhitePoint(profile, &wp_unadapted));
+ JXL_RETURN_IF_ERROR(SetWhitePoint(wp_unadapted));
+
+ // Relies on color_space.
+ JXL_RETURN_IF_ERROR(IdentifyPrimaries(profile, wp_unadapted, this));
+
+ // Relies on color_space/white point/primaries being set already.
+ DetectTransferFunction(profile, this);
+#else // JPEGXL_ENABLE_SKCMS
+
+ const cmsContext context = GetContext();
+
+ Profile profile;
+ JXL_RETURN_IF_ERROR(DecodeProfile(context, icc_, &profile));
+
+ static constexpr size_t kCICPSize = 12;
+ static constexpr auto kCICPSignature =
+ static_cast<cmsTagSignature>(0x63696370);
+ uint8_t cicp_buffer[kCICPSize];
+ if (cmsReadRawTag(profile.get(), kCICPSignature, cicp_buffer, kCICPSize) ==
+ kCICPSize &&
+ ApplyCICP(cicp_buffer[8], cicp_buffer[9], cicp_buffer[10],
+ cicp_buffer[11], this)) {
+ return true;
+ }
+
+ const cmsUInt32Number rendering_intent32 =
+ cmsGetHeaderRenderingIntent(profile.get());
+ if (rendering_intent32 > 3) {
+ return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32);
+ }
+ // ICC and RenderingIntent have the same values (0..3).
+ rendering_intent = static_cast<RenderingIntent>(rendering_intent32);
+
+ SetColorSpace(ColorSpaceFromProfile(profile));
+ if (cmsGetColorSpace(profile.get()) == cmsSigCmykData) {
+ cmyk_ = true;
+ return true;
+ }
+
+ const cmsCIEXYZ wp_unadapted = UnadaptedWhitePoint(context, profile, *this);
+ JXL_RETURN_IF_ERROR(SetWhitePoint(CIExyFromXYZ(wp_unadapted)));
+
+ // Relies on color_space.
+ JXL_RETURN_IF_ERROR(IdentifyPrimaries(context, profile, wp_unadapted, this));
+
+ // Relies on color_space/white point/primaries being set already.
+ DetectTransferFunction(context, profile, this);
+
+#endif // JPEGXL_ENABLE_SKCMS
+
+ return true;
+}
+
+void ColorEncoding::DecideIfWantICC() {
+ PaddedBytes icc_new;
+#if JPEGXL_ENABLE_SKCMS
+ skcms_ICCProfile profile;
+ if (!DecodeProfile(ICC().data(), ICC().size(), &profile)) return;
+ if (!MaybeCreateProfile(*this, &icc_new)) return;
+#else // JPEGXL_ENABLE_SKCMS
+ const cmsContext context = GetContext();
+ Profile profile;
+ if (!DecodeProfile(context, ICC(), &profile)) return;
+ if (cmsGetColorSpace(profile.get()) == cmsSigCmykData) return;
+ if (!MaybeCreateProfile(*this, &icc_new)) return;
+#endif // JPEGXL_ENABLE_SKCMS
+
+ want_icc_ = false;
+}
+
+namespace {
+
+void JxlCmsDestroy(void* cms_data) {
+ if (cms_data == nullptr) return;
+ JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+#if !JPEGXL_ENABLE_SKCMS
+ TransformDeleter()(t->lcms_transform);
+#endif
+ delete t;
+}
+
+void* JxlCmsInit(void* init_data, size_t num_threads, size_t xsize,
+ const JxlColorProfile* input, const JxlColorProfile* output,
+ float intensity_target) {
+ auto t = jxl::make_unique<JxlCms>();
+ PaddedBytes icc_src, icc_dst;
+ icc_src.assign(input->icc.data, input->icc.data + input->icc.size);
+ ColorEncoding c_src;
+ if (!c_src.SetICC(std::move(icc_src))) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse input ICC");
+ return nullptr;
+ }
+ icc_dst.assign(output->icc.data, output->icc.data + output->icc.size);
+ ColorEncoding c_dst;
+ if (!c_dst.SetICC(std::move(icc_dst))) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse output ICC");
+ return nullptr;
+ }
+#if JXL_CMS_VERBOSE
+ printf("%s -> %s\n", Description(c_src).c_str(), Description(c_dst).c_str());
+#endif
+
+#if JPEGXL_ENABLE_SKCMS
+ if (!DecodeProfile(input->icc.data, input->icc.size, &t->profile_src)) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse input ICC");
+ return nullptr;
+ }
+ if (!DecodeProfile(output->icc.data, output->icc.size, &t->profile_dst)) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse output ICC");
+ return nullptr;
+ }
+#else // JPEGXL_ENABLE_SKCMS
+ const cmsContext context = GetContext();
+ Profile profile_src, profile_dst;
+ if (!DecodeProfile(context, c_src.ICC(), &profile_src)) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse input ICC");
+ return nullptr;
+ }
+ if (!DecodeProfile(context, c_dst.ICC(), &profile_dst)) {
+ JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse output ICC");
+ return nullptr;
+ }
+#endif // JPEGXL_ENABLE_SKCMS
+
+ t->skip_lcms = false;
+ if (c_src.SameColorEncoding(c_dst)) {
+ t->skip_lcms = true;
+#if JXL_CMS_VERBOSE
+ printf("Skip CMS\n");
+#endif
+ }
+
+ t->apply_hlg_ootf = c_src.tf.IsHLG() != c_dst.tf.IsHLG();
+ if (t->apply_hlg_ootf) {
+ const ColorEncoding* c_hlg = c_src.tf.IsHLG() ? &c_src : &c_dst;
+ t->hlg_ootf_num_channels = c_hlg->Channels();
+ if (t->hlg_ootf_num_channels == 3 &&
+ !GetPrimariesLuminances(*c_hlg, t->hlg_ootf_luminances.data())) {
+ JXL_NOTIFY_ERROR(
+ "JxlCmsInit: failed to compute the luminances of primaries");
+ return nullptr;
+ }
+ }
+
+ // Special-case SRGB <=> linear if the primaries / white point are the same,
+ // or any conversion where PQ or HLG is involved:
+ bool src_linear = c_src.tf.IsLinear();
+ const bool dst_linear = c_dst.tf.IsLinear();
+
+ if (c_src.tf.IsPQ() || c_src.tf.IsHLG() ||
+ (c_src.tf.IsSRGB() && dst_linear && c_src.SameColorSpace(c_dst))) {
+ // Construct new profile as if the data were already/still linear.
+ ColorEncoding c_linear_src = c_src;
+ c_linear_src.tf.SetTransferFunction(TransferFunction::kLinear);
+#if JPEGXL_ENABLE_SKCMS
+ skcms_ICCProfile new_src;
+#else // JPEGXL_ENABLE_SKCMS
+ Profile new_src;
+#endif // JPEGXL_ENABLE_SKCMS
+ // Only enable ExtraTF if profile creation succeeded.
+ if (MaybeCreateProfile(c_linear_src, &icc_src) &&
+#if JPEGXL_ENABLE_SKCMS
+ DecodeProfile(icc_src.data(), icc_src.size(), &new_src)) {
+#else // JPEGXL_ENABLE_SKCMS
+ DecodeProfile(context, icc_src, &new_src)) {
+#endif // JPEGXL_ENABLE_SKCMS
+#if JXL_CMS_VERBOSE
+ printf("Special HLG/PQ/sRGB -> linear\n");
+#endif
+#if JPEGXL_ENABLE_SKCMS
+ t->icc_src = std::move(icc_src);
+ t->profile_src = new_src;
+#else // JPEGXL_ENABLE_SKCMS
+ profile_src.swap(new_src);
+#endif // JPEGXL_ENABLE_SKCMS
+ t->preprocess = c_src.tf.IsSRGB()
+ ? ExtraTF::kSRGB
+ : (c_src.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+ c_src = c_linear_src;
+ src_linear = true;
+ } else {
+ if (t->apply_hlg_ootf) {
+ JXL_NOTIFY_ERROR(
+ "Failed to create extra linear source profile, and HLG OOTF "
+ "required");
+ return nullptr;
+ }
+ JXL_WARNING("Failed to create extra linear destination profile");
+ }
+ }
+
+ if (c_dst.tf.IsPQ() || c_dst.tf.IsHLG() ||
+ (c_dst.tf.IsSRGB() && src_linear && c_src.SameColorSpace(c_dst))) {
+ ColorEncoding c_linear_dst = c_dst;
+ c_linear_dst.tf.SetTransferFunction(TransferFunction::kLinear);
+#if JPEGXL_ENABLE_SKCMS
+ skcms_ICCProfile new_dst;
+#else // JPEGXL_ENABLE_SKCMS
+ Profile new_dst;
+#endif // JPEGXL_ENABLE_SKCMS
+ // Only enable ExtraTF if profile creation succeeded.
+ if (MaybeCreateProfile(c_linear_dst, &icc_dst) &&
+#if JPEGXL_ENABLE_SKCMS
+ DecodeProfile(icc_dst.data(), icc_dst.size(), &new_dst)) {
+#else // JPEGXL_ENABLE_SKCMS
+ DecodeProfile(context, icc_dst, &new_dst)) {
+#endif // JPEGXL_ENABLE_SKCMS
+#if JXL_CMS_VERBOSE
+ printf("Special linear -> HLG/PQ/sRGB\n");
+#endif
+#if JPEGXL_ENABLE_SKCMS
+ t->icc_dst = std::move(icc_dst);
+ t->profile_dst = new_dst;
+#else // JPEGXL_ENABLE_SKCMS
+ profile_dst.swap(new_dst);
+#endif // JPEGXL_ENABLE_SKCMS
+ t->postprocess = c_dst.tf.IsSRGB()
+ ? ExtraTF::kSRGB
+ : (c_dst.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG);
+ c_dst = c_linear_dst;
+ } else {
+ if (t->apply_hlg_ootf) {
+ JXL_NOTIFY_ERROR(
+ "Failed to create extra linear destination profile, and inverse "
+ "HLG OOTF required");
+ return nullptr;
+ }
+ JXL_WARNING("Failed to create extra linear destination profile");
+ }
+ }
+
+ if (c_src.SameColorEncoding(c_dst)) {
+#if JXL_CMS_VERBOSE
+ printf("Same intermediary linear profiles, skipping CMS\n");
+#endif
+ t->skip_lcms = true;
+ }
+
+#if JPEGXL_ENABLE_SKCMS
+ if (!skcms_MakeUsableAsDestination(&t->profile_dst)) {
+ JXL_NOTIFY_ERROR(
+ "Failed to make %s usable as a color transform destination",
+ Description(c_dst).c_str());
+ return nullptr;
+ }
+#endif // JPEGXL_ENABLE_SKCMS
+
+ // Not including alpha channel (copied separately).
+ const size_t channels_src = (c_src.IsCMYK() ? 4 : c_src.Channels());
+ const size_t channels_dst = c_dst.Channels();
+ JXL_CHECK(channels_src == channels_dst ||
+ (channels_src == 4 && channels_dst == 3));
+#if JXL_CMS_VERBOSE
+ printf("Channels: %" PRIuS "; Threads: %" PRIuS "\n", channels_src,
+ num_threads);
+#endif
+
+#if !JPEGXL_ENABLE_SKCMS
+ // Type includes color space (XYZ vs RGB), so can be different.
+ const uint32_t type_src = Type32(c_src, channels_src == 4);
+ const uint32_t type_dst = Type32(c_dst, false);
+ const uint32_t intent = static_cast<uint32_t>(c_dst.rendering_intent);
+ // Use cmsFLAGS_NOCACHE to disable the 1-pixel cache and make calling
+ // cmsDoTransform() thread-safe.
+ const uint32_t flags = cmsFLAGS_NOCACHE | cmsFLAGS_BLACKPOINTCOMPENSATION |
+ cmsFLAGS_HIGHRESPRECALC;
+ t->lcms_transform =
+ cmsCreateTransformTHR(context, profile_src.get(), type_src,
+ profile_dst.get(), type_dst, intent, flags);
+ if (t->lcms_transform == nullptr) {
+ JXL_NOTIFY_ERROR("Failed to create transform");
+ return nullptr;
+ }
+#endif // !JPEGXL_ENABLE_SKCMS
+
+ // Ideally LCMS would convert directly from External to Image3. However,
+ // cmsDoTransformLineStride only accepts 32-bit BytesPerPlaneIn, whereas our
+ // planes can be more than 4 GiB apart. Hence, transform inputs/outputs must
+ // be interleaved. Calling cmsDoTransform for each pixel is expensive
+ // (indirect call). We therefore transform rows, which requires per-thread
+ // buffers. To avoid separate allocations, we use the rows of an image.
+ // Because LCMS apparently also cannot handle <= 16 bit inputs and 32-bit
+ // outputs (or vice versa), we use floating point input/output.
+ t->channels_src = channels_src;
+ t->channels_dst = channels_dst;
+#if JPEGXL_ENABLE_SKCMS
+ // SkiaCMS doesn't support grayscale float buffers, so we create space for RGB
+ // float buffers anyway.
+ t->buf_src = ImageF(xsize * (channels_src == 4 ? 4 : 3), num_threads);
+ t->buf_dst = ImageF(xsize * 3, num_threads);
+#else
+ t->buf_src = ImageF(xsize * channels_src, num_threads);
+ t->buf_dst = ImageF(xsize * channels_dst, num_threads);
+#endif
+ t->intensity_target = intensity_target;
+ return t.release();
+}
+
+float* JxlCmsGetSrcBuf(void* cms_data, size_t thread) {
+ JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+ return t->buf_src.Row(thread);
+}
+
+float* JxlCmsGetDstBuf(void* cms_data, size_t thread) {
+ JxlCms* t = reinterpret_cast<JxlCms*>(cms_data);
+ return t->buf_dst.Row(thread);
+}
+
+} // namespace
+
+const JxlCmsInterface& GetJxlCms() {
+ static constexpr JxlCmsInterface kInterface = {
+ /*init_data=*/nullptr,
+ /*init=*/&JxlCmsInit,
+ /*get_src_buf=*/&JxlCmsGetSrcBuf,
+ /*get_dst_buf=*/&JxlCmsGetDstBuf,
+ /*run=*/&DoColorSpaceTransform,
+ /*destroy=*/&JxlCmsDestroy};
+ return kInterface;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_color_management.h b/third_party/jpeg-xl/lib/jxl/enc_color_management.h
new file mode 100644
index 0000000000..6f6e9023a6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_color_management.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+#define LIB_JXL_ENC_COLOR_MANAGEMENT_H_
+
+// ICC profiles and color space conversions.
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Internal C++ wrapper for a JxlCmsInterface.
+class ColorSpaceTransform {
+ public:
+ explicit ColorSpaceTransform(const JxlCmsInterface& cms) : cms_(cms) {}
+ ~ColorSpaceTransform() {
+ if (cms_data_ != nullptr) {
+ cms_.destroy(cms_data_);
+ }
+ }
+
+ // Cannot copy.
+ ColorSpaceTransform(const ColorSpaceTransform&) = delete;
+ ColorSpaceTransform& operator=(const ColorSpaceTransform&) = delete;
+
+ Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst,
+ float intensity_target, size_t xsize, size_t num_threads) {
+ xsize_ = xsize;
+ JxlColorProfile input_profile;
+ icc_src_ = c_src.ICC();
+ input_profile.icc.data = icc_src_.data();
+ input_profile.icc.size = icc_src_.size();
+ ConvertInternalToExternalColorEncoding(c_src,
+ &input_profile.color_encoding);
+ input_profile.num_channels = c_src.IsCMYK() ? 4 : c_src.Channels();
+ JxlColorProfile output_profile;
+ icc_dst_ = c_dst.ICC();
+ output_profile.icc.data = icc_dst_.data();
+ output_profile.icc.size = icc_dst_.size();
+ ConvertInternalToExternalColorEncoding(c_dst,
+ &output_profile.color_encoding);
+ if (c_dst.IsCMYK())
+ return JXL_FAILURE("Conversion to CMYK is not supported");
+ output_profile.num_channels = c_dst.Channels();
+ cms_data_ = cms_.init(cms_.init_data, num_threads, xsize, &input_profile,
+ &output_profile, intensity_target);
+ JXL_RETURN_IF_ERROR(cms_data_ != nullptr);
+ return true;
+ }
+
+ float* BufSrc(const size_t thread) const {
+ return cms_.get_src_buf(cms_data_, thread);
+ }
+
+ float* BufDst(const size_t thread) const {
+ return cms_.get_dst_buf(cms_data_, thread);
+ }
+
+ Status Run(const size_t thread, const float* buf_src, float* buf_dst) {
+ return cms_.run(cms_data_, thread, buf_src, buf_dst, xsize_);
+ }
+
+ private:
+ JxlCmsInterface cms_;
+ void* cms_data_ = nullptr;
+ // The interface may retain pointers into these.
+ PaddedBytes icc_src_;
+ PaddedBytes icc_dst_;
+ size_t xsize_;
+};
+
+const JxlCmsInterface& GetJxlCms();
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_COLOR_MANAGEMENT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_comparator.cc b/third_party/jpeg-xl/lib/jxl/enc_comparator.cc
new file mode 100644
index 0000000000..cbdd0f78d9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_comparator.cc
@@ -0,0 +1,130 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_comparator.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/enc_image_bundle.h"
+
+namespace jxl {
+namespace {
+
+// color is linear, but blending happens in gamma-compressed space using
+// (gamma-compressed) grayscale background color, alpha image represents
+// weights of the sRGB colors in the [0 .. (1 << bit_depth) - 1] interval,
+// output image is in linear space.
+void AlphaBlend(const Image3F& in, const size_t c, float background_linear,
+ const ImageF& alpha, Image3F* out) {
+ const float background = LinearToSrgb8Direct(background_linear);
+
+ for (size_t y = 0; y < out->ysize(); ++y) {
+ const float* JXL_RESTRICT row_a = alpha.ConstRow(y);
+ const float* JXL_RESTRICT row_i = in.ConstPlaneRow(c, y);
+ float* JXL_RESTRICT row_o = out->PlaneRow(c, y);
+ for (size_t x = 0; x < out->xsize(); ++x) {
+ const float a = row_a[x];
+ if (a <= 0.f) {
+ row_o[x] = background_linear;
+ } else if (a >= 1.f) {
+ row_o[x] = row_i[x];
+ } else {
+ const float w_fg = a;
+ const float w_bg = 1.0f - w_fg;
+ const float fg = w_fg * LinearToSrgb8Direct(row_i[x]);
+ const float bg = w_bg * background;
+ row_o[x] = Srgb8ToLinearDirect(fg + bg);
+ }
+ }
+ }
+}
+
+void AlphaBlend(float background_linear, ImageBundle* io_linear_srgb) {
+ // No alpha => all opaque.
+ if (!io_linear_srgb->HasAlpha()) return;
+
+ for (size_t c = 0; c < 3; ++c) {
+ AlphaBlend(*io_linear_srgb->color(), c, background_linear,
+ *io_linear_srgb->alpha(), io_linear_srgb->color());
+ }
+}
+
+float ComputeScoreImpl(const ImageBundle& rgb0, const ImageBundle& rgb1,
+ Comparator* comparator, ImageF* distmap) {
+ JXL_CHECK(comparator->SetReferenceImage(rgb0));
+ float score;
+ JXL_CHECK(comparator->CompareWith(rgb1, distmap, &score));
+ return score;
+}
+
+} // namespace
+
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+ Comparator* comparator, const JxlCmsInterface& cms,
+ ImageF* diffmap, ThreadPool* pool) {
+ PROFILER_FUNC;
+ // Convert to linear sRGB (unless already in that space)
+ ImageMetadata metadata0 = *rgb0.metadata();
+ ImageBundle store0(&metadata0);
+ const ImageBundle* linear_srgb0;
+ JXL_CHECK(TransformIfNeeded(rgb0, ColorEncoding::LinearSRGB(rgb0.IsGray()),
+ cms, pool, &store0, &linear_srgb0));
+ ImageMetadata metadata1 = *rgb1.metadata();
+ ImageBundle store1(&metadata1);
+ const ImageBundle* linear_srgb1;
+ JXL_CHECK(TransformIfNeeded(rgb1, ColorEncoding::LinearSRGB(rgb1.IsGray()),
+ cms, pool, &store1, &linear_srgb1));
+
+ // No alpha: skip blending, only need a single call to Butteraugli.
+ if (!rgb0.HasAlpha() && !rgb1.HasAlpha()) {
+ return ComputeScoreImpl(*linear_srgb0, *linear_srgb1, comparator, diffmap);
+ }
+
+ // Blend on black and white backgrounds
+
+ const float black = 0.0f;
+ ImageBundle blended_black0 = linear_srgb0->Copy();
+ ImageBundle blended_black1 = linear_srgb1->Copy();
+ AlphaBlend(black, &blended_black0);
+ AlphaBlend(black, &blended_black1);
+
+ const float white = 1.0f;
+ ImageBundle blended_white0 = linear_srgb0->Copy();
+ ImageBundle blended_white1 = linear_srgb1->Copy();
+
+ AlphaBlend(white, &blended_white0);
+ AlphaBlend(white, &blended_white1);
+
+ ImageF diffmap_black, diffmap_white;
+ const float dist_black = ComputeScoreImpl(blended_black0, blended_black1,
+ comparator, &diffmap_black);
+ const float dist_white = ComputeScoreImpl(blended_white0, blended_white1,
+ comparator, &diffmap_white);
+
+ // diffmap and return values are the max of diffmap_black/white.
+ if (diffmap != nullptr) {
+ const size_t xsize = rgb0.xsize();
+ const size_t ysize = rgb0.ysize();
+ *diffmap = ImageF(xsize, ysize);
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* JXL_RESTRICT row_black = diffmap_black.ConstRow(y);
+ const float* JXL_RESTRICT row_white = diffmap_white.ConstRow(y);
+ float* JXL_RESTRICT row_out = diffmap->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = std::max(row_black[x], row_white[x]);
+ }
+ }
+ }
+ return std::max(dist_black, dist_white);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_comparator.h b/third_party/jpeg-xl/lib/jxl/enc_comparator.h
new file mode 100644
index 0000000000..0ac4df8296
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_comparator.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_COMPARATOR_H_
+#define LIB_JXL_ENC_COMPARATOR_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+class Comparator {
+ public:
+ virtual ~Comparator() = default;
+
+ // Sets the reference image, the first to compare
+ // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+ // the range from standard black point to standard white point, but values
+ // outside permitted.
+ virtual Status SetReferenceImage(const ImageBundle& ref) = 0;
+
+ // Sets the actual image (with loss), the second to compare
+ // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as
+ // the range from standard black point to standard white point, but values
+ // outside permitted.
+ // In diffmap it outputs the local score per pixel, while in score it outputs
+ // a single score. Any one may be set to nullptr to not compute it.
+ virtual Status CompareWith(const ImageBundle& actual, ImageF* diffmap,
+ float* score) = 0;
+
+ // Quality thresholds for diffmap and score values.
+ // The good score must represent a value where the images are considered to
+ // be perceptually indistinguishable (but not identical)
+ // The bad value must be larger than good to indicate "lower means better"
+ // and smaller than good to indicate "higher means better"
+ virtual float GoodQualityScore() const = 0;
+ virtual float BadQualityScore() const = 0;
+};
+
+// Computes the score given images in any RGB color model, optionally with
+// alpha channel.
+float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1,
+ Comparator* comparator, const JxlCmsInterface& cms,
+ ImageF* diffmap = nullptr, ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_COMPARATOR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_context_map.cc b/third_party/jpeg-xl/lib/jxl/enc_context_map.cc
new file mode 100644
index 0000000000..842dd12423
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_context_map.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library to encode the context map.
+
+#include "lib/jxl/enc_context_map.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/entropy_coder.h"
+
+namespace jxl {
+
+namespace {
+
+size_t IndexOf(const std::vector<uint8_t>& v, uint8_t value) {
+ size_t i = 0;
+ for (; i < v.size(); ++i) {
+ if (v[i] == value) return i;
+ }
+ return i;
+}
+
+void MoveToFront(std::vector<uint8_t>* v, size_t index) {
+ uint8_t value = (*v)[index];
+ for (size_t i = index; i != 0; --i) {
+ (*v)[i] = (*v)[i - 1];
+ }
+ (*v)[0] = value;
+}
+
+std::vector<uint8_t> MoveToFrontTransform(const std::vector<uint8_t>& v) {
+ if (v.empty()) return v;
+ uint8_t max_value = *std::max_element(v.begin(), v.end());
+ std::vector<uint8_t> mtf(max_value + 1);
+ for (size_t i = 0; i <= max_value; ++i) mtf[i] = i;
+ std::vector<uint8_t> result(v.size());
+ for (size_t i = 0; i < v.size(); ++i) {
+ size_t index = IndexOf(mtf, v[i]);
+ JXL_ASSERT(index < mtf.size());
+ result[i] = static_cast<uint8_t>(index);
+ MoveToFront(&mtf, index);
+ }
+ return result;
+}
+
+} // namespace
+
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+ size_t num_histograms, BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ if (num_histograms == 1) {
+ // Simple code
+ writer->Write(1, 1);
+ // 0 bits per entry.
+ writer->Write(2, 0);
+ return;
+ }
+
+ std::vector<uint8_t> transformed_symbols = MoveToFrontTransform(context_map);
+ std::vector<std::vector<Token>> tokens(1), mtf_tokens(1);
+ EntropyEncodingData codes;
+ std::vector<uint8_t> dummy_context_map;
+ for (size_t i = 0; i < context_map.size(); i++) {
+ tokens[0].emplace_back(0, context_map[i]);
+ }
+ for (size_t i = 0; i < transformed_symbols.size(); i++) {
+ mtf_tokens[0].emplace_back(0, transformed_symbols[i]);
+ }
+ HistogramParams params;
+ params.uint_method = HistogramParams::HybridUintMethod::kContextMap;
+ size_t ans_cost = BuildAndEncodeHistograms(
+ params, 1, tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+ size_t mtf_cost = BuildAndEncodeHistograms(
+ params, 1, mtf_tokens, &codes, &dummy_context_map, nullptr, 0, nullptr);
+ bool use_mtf = mtf_cost < ans_cost;
+ // Rebuild token list.
+ tokens[0].clear();
+ for (size_t i = 0; i < transformed_symbols.size(); i++) {
+ tokens[0].emplace_back(0,
+ use_mtf ? transformed_symbols[i] : context_map[i]);
+ }
+ size_t entry_bits = CeilLog2Nonzero(num_histograms);
+ size_t simple_cost = entry_bits * context_map.size();
+ if (entry_bits < 4 && simple_cost < ans_cost && simple_cost < mtf_cost) {
+ writer->Write(1, 1);
+ writer->Write(2, entry_bits);
+ for (size_t i = 0; i < context_map.size(); i++) {
+ writer->Write(entry_bits, context_map[i]);
+ }
+ } else {
+ writer->Write(1, 0);
+ writer->Write(1, use_mtf); // Use/don't use MTF.
+ BuildAndEncodeHistograms(params, 1, tokens, &codes, &dummy_context_map,
+ writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, dummy_context_map, writer);
+ }
+}
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+ AuxOut* aux_out) {
+ auto& dct = block_ctx_map.dc_thresholds;
+ auto& qft = block_ctx_map.qf_thresholds;
+ auto& ctx_map = block_ctx_map.ctx_map;
+ BitWriter::Allotment allotment(
+ writer,
+ (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 +
+ 4 + 4 + ctx_map.size() * 10 + 1024);
+ if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() &&
+ ctx_map.size() == 21 &&
+ std::equal(ctx_map.begin(), ctx_map.end(), BlockCtxMap::kDefaultCtxMap)) {
+ writer->Write(1, 1); // default
+ allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+ return;
+ }
+ writer->Write(1, 0);
+ for (int j : {0, 1, 2}) {
+ writer->Write(4, dct[j].size());
+ for (int i : dct[j]) {
+ JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), writer));
+ }
+ }
+ writer->Write(4, qft.size());
+ for (uint32_t i : qft) {
+ JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, writer));
+ }
+ EncodeContextMap(ctx_map, block_ctx_map.num_ctxs, writer, kLayerAC, aux_out);
+ allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_context_map.h b/third_party/jpeg-xl/lib/jxl/enc_context_map.h
new file mode 100644
index 0000000000..041e71de7a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_context_map.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_CONTEXT_MAP_H_
+#define LIB_JXL_ENC_CONTEXT_MAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Max limit is 255 because encoding assumes numbers < 255
+// More clusters can help compression, but makes encode/decode somewhat slower
+static const size_t kClustersLimit = 128;
+
+// Encodes the given context map to the bit stream. The number of different
+// histogram ids is given by num_histograms.
+void EncodeContextMap(const std::vector<uint8_t>& context_map,
+ size_t num_histograms, BitWriter* writer, size_t layer,
+ AuxOut* aux_out);
+
+void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer,
+ AuxOut* aux_out);
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_CONTEXT_MAP_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_detect_dots.cc b/third_party/jpeg-xl/lib/jxl/enc_detect_dots.cc
new file mode 100644
index 0000000000..5819036987
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_detect_dots.cc
@@ -0,0 +1,626 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_detect_dots.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdio>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_detect_dots.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_linalg.h"
+#include "lib/jxl/enc_optimize.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+// Set JXL_DEBUG_DOT_DETECT to 1 to enable debugging.
+#ifndef JXL_DEBUG_DOT_DETECT
+#define JXL_DEBUG_DOT_DETECT 0
+#endif
+
+#if JXL_DEBUG_DOT_DETECT
+#include "lib/jxl/enc_aux_out.h"
+#endif
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::Sub;
+
+ImageF SumOfSquareDifferences(const Image3F& forig, const Image3F& smooth,
+ ThreadPool* pool) {
+ const HWY_FULL(float) d;
+ const auto color_coef0 = Set(d, 0.0f);
+ const auto color_coef1 = Set(d, 10.0f);
+ const auto color_coef2 = Set(d, 0.0f);
+
+ ImageF sum_of_squares(forig.xsize(), forig.ysize());
+ JXL_CHECK(RunOnPool(
+ pool, 0, forig.ysize(), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t thread) {
+ const size_t y = static_cast<size_t>(task);
+ const float* JXL_RESTRICT orig_row0 = forig.Plane(0).ConstRow(y);
+ const float* JXL_RESTRICT orig_row1 = forig.Plane(1).ConstRow(y);
+ const float* JXL_RESTRICT orig_row2 = forig.Plane(2).ConstRow(y);
+ const float* JXL_RESTRICT smooth_row0 = smooth.Plane(0).ConstRow(y);
+ const float* JXL_RESTRICT smooth_row1 = smooth.Plane(1).ConstRow(y);
+ const float* JXL_RESTRICT smooth_row2 = smooth.Plane(2).ConstRow(y);
+ float* JXL_RESTRICT sos_row = sum_of_squares.Row(y);
+
+ for (size_t x = 0; x < forig.xsize(); x += Lanes(d)) {
+ auto v0 = Sub(Load(d, orig_row0 + x), Load(d, smooth_row0 + x));
+ auto v1 = Sub(Load(d, orig_row1 + x), Load(d, smooth_row1 + x));
+ auto v2 = Sub(Load(d, orig_row2 + x), Load(d, smooth_row2 + x));
+ v0 = Mul(Mul(v0, v0), color_coef0);
+ v1 = Mul(Mul(v1, v1), color_coef1);
+ v2 = Mul(Mul(v2, v2), color_coef2);
+ const auto sos =
+ Add(v0, Add(v1, v2)); // weighted sum of square diffs
+ Store(sos, d, sos_row + x);
+ }
+ },
+ "ComputeEnergyImage"));
+ return sum_of_squares;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(SumOfSquareDifferences); // Local function
+
+const int kEllipseWindowSize = 5;
+
+namespace {
+struct GaussianEllipse {
+ double x; // position in x
+ double y; // position in y
+ double sigma_x; // scale in x
+ double sigma_y; // scale in y
+ double angle; // ellipse rotation in radians
+ std::array<double, 3> intensity; // intensity in each channel
+
+ // The following variables do not need to be encoded
+ double l2_loss; // error after the Gaussian was fit
+ double l1_loss;
+ double ridge_loss; // the l2_loss plus regularization term
+ double custom_loss; // experimental custom loss
+ std::array<double, 3> bgColor; // best background color
+ size_t neg_pixels; // number of negative pixels when subtracting dot
+ std::array<double, 3> neg_value; // debt due to channel truncation
+};
+double DotGaussianModel(double dx, double dy, double ct, double st,
+ double sigma_x, double sigma_y, double intensity) {
+ double rx = ct * dx + st * dy;
+ double ry = -st * dx + ct * dy;
+ double md = (rx * rx / sigma_x) + (ry * ry / sigma_y);
+ double value = intensity * exp(-0.5 * md);
+ return value;
+}
+
+constexpr bool kOptimizeBackground = true;
+
+// Gaussian that smooths noise but preserves dots
+const WeightsSeparable5& WeightsSeparable5Gaussian0_65() {
+ constexpr float w0 = 0.558311f;
+ constexpr float w1 = 0.210395f;
+ constexpr float w2 = 0.010449f;
+ static constexpr WeightsSeparable5 weights = {
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+ return weights;
+}
+
+// (Iterated) Gaussian that removes dots.
+const WeightsSeparable5& WeightsSeparable5Gaussian3() {
+ constexpr float w0 = 0.222338f;
+ constexpr float w1 = 0.210431f;
+ constexpr float w2 = 0.1784f;
+ static constexpr WeightsSeparable5 weights = {
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)},
+ {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}};
+ return weights;
+}
+
+ImageF ComputeEnergyImage(const Image3F& orig, Image3F* smooth,
+ ThreadPool* pool) {
+ PROFILER_FUNC;
+
+ // Prepare guidance images for dot selection.
+ Image3F forig(orig.xsize(), orig.ysize());
+ *smooth = Image3F(orig.xsize(), orig.ysize());
+ Rect rect(orig);
+
+ const auto& weights1 = WeightsSeparable5Gaussian0_65();
+ const auto& weights3 = WeightsSeparable5Gaussian3();
+
+ for (size_t c = 0; c < 3; ++c) {
+ // Use forig as temporary storage to reduce memory and keep it warmer.
+ Separable5(orig.Plane(c), rect, weights3, pool, &forig.Plane(c));
+ Separable5(forig.Plane(c), rect, weights3, pool, &smooth->Plane(c));
+ Separable5(orig.Plane(c), rect, weights1, pool, &forig.Plane(c));
+ }
+
+#if JXL_DEBUG_DOT_DETECT
+ AuxOut aux;
+ aux.debug_prefix = "/tmp/sebastian/";
+ aux.DumpImage("filtered", forig);
+ aux.DumpImage("sm", *smooth);
+#endif
+
+ return HWY_DYNAMIC_DISPATCH(SumOfSquareDifferences)(forig, *smooth, pool);
+}
+
+struct Pixel {
+ int x;
+ int y;
+};
+
+Pixel operator+(const Pixel& a, const Pixel& b) {
+ return Pixel{a.x + b.x, a.y + b.y};
+}
+
+// Maximum area in pixels of a ellipse
+const size_t kMaxCCSize = 1000;
+
+// Extracts a connected component from a Binary image where seed is part
+// of the component
+bool ExtractComponent(ImageF* img, std::vector<Pixel>* pixels,
+ const Pixel& seed, double threshold) {
+ PROFILER_FUNC;
+ static const std::vector<Pixel> neighbors{{1, -1}, {1, 0}, {1, 1}, {0, -1},
+ {0, 1}, {-1, -1}, {-1, 1}, {1, 0}};
+ std::vector<Pixel> q{seed};
+ while (!q.empty()) {
+ Pixel current = q.back();
+ q.pop_back();
+ pixels->push_back(current);
+ if (pixels->size() > kMaxCCSize) return false;
+ for (const Pixel& delta : neighbors) {
+ Pixel child = current + delta;
+ if (child.x >= 0 && static_cast<size_t>(child.x) < img->xsize() &&
+ child.y >= 0 && static_cast<size_t>(child.y) < img->ysize()) {
+ float* value = &img->Row(child.y)[child.x];
+ if (*value > threshold) {
+ *value = 0.0;
+ q.push_back(child);
+ }
+ }
+ }
+ }
+ return true;
+}
+
+inline bool PointInRect(const Rect& r, const Pixel& p) {
+ return (static_cast<size_t>(p.x) >= r.x0() &&
+ static_cast<size_t>(p.x) < (r.x0() + r.xsize()) &&
+ static_cast<size_t>(p.y) >= r.y0() &&
+ static_cast<size_t>(p.y) < (r.y0() + r.ysize()));
+}
+
+struct ConnectedComponent {
+ ConnectedComponent(const Rect& bounds, const std::vector<Pixel>&& pixels)
+ : bounds(bounds), pixels(pixels) {}
+ Rect bounds;
+ std::vector<Pixel> pixels;
+ float maxEnergy;
+ float meanEnergy;
+ float varEnergy;
+ float meanBg;
+ float varBg;
+ float score;
+ Pixel mode;
+
+ void CompStats(const ImageF& energy, int extra) {
+ PROFILER_FUNC;
+ maxEnergy = 0.0;
+ meanEnergy = 0.0;
+ varEnergy = 0.0;
+ meanBg = 0.0;
+ varBg = 0.0;
+ int nIn = 0;
+ int nOut = 0;
+ mode.x = 0;
+ mode.y = 0;
+ for (int sy = -extra; sy < (static_cast<int>(bounds.ysize()) + extra);
+ sy++) {
+ int y = sy + static_cast<int>(bounds.y0());
+ if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+ const float* JXL_RESTRICT erow = energy.ConstRow(y);
+ for (int sx = -extra; sx < (static_cast<int>(bounds.xsize()) + extra);
+ sx++) {
+ int x = sx + static_cast<int>(bounds.x0());
+ if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+ if (erow[x] > maxEnergy) {
+ maxEnergy = erow[x];
+ mode.x = x;
+ mode.y = y;
+ }
+ if (PointInRect(bounds, Pixel{x, y})) {
+ meanEnergy += erow[x];
+ varEnergy += erow[x] * erow[x];
+ nIn++;
+ } else {
+ meanBg += erow[x];
+ varBg += erow[x] * erow[x];
+ nOut++;
+ }
+ }
+ }
+ meanEnergy = meanEnergy / nIn;
+ meanBg = meanBg / nOut;
+ varEnergy = (varEnergy / nIn) - meanEnergy * meanEnergy;
+ varBg = (varBg / nOut) - meanBg * meanBg;
+ score = (meanEnergy - meanBg) / std::sqrt(varBg);
+ }
+};
+
+Rect BoundingRectangle(const std::vector<Pixel>& pixels) {
+ PROFILER_FUNC;
+ JXL_ASSERT(!pixels.empty());
+ int low_x, high_x, low_y, high_y;
+ low_x = high_x = pixels[0].x;
+ low_y = high_y = pixels[0].y;
+ for (const Pixel& p : pixels) {
+ low_x = std::min(low_x, p.x);
+ high_x = std::max(high_x, p.x);
+ low_y = std::min(low_y, p.y);
+ high_y = std::max(high_y, p.y);
+ }
+ return Rect(low_x, low_y, high_x - low_x + 1, high_y - low_y + 1);
+}
+
+std::vector<ConnectedComponent> FindCC(const ImageF& energy, double t_low,
+ double t_high, uint32_t maxWindow,
+ double minScore) {
+ PROFILER_FUNC;
+ const int kExtraRect = 4;
+ ImageF img = CopyImage(energy);
+ std::vector<ConnectedComponent> ans;
+ for (size_t y = 0; y < img.ysize(); y++) {
+ float* JXL_RESTRICT row = img.Row(y);
+ for (size_t x = 0; x < img.xsize(); x++) {
+ if (row[x] > t_high) {
+ std::vector<Pixel> pixels;
+ row[x] = 0.0;
+ bool success = ExtractComponent(
+ &img, &pixels, Pixel{static_cast<int>(x), static_cast<int>(y)},
+ t_low);
+ if (!success) continue;
+#if JXL_DEBUG_DOT_DETECT
+ for (size_t i = 0; i < pixels.size(); i++) {
+ fprintf(stderr, "(%d,%d) ", pixels[i].x, pixels[i].y);
+ }
+ fprintf(stderr, "\n");
+#endif // JXL_DEBUG_DOT_DETECT
+ Rect bounds = BoundingRectangle(pixels);
+ if (bounds.xsize() < maxWindow && bounds.ysize() < maxWindow) {
+ ConnectedComponent cc{bounds, std::move(pixels)};
+ cc.CompStats(energy, kExtraRect);
+ if (cc.score < minScore) continue;
+ JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+ "cc mode: (%d,%d), max: %f, bgMean: %f bgVar: "
+ "%f bound:(%" PRIuS ",%" PRIuS ",%" PRIuS ",%" PRIuS ")\n",
+ cc.mode.x, cc.mode.y, cc.maxEnergy, cc.meanEnergy,
+ cc.varEnergy, cc.bounds.x0(), cc.bounds.y0(),
+ cc.bounds.xsize(), cc.bounds.ysize());
+ ans.push_back(cc);
+ }
+ }
+ }
+ }
+ return ans;
+}
+
+// TODO (sggonzalez): Adapt this function for the different color spaces or
+// remove it if the color space with the best performance does not need it
+void ComputeDotLosses(GaussianEllipse* ellipse, const ConnectedComponent& cc,
+ const Image3F& img, const Image3F& background) {
+ PROFILER_FUNC;
+ const int rectBounds = 2;
+ const double kIntensityR = 0.0; // 0.015;
+ const double kSigmaR = 0.0; // 0.01;
+ const double kZeroEpsilon = 0.1; // Tolerance to consider a value negative
+ double ct = cos(ellipse->angle), st = sin(ellipse->angle);
+ const std::array<double, 3> channelGains{{1.0, 1.0, 1.0}};
+ int N = 0;
+ ellipse->l1_loss = 0.0;
+ ellipse->l2_loss = 0.0;
+ ellipse->neg_pixels = 0;
+ ellipse->neg_value.fill(0.0);
+ double distMeanModeSq = (cc.mode.x - ellipse->x) * (cc.mode.x - ellipse->x) +
+ (cc.mode.y - ellipse->y) * (cc.mode.y - ellipse->y);
+ ellipse->custom_loss = 0.0;
+ for (int c = 0; c < 3; c++) {
+ for (int sy = -rectBounds;
+ sy < (static_cast<int>(cc.bounds.ysize()) + rectBounds); sy++) {
+ int y = sy + cc.bounds.y0();
+ if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+ const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+ // bgrow is only used if kOptimizeBackground is false.
+ // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
+ const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+ for (int sx = -rectBounds;
+ sx < (static_cast<int>(cc.bounds.xsize()) + rectBounds); sx++) {
+ int x = sx + cc.bounds.x0();
+ if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+ double target = row[x];
+ double dotDelta = DotGaussianModel(
+ x - ellipse->x, y - ellipse->y, ct, st, ellipse->sigma_x,
+ ellipse->sigma_y, ellipse->intensity[c]);
+ if (dotDelta > target + kZeroEpsilon) {
+ ellipse->neg_pixels++;
+ ellipse->neg_value[c] += dotDelta - target;
+ }
+ double bkg = kOptimizeBackground ? ellipse->bgColor[c] : bgrow[x];
+ double pred = bkg + dotDelta;
+ double diff = target - pred;
+ double l2 = channelGains[c] * diff * diff;
+ double l1 = channelGains[c] * std::fabs(diff);
+ ellipse->l2_loss += l2;
+ ellipse->l1_loss += l1;
+ double w = DotGaussianModel(x - cc.mode.x, y - cc.mode.y, 1.0, 0.0,
+ 1.0 + ellipse->sigma_x,
+ 1.0 + ellipse->sigma_y, 1.0);
+ ellipse->custom_loss += w * l2;
+ N++;
+ }
+ }
+ }
+ ellipse->l2_loss /= N;
+ ellipse->custom_loss /= N;
+ ellipse->custom_loss += 20.0 * distMeanModeSq + ellipse->neg_value[1];
+ ellipse->l1_loss /= N;
+ double ridgeTerm = kSigmaR * ellipse->sigma_x + kSigmaR * ellipse->sigma_y;
+ for (int c = 0; c < 3; c++) {
+ ridgeTerm += kIntensityR * ellipse->intensity[c] * ellipse->intensity[c];
+ }
+ ellipse->ridge_loss = ellipse->l2_loss + ridgeTerm;
+}
+
+GaussianEllipse FitGaussianFast(const ConnectedComponent& cc,
+ const ImageF& energy, const Image3F& img,
+ const Image3F& background) {
+ PROFILER_FUNC;
+ constexpr bool leastSqIntensity = true;
+ constexpr double kEpsilon = 1e-6;
+ GaussianEllipse ans;
+ constexpr int kRectBounds = (kEllipseWindowSize >> 1);
+
+ // Compute the 1st and 2nd moments of the CC
+ double sum = 0.0;
+ int N = 0;
+ std::array<double, 3> m1{{0.0, 0.0, 0.0}};
+ std::array<double, 3> m2{{0.0, 0.0, 0.0}};
+ std::array<double, 3> color{{0.0, 0.0, 0.0}};
+ std::array<double, 3> bgColor{{0.0, 0.0, 0.0}};
+
+ JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+ "%" PRIuS " %" PRIuS " %" PRIuS " %" PRIuS "\n", cc.bounds.x0(),
+ cc.bounds.y0(), cc.bounds.xsize(), cc.bounds.ysize());
+ for (int c = 0; c < 3; c++) {
+ color[c] = img.ConstPlaneRow(c, cc.mode.y)[cc.mode.x] -
+ background.ConstPlaneRow(c, cc.mode.y)[cc.mode.x];
+ }
+ double sign = (color[1] > 0) ? 1 : -1;
+ for (int sy = -kRectBounds; sy <= kRectBounds; sy++) {
+ int y = sy + cc.mode.y;
+ if (y < 0 || static_cast<size_t>(y) >= energy.ysize()) continue;
+ const float* JXL_RESTRICT row = img.ConstPlaneRow(1, y);
+ const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(1, y);
+ for (int sx = -kRectBounds; sx <= kRectBounds; sx++) {
+ int x = sx + cc.mode.x;
+ if (x < 0 || static_cast<size_t>(x) >= energy.xsize()) continue;
+ double w = std::max(kEpsilon, sign * (row[x] - bgrow[x]));
+ sum += w;
+
+ m1[0] += w * x;
+ m1[1] += w * y;
+ m2[0] += w * x * x;
+ m2[1] += w * x * y;
+ m2[2] += w * y * y;
+ for (int c = 0; c < 3; c++) {
+ bgColor[c] += background.ConstPlaneRow(c, y)[x];
+ }
+ N++;
+ }
+ }
+ JXL_CHECK(N > 0);
+
+ for (int i = 0; i < 3; i++) {
+ m1[i] /= sum;
+ m2[i] /= sum;
+ bgColor[i] /= N;
+ }
+
+ // Some magic constants
+ constexpr double kSigmaMult = 1.0;
+ constexpr std::array<double, 3> kScaleMult{{1.1, 1.1, 1.1}};
+
+ // Now set the parameters of the Gaussian
+ ans.x = m1[0];
+ ans.y = m1[1];
+ for (int j = 0; j < 3; j++) {
+ ans.intensity[j] = kScaleMult[j] * color[j];
+ }
+
+ ImageD Sigma(2, 2), D(1, 2), U(2, 2);
+ Sigma.Row(0)[0] = m2[0] - m1[0] * m1[0];
+ Sigma.Row(1)[1] = m2[2] - m1[1] * m1[1];
+ Sigma.Row(0)[1] = Sigma.Row(1)[0] = m2[1] - m1[0] * m1[1];
+ ConvertToDiagonal(Sigma, &D, &U);
+ const double* JXL_RESTRICT d = D.ConstRow(0);
+ const double* JXL_RESTRICT u = U.ConstRow(1);
+ int p1 = 0, p2 = 1;
+ if (d[0] < d[1]) std::swap(p1, p2);
+ ans.sigma_x = kSigmaMult * d[p1];
+ ans.sigma_y = kSigmaMult * d[p2];
+ ans.angle = std::atan2(u[p1], u[p2]);
+ ans.l2_loss = 0.0;
+ ans.bgColor = bgColor;
+ if (leastSqIntensity) {
+ GaussianEllipse* ellipse = &ans;
+ double ct = cos(ans.angle), st = sin(ans.angle);
+ // Estimate intensity with least squares (fixed background)
+ for (int c = 0; c < 3; c++) {
+ double gg = 0.0;
+ double gd = 0.0;
+ int yc = static_cast<int>(cc.mode.y);
+ int xc = static_cast<int>(cc.mode.x);
+ for (int y = yc - kRectBounds; y <= yc + kRectBounds; y++) {
+ if (y < 0 || static_cast<size_t>(y) >= img.ysize()) continue;
+ const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y);
+ const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y);
+ for (int x = xc - kRectBounds; x <= xc + kRectBounds; x++) {
+ if (x < 0 || static_cast<size_t>(x) >= img.xsize()) continue;
+ double target = row[x] - bgrow[x];
+ double gaussian =
+ DotGaussianModel(x - ellipse->x, y - ellipse->y, ct, st,
+ ellipse->sigma_x, ellipse->sigma_y, 1.0);
+ gg += gaussian * gaussian;
+ gd += gaussian * target;
+ }
+ }
+ ans.intensity[c] = gd / (gg + 1e-6); // Regularized least squares
+ }
+ }
+ ComputeDotLosses(&ans, cc, img, background);
+ return ans;
+}
+
+GaussianEllipse FitGaussian(const ConnectedComponent& cc, const ImageF& energy,
+ const Image3F& img, const Image3F& background) {
+ auto ellipse = FitGaussianFast(cc, energy, img, background);
+ if (ellipse.sigma_x < ellipse.sigma_y) {
+ std::swap(ellipse.sigma_x, ellipse.sigma_y);
+ ellipse.angle += kPi / 2.0;
+ }
+ ellipse.angle -= kPi * std::floor(ellipse.angle / kPi);
+ if (fabs(ellipse.angle - kPi) < 1e-6 || fabs(ellipse.angle) < 1e-6) {
+ ellipse.angle = 0.0;
+ }
+ JXL_CHECK(ellipse.angle >= 0 && ellipse.angle <= kPi &&
+ ellipse.sigma_x >= ellipse.sigma_y);
+ JXL_DEBUG(JXL_DEBUG_DOT_DETECT,
+ "Ellipse mu=(%lf,%lf) sigma=(%lf,%lf) angle=%lf "
+ "intensity=(%lf,%lf,%lf) bg=(%lf,%lf,%lf) l2_loss=%lf "
+ "custom_loss=%lf, neg_pix=%" PRIuS ", neg_v=(%lf,%lf,%lf)\n",
+ ellipse.x, ellipse.y, ellipse.sigma_x, ellipse.sigma_y,
+ ellipse.angle, ellipse.intensity[0], ellipse.intensity[1],
+ ellipse.intensity[2], ellipse.bgColor[0], ellipse.bgColor[1],
+ ellipse.bgColor[2], ellipse.l2_loss, ellipse.custom_loss,
+ ellipse.neg_pixels, ellipse.neg_value[0], ellipse.neg_value[1],
+ ellipse.neg_value[2]);
+ return ellipse;
+}
+
+} // namespace
+
+std::vector<PatchInfo> DetectGaussianEllipses(
+ const Image3F& opsin, const GaussianDetectParams& params,
+ const EllipseQuantParams& qParams, ThreadPool* pool) {
+ PROFILER_FUNC;
+ std::vector<PatchInfo> dots;
+ Image3F smooth(opsin.xsize(), opsin.ysize());
+ ImageF energy = ComputeEnergyImage(opsin, &smooth, pool);
+#if JXL_DEBUG_DOT_DETECT
+ AuxOut aux;
+ aux.debug_prefix = "/tmp/sebastian/";
+ aux.DumpXybImage("smooth", smooth);
+ aux.DumpPlaneNormalized("energy", energy);
+#endif // JXL_DEBUG_DOT_DETECT
+ std::vector<ConnectedComponent> components = FindCC(
+ energy, params.t_low, params.t_high, params.maxWinSize, params.minScore);
+ size_t numCC =
+ std::min(params.maxCC, (components.size() * params.percCC) / 100);
+ if (components.size() > numCC) {
+ std::sort(
+ components.begin(), components.end(),
+ [](const ConnectedComponent& a, const ConnectedComponent& b) -> bool {
+ return a.score > b.score;
+ });
+ components.erase(components.begin() + numCC, components.end());
+ }
+ for (const auto& cc : components) {
+ GaussianEllipse ellipse = FitGaussian(cc, energy, opsin, smooth);
+ if (ellipse.x < 0.0 ||
+ std::ceil(ellipse.x) >= static_cast<double>(opsin.xsize()) ||
+ ellipse.y < 0.0 ||
+ std::ceil(ellipse.y) >= static_cast<double>(opsin.ysize())) {
+ continue;
+ }
+ if (ellipse.neg_pixels > params.maxNegPixels) continue;
+ double intensity = 0.21 * ellipse.intensity[0] +
+ 0.72 * ellipse.intensity[1] +
+ 0.07 * ellipse.intensity[2];
+ double intensitySq = intensity * intensity;
+ // for (int c = 0; c < 3; c++) {
+ // intensitySq += ellipse.intensity[c] * ellipse.intensity[c];
+ //}
+ double sqDistMeanMode = (ellipse.x - cc.mode.x) * (ellipse.x - cc.mode.x) +
+ (ellipse.y - cc.mode.y) * (ellipse.y - cc.mode.y);
+ if (ellipse.l2_loss < params.maxL2Loss &&
+ ellipse.custom_loss < params.maxCustomLoss &&
+ intensitySq > (params.minIntensity * params.minIntensity) &&
+ sqDistMeanMode < params.maxDistMeanMode * params.maxDistMeanMode) {
+ size_t x0 = cc.bounds.x0();
+ size_t y0 = cc.bounds.y0();
+ dots.emplace_back();
+ dots.back().second.emplace_back(x0, y0);
+ QuantizedPatch& patch = dots.back().first;
+ patch.xsize = cc.bounds.xsize();
+ patch.ysize = cc.bounds.ysize();
+ for (size_t y = 0; y < patch.ysize; y++) {
+ for (size_t x = 0; x < patch.xsize; x++) {
+ for (size_t c = 0; c < 3; c++) {
+ patch.fpixels[c][y * patch.xsize + x] =
+ opsin.ConstPlaneRow(c, y0 + y)[x0 + x] -
+ smooth.ConstPlaneRow(c, y0 + y)[x0 + x];
+ }
+ }
+ }
+ }
+ }
+#if JXL_DEBUG_DOT_DETECT
+ JXL_DEBUG(JXL_DEBUG_DOT_DETECT, "Candidates: %" PRIuS ", Dots: %" PRIuS "\n",
+ components.size(), dots.size());
+ ApplyGaussianEllipses(&smooth, dots, 1.0);
+ aux.DumpXybImage("draw", smooth);
+ ApplyGaussianEllipses(&smooth, dots, -1.0);
+
+ auto qdots = QuantizeGaussianEllipses(dots, qParams);
+ auto deq = DequantizeGaussianEllipses(qdots, qParams);
+ ApplyGaussianEllipses(&smooth, deq, 1.0);
+ aux.DumpXybImage("qdraw", smooth);
+ ApplyGaussianEllipses(&smooth, deq, -1.0);
+#endif // JXL_DEBUG_DOT_DETECT
+ return dots;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_detect_dots.h b/third_party/jpeg-xl/lib/jxl/enc_detect_dots.h
new file mode 100644
index 0000000000..c3071d9a2f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_detect_dots.h
@@ -0,0 +1,67 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// We attempt to remove dots, or speckle from images using Gaussian blur.
+#ifndef LIB_JXL_ENC_DETECT_DOTS_H_
+#define LIB_JXL_ENC_DETECT_DOTS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct GaussianDetectParams {
+ double t_high = 0; // at least one pixel must have larger energy than t_high
+ double t_low = 0; // all pixels must have a larger energy than tLow
+ uint32_t maxWinSize = 0; // discard dots larger than this containing window
+ double maxL2Loss = 0;
+ double maxCustomLoss = 0;
+ double minIntensity = 0; // If the intensity is too low, discard it
+ double maxDistMeanMode = 0; // The mean and the mode must be close
+ size_t maxNegPixels = 0; // Maximum number of negative pixel
+ size_t minScore = 0;
+ size_t maxCC = 50; // Maximum number of CC to keep
+ size_t percCC = 15; // Percentage in [0,100] of CC to keep
+};
+
+// Ellipse Quantization Params
+struct EllipseQuantParams {
+ size_t xsize; // Image size in x
+ size_t ysize; // Image size in y
+ size_t qPosition; // Position quantization delta
+ // Quantization for the Gaussian sigma parameters
+ double minSigma;
+ double maxSigma;
+ size_t qSigma; // number of quantization levels
+ // Quantization for the rotation angle (between -pi and pi)
+ size_t qAngle;
+ // Quantization for the intensity
+ std::array<double, 3> minIntensity;
+ std::array<double, 3> maxIntensity;
+ std::array<size_t, 3> qIntensity; // number of quantization levels
+ // Extra parameters for the encoding
+ bool subtractQuantized; // Should we subtract quantized or detected dots?
+ float ytox;
+ float ytob;
+
+ void QuantPositionSize(size_t* xsize, size_t* ysize) const;
+};
+
+// Detects dots in XYB image.
+std::vector<PatchInfo> DetectGaussianEllipses(
+ const Image3F& opsin, const GaussianDetectParams& params,
+ const EllipseQuantParams& qParams, ThreadPool* pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_DETECT_DOTS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.cc b/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.cc
new file mode 100644
index 0000000000..2d22c1edb8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.cc
@@ -0,0 +1,71 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_dot_dictionary.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_detect_dots.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Private implementation of Dictionary Encode/Decode
+namespace {
+
+/* Quantization constants for Ellipse dots */
+const size_t kEllipsePosQ = 2; // Quantization level for the position
+const double kEllipseMinSigma = 0.1; // Minimum sigma value
+const double kEllipseMaxSigma = 3.1; // Maximum Sigma value
+const size_t kEllipseSigmaQ = 16; // Number of quantization levels for sigma
+const size_t kEllipseAngleQ = 8; // Quantization level for the angle
+// TODO: fix these values.
+const std::array<double, 3> kEllipseMinIntensity{{-0.05, 0.0, -0.5}};
+const std::array<double, 3> kEllipseMaxIntensity{{0.05, 1.0, 0.4}};
+const std::array<size_t, 3> kEllipseIntensityQ{{10, 36, 10}};
+} // namespace
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+ const Image3F& opsin,
+ const ColorCorrelationMap& cmap,
+ ThreadPool* pool) {
+ if (ApplyOverride(cparams.dots,
+ cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+ GaussianDetectParams ellipse_params;
+ ellipse_params.t_high = 0.04;
+ ellipse_params.t_low = 0.02;
+ ellipse_params.maxWinSize = 5;
+ ellipse_params.maxL2Loss = 0.005;
+ ellipse_params.maxCustomLoss = 300;
+ ellipse_params.minIntensity = 0.12;
+ ellipse_params.maxDistMeanMode = 1.0;
+ ellipse_params.maxNegPixels = 0;
+ ellipse_params.minScore = 12.0;
+ ellipse_params.maxCC = 100;
+ ellipse_params.percCC = 100;
+ EllipseQuantParams qParams{
+ opsin.xsize(), opsin.ysize(), kEllipsePosQ,
+ kEllipseMinSigma, kEllipseMaxSigma, kEllipseSigmaQ,
+ kEllipseAngleQ, kEllipseMinIntensity, kEllipseMaxIntensity,
+ kEllipseIntensityQ, kEllipsePosQ <= 5, cmap.YtoXRatio(0),
+ cmap.YtoBRatio(0)};
+
+ return DetectGaussianEllipses(opsin, ellipse_params, qParams, pool);
+ }
+ return {};
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.h b/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.h
new file mode 100644
index 0000000000..2ba4393f30
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_dot_dictionary.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_DOT_DICTIONARY_H_
+#define LIB_JXL_ENC_DOT_DICTIONARY_H_
+
+// Dots are stored in a dictionary to avoid storing similar dots multiple
+// times.
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+std::vector<PatchInfo> FindDotDictionary(const CompressParams& cparams,
+ const Image3F& opsin,
+ const ColorCorrelationMap& cmap,
+ ThreadPool* pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_DOT_DICTIONARY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc
new file mode 100644
index 0000000000..c634445e83
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.cc
@@ -0,0 +1,274 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::GetLane;
+
+// Returns number of non-zero coefficients (but skip LLF).
+// We cannot rely on block[] being all-zero bits, so first truncate to integer.
+// Also writes the per-8x8 block nzeros starting at nzeros_pos.
+int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy,
+ const AcStrategy acs, const size_t covered_blocks,
+ const size_t log2_covered_blocks,
+ const int32_t* JXL_RESTRICT block,
+ const size_t nzeros_stride,
+ int32_t* JXL_RESTRICT nzeros_pos) {
+ const HWY_CAPPED(int32_t, kBlockDim) di;
+
+ const auto zero = Zero(di);
+ // Add FF..FF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+
+ {
+ // Mask sufficient for one row of coefficients.
+ HWY_ALIGN const int32_t
+ llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = {
+ -1, -1, -1, -1};
+ // First cx=1,2,4 elements are FF..FF, others 0.
+ const int32_t* llf_mask_pos =
+ llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx;
+
+ // Rows with LLF: mask out the LLF
+ for (size_t y = 0; y < cy; y++) {
+ for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+ const auto llf_mask = LoadU(di, llf_mask_pos + x);
+
+ // LLF counts as zero so we don't include it in nzeros.
+ const auto coef =
+ AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+ }
+
+ // Remaining rows: no mask
+ for (size_t y = cy; y < cy * kBlockDim; y++) {
+ for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * cx * kBlockDim + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want area - sum_zero, add because neg_sum_zero is already negated.
+ const int32_t nzeros =
+ int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+ const int32_t shifted_nzeros = static_cast<int32_t>(
+ (nzeros + covered_blocks - 1) >> log2_covered_blocks);
+ // Need non-canonicalized dimensions!
+ for (size_t y = 0; y < acs.covered_blocks_y(); y++) {
+ for (size_t x = 0; x < acs.covered_blocks_x(); x++) {
+ nzeros_pos[x + y * nzeros_stride] = shifted_nzeros;
+ }
+ }
+
+ return nzeros;
+}
+
+// Specialization for 8x8, where only top-left is LLF/DC.
+// About 1% overall speedup vs. NumNonZeroExceptLLF.
+int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block,
+ int32_t* JXL_RESTRICT nzeros_pos) {
+ const HWY_CAPPED(int32_t, kBlockDim) di;
+
+ const auto zero = Zero(di);
+ // Add FF..FF for every zero coefficient, negate to get #zeros.
+ auto neg_sum_zero = zero;
+
+ {
+ // First row has DC, so mask
+ const size_t y = 0;
+ HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1};
+
+ for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+ const auto dc_mask = Load(di, dc_mask_lanes + x);
+
+ // DC counts as zero so we don't include it in nzeros.
+ const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x]));
+
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // Remaining rows: no mask
+ for (size_t y = 1; y < kBlockDim; y++) {
+ for (size_t x = 0; x < kBlockDim; x += Lanes(di)) {
+ const auto coef = Load(di, &block[y * kBlockDim + x]);
+ neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero)));
+ }
+ }
+
+ // We want 64 - sum_zero, add because neg_sum_zero is already negated.
+ const int32_t nzeros =
+ int32_t(kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero));
+
+ *nzeros_pos = nzeros;
+
+ return nzeros;
+}
+
+// The number of nonzeros of each block is predicted from the top and the left
+// blocks, with opportune scaling to take into account the number of blocks of
+// each strategy. The predicted number of nonzeros divided by two is used as a
+// context; if this number is above 63, a specific context is used. If the
+// number of nonzeros of a strategy is above 63, it is written directly using a
+// fixed number of bits (that depends on the size of the strategy).
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+ const Rect& rect,
+ const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+ const AcStrategyImage& ac_strategy,
+ YCbCrChromaSubsampling cs,
+ Image3I* JXL_RESTRICT tmp_num_nzeroes,
+ std::vector<Token>* JXL_RESTRICT output,
+ const ImageB& qdc, const ImageI& qf,
+ const BlockCtxMap& block_ctx_map) {
+ const size_t xsize_blocks = rect.xsize();
+ const size_t ysize_blocks = rect.ysize();
+
+ // TODO(user): update the estimate: usually less coefficients are used.
+ output->reserve(output->size() +
+ 3 * xsize_blocks * ysize_blocks * kDCTBlockSize);
+
+ size_t offset[3] = {};
+ const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow();
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1),
+ by >> cs.VShift(2)};
+ int32_t* JXL_RESTRICT row_nzeros[3] = {
+ tmp_num_nzeroes->PlaneRow(0, sby[0]),
+ tmp_num_nzeroes->PlaneRow(1, sby[1]),
+ tmp_num_nzeroes->PlaneRow(2, sby[2]),
+ };
+ const int32_t* JXL_RESTRICT row_nzeros_top[3] = {
+ sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1),
+ sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1),
+ sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1),
+ };
+ const uint8_t* JXL_RESTRICT row_qdc =
+ qdc.ConstRow(rect.y0() + by) + rect.x0();
+ const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by);
+ AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by);
+ for (size_t bx = 0; bx < xsize_blocks; ++bx) {
+ AcStrategy acs = acs_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+ size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1),
+ bx >> cs.HShift(2)};
+ size_t cx = acs.covered_blocks_x();
+ size_t cy = acs.covered_blocks_y();
+ const size_t covered_blocks = cx * cy; // = #LLF coefficients
+ const size_t log2_covered_blocks =
+ Num0BitsBelowLS1Bit_Nonzero(covered_blocks);
+ const size_t size = covered_blocks * kDCTBlockSize;
+
+ CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order
+
+ for (int c : {1, 0, 2}) {
+ if (sbx[c] << cs.HShift(c) != bx) continue;
+ if (sby[c] << cs.VShift(c) != by) continue;
+ const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c];
+
+ int32_t nzeros =
+ (covered_blocks == 1)
+ ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c])
+ : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks,
+ log2_covered_blocks, block, nzeros_stride,
+ row_nzeros[c] + sbx[c]);
+
+ int ord = kStrategyOrder[acs.RawStrategy()];
+ const coeff_order_t* JXL_RESTRICT order =
+ &orders[CoeffOrderOffset(ord, c)];
+
+ int32_t predicted_nzeros =
+ PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32);
+ size_t block_ctx =
+ block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c);
+ const int32_t nzero_ctx =
+ block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx);
+
+ output->emplace_back(nzero_ctx, nzeros);
+ const size_t histo_offset =
+ block_ctx_map.ZeroDensityContextsOffset(block_ctx);
+ // Skip LLF.
+ size_t prev = (nzeros > static_cast<ssize_t>(size / 16) ? 0 : 1);
+ for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) {
+ int32_t coeff = block[order[k]];
+ size_t ctx =
+ histo_offset + ZeroDensityContext(nzeros, k, covered_blocks,
+ log2_covered_blocks, prev);
+ uint32_t u_coeff = PackSigned(coeff);
+ output->emplace_back(ctx, u_coeff);
+ prev = coeff != 0;
+ nzeros -= prev;
+ }
+ JXL_DASSERT(nzeros == 0);
+ offset[c] += size;
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(TokenizeCoefficients);
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+ const Rect& rect,
+ const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+ const AcStrategyImage& ac_strategy,
+ YCbCrChromaSubsampling cs,
+ Image3I* JXL_RESTRICT tmp_num_nzeroes,
+ std::vector<Token>* JXL_RESTRICT output,
+ const ImageB& qdc, const ImageI& qf,
+ const BlockCtxMap& block_ctx_map) {
+ return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)(
+ orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf,
+ block_ctx_map);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.h b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.h
new file mode 100644
index 0000000000..7dfc71c726
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_entropy_coder.h
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ENTROPY_CODER_H_
+#define LIB_JXL_ENC_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h" // BlockCtxMap
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h" // YCbCrChromaSubsampling
+#include "lib/jxl/image.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+// Generate DCT NxN quantized AC values tokens.
+// Only the subset "rect" [in units of blocks] within all images.
+// See also DecodeACVarBlock.
+void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders,
+ const Rect& rect,
+ const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows,
+ const AcStrategyImage& ac_strategy,
+ YCbCrChromaSubsampling cs,
+ Image3I* JXL_RESTRICT tmp_num_nzeroes,
+ std::vector<Token>* JXL_RESTRICT output,
+ const ImageB& qdc, const ImageI& qf,
+ const BlockCtxMap& block_ctx_map);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_ENTROPY_CODER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_external_image.cc b/third_party/jpeg-xl/lib/jxl/enc_external_image.cc
new file mode 100644
index 0000000000..1408746476
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_external_image.cc
@@ -0,0 +1,183 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <jxl/types.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/float.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+namespace {
+
+size_t JxlDataTypeBytes(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 1;
+ case JXL_TYPE_UINT16:
+ return 2;
+ case JXL_TYPE_FLOAT16:
+ return 2;
+ case JXL_TYPE_FLOAT:
+ return 4;
+ default:
+ return 0;
+ }
+}
+
+} // namespace
+
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+ size_t ysize, size_t bits_per_sample,
+ JxlPixelFormat format, size_t c, ThreadPool* pool,
+ ImageF* channel) {
+ if (format.data_type == JXL_TYPE_UINT8) {
+ JXL_RETURN_IF_ERROR(bits_per_sample > 0 && bits_per_sample <= 8);
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ JXL_RETURN_IF_ERROR(bits_per_sample > 8 && bits_per_sample <= 16);
+ } else if (format.data_type == JXL_TYPE_FLOAT16) {
+ JXL_RETURN_IF_ERROR(bits_per_sample == 16);
+ } else if (format.data_type == JXL_TYPE_FLOAT) {
+ JXL_RETURN_IF_ERROR(bits_per_sample == 32);
+ } else {
+ JXL_FAILURE("unsupported pixel format data type %d", format.data_type);
+ }
+ size_t bytes_per_channel = JxlDataTypeBytes(format.data_type);
+ size_t bytes_per_pixel = format.num_channels * bytes_per_channel;
+ size_t pixel_offset = c * bytes_per_channel;
+ // Only for uint8/16.
+ float scale = 1. / ((1ull << bits_per_sample) - 1);
+
+ const size_t last_row_size = xsize * bytes_per_pixel;
+ const size_t align = format.align;
+ const size_t row_size =
+ (align > 1 ? jxl::DivCeil(last_row_size, align) * align : last_row_size);
+ const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
+ if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image");
+ if (bytes.size() < bytes_to_read) {
+ return JXL_FAILURE("Buffer size is too small, expected: %" PRIuS
+ " got: %" PRIuS " (Image: %" PRIuS "x%" PRIuS
+ "x%u, bytes_per_channel: %" PRIuS ")",
+ bytes_to_read, bytes.size(), xsize, ysize,
+ format.num_channels, bytes_per_channel);
+ }
+ JXL_ASSERT(channel->xsize() == xsize);
+ JXL_ASSERT(channel->ysize() == ysize);
+ // Too large buffer is likely an application bug, so also fail for that.
+ // Do allow padding to stride in last row though.
+ if (bytes.size() > row_size * ysize) {
+ return JXL_FAILURE("Buffer size is too large");
+ }
+
+ const bool little_endian =
+ format.endianness == JXL_LITTLE_ENDIAN ||
+ (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian());
+
+ const uint8_t* const in = bytes.data();
+
+ std::atomic<size_t> error_count = {0};
+
+ const auto convert_row = [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = task;
+ size_t offset = row_size * task + pixel_offset;
+ float* JXL_RESTRICT row_out = channel->Row(y);
+ const auto save_value = [&](size_t index, float value) {
+ row_out[index] = value;
+ };
+ if (!LoadFloatRow(in + offset, xsize, bytes_per_pixel, format.data_type,
+ little_endian, scale, save_value)) {
+ error_count++;
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast<uint32_t>(ysize),
+ ThreadPool::NoInit, convert_row,
+ "ConvertExtraChannel"));
+
+ if (error_count) {
+ JXL_FAILURE("unsupported pixel format data type");
+ }
+
+ return true;
+}
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+ size_t ysize, const ColorEncoding& c_current,
+ size_t bits_per_sample, JxlPixelFormat format,
+ ThreadPool* pool, ImageBundle* ib) {
+ const size_t color_channels = c_current.Channels();
+ bool has_alpha = format.num_channels == 2 || format.num_channels == 4;
+ if (format.num_channels < color_channels) {
+ return JXL_FAILURE("Expected %" PRIuS
+ " color channels, received only %u channels",
+ color_channels, format.num_channels);
+ }
+
+ Image3F color(xsize, ysize);
+ for (size_t c = 0; c < color_channels; ++c) {
+ JXL_RETURN_IF_ERROR(ConvertFromExternal(bytes, xsize, ysize,
+ bits_per_sample, format, c, pool,
+ &color.Plane(c)));
+ }
+ if (color_channels == 1) {
+ CopyImageTo(color.Plane(0), &color.Plane(1));
+ CopyImageTo(color.Plane(0), &color.Plane(2));
+ }
+ ib->SetFromImage(std::move(color), c_current);
+
+ // Passing an interleaved image with an alpha channel to an image that doesn't
+ // have alpha channel just discards the passed alpha channel.
+ if (has_alpha && ib->HasAlpha()) {
+ ImageF alpha(xsize, ysize);
+ JXL_RETURN_IF_ERROR(
+ ConvertFromExternal(bytes, xsize, ysize, bits_per_sample, format,
+ format.num_channels - 1, pool, &alpha));
+ ib->SetAlpha(std::move(alpha));
+ } else if (!has_alpha && ib->HasAlpha()) {
+ // if alpha is not passed, but it is expected, then assume
+ // it is all-opaque
+ ImageF alpha(xsize, ysize);
+ FillImage(1.0f, &alpha);
+ ib->SetAlpha(std::move(alpha));
+ }
+
+ return true;
+}
+
+Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
+ size_t ysize, const void* buffer, size_t size,
+ ThreadPool* pool, ImageF* channel) {
+ size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
+ return ConvertFromExternal(
+ jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
+ xsize, ysize, bitdepth, pixel_format, 0, pool, channel);
+}
+
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+ uint32_t ysize, const void* buffer, size_t size,
+ jxl::ThreadPool* pool,
+ const jxl::ColorEncoding& c_current,
+ jxl::ImageBundle* ib) {
+ size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte;
+ JXL_RETURN_IF_ERROR(ConvertFromExternal(
+ jxl::Span<const uint8_t>(static_cast<const uint8_t*>(buffer), size),
+ xsize, ysize, c_current, bitdepth, pixel_format, pool, ib));
+ ib->VerifyMetadata();
+
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_external_image.h b/third_party/jpeg-xl/lib/jxl/enc_external_image.h
new file mode 100644
index 0000000000..3b2b295076
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_external_image.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+#define LIB_JXL_ENC_EXTERNAL_IMAGE_H_
+
+// Interleaved image for color transforms and Codec.
+
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+ size_t ysize, size_t bits_per_sample,
+ JxlPixelFormat format, size_t c, ThreadPool* pool,
+ ImageF* channel);
+
+// Convert an interleaved pixel buffer to the internal ImageBundle
+// representation. This is the opposite of ConvertToExternal().
+Status ConvertFromExternal(Span<const uint8_t> bytes, size_t xsize,
+ size_t ysize, const ColorEncoding& c_current,
+ size_t bits_per_sample, JxlPixelFormat format,
+ ThreadPool* pool, ImageBundle* ib);
+Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize,
+ size_t ysize, const void* buffer, size_t size,
+ ThreadPool* pool, ImageF* channel);
+Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize,
+ uint32_t ysize, const void* buffer, size_t size,
+ jxl::ThreadPool* pool,
+ const jxl::ColorEncoding& c_current,
+ jxl::ImageBundle* ib);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_EXTERNAL_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_external_image_gbench.cc b/third_party/jpeg-xl/lib/jxl/enc_external_image_gbench.cc
new file mode 100644
index 0000000000..4b7147817a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_external_image_gbench.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+// Encoder case, deinterleaves a buffer.
+void BM_EncExternalImage_ConvertImageRGBA(benchmark::State& state) {
+ const size_t kNumIter = 5;
+ size_t xsize = state.range();
+ size_t ysize = state.range();
+
+ ImageMetadata im;
+ im.SetAlphaBits(8);
+ ImageBundle ib(&im);
+
+ std::vector<uint8_t> interleaved(xsize * ysize * 4);
+ JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+ for (auto _ : state) {
+ for (size_t i = 0; i < kNumIter; ++i) {
+ JXL_CHECK(ConvertFromExternal(
+ Span<const uint8_t>(interleaved.data(), interleaved.size()), xsize,
+ ysize,
+ /*c_current=*/ColorEncoding::SRGB(),
+ /*bits_per_sample=*/8, format,
+ /*pool=*/nullptr, &ib));
+ }
+ }
+
+ // Pixels per second.
+ state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize);
+ state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size());
+}
+
+BENCHMARK(BM_EncExternalImage_ConvertImageRGBA)
+ ->RangeMultiplier(2)
+ ->Range(256, 2048);
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_external_image_test.cc b/third_party/jpeg-xl/lib/jxl/enc_external_image_test.cc
new file mode 100644
index 0000000000..7be8d45f2d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_external_image_test.cc
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_external_image.h"
+
+#include <array>
+#include <new>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+#if !defined(JXL_CRASH_ON_ERROR)
+TEST(ExternalImageTest, InvalidSize) {
+ ImageMetadata im;
+ im.SetAlphaBits(8);
+ ImageBundle ib(&im);
+
+ JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ const uint8_t buf[10 * 100 * 8] = {};
+ EXPECT_FALSE(ConvertFromExternal(
+ Span<const uint8_t>(buf, 10), /*xsize=*/10, /*ysize=*/100,
+ /*c_current=*/ColorEncoding::SRGB(),
+ /*bits_per_sample=*/16, format, nullptr, &ib));
+ EXPECT_FALSE(ConvertFromExternal(
+ Span<const uint8_t>(buf, sizeof(buf) - 1), /*xsize=*/10, /*ysize=*/100,
+ /*c_current=*/ColorEncoding::SRGB(),
+ /*bits_per_sample=*/16, format, nullptr, &ib));
+ EXPECT_TRUE(
+ ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), /*xsize=*/10,
+ /*ysize=*/100, /*c_current=*/ColorEncoding::SRGB(),
+ /*bits_per_sample=*/16, format, nullptr, &ib));
+}
+#endif
+
+TEST(ExternalImageTest, AlphaMissing) {
+ ImageMetadata im;
+ im.SetAlphaBits(0); // No alpha
+ ImageBundle ib(&im);
+
+ const size_t xsize = 10;
+ const size_t ysize = 20;
+ const uint8_t buf[xsize * ysize * 4] = {};
+
+ JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0};
+ // has_alpha is true but the ImageBundle has no alpha. Alpha channel should
+ // be ignored.
+ EXPECT_TRUE(ConvertFromExternal(Span<const uint8_t>(buf, sizeof(buf)), xsize,
+ ysize,
+ /*c_current=*/ColorEncoding::SRGB(),
+ /*bits_per_sample=*/8, format, nullptr, &ib));
+ EXPECT_FALSE(ib.HasAlpha());
+}
+
+TEST(ExternalImageTest, AlphaPremultiplied) {
+ ImageMetadata im;
+ im.SetAlphaBits(8, true);
+
+ ImageBundle ib(&im);
+ const size_t xsize = 10;
+ const size_t ysize = 20;
+ const size_t size = xsize * ysize * 8;
+ const uint8_t buf[size] = {};
+
+ JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ EXPECT_TRUE(BufferToImageBundle(format, xsize, ysize, buf, size, nullptr,
+ ColorEncoding::SRGB(), &ib));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc b/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc
new file mode 100644
index 0000000000..286990ee8a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.cc
@@ -0,0 +1,3860 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef FJXL_SELF_INCLUDE
+
+#include "lib/jxl/enc_fast_lossless.h"
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <memory>
+#include <vector>
+
+// Enable NEON and AVX2/AVX512 if not asked to do otherwise and the compilers
+// support it.
+#if defined(__aarch64__) || defined(_M_ARM64)
+#include <arm_neon.h>
+
+#ifndef FJXL_ENABLE_NEON
+#define FJXL_ENABLE_NEON 1
+#endif
+
+#elif (defined(__x86_64__) || defined(_M_X64)) && !defined(_MSC_VER)
+#include <immintrin.h>
+
+// manually add _mm512_cvtsi512_si32 definition if missing
+// (e.g. with Xcode on macOS Mojave)
+// copied from gcc 11.1.0 include/avx512fintrin.h line 14367-14373
+#if defined(__clang__) && \
+ ((!defined(__apple_build_version__) && __clang_major__ < 10) || \
+ (defined(__apple_build_version__) && __apple_build_version__ < 12000032))
+inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsi512_si32(__m512i __A) {
+ __v16si __B = (__v16si)__A;
+ return __B[0];
+}
+#endif
+
+// TODO(veluca): MSVC support for dynamic dispatch.
+#if defined(__clang__) || defined(__GNUC__)
+
+#ifndef FJXL_ENABLE_AVX2
+#define FJXL_ENABLE_AVX2 1
+#endif
+
+#ifndef FJXL_ENABLE_AVX512
+// On clang-7 or earlier, and gcc-10 or earlier, AVX512 seems broken.
+#if (defined(__clang__) && \
+ (!defined(__apple_build_version__) && __clang_major__ > 7) || \
+ (defined(__apple_build_version__) && \
+ __apple_build_version__ > 10010046)) || \
+ (defined(__GNUC__) && __GNUC__ > 10)
+#define FJXL_ENABLE_AVX512 1
+#endif
+#endif
+
+#endif
+
+#endif
+
+#ifndef FJXL_ENABLE_NEON
+#define FJXL_ENABLE_NEON 0
+#endif
+
+#ifndef FJXL_ENABLE_AVX2
+#define FJXL_ENABLE_AVX2 0
+#endif
+
+#ifndef FJXL_ENABLE_AVX512
+#define FJXL_ENABLE_AVX512 0
+#endif
+
+namespace {
+#if defined(_MSC_VER) && !defined(__clang__)
+#define FJXL_INLINE __forceinline
+FJXL_INLINE uint32_t FloorLog2(uint32_t v) {
+ unsigned long index;
+ _BitScanReverse(&index, v);
+ return index;
+}
+FJXL_INLINE uint32_t CtzNonZero(uint64_t v) {
+ unsigned long index;
+ _BitScanForward(&index, v);
+ return index;
+}
+#else
+#define FJXL_INLINE inline __attribute__((always_inline))
+FJXL_INLINE uint32_t FloorLog2(uint32_t v) {
+ return v ? 31 - __builtin_clz(v) : 0;
+}
+FJXL_INLINE uint32_t CtzNonZero(uint64_t v) { return __builtin_ctzll(v); }
+#endif
+
+// Compiles to a memcpy on little-endian systems.
+FJXL_INLINE void StoreLE64(uint8_t* tgt, uint64_t data) {
+#if (!defined(__BYTE_ORDER__) || (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__))
+ for (int i = 0; i < 8; i++) {
+ tgt[i] = (data >> (i * 8)) & 0xFF;
+ }
+#else
+ memcpy(tgt, &data, 8);
+#endif
+}
+
+FJXL_INLINE size_t AddBits(uint32_t count, uint64_t bits, uint8_t* data_buf,
+ size_t& bits_in_buffer, uint64_t& bit_buffer) {
+ bit_buffer |= bits << bits_in_buffer;
+ bits_in_buffer += count;
+ StoreLE64(data_buf, bit_buffer);
+ size_t bytes_in_buffer = bits_in_buffer / 8;
+ bits_in_buffer -= bytes_in_buffer * 8;
+ bit_buffer >>= bytes_in_buffer * 8;
+ return bytes_in_buffer;
+}
+
+struct BitWriter {
+ void Allocate(size_t maximum_bit_size) {
+ assert(data == nullptr);
+ // Leave some padding.
+ data.reset(static_cast<uint8_t*>(malloc(maximum_bit_size / 8 + 64)));
+ }
+
+ void Write(uint32_t count, uint64_t bits) {
+ bytes_written += AddBits(count, bits, data.get() + bytes_written,
+ bits_in_buffer, buffer);
+ }
+
+ void ZeroPadToByte() {
+ if (bits_in_buffer != 0) {
+ Write(8 - bits_in_buffer, 0);
+ }
+ }
+
+ FJXL_INLINE void WriteMultiple(const uint64_t* nbits, const uint64_t* bits,
+ size_t n) {
+ // Necessary because Write() is only guaranteed to work with <=56 bits.
+ // Trying to SIMD-fy this code results in lower speed (and definitely less
+ // clarity).
+ {
+ for (size_t i = 0; i < n; i++) {
+ this->buffer |= bits[i] << this->bits_in_buffer;
+ memcpy(this->data.get() + this->bytes_written, &this->buffer, 8);
+ uint64_t shift = 64 - this->bits_in_buffer;
+ this->bits_in_buffer += nbits[i];
+ // This `if` seems to be faster than using ternaries.
+ if (this->bits_in_buffer >= 64) {
+ uint64_t next_buffer = bits[i] >> shift;
+ this->buffer = next_buffer;
+ this->bits_in_buffer -= 64;
+ this->bytes_written += 8;
+ }
+ }
+ memcpy(this->data.get() + this->bytes_written, &this->buffer, 8);
+ size_t bytes_in_buffer = this->bits_in_buffer / 8;
+ this->bits_in_buffer -= bytes_in_buffer * 8;
+ this->buffer >>= bytes_in_buffer * 8;
+ this->bytes_written += bytes_in_buffer;
+ }
+ }
+
+ std::unique_ptr<uint8_t[], void (*)(void*)> data = {nullptr, free};
+ size_t bytes_written = 0;
+ size_t bits_in_buffer = 0;
+ uint64_t buffer = 0;
+};
+
+} // namespace
+
+extern "C" {
+
+struct JxlFastLosslessFrameState {
+ size_t width;
+ size_t height;
+ size_t nb_chans;
+ size_t bitdepth;
+ BitWriter header;
+ std::vector<std::array<BitWriter, 4>> group_data;
+ size_t current_bit_writer = 0;
+ size_t bit_writer_byte_pos = 0;
+ size_t bits_in_buffer = 0;
+ uint64_t bit_buffer = 0;
+};
+
+size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame) {
+ size_t total_size_groups = 0;
+ for (size_t i = 0; i < frame->group_data.size(); i++) {
+ size_t sz = 0;
+ for (size_t j = 0; j < frame->nb_chans; j++) {
+ const auto& writer = frame->group_data[i][j];
+ sz += writer.bytes_written * 8 + writer.bits_in_buffer;
+ }
+ sz = (sz + 7) / 8;
+ total_size_groups += sz;
+ }
+ return frame->header.bytes_written + total_size_groups;
+}
+
+size_t JxlFastLosslessMaxRequiredOutput(
+ const JxlFastLosslessFrameState* frame) {
+ return JxlFastLosslessOutputSize(frame) + 32;
+}
+
+void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
+ int add_image_header, int is_last) {
+ BitWriter* output = &frame->header;
+ output->Allocate(1000 + frame->group_data.size() * 32);
+
+ std::vector<size_t> group_sizes(frame->group_data.size());
+ for (size_t i = 0; i < frame->group_data.size(); i++) {
+ size_t sz = 0;
+ for (size_t j = 0; j < frame->nb_chans; j++) {
+ const auto& writer = frame->group_data[i][j];
+ sz += writer.bytes_written * 8 + writer.bits_in_buffer;
+ }
+ sz = (sz + 7) / 8;
+ group_sizes[i] = sz;
+ }
+
+ bool have_alpha = (frame->nb_chans == 2 || frame->nb_chans == 4);
+
+ if (add_image_header) {
+ // Signature
+ output->Write(16, 0x0AFF);
+
+ // Size header, hand-crafted.
+ // Not small
+ output->Write(1, 0);
+
+ auto wsz = [output](size_t size) {
+ if (size - 1 < (1 << 9)) {
+ output->Write(2, 0b00);
+ output->Write(9, size - 1);
+ } else if (size - 1 < (1 << 13)) {
+ output->Write(2, 0b01);
+ output->Write(13, size - 1);
+ } else if (size - 1 < (1 << 18)) {
+ output->Write(2, 0b10);
+ output->Write(18, size - 1);
+ } else {
+ output->Write(2, 0b11);
+ output->Write(30, size - 1);
+ }
+ };
+
+ wsz(frame->height);
+
+ // No special ratio.
+ output->Write(3, 0);
+
+ wsz(frame->width);
+
+ // Hand-crafted ImageMetadata.
+ output->Write(1, 0); // all_default
+ output->Write(1, 0); // extra_fields
+ output->Write(1, 0); // bit_depth.floating_point_sample
+ if (frame->bitdepth == 8) {
+ output->Write(2, 0b00); // bit_depth.bits_per_sample = 8
+ } else if (frame->bitdepth == 10) {
+ output->Write(2, 0b01); // bit_depth.bits_per_sample = 10
+ } else if (frame->bitdepth == 12) {
+ output->Write(2, 0b10); // bit_depth.bits_per_sample = 12
+ } else {
+ output->Write(2, 0b11); // 1 + u(6)
+ output->Write(6, frame->bitdepth - 1);
+ }
+ if (frame->bitdepth <= 14) {
+ output->Write(1, 1); // 16-bit-buffer sufficient
+ } else {
+ output->Write(1, 0); // 16-bit-buffer NOT sufficient
+ }
+ if (have_alpha) {
+ output->Write(2, 0b01); // One extra channel
+ output->Write(1, 1); // ... all_default (ie. 8-bit alpha)
+ } else {
+ output->Write(2, 0b00); // No extra channel
+ }
+ output->Write(1, 0); // Not XYB
+ if (frame->nb_chans > 2) {
+ output->Write(1, 1); // color_encoding.all_default (sRGB)
+ } else {
+ output->Write(1, 0); // color_encoding.all_default false
+ output->Write(1, 0); // color_encoding.want_icc false
+ output->Write(2, 1); // grayscale
+ output->Write(2, 1); // D65
+ output->Write(1, 0); // no gamma transfer function
+ output->Write(2, 0b10); // tf: 2 + u(4)
+ output->Write(4, 11); // tf of sRGB
+ output->Write(2, 1); // relative rendering intent
+ }
+ output->Write(2, 0b00); // No extensions.
+
+ output->Write(1, 1); // all_default transform data
+
+ // No ICC, no preview. Frame should start at byte boundery.
+ output->ZeroPadToByte();
+ }
+
+ // Handcrafted frame header.
+ output->Write(1, 0); // all_default
+ output->Write(2, 0b00); // regular frame
+ output->Write(1, 1); // modular
+ output->Write(2, 0b00); // default flags
+ output->Write(1, 0); // not YCbCr
+ output->Write(2, 0b00); // no upsampling
+ if (have_alpha) {
+ output->Write(2, 0b00); // no alpha upsampling
+ }
+ output->Write(2, 0b01); // default group size
+ output->Write(2, 0b00); // exactly one pass
+ output->Write(1, 0); // no custom size or origin
+ output->Write(2, 0b00); // kReplace blending mode
+ if (have_alpha) {
+ output->Write(2, 0b00); // kReplace blending mode for alpha channel
+ }
+ output->Write(1, is_last); // is_last
+ output->Write(2, 0b00); // a frame has no name
+ output->Write(1, 0); // loop filter is not all_default
+ output->Write(1, 0); // no gaborish
+ output->Write(2, 0); // 0 EPF iters
+ output->Write(2, 0b00); // No LF extensions
+ output->Write(2, 0b00); // No FH extensions
+
+ output->Write(1, 0); // No TOC permutation
+ output->ZeroPadToByte(); // TOC is byte-aligned.
+ for (size_t i = 0; i < frame->group_data.size(); i++) {
+ size_t sz = group_sizes[i];
+ if (sz < (1 << 10)) {
+ output->Write(2, 0b00);
+ output->Write(10, sz);
+ } else if (sz - 1024 < (1 << 14)) {
+ output->Write(2, 0b01);
+ output->Write(14, sz - 1024);
+ } else if (sz - 17408 < (1 << 22)) {
+ output->Write(2, 0b10);
+ output->Write(22, sz - 17408);
+ } else {
+ output->Write(2, 0b11);
+ output->Write(30, sz - 4211712);
+ }
+ }
+ output->ZeroPadToByte(); // Groups are byte-aligned.
+}
+
+#if FJXL_ENABLE_AVX512
+__attribute__((target("avx512vbmi2"))) static size_t AppendBytesWithBitOffset(
+ const uint8_t* data, size_t n, size_t bit_buffer_nbits,
+ unsigned char* output, uint64_t& bit_buffer) {
+ if (n < 128) {
+ return 0;
+ }
+
+ size_t i = 0;
+ __m512i shift = _mm512_set1_epi64(64 - bit_buffer_nbits);
+ __m512i carry = _mm512_set1_epi64(bit_buffer << (64 - bit_buffer_nbits));
+
+ for (; i + 64 <= n; i += 64) {
+ __m512i current = _mm512_loadu_si512(data + i);
+ __m512i previous_u64 = _mm512_alignr_epi64(current, carry, 7);
+ carry = current;
+ __m512i out = _mm512_shrdv_epi64(previous_u64, current, shift);
+ _mm512_storeu_si512(output + i, out);
+ }
+
+ bit_buffer = data[i - 1] >> (8 - bit_buffer_nbits);
+
+ return i;
+}
+#endif
+
+size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame,
+ unsigned char* output, size_t output_size) {
+ assert(output_size >= 32);
+ unsigned char* initial_output = output;
+ size_t (*append_bytes_with_bit_offset)(const uint8_t*, size_t, size_t,
+ unsigned char*, uint64_t&) = nullptr;
+
+#if FJXL_ENABLE_AVX512
+ if (__builtin_cpu_supports("avx512vbmi2")) {
+ append_bytes_with_bit_offset = AppendBytesWithBitOffset;
+ }
+#endif
+
+ while (true) {
+ size_t& cur = frame->current_bit_writer;
+ size_t& bw_pos = frame->bit_writer_byte_pos;
+ if (cur >= 1 + frame->group_data.size() * frame->nb_chans) {
+ return output - initial_output;
+ }
+ if (output_size <= 8) {
+ return output - initial_output;
+ }
+ size_t nbc = frame->nb_chans;
+ const BitWriter& writer =
+ cur == 0 ? frame->header
+ : frame->group_data[(cur - 1) / nbc][(cur - 1) % nbc];
+ size_t full_byte_count =
+ std::min(output_size - 8, writer.bytes_written - bw_pos);
+ if (frame->bits_in_buffer == 0) {
+ memcpy(output, writer.data.get() + bw_pos, full_byte_count);
+ } else {
+ size_t i = 0;
+ if (append_bytes_with_bit_offset) {
+ i += append_bytes_with_bit_offset(
+ writer.data.get() + bw_pos, full_byte_count, frame->bits_in_buffer,
+ output, frame->bit_buffer);
+ }
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ // Copy 8 bytes at a time until we reach the border.
+ for (; i + 8 < full_byte_count; i += 8) {
+ uint64_t chunk;
+ memcpy(&chunk, writer.data.get() + bw_pos + i, 8);
+ uint64_t out = frame->bit_buffer | (chunk << frame->bits_in_buffer);
+ memcpy(output + i, &out, 8);
+ frame->bit_buffer = chunk >> (64 - frame->bits_in_buffer);
+ }
+#endif
+ for (; i < full_byte_count; i++) {
+ AddBits(8, writer.data.get()[bw_pos + i], output + i,
+ frame->bits_in_buffer, frame->bit_buffer);
+ }
+ }
+ output += full_byte_count;
+ output_size -= full_byte_count;
+ bw_pos += full_byte_count;
+ if (bw_pos == writer.bytes_written) {
+ auto write = [&](size_t num, uint64_t bits) {
+ size_t n = AddBits(num, bits, output, frame->bits_in_buffer,
+ frame->bit_buffer);
+ output += n;
+ output_size -= n;
+ };
+ if (writer.bits_in_buffer) {
+ write(writer.bits_in_buffer, writer.buffer);
+ }
+ bw_pos = 0;
+ cur++;
+ if ((cur - 1) % nbc == 0 && frame->bits_in_buffer != 0) {
+ write(8 - frame->bits_in_buffer, 0);
+ }
+ }
+ }
+}
+
+void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame) {
+ delete frame;
+}
+
+} // extern "C"
+
+#endif
+
+#ifdef FJXL_SELF_INCLUDE
+
+namespace {
+
+constexpr size_t kNumRawSymbols = 19;
+constexpr size_t kNumLZ77 = 33;
+constexpr size_t kLZ77CacheSize = 32;
+
+constexpr size_t kLZ77Offset = 224;
+constexpr size_t kLZ77MinLength = 7;
+
+void EncodeHybridUintLZ77(uint32_t value, uint32_t* token, uint32_t* nbits,
+ uint32_t* bits) {
+ // 400 config
+ uint32_t n = FloorLog2(value);
+ *token = value < 16 ? value : 16 + n - 4;
+ *nbits = value < 16 ? 0 : n;
+ *bits = value < 16 ? 0 : value - (1 << *nbits);
+}
+
+struct PrefixCode {
+ uint8_t raw_nbits[kNumRawSymbols] = {};
+ uint8_t raw_bits[kNumRawSymbols] = {};
+
+ alignas(64) uint8_t raw_nbits_simd[16] = {};
+ alignas(64) uint8_t raw_bits_simd[16] = {};
+
+ uint8_t lz77_nbits[kNumLZ77] = {};
+ uint16_t lz77_bits[kNumLZ77] = {};
+
+ uint64_t lz77_cache_bits[kLZ77CacheSize] = {};
+ uint8_t lz77_cache_nbits[kLZ77CacheSize] = {};
+
+ static uint16_t BitReverse(size_t nbits, uint16_t bits) {
+ constexpr uint16_t kNibbleLookup[16] = {
+ 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110,
+ 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111,
+ };
+ uint16_t rev16 = (kNibbleLookup[bits & 0xF] << 12) |
+ (kNibbleLookup[(bits >> 4) & 0xF] << 8) |
+ (kNibbleLookup[(bits >> 8) & 0xF] << 4) |
+ (kNibbleLookup[bits >> 12]);
+ return rev16 >> (16 - nbits);
+ }
+
+ // Create the prefix codes given the code lengths.
+ // Supports the code lengths being split into two halves.
+ static void ComputeCanonicalCode(const uint8_t* first_chunk_nbits,
+ uint8_t* first_chunk_bits,
+ size_t first_chunk_size,
+ const uint8_t* second_chunk_nbits,
+ uint16_t* second_chunk_bits,
+ size_t second_chunk_size) {
+ constexpr size_t kMaxCodeLength = 15;
+ uint8_t code_length_counts[kMaxCodeLength + 1] = {};
+ for (size_t i = 0; i < first_chunk_size; i++) {
+ code_length_counts[first_chunk_nbits[i]]++;
+ assert(first_chunk_nbits[i] <= kMaxCodeLength);
+ assert(first_chunk_nbits[i] <= 8);
+ assert(first_chunk_nbits[i] > 0);
+ }
+ for (size_t i = 0; i < second_chunk_size; i++) {
+ code_length_counts[second_chunk_nbits[i]]++;
+ assert(second_chunk_nbits[i] <= kMaxCodeLength);
+ }
+
+ uint16_t next_code[kMaxCodeLength + 1] = {};
+
+ uint16_t code = 0;
+ for (size_t i = 1; i < kMaxCodeLength + 1; i++) {
+ code = (code + code_length_counts[i - 1]) << 1;
+ next_code[i] = code;
+ }
+
+ for (size_t i = 0; i < first_chunk_size; i++) {
+ first_chunk_bits[i] =
+ BitReverse(first_chunk_nbits[i], next_code[first_chunk_nbits[i]]++);
+ }
+ for (size_t i = 0; i < second_chunk_size; i++) {
+ second_chunk_bits[i] =
+ BitReverse(second_chunk_nbits[i], next_code[second_chunk_nbits[i]]++);
+ }
+ }
+
+ template <typename T>
+ static void ComputeCodeLengthsNonZeroImpl(const uint64_t* freqs, size_t n,
+ size_t precision, T infty,
+ uint8_t* min_limit,
+ uint8_t* max_limit,
+ uint8_t* nbits) {
+ std::vector<T> dynp(((1U << precision) + 1) * (n + 1), infty);
+ auto d = [&](size_t sym, size_t off) -> T& {
+ return dynp[sym * ((1 << precision) + 1) + off];
+ };
+ d(0, 0) = 0;
+ for (size_t sym = 0; sym < n; sym++) {
+ for (T bits = min_limit[sym]; bits <= max_limit[sym]; bits++) {
+ size_t off_delta = 1U << (precision - bits);
+ for (size_t off = 0; off + off_delta <= (1U << precision); off++) {
+ d(sym + 1, off + off_delta) =
+ std::min(d(sym, off) + static_cast<T>(freqs[sym]) * bits,
+ d(sym + 1, off + off_delta));
+ }
+ }
+ }
+
+ size_t sym = n;
+ size_t off = 1U << precision;
+
+ assert(d(sym, off) != infty);
+
+ while (sym-- > 0) {
+ assert(off > 0);
+ for (size_t bits = min_limit[sym]; bits <= max_limit[sym]; bits++) {
+ size_t off_delta = 1U << (precision - bits);
+ if (off_delta <= off &&
+ d(sym + 1, off) == d(sym, off - off_delta) + freqs[sym] * bits) {
+ off -= off_delta;
+ nbits[sym] = bits;
+ break;
+ }
+ }
+ }
+ }
+
+ // Computes nbits[i] for i <= n, subject to min_limit[i] <= nbits[i] <=
+ // max_limit[i] and sum 2**-nbits[i] == 1, so to minimize sum(nbits[i] *
+ // freqs[i]).
+ static void ComputeCodeLengthsNonZero(const uint64_t* freqs, size_t n,
+ uint8_t* min_limit, uint8_t* max_limit,
+ uint8_t* nbits) {
+ size_t precision = 0;
+ size_t shortest_length = 255;
+ uint64_t freqsum = 0;
+ for (size_t i = 0; i < n; i++) {
+ assert(freqs[i] != 0);
+ freqsum += freqs[i];
+ if (min_limit[i] < 1) min_limit[i] = 1;
+ assert(min_limit[i] <= max_limit[i]);
+ precision = std::max<size_t>(max_limit[i], precision);
+ shortest_length = std::min<size_t>(min_limit[i], shortest_length);
+ }
+ // If all the minimum limits are greater than 1, shift precision so that we
+ // behave as if the shortest was 1.
+ precision -= shortest_length - 1;
+ uint64_t infty = freqsum * precision;
+ if (infty < std::numeric_limits<uint32_t>::max() / 2) {
+ ComputeCodeLengthsNonZeroImpl(freqs, n, precision,
+ static_cast<uint32_t>(infty), min_limit,
+ max_limit, nbits);
+ } else {
+ ComputeCodeLengthsNonZeroImpl(freqs, n, precision, infty, min_limit,
+ max_limit, nbits);
+ }
+ }
+
+ static constexpr size_t kMaxNumSymbols =
+ kNumRawSymbols + 1 < kNumLZ77 ? kNumLZ77 : kNumRawSymbols + 1;
+ static void ComputeCodeLengths(const uint64_t* freqs, size_t n,
+ const uint8_t* min_limit_in,
+ const uint8_t* max_limit_in, uint8_t* nbits) {
+ assert(n <= kMaxNumSymbols);
+ uint64_t compact_freqs[kMaxNumSymbols];
+ uint8_t min_limit[kMaxNumSymbols];
+ uint8_t max_limit[kMaxNumSymbols];
+ size_t ni = 0;
+ for (size_t i = 0; i < n; i++) {
+ if (freqs[i]) {
+ compact_freqs[ni] = freqs[i];
+ min_limit[ni] = min_limit_in[i];
+ max_limit[ni] = max_limit_in[i];
+ ni++;
+ }
+ }
+ uint8_t num_bits[kMaxNumSymbols] = {};
+ ComputeCodeLengthsNonZero(compact_freqs, ni, min_limit, max_limit,
+ num_bits);
+ ni = 0;
+ for (size_t i = 0; i < n; i++) {
+ nbits[i] = 0;
+ if (freqs[i]) {
+ nbits[i] = num_bits[ni++];
+ }
+ }
+ }
+
+ // Invalid code, used to construct arrays.
+ PrefixCode() {}
+
+ template <typename BitDepth>
+ PrefixCode(BitDepth, uint64_t* raw_counts, uint64_t* lz77_counts) {
+ // "merge" together all the lz77 counts in a single symbol for the level 1
+ // table (containing just the raw symbols, up to length 7).
+ uint64_t level1_counts[kNumRawSymbols + 1];
+ memcpy(level1_counts, raw_counts, kNumRawSymbols * sizeof(uint64_t));
+ size_t numraw = kNumRawSymbols;
+ while (numraw > 0 && level1_counts[numraw - 1] == 0) numraw--;
+
+ level1_counts[numraw] = 0;
+ for (size_t i = 0; i < kNumLZ77; i++) {
+ level1_counts[numraw] += lz77_counts[i];
+ }
+ uint8_t level1_nbits[kNumRawSymbols + 1] = {};
+ ComputeCodeLengths(level1_counts, numraw + 1, BitDepth::kMinRawLength,
+ BitDepth::kMaxRawLength, level1_nbits);
+
+ uint8_t level2_nbits[kNumLZ77] = {};
+ uint8_t min_lengths[kNumLZ77] = {};
+ uint8_t l = 15 - level1_nbits[numraw];
+ uint8_t max_lengths[kNumLZ77];
+ for (size_t i = 0; i < kNumLZ77; i++) {
+ max_lengths[i] = l;
+ }
+ size_t num_lz77 = kNumLZ77;
+ while (num_lz77 > 0 && lz77_counts[num_lz77 - 1] == 0) num_lz77--;
+ ComputeCodeLengths(lz77_counts, num_lz77, min_lengths, max_lengths,
+ level2_nbits);
+ for (size_t i = 0; i < numraw; i++) {
+ raw_nbits[i] = level1_nbits[i];
+ }
+ for (size_t i = 0; i < num_lz77; i++) {
+ lz77_nbits[i] =
+ level2_nbits[i] ? level1_nbits[numraw] + level2_nbits[i] : 0;
+ }
+
+ ComputeCanonicalCode(raw_nbits, raw_bits, numraw, lz77_nbits, lz77_bits,
+ kNumLZ77);
+ BitDepth::PrepareForSimd(raw_nbits, raw_bits, numraw, raw_nbits_simd,
+ raw_bits_simd);
+
+ // Prepare lz77 cache
+ for (size_t count = 0; count < kLZ77CacheSize; count++) {
+ unsigned token, nbits, bits;
+ EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+ lz77_cache_nbits[count] = lz77_nbits[token] + nbits + raw_nbits[0];
+ lz77_cache_bits[count] =
+ (((bits << lz77_nbits[token]) | lz77_bits[token]) << raw_nbits[0]) |
+ raw_bits[0];
+ }
+ }
+
+ void WriteTo(BitWriter* writer) const {
+ uint64_t code_length_counts[18] = {};
+ code_length_counts[17] = 3 + 2 * (kNumLZ77 - 1);
+ for (size_t i = 0; i < kNumRawSymbols; i++) {
+ code_length_counts[raw_nbits[i]]++;
+ }
+ for (size_t i = 0; i < kNumLZ77; i++) {
+ code_length_counts[lz77_nbits[i]]++;
+ }
+ uint8_t code_length_nbits[18] = {};
+ uint8_t code_length_nbits_min[18] = {};
+ uint8_t code_length_nbits_max[18] = {
+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+ };
+ ComputeCodeLengths(code_length_counts, 18, code_length_nbits_min,
+ code_length_nbits_max, code_length_nbits);
+ writer->Write(2, 0b00); // HSKIP = 0, i.e. don't skip code lengths.
+
+ // As per Brotli RFC.
+ uint8_t code_length_order[18] = {1, 2, 3, 4, 0, 5, 17, 6, 16,
+ 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ uint8_t code_length_length_nbits[] = {2, 4, 3, 2, 2, 4};
+ uint8_t code_length_length_bits[] = {0, 7, 3, 2, 1, 15};
+
+ // Encode lengths of code lengths.
+ size_t num_code_lengths = 18;
+ while (code_length_nbits[code_length_order[num_code_lengths - 1]] == 0) {
+ num_code_lengths--;
+ }
+ for (size_t i = 0; i < num_code_lengths; i++) {
+ int symbol = code_length_nbits[code_length_order[i]];
+ writer->Write(code_length_length_nbits[symbol],
+ code_length_length_bits[symbol]);
+ }
+
+ // Compute the canonical codes for the codes that represent the lengths of
+ // the actual codes for data.
+ uint16_t code_length_bits[18] = {};
+ ComputeCanonicalCode(nullptr, nullptr, 0, code_length_nbits,
+ code_length_bits, 18);
+ // Encode raw bit code lengths.
+ for (size_t i = 0; i < kNumRawSymbols; i++) {
+ writer->Write(code_length_nbits[raw_nbits[i]],
+ code_length_bits[raw_nbits[i]]);
+ }
+ size_t num_lz77 = kNumLZ77;
+ while (lz77_nbits[num_lz77 - 1] == 0) {
+ num_lz77--;
+ }
+ // Encode 0s until 224 (start of LZ77 symbols). This is in total 224-19 =
+ // 205.
+ static_assert(kLZ77Offset == 224, "");
+ static_assert(kNumRawSymbols == 19, "");
+ writer->Write(code_length_nbits[17], code_length_bits[17]);
+ writer->Write(3, 0b010); // 5
+ writer->Write(code_length_nbits[17], code_length_bits[17]);
+ writer->Write(3, 0b000); // (5-2)*8 + 3 = 27
+ writer->Write(code_length_nbits[17], code_length_bits[17]);
+ writer->Write(3, 0b010); // (27-2)*8 + 5 = 205
+ // Encode LZ77 symbols, with values 224+i.
+ for (size_t i = 0; i < num_lz77; i++) {
+ writer->Write(code_length_nbits[lz77_nbits[i]],
+ code_length_bits[lz77_nbits[i]]);
+ }
+ }
+};
+
+template <typename T>
+struct VecPair {
+ T low;
+ T hi;
+};
+
+#ifdef FJXL_GENERIC_SIMD
+#undef FJXL_GENERIC_SIMD
+#endif
+
+#ifdef FJXL_AVX512
+#define FJXL_GENERIC_SIMD
+struct SIMDVec32;
+struct Mask32 {
+ __mmask16 mask;
+ SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+ size_t CountPrefix() const {
+ return CtzNonZero(~uint64_t{_cvtmask16_u32(mask)});
+ }
+};
+
+struct SIMDVec32 {
+ __m512i vec;
+
+ static constexpr size_t kLanes = 16;
+
+ FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+ return SIMDVec32{_mm512_loadu_si512((__m512i*)data)};
+ }
+ FJXL_INLINE void Store(uint32_t* data) {
+ _mm512_storeu_si512((__m512i*)data, vec);
+ }
+ FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+ return SIMDVec32{_mm512_set1_epi32(v)};
+ }
+ FJXL_INLINE SIMDVec32 ValToToken() const {
+ return SIMDVec32{
+ _mm512_sub_epi32(_mm512_set1_epi32(32), _mm512_lzcnt_epi32(vec))};
+ }
+ FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{_mm512_sub_epi32(_mm512_max_epu32(vec, to_subtract.vec),
+ to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{_mm512_sub_epi32(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+ return SIMDVec32{_mm512_add_epi32(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+ return SIMDVec32{_mm512_xor_epi32(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+ return Mask32{_mm512_cmpeq_epi32_mask(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+ return Mask32{_mm512_cmpgt_epi32_mask(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Pow2() const {
+ return SIMDVec32{_mm512_sllv_epi32(_mm512_set1_epi32(1), vec)};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+ return SIMDVec32{_mm512_srai_epi32(vec, i)};
+ }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+ __mmask32 mask;
+ SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+ Mask16 And(const Mask16& oth) const {
+ return Mask16{_kand_mask32(mask, oth.mask)};
+ }
+ size_t CountPrefix() const {
+ return CtzNonZero(~uint64_t{_cvtmask32_u32(mask)});
+ }
+};
+
+struct SIMDVec16 {
+ __m512i vec;
+
+ static constexpr size_t kLanes = 32;
+
+ FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+ return SIMDVec16{_mm512_loadu_si512((__m512i*)data)};
+ }
+ FJXL_INLINE void Store(uint16_t* data) {
+ _mm512_storeu_si512((__m512i*)data, vec);
+ }
+ FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+ return SIMDVec16{_mm512_set1_epi16(v)};
+ }
+ FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+ const SIMDVec32& hi) {
+ auto tmp = _mm512_packus_epi32(lo.vec, hi.vec);
+ alignas(64) uint64_t perm[8] = {0, 2, 4, 6, 1, 3, 5, 7};
+ return SIMDVec16{
+ _mm512_permutex2var_epi64(tmp, _mm512_load_si512((__m512i*)perm), tmp)};
+ }
+
+ FJXL_INLINE SIMDVec16 ValToToken() const {
+ auto c16 = _mm512_set1_epi32(16);
+ auto c32 = _mm512_set1_epi32(32);
+ auto low16bit = _mm512_set1_epi32(0x0000FFFF);
+ auto lzhi =
+ _mm512_sub_epi32(c16, _mm512_min_epu32(c16, _mm512_lzcnt_epi32(vec)));
+ auto lzlo = _mm512_sub_epi32(
+ c32, _mm512_lzcnt_epi32(_mm512_and_si512(low16bit, vec)));
+ return SIMDVec16{_mm512_or_si512(lzlo, _mm512_slli_epi32(lzhi, 16))};
+ }
+
+ FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{_mm512_subs_epu16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{_mm512_sub_epi16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_add_epi16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_min_epu16(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+ return Mask16{_mm512_cmpeq_epi16_mask(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+ return Mask16{_mm512_cmpgt_epi16_mask(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Pow2() const {
+ return SIMDVec16{_mm512_sllv_epi16(_mm512_set1_epi16(1), vec)};
+ }
+ FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_or_si512(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_xor_si512(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_and_si512(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm512_srai_epi16(_mm512_add_epi16(vec, oth.vec), 1)};
+ }
+ FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+ return SIMDVec16{_mm512_or_si512(vec, _mm512_set1_epi16(0xFF00))};
+ }
+ FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+ return SIMDVec16{_mm512_shuffle_epi8(
+ _mm512_broadcast_i32x4(_mm_loadu_si128((__m128i*)table)), vec)};
+ }
+ FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+ auto lo = _mm512_unpacklo_epi16(low.vec, vec);
+ auto hi = _mm512_unpackhi_epi16(low.vec, vec);
+ alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+ alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+ return {SIMDVec16{_mm512_permutex2var_epi64(
+ lo, _mm512_load_si512((__m512i*)perm1), hi)},
+ SIMDVec16{_mm512_permutex2var_epi64(
+ lo, _mm512_load_si512((__m512i*)perm2), hi)}};
+ }
+ FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+ auto lo = _mm512_unpacklo_epi16(vec, _mm512_setzero_si512());
+ auto hi = _mm512_unpackhi_epi16(vec, _mm512_setzero_si512());
+ alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+ alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+ return {SIMDVec32{_mm512_permutex2var_epi64(
+ lo, _mm512_load_si512((__m512i*)perm1), hi)},
+ SIMDVec32{_mm512_permutex2var_epi64(
+ lo, _mm512_load_si512((__m512i*)perm2), hi)}};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+ return SIMDVec16{_mm512_srai_epi16(vec, i)};
+ }
+
+ static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+ __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+ return {SIMDVec16{_mm512_cvtepu8_epi16(bytes)}};
+ }
+ static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+ return {Load((const uint16_t*)data)};
+ }
+
+ static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+ __m512i bytes = _mm512_loadu_si512((__m512i*)data);
+ __m512i gray = _mm512_and_si512(bytes, _mm512_set1_epi16(0xFF));
+ __m512i alpha = _mm512_srli_epi16(bytes, 8);
+ return {SIMDVec16{gray}, SIMDVec16{alpha}};
+ }
+ static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+ __m512i bytes1 = _mm512_loadu_si512((__m512i*)data);
+ __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64));
+ __m512i g_mask = _mm512_set1_epi32(0xFFFF);
+ __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+ __m512i g = _mm512_permutexvar_epi64(
+ permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, g_mask),
+ _mm512_and_si512(bytes2, g_mask)));
+ __m512i a = _mm512_permutexvar_epi64(
+ permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16),
+ _mm512_srli_epi32(bytes2, 16)));
+ return {SIMDVec16{g}, SIMDVec16{a}};
+ }
+
+ static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+ __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+ __m512i bytes1 =
+ _mm512_zextsi256_si512(_mm256_loadu_si256((__m256i*)(data + 64)));
+
+ // 0x7A = element of upper half of second vector = 0 after lookup; still in
+ // the upper half once we add 1 or 2.
+ uint8_t z = 0x7A;
+ __m512i ridx =
+ _mm512_set_epi8(z, 93, z, 90, z, 87, z, 84, z, 81, z, 78, z, 75, z, 72,
+ z, 69, z, 66, z, 63, z, 60, z, 57, z, 54, z, 51, z, 48,
+ z, 45, z, 42, z, 39, z, 36, z, 33, z, 30, z, 27, z, 24,
+ z, 21, z, 18, z, 15, z, 12, z, 9, z, 6, z, 3, z, 0);
+ __m512i gidx = _mm512_add_epi8(ridx, _mm512_set1_epi8(1));
+ __m512i bidx = _mm512_add_epi8(gidx, _mm512_set1_epi8(1));
+ __m512i r = _mm512_permutex2var_epi8(bytes0, ridx, bytes1);
+ __m512i g = _mm512_permutex2var_epi8(bytes0, gidx, bytes1);
+ __m512i b = _mm512_permutex2var_epi8(bytes0, bidx, bytes1);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}};
+ }
+ static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+ __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+ __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64));
+ __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128));
+
+ __m512i ridx_lo = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 60, 57,
+ 54, 51, 48, 45, 42, 39, 36, 33, 30, 27,
+ 24, 21, 18, 15, 12, 9, 6, 3, 0);
+ // -1 is such that when adding 1 or 2, we get the correct index for
+ // green/blue.
+ __m512i ridx_hi =
+ _mm512_set_epi16(29, 26, 23, 20, 17, 14, 11, 8, 5, 2, -1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+ __m512i gidx_lo = _mm512_add_epi16(ridx_lo, _mm512_set1_epi16(1));
+ __m512i gidx_hi = _mm512_add_epi16(ridx_hi, _mm512_set1_epi16(1));
+ __m512i bidx_lo = _mm512_add_epi16(gidx_lo, _mm512_set1_epi16(1));
+ __m512i bidx_hi = _mm512_add_epi16(gidx_hi, _mm512_set1_epi16(1));
+
+ __mmask32 rmask = _cvtu32_mask32(0b11111111110000000000000000000000);
+ __mmask32 gbmask = _cvtu32_mask32(0b11111111111000000000000000000000);
+
+ __m512i rlo = _mm512_permutex2var_epi16(bytes0, ridx_lo, bytes1);
+ __m512i glo = _mm512_permutex2var_epi16(bytes0, gidx_lo, bytes1);
+ __m512i blo = _mm512_permutex2var_epi16(bytes0, bidx_lo, bytes1);
+ __m512i r = _mm512_mask_permutexvar_epi16(rlo, rmask, ridx_hi, bytes2);
+ __m512i g = _mm512_mask_permutexvar_epi16(glo, gbmask, gidx_hi, bytes2);
+ __m512i b = _mm512_mask_permutexvar_epi16(blo, gbmask, bidx_hi, bytes2);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}};
+ }
+
+ static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+ __m512i bytes1 = _mm512_loadu_si512((__m512i*)data);
+ __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64));
+ __m512i rg_mask = _mm512_set1_epi32(0xFFFF);
+ __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+ __m512i rg = _mm512_permutexvar_epi64(
+ permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, rg_mask),
+ _mm512_and_si512(bytes2, rg_mask)));
+ __m512i ba = _mm512_permutexvar_epi64(
+ permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16),
+ _mm512_srli_epi32(bytes2, 16)));
+ __m512i r = _mm512_and_si512(rg, _mm512_set1_epi16(0xFF));
+ __m512i g = _mm512_srli_epi16(rg, 8);
+ __m512i b = _mm512_and_si512(ba, _mm512_set1_epi16(0xFF));
+ __m512i a = _mm512_srli_epi16(ba, 8);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+ }
+ static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+ __m512i bytes0 = _mm512_loadu_si512((__m512i*)data);
+ __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64));
+ __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128));
+ __m512i bytes3 = _mm512_loadu_si512((__m512i*)(data + 192));
+
+ auto pack32 = [](__m512i a, __m512i b) {
+ __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0);
+ return _mm512_permutexvar_epi64(permuteidx, _mm512_packus_epi32(a, b));
+ };
+ auto packlow32 = [&pack32](__m512i a, __m512i b) {
+ __m512i mask = _mm512_set1_epi32(0xFFFF);
+ return pack32(_mm512_and_si512(a, mask), _mm512_and_si512(b, mask));
+ };
+ auto packhi32 = [&pack32](__m512i a, __m512i b) {
+ return pack32(_mm512_srli_epi32(a, 16), _mm512_srli_epi32(b, 16));
+ };
+
+ __m512i rb0 = packlow32(bytes0, bytes1);
+ __m512i rb1 = packlow32(bytes2, bytes3);
+ __m512i ga0 = packhi32(bytes0, bytes1);
+ __m512i ga1 = packhi32(bytes2, bytes3);
+
+ __m512i r = packlow32(rb0, rb1);
+ __m512i g = packlow32(ga0, ga1);
+ __m512i b = packhi32(rb0, rb1);
+ __m512i a = packhi32(ga0, ga1);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+ }
+
+ void SwapEndian() {
+ auto indices = _mm512_broadcast_i32x4(
+ _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
+ vec = _mm512_shuffle_epi8(vec, indices);
+ }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+ const SIMDVec16& if_false) {
+ return SIMDVec16{_mm512_mask_blend_epi16(mask, if_false.vec, if_true.vec)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+ const SIMDVec32& if_false) {
+ return SIMDVec32{_mm512_mask_blend_epi32(mask, if_false.vec, if_true.vec)};
+}
+
+struct Bits64 {
+ static constexpr size_t kLanes = 8;
+
+ __m512i nbits;
+ __m512i bits;
+
+ FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+ _mm512_storeu_si512((__m512i*)nbits_out, nbits);
+ _mm512_storeu_si512((__m512i*)bits_out, bits);
+ }
+};
+
+struct Bits32 {
+ __m512i nbits;
+ __m512i bits;
+
+ static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+ return Bits32{nbits.vec, bits.vec};
+ }
+
+ Bits64 Merge() const {
+ auto nbits_hi32 = _mm512_srli_epi64(nbits, 32);
+ auto nbits_lo32 = _mm512_and_si512(nbits, _mm512_set1_epi64(0xFFFFFFFF));
+ auto bits_hi32 = _mm512_srli_epi64(bits, 32);
+ auto bits_lo32 = _mm512_and_si512(bits, _mm512_set1_epi64(0xFFFFFFFF));
+
+ auto nbits64 = _mm512_add_epi64(nbits_hi32, nbits_lo32);
+ auto bits64 =
+ _mm512_or_si512(_mm512_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32);
+ return Bits64{nbits64, bits64};
+ }
+
+ void Interleave(const Bits32& low) {
+ bits = _mm512_or_si512(_mm512_sllv_epi32(bits, low.nbits), low.bits);
+ nbits = _mm512_add_epi32(nbits, low.nbits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 16);
+ constexpr uint32_t kMask[32] = {
+ ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+ ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n));
+ nbits = _mm512_and_si512(mask, nbits);
+ bits = _mm512_and_si512(mask, bits);
+ }
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 16);
+ constexpr uint32_t kMask[32] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+ ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+ };
+ __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n));
+ nbits = _mm512_and_si512(mask, nbits);
+ bits = _mm512_and_si512(mask, bits);
+ }
+};
+
+struct Bits16 {
+ __m512i nbits;
+ __m512i bits;
+
+ static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+ return Bits16{nbits.vec, bits.vec};
+ }
+
+ Bits32 Merge() const {
+ auto nbits_hi16 = _mm512_srli_epi32(nbits, 16);
+ auto nbits_lo16 = _mm512_and_si512(nbits, _mm512_set1_epi32(0xFFFF));
+ auto bits_hi16 = _mm512_srli_epi32(bits, 16);
+ auto bits_lo16 = _mm512_and_si512(bits, _mm512_set1_epi32(0xFFFF));
+
+ auto nbits32 = _mm512_add_epi32(nbits_hi16, nbits_lo16);
+ auto bits32 =
+ _mm512_or_si512(_mm512_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16);
+ return Bits32{nbits32, bits32};
+ }
+
+ void Interleave(const Bits16& low) {
+ bits = _mm512_or_si512(_mm512_sllv_epi16(bits, low.nbits), low.bits);
+ nbits = _mm512_add_epi16(nbits, low.nbits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 32);
+ constexpr uint16_t kMask[64] = {
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n));
+ nbits = _mm512_and_si512(mask, nbits);
+ bits = _mm512_and_si512(mask, bits);
+ }
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 32);
+ constexpr uint16_t kMask[64] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ };
+ __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n));
+ nbits = _mm512_and_si512(mask, nbits);
+ bits = _mm512_and_si512(mask, bits);
+ }
+};
+
+#endif
+
+#ifdef FJXL_AVX2
+#define FJXL_GENERIC_SIMD
+
+struct SIMDVec32;
+
+struct Mask32 {
+ __m256i mask;
+ SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+ size_t CountPrefix() const {
+ return CtzNonZero(~static_cast<uint64_t>(
+ (uint8_t)_mm256_movemask_ps(_mm256_castsi256_ps(mask))));
+ }
+};
+
+struct SIMDVec32 {
+ __m256i vec;
+
+ static constexpr size_t kLanes = 8;
+
+ FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+ return SIMDVec32{_mm256_loadu_si256((__m256i*)data)};
+ }
+ FJXL_INLINE void Store(uint32_t* data) {
+ _mm256_storeu_si256((__m256i*)data, vec);
+ }
+ FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+ return SIMDVec32{_mm256_set1_epi32(v)};
+ }
+ FJXL_INLINE SIMDVec32 ValToToken() const {
+ // we know that each value has at most 20 bits, so we just need 5 nibbles
+ // and don't need to mask the fifth. However we do need to set the higher
+ // bytes to 0xFF, which will make table lookups return 0.
+ auto nibble0 =
+ _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi32(0xF)),
+ _mm256_set1_epi32(0xFFFFFF00));
+ auto nibble1 = _mm256_or_si256(
+ _mm256_and_si256(_mm256_srli_epi32(vec, 4), _mm256_set1_epi32(0xF)),
+ _mm256_set1_epi32(0xFFFFFF00));
+ auto nibble2 = _mm256_or_si256(
+ _mm256_and_si256(_mm256_srli_epi32(vec, 8), _mm256_set1_epi32(0xF)),
+ _mm256_set1_epi32(0xFFFFFF00));
+ auto nibble3 = _mm256_or_si256(
+ _mm256_and_si256(_mm256_srli_epi32(vec, 12), _mm256_set1_epi32(0xF)),
+ _mm256_set1_epi32(0xFFFFFF00));
+ auto nibble4 = _mm256_or_si256(_mm256_srli_epi32(vec, 16),
+ _mm256_set1_epi32(0xFFFFFF00));
+
+ auto lut0 = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4));
+ auto lut1 = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8));
+ auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12));
+ auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16));
+ auto lut4 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20));
+
+ auto token0 = _mm256_shuffle_epi8(lut0, nibble0);
+ auto token1 = _mm256_shuffle_epi8(lut1, nibble1);
+ auto token2 = _mm256_shuffle_epi8(lut2, nibble2);
+ auto token3 = _mm256_shuffle_epi8(lut3, nibble3);
+ auto token4 = _mm256_shuffle_epi8(lut4, nibble4);
+
+ auto token =
+ _mm256_max_epi32(_mm256_max_epi32(_mm256_max_epi32(token0, token1),
+ _mm256_max_epi32(token2, token3)),
+ token4);
+ return SIMDVec32{token};
+ }
+ FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{_mm256_sub_epi32(_mm256_max_epu32(vec, to_subtract.vec),
+ to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{_mm256_sub_epi32(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+ return SIMDVec32{_mm256_add_epi32(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+ return SIMDVec32{_mm256_xor_si256(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Pow2() const {
+ return SIMDVec32{_mm256_sllv_epi32(_mm256_set1_epi32(1), vec)};
+ }
+ FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+ return Mask32{_mm256_cmpeq_epi32(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+ return Mask32{_mm256_cmpgt_epi32(vec, oth.vec)};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+ return SIMDVec32{_mm256_srai_epi32(vec, i)};
+ }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+ __m256i mask;
+ SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+ Mask16 And(const Mask16& oth) const {
+ return Mask16{_mm256_and_si256(mask, oth.mask)};
+ }
+ size_t CountPrefix() const {
+ return CtzNonZero(
+ ~static_cast<uint64_t>((uint32_t)_mm256_movemask_epi8(mask))) /
+ 2;
+ }
+};
+
+struct SIMDVec16 {
+ __m256i vec;
+
+ static constexpr size_t kLanes = 16;
+
+ FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+ return SIMDVec16{_mm256_loadu_si256((__m256i*)data)};
+ }
+ FJXL_INLINE void Store(uint16_t* data) {
+ _mm256_storeu_si256((__m256i*)data, vec);
+ }
+ FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+ return SIMDVec16{_mm256_set1_epi16(v)};
+ }
+ FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+ const SIMDVec32& hi) {
+ auto tmp = _mm256_packus_epi32(lo.vec, hi.vec);
+ return SIMDVec16{_mm256_permute4x64_epi64(tmp, 0b11011000)};
+ }
+
+ FJXL_INLINE SIMDVec16 ValToToken() const {
+ auto nibble0 =
+ _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi16(0xF)),
+ _mm256_set1_epi16(0xFF00));
+ auto nibble1 = _mm256_or_si256(
+ _mm256_and_si256(_mm256_srli_epi16(vec, 4), _mm256_set1_epi16(0xF)),
+ _mm256_set1_epi16(0xFF00));
+ auto nibble2 = _mm256_or_si256(
+ _mm256_and_si256(_mm256_srli_epi16(vec, 8), _mm256_set1_epi16(0xF)),
+ _mm256_set1_epi16(0xFF00));
+ auto nibble3 =
+ _mm256_or_si256(_mm256_srli_epi16(vec, 12), _mm256_set1_epi16(0xFF00));
+
+ auto lut0 = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4));
+ auto lut1 = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8));
+ auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12));
+ auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16));
+
+ auto token0 = _mm256_shuffle_epi8(lut0, nibble0);
+ auto token1 = _mm256_shuffle_epi8(lut1, nibble1);
+ auto token2 = _mm256_shuffle_epi8(lut2, nibble2);
+ auto token3 = _mm256_shuffle_epi8(lut3, nibble3);
+
+ auto token = _mm256_max_epi16(_mm256_max_epi16(token0, token1),
+ _mm256_max_epi16(token2, token3));
+ return SIMDVec16{token};
+ }
+
+ FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{_mm256_subs_epu16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{_mm256_sub_epi16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_add_epi16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_min_epu16(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+ return Mask16{_mm256_cmpeq_epi16(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+ return Mask16{_mm256_cmpgt_epi16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Pow2() const {
+ auto pow2_lo_lut = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+ 1u << 7, 0, 0, 0, 0, 0, 0, 0, 0));
+ auto pow2_hi_lut = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3,
+ 1 << 4, 1 << 5, 1 << 6, 1u << 7));
+
+ auto masked = _mm256_or_si256(vec, _mm256_set1_epi16(0xFF00));
+
+ auto pow2_lo = _mm256_shuffle_epi8(pow2_lo_lut, masked);
+ auto pow2_hi = _mm256_shuffle_epi8(pow2_hi_lut, masked);
+
+ auto pow2 = _mm256_or_si256(_mm256_slli_epi16(pow2_hi, 8), pow2_lo);
+ return SIMDVec16{pow2};
+ }
+ FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_or_si256(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_xor_si256(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_and_si256(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+ return SIMDVec16{_mm256_srai_epi16(_mm256_add_epi16(vec, oth.vec), 1)};
+ }
+ FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+ return SIMDVec16{_mm256_or_si256(vec, _mm256_set1_epi16(0xFF00))};
+ }
+ FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+ return SIMDVec16{_mm256_shuffle_epi8(
+ _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)table)), vec)};
+ }
+ FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+ auto v02 = _mm256_unpacklo_epi16(low.vec, vec);
+ auto v13 = _mm256_unpackhi_epi16(low.vec, vec);
+ return {SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x20)},
+ SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x31)}};
+ }
+ FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+ auto v02 = _mm256_unpacklo_epi16(vec, _mm256_setzero_si256());
+ auto v13 = _mm256_unpackhi_epi16(vec, _mm256_setzero_si256());
+ return {SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x20)},
+ SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x31)}};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+ return SIMDVec16{_mm256_srai_epi16(vec, i)};
+ }
+
+ static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+ __m128i bytes = _mm_loadu_si128((__m128i*)data);
+ return {SIMDVec16{_mm256_cvtepu8_epi16(bytes)}};
+ }
+ static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+ return {Load((const uint16_t*)data)};
+ }
+
+ static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+ __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+ __m256i gray = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF));
+ __m256i alpha = _mm256_srli_epi16(bytes, 8);
+ return {SIMDVec16{gray}, SIMDVec16{alpha}};
+ }
+ static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+ __m256i bytes1 = _mm256_loadu_si256((__m256i*)data);
+ __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32));
+ __m256i g_mask = _mm256_set1_epi32(0xFFFF);
+ __m256i g = _mm256_permute4x64_epi64(
+ _mm256_packus_epi32(_mm256_and_si256(bytes1, g_mask),
+ _mm256_and_si256(bytes2, g_mask)),
+ 0b11011000);
+ __m256i a = _mm256_permute4x64_epi64(
+ _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16),
+ _mm256_srli_epi32(bytes2, 16)),
+ 0b11011000);
+ return {SIMDVec16{g}, SIMDVec16{a}};
+ }
+
+ static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+ __m128i bytes0 = _mm_loadu_si128((__m128i*)data);
+ __m128i bytes1 = _mm_loadu_si128((__m128i*)(data + 16));
+ __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 32));
+
+ __m128i idx =
+ _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13);
+
+ __m128i r6b5g5_0 = _mm_shuffle_epi8(bytes0, idx);
+ __m128i g6r5b5_1 = _mm_shuffle_epi8(bytes1, idx);
+ __m128i b6g5r5_2 = _mm_shuffle_epi8(bytes2, idx);
+
+ __m128i mask010 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0, 0, 0, 0, 0);
+ __m128i mask001 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF);
+
+ __m128i b2g2b1 = _mm_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001);
+ __m128i b2b0b1 = _mm_blendv_epi8(b2g2b1, r6b5g5_0, mask010);
+
+ __m128i r0r1b1 = _mm_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010);
+ __m128i r0r1r2 = _mm_blendv_epi8(r0r1b1, b6g5r5_2, mask001);
+
+ __m128i g1r1g0 = _mm_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001);
+ __m128i g1g2g0 = _mm_blendv_epi8(g1r1g0, b6g5r5_2, mask010);
+
+ __m128i g0g1g2 = _mm_alignr_epi8(g1g2g0, g1g2g0, 11);
+ __m128i b0b1b2 = _mm_alignr_epi8(b2b0b1, b2b0b1, 6);
+
+ return {SIMDVec16{_mm256_cvtepu8_epi16(r0r1r2)},
+ SIMDVec16{_mm256_cvtepu8_epi16(g0g1g2)},
+ SIMDVec16{_mm256_cvtepu8_epi16(b0b1b2)}};
+ }
+ static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+ auto load_and_split_lohi = [](const unsigned char* data) {
+ // LHLHLH...
+ __m256i bytes = _mm256_loadu_si256((__m256i*)data);
+ // L0L0L0...
+ __m256i lo = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF));
+ // H0H0H0...
+ __m256i hi = _mm256_srli_epi16(bytes, 8);
+ // LLLLLLLLHHHHHHHHLLLLLLLLHHHHHHHH
+ __m256i packed = _mm256_packus_epi16(lo, hi);
+ return _mm256_permute4x64_epi64(packed, 0b11011000);
+ };
+ __m256i bytes0 = load_and_split_lohi(data);
+ __m256i bytes1 = load_and_split_lohi(data + 32);
+ __m256i bytes2 = load_and_split_lohi(data + 64);
+
+ __m256i idx = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13));
+
+ __m256i r6b5g5_0 = _mm256_shuffle_epi8(bytes0, idx);
+ __m256i g6r5b5_1 = _mm256_shuffle_epi8(bytes1, idx);
+ __m256i b6g5r5_2 = _mm256_shuffle_epi8(bytes2, idx);
+
+ __m256i mask010 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0));
+ __m256i mask001 = _mm256_broadcastsi128_si256(_mm_setr_epi8(
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF));
+
+ __m256i b2g2b1 = _mm256_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001);
+ __m256i b2b0b1 = _mm256_blendv_epi8(b2g2b1, r6b5g5_0, mask010);
+
+ __m256i r0r1b1 = _mm256_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010);
+ __m256i r0r1r2 = _mm256_blendv_epi8(r0r1b1, b6g5r5_2, mask001);
+
+ __m256i g1r1g0 = _mm256_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001);
+ __m256i g1g2g0 = _mm256_blendv_epi8(g1r1g0, b6g5r5_2, mask010);
+
+ __m256i g0g1g2 = _mm256_alignr_epi8(g1g2g0, g1g2g0, 11);
+ __m256i b0b1b2 = _mm256_alignr_epi8(b2b0b1, b2b0b1, 6);
+
+ // Now r0r1r2, g0g1g2, b0b1b2 have the low bytes of the RGB pixels in their
+ // lower half, and the high bytes in their upper half.
+
+ auto combine_low_hi = [](__m256i v) {
+ __m128i low = _mm256_extracti128_si256(v, 0);
+ __m128i hi = _mm256_extracti128_si256(v, 1);
+ __m256i low16 = _mm256_cvtepu8_epi16(low);
+ __m256i hi16 = _mm256_cvtepu8_epi16(hi);
+ return _mm256_or_si256(_mm256_slli_epi16(hi16, 8), low16);
+ };
+
+ return {SIMDVec16{combine_low_hi(r0r1r2)},
+ SIMDVec16{combine_low_hi(g0g1g2)},
+ SIMDVec16{combine_low_hi(b0b1b2)}};
+ }
+
+ static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+ __m256i bytes1 = _mm256_loadu_si256((__m256i*)data);
+ __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32));
+ __m256i rg_mask = _mm256_set1_epi32(0xFFFF);
+ __m256i rg = _mm256_permute4x64_epi64(
+ _mm256_packus_epi32(_mm256_and_si256(bytes1, rg_mask),
+ _mm256_and_si256(bytes2, rg_mask)),
+ 0b11011000);
+ __m256i ba = _mm256_permute4x64_epi64(
+ _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16),
+ _mm256_srli_epi32(bytes2, 16)),
+ 0b11011000);
+ __m256i r = _mm256_and_si256(rg, _mm256_set1_epi16(0xFF));
+ __m256i g = _mm256_srli_epi16(rg, 8);
+ __m256i b = _mm256_and_si256(ba, _mm256_set1_epi16(0xFF));
+ __m256i a = _mm256_srli_epi16(ba, 8);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+ }
+ static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+ __m256i bytes0 = _mm256_loadu_si256((__m256i*)data);
+ __m256i bytes1 = _mm256_loadu_si256((__m256i*)(data + 32));
+ __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 64));
+ __m256i bytes3 = _mm256_loadu_si256((__m256i*)(data + 96));
+
+ auto pack32 = [](__m256i a, __m256i b) {
+ return _mm256_permute4x64_epi64(_mm256_packus_epi32(a, b), 0b11011000);
+ };
+ auto packlow32 = [&pack32](__m256i a, __m256i b) {
+ __m256i mask = _mm256_set1_epi32(0xFFFF);
+ return pack32(_mm256_and_si256(a, mask), _mm256_and_si256(b, mask));
+ };
+ auto packhi32 = [&pack32](__m256i a, __m256i b) {
+ return pack32(_mm256_srli_epi32(a, 16), _mm256_srli_epi32(b, 16));
+ };
+
+ __m256i rb0 = packlow32(bytes0, bytes1);
+ __m256i rb1 = packlow32(bytes2, bytes3);
+ __m256i ga0 = packhi32(bytes0, bytes1);
+ __m256i ga1 = packhi32(bytes2, bytes3);
+
+ __m256i r = packlow32(rb0, rb1);
+ __m256i g = packlow32(ga0, ga1);
+ __m256i b = packhi32(rb0, rb1);
+ __m256i a = packhi32(ga0, ga1);
+ return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}};
+ }
+
+ void SwapEndian() {
+ auto indices = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14));
+ vec = _mm256_shuffle_epi8(vec, indices);
+ }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+ const SIMDVec16& if_false) {
+ return SIMDVec16{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+ const SIMDVec32& if_false) {
+ return SIMDVec32{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)};
+}
+
+struct Bits64 {
+ static constexpr size_t kLanes = 4;
+
+ __m256i nbits;
+ __m256i bits;
+
+ FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+ _mm256_storeu_si256((__m256i*)nbits_out, nbits);
+ _mm256_storeu_si256((__m256i*)bits_out, bits);
+ }
+};
+
+struct Bits32 {
+ __m256i nbits;
+ __m256i bits;
+
+ static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+ return Bits32{nbits.vec, bits.vec};
+ }
+
+ Bits64 Merge() const {
+ auto nbits_hi32 = _mm256_srli_epi64(nbits, 32);
+ auto nbits_lo32 = _mm256_and_si256(nbits, _mm256_set1_epi64x(0xFFFFFFFF));
+ auto bits_hi32 = _mm256_srli_epi64(bits, 32);
+ auto bits_lo32 = _mm256_and_si256(bits, _mm256_set1_epi64x(0xFFFFFFFF));
+
+ auto nbits64 = _mm256_add_epi64(nbits_hi32, nbits_lo32);
+ auto bits64 =
+ _mm256_or_si256(_mm256_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32);
+ return Bits64{nbits64, bits64};
+ }
+
+ void Interleave(const Bits32& low) {
+ bits = _mm256_or_si256(_mm256_sllv_epi32(bits, low.nbits), low.bits);
+ nbits = _mm256_add_epi32(nbits, low.nbits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 8);
+ constexpr uint32_t kMask[16] = {
+ ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n));
+ nbits = _mm256_and_si256(mask, nbits);
+ bits = _mm256_and_si256(mask, bits);
+ }
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 8);
+ constexpr uint32_t kMask[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u,
+ };
+ __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n));
+ nbits = _mm256_and_si256(mask, nbits);
+ bits = _mm256_and_si256(mask, bits);
+ }
+};
+
+struct Bits16 {
+ __m256i nbits;
+ __m256i bits;
+
+ static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+ return Bits16{nbits.vec, bits.vec};
+ }
+
+ Bits32 Merge() const {
+ auto nbits_hi16 = _mm256_srli_epi32(nbits, 16);
+ auto nbits_lo16 = _mm256_and_si256(nbits, _mm256_set1_epi32(0xFFFF));
+ auto bits_hi16 = _mm256_srli_epi32(bits, 16);
+ auto bits_lo16 = _mm256_and_si256(bits, _mm256_set1_epi32(0xFFFF));
+
+ auto nbits32 = _mm256_add_epi32(nbits_hi16, nbits_lo16);
+ auto bits32 =
+ _mm256_or_si256(_mm256_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16);
+ return Bits32{nbits32, bits32};
+ }
+
+ void Interleave(const Bits16& low) {
+ auto pow2_lo_lut = _mm256_broadcastsi128_si256(
+ _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6,
+ 1u << 7, 0, 0, 0, 0, 0, 0, 0, 0));
+ auto low_nbits_masked =
+ _mm256_or_si256(low.nbits, _mm256_set1_epi16(0xFF00));
+
+ auto bits_shifted = _mm256_mullo_epi16(
+ bits, _mm256_shuffle_epi8(pow2_lo_lut, low_nbits_masked));
+
+ nbits = _mm256_add_epi16(nbits, low.nbits);
+ bits = _mm256_or_si256(bits_shifted, low.bits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 16);
+ constexpr uint16_t kMask[32] = {
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n));
+ nbits = _mm256_and_si256(mask, nbits);
+ bits = _mm256_and_si256(mask, bits);
+ }
+
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 16);
+ constexpr uint16_t kMask[32] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ };
+ __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n));
+ nbits = _mm256_and_si256(mask, nbits);
+ bits = _mm256_and_si256(mask, bits);
+ }
+};
+
+#endif
+
+#ifdef FJXL_NEON
+#define FJXL_GENERIC_SIMD
+
+struct SIMDVec32;
+
+struct Mask32 {
+ uint32x4_t mask;
+ SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false);
+ Mask32 And(const Mask32& oth) const {
+ return Mask32{vandq_u32(mask, oth.mask)};
+ }
+ size_t CountPrefix() const {
+ uint32_t val_unset[4] = {0, 1, 2, 3};
+ uint32_t val_set[4] = {4, 4, 4, 4};
+ uint32x4_t val = vbslq_u32(mask, vld1q_u32(val_set), vld1q_u32(val_unset));
+ return vminvq_u32(val);
+ }
+};
+
+struct SIMDVec32 {
+ uint32x4_t vec;
+
+ static constexpr size_t kLanes = 4;
+
+ FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) {
+ return SIMDVec32{vld1q_u32(data)};
+ }
+ FJXL_INLINE void Store(uint32_t* data) { vst1q_u32(data, vec); }
+ FJXL_INLINE static SIMDVec32 Val(uint32_t v) {
+ return SIMDVec32{vdupq_n_u32(v)};
+ }
+ FJXL_INLINE SIMDVec32 ValToToken() const {
+ return SIMDVec32{vsubq_u32(vdupq_n_u32(32), vclzq_u32(vec))};
+ }
+ FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{vqsubq_u32(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const {
+ return SIMDVec32{vsubq_u32(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const {
+ return SIMDVec32{vaddq_u32(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const {
+ return SIMDVec32{veorq_u32(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec32 Pow2() const {
+ return SIMDVec32{vshlq_u32(vdupq_n_u32(1), vreinterpretq_s32_u32(vec))};
+ }
+ FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const {
+ return Mask32{vceqq_u32(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const {
+ return Mask32{
+ vcgtq_s32(vreinterpretq_s32_u32(vec), vreinterpretq_s32_u32(oth.vec))};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec32 SignedShiftRight() const {
+ return SIMDVec32{
+ vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(vec), i))};
+ }
+};
+
+struct SIMDVec16;
+
+struct Mask16 {
+ uint16x8_t mask;
+ SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false);
+ Mask16 And(const Mask16& oth) const {
+ return Mask16{vandq_u16(mask, oth.mask)};
+ }
+ size_t CountPrefix() const {
+ uint16_t val_unset[8] = {0, 1, 2, 3, 4, 5, 6, 7};
+ uint16_t val_set[8] = {8, 8, 8, 8, 8, 8, 8, 8};
+ uint16x8_t val = vbslq_u16(mask, vld1q_u16(val_set), vld1q_u16(val_unset));
+ return vminvq_u16(val);
+ }
+};
+
+struct SIMDVec16 {
+ uint16x8_t vec;
+
+ static constexpr size_t kLanes = 8;
+
+ FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) {
+ return SIMDVec16{vld1q_u16(data)};
+ }
+ FJXL_INLINE void Store(uint16_t* data) { vst1q_u16(data, vec); }
+ FJXL_INLINE static SIMDVec16 Val(uint16_t v) {
+ return SIMDVec16{vdupq_n_u16(v)};
+ }
+ FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo,
+ const SIMDVec32& hi) {
+ return SIMDVec16{vmovn_high_u32(vmovn_u32(lo.vec), hi.vec)};
+ }
+
+ FJXL_INLINE SIMDVec16 ValToToken() const {
+ return SIMDVec16{vsubq_u16(vdupq_n_u16(16), vclzq_u16(vec))};
+ }
+ FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{vqsubq_u16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const {
+ return SIMDVec16{vsubq_u16(vec, to_subtract.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const {
+ return SIMDVec16{vaddq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const {
+ return SIMDVec16{vminq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const {
+ return Mask16{vceqq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const {
+ return Mask16{
+ vcgtq_s16(vreinterpretq_s16_u16(vec), vreinterpretq_s16_u16(oth.vec))};
+ }
+ FJXL_INLINE SIMDVec16 Pow2() const {
+ return SIMDVec16{vshlq_u16(vdupq_n_u16(1), vreinterpretq_s16_u16(vec))};
+ }
+ FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const {
+ return SIMDVec16{vorrq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const {
+ return SIMDVec16{veorq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const {
+ return SIMDVec16{vandq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const {
+ return SIMDVec16{vhaddq_u16(vec, oth.vec)};
+ }
+ FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const {
+ return SIMDVec16{vorrq_u16(vec, vdupq_n_u16(0xFF00))};
+ }
+ FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const {
+ uint8x16_t tbl = vld1q_u8(table);
+ uint8x16_t indices = vreinterpretq_u8_u16(vec);
+ return SIMDVec16{vreinterpretq_u16_u8(vqtbl1q_u8(tbl, indices))};
+ }
+ FJXL_INLINE VecPair<SIMDVec16> Interleave(const SIMDVec16& low) const {
+ return {SIMDVec16{vzip1q_u16(low.vec, vec)},
+ SIMDVec16{vzip2q_u16(low.vec, vec)}};
+ }
+ FJXL_INLINE VecPair<SIMDVec32> Upcast() const {
+ uint32x4_t lo = vmovl_u16(vget_low_u16(vec));
+ uint32x4_t hi = vmovl_high_u16(vec);
+ return {SIMDVec32{lo}, SIMDVec32{hi}};
+ }
+ template <size_t i>
+ FJXL_INLINE SIMDVec16 SignedShiftRight() const {
+ return SIMDVec16{
+ vreinterpretq_u16_s16(vshrq_n_s16(vreinterpretq_s16_u16(vec), i))};
+ }
+
+ static std::array<SIMDVec16, 1> LoadG8(const unsigned char* data) {
+ uint8x8_t v = vld1_u8(data);
+ return {SIMDVec16{vmovl_u8(v)}};
+ }
+ static std::array<SIMDVec16, 1> LoadG16(const unsigned char* data) {
+ return {Load((const uint16_t*)data)};
+ }
+
+ static std::array<SIMDVec16, 2> LoadGA8(const unsigned char* data) {
+ uint8x8x2_t v = vld2_u8(data);
+ return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])}};
+ }
+ static std::array<SIMDVec16, 2> LoadGA16(const unsigned char* data) {
+ uint16x8x2_t v = vld2q_u16((const uint16_t*)data);
+ return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}};
+ }
+
+ static std::array<SIMDVec16, 3> LoadRGB8(const unsigned char* data) {
+ uint8x8x3_t v = vld3_u8(data);
+ return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])},
+ SIMDVec16{vmovl_u8(v.val[2])}};
+ }
+ static std::array<SIMDVec16, 3> LoadRGB16(const unsigned char* data) {
+ uint16x8x3_t v = vld3q_u16((const uint16_t*)data);
+ return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]}};
+ }
+
+ static std::array<SIMDVec16, 4> LoadRGBA8(const unsigned char* data) {
+ uint8x8x4_t v = vld4_u8(data);
+ return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])},
+ SIMDVec16{vmovl_u8(v.val[2])}, SIMDVec16{vmovl_u8(v.val[3])}};
+ }
+ static std::array<SIMDVec16, 4> LoadRGBA16(const unsigned char* data) {
+ uint16x8x4_t v = vld4q_u16((const uint16_t*)data);
+ return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]},
+ SIMDVec16{v.val[3]}};
+ }
+
+ void SwapEndian() {
+ vec = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(vec)));
+ }
+};
+
+SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true,
+ const SIMDVec16& if_false) {
+ return SIMDVec16{vbslq_u16(mask, if_true.vec, if_false.vec)};
+}
+
+SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true,
+ const SIMDVec32& if_false) {
+ return SIMDVec32{vbslq_u32(mask, if_true.vec, if_false.vec)};
+}
+
+struct Bits64 {
+ static constexpr size_t kLanes = 2;
+
+ uint64x2_t nbits;
+ uint64x2_t bits;
+
+ FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) {
+ vst1q_u64(nbits_out, nbits);
+ vst1q_u64(bits_out, bits);
+ }
+};
+
+struct Bits32 {
+ uint32x4_t nbits;
+ uint32x4_t bits;
+
+ static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) {
+ return Bits32{nbits.vec, bits.vec};
+ }
+
+ Bits64 Merge() const {
+ // TODO(veluca): can probably be optimized.
+ uint64x2_t nbits_lo32 =
+ vandq_u64(vreinterpretq_u64_u32(nbits), vdupq_n_u64(0xFFFFFFFF));
+ uint64x2_t bits_hi32 =
+ vshlq_u64(vshrq_n_u64(vreinterpretq_u64_u32(bits), 32),
+ vreinterpretq_s64_u64(nbits_lo32));
+ uint64x2_t bits_lo32 =
+ vandq_u64(vreinterpretq_u64_u32(bits), vdupq_n_u64(0xFFFFFFFF));
+ uint64x2_t nbits64 =
+ vsraq_n_u64(nbits_lo32, vreinterpretq_u64_u32(nbits), 32);
+ uint64x2_t bits64 = vorrq_u64(bits_hi32, bits_lo32);
+ return Bits64{nbits64, bits64};
+ }
+
+ void Interleave(const Bits32& low) {
+ bits =
+ vorrq_u32(vshlq_u32(bits, vreinterpretq_s32_u32(low.nbits)), low.bits);
+ nbits = vaddq_u32(nbits, low.nbits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 4);
+ constexpr uint32_t kMask[8] = {
+ ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0,
+ };
+ uint32x4_t mask = vld1q_u32(kMask + 4 - n);
+ nbits = vandq_u32(mask, nbits);
+ bits = vandq_u32(mask, bits);
+ }
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 4);
+ constexpr uint32_t kMask[8] = {
+ 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u,
+ };
+ uint32x4_t mask = vld1q_u32(kMask + 4 - n);
+ nbits = vandq_u32(mask, nbits);
+ bits = vandq_u32(mask, bits);
+ }
+};
+
+struct Bits16 {
+ uint16x8_t nbits;
+ uint16x8_t bits;
+
+ static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) {
+ return Bits16{nbits.vec, bits.vec};
+ }
+
+ Bits32 Merge() const {
+ // TODO(veluca): can probably be optimized.
+ uint32x4_t nbits_lo16 =
+ vandq_u32(vreinterpretq_u32_u16(nbits), vdupq_n_u32(0xFFFF));
+ uint32x4_t bits_hi16 =
+ vshlq_u32(vshrq_n_u32(vreinterpretq_u32_u16(bits), 16),
+ vreinterpretq_s32_u32(nbits_lo16));
+ uint32x4_t bits_lo16 =
+ vandq_u32(vreinterpretq_u32_u16(bits), vdupq_n_u32(0xFFFF));
+ uint32x4_t nbits32 =
+ vsraq_n_u32(nbits_lo16, vreinterpretq_u32_u16(nbits), 16);
+ uint32x4_t bits32 = vorrq_u32(bits_hi16, bits_lo16);
+ return Bits32{nbits32, bits32};
+ }
+
+ void Interleave(const Bits16& low) {
+ bits =
+ vorrq_u16(vshlq_u16(bits, vreinterpretq_s16_u16(low.nbits)), low.bits);
+ nbits = vaddq_u16(nbits, low.nbits);
+ }
+
+ void ClipTo(size_t n) {
+ n = std::min<size_t>(n, 8);
+ constexpr uint16_t kMask[16] = {
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ uint16x8_t mask = vld1q_u16(kMask + 8 - n);
+ nbits = vandq_u16(mask, nbits);
+ bits = vandq_u16(mask, bits);
+ }
+ void Skip(size_t n) {
+ n = std::min<size_t>(n, 8);
+ constexpr uint16_t kMask[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ };
+ uint16x8_t mask = vld1q_u16(kMask + 8 - n);
+ nbits = vandq_u16(mask, nbits);
+ bits = vandq_u16(mask, bits);
+ }
+};
+
+#endif
+
+#ifdef FJXL_GENERIC_SIMD
+constexpr size_t SIMDVec32::kLanes;
+constexpr size_t SIMDVec16::kLanes;
+
+//  Each of these functions will process SIMDVec16::kLanes worth of values.
+
+FJXL_INLINE void TokenizeSIMD(const uint16_t* residuals, uint16_t* token_out,
+ uint16_t* nbits_out, uint16_t* bits_out) {
+ SIMDVec16 res = SIMDVec16::Load(residuals);
+ SIMDVec16 token = res.ValToToken();
+ SIMDVec16 nbits = token.SatSubU(SIMDVec16::Val(1));
+ SIMDVec16 bits = res.SatSubU(nbits.Pow2());
+ token.Store(token_out);
+ nbits.Store(nbits_out);
+ bits.Store(bits_out);
+}
+
+FJXL_INLINE void TokenizeSIMD(const uint32_t* residuals, uint16_t* token_out,
+ uint32_t* nbits_out, uint32_t* bits_out) {
+ static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, "");
+ SIMDVec32 res_lo = SIMDVec32::Load(residuals);
+ SIMDVec32 res_hi = SIMDVec32::Load(residuals + SIMDVec32::kLanes);
+ SIMDVec32 token_lo = res_lo.ValToToken();
+ SIMDVec32 token_hi = res_hi.ValToToken();
+ SIMDVec32 nbits_lo = token_lo.SatSubU(SIMDVec32::Val(1));
+ SIMDVec32 nbits_hi = token_hi.SatSubU(SIMDVec32::Val(1));
+ SIMDVec32 bits_lo = res_lo.SatSubU(nbits_lo.Pow2());
+ SIMDVec32 bits_hi = res_hi.SatSubU(nbits_hi.Pow2());
+ SIMDVec16 token = SIMDVec16::FromTwo32(token_lo, token_hi);
+ token.Store(token_out);
+ nbits_lo.Store(nbits_out);
+ nbits_hi.Store(nbits_out + SIMDVec32::kLanes);
+ bits_lo.Store(bits_out);
+ bits_hi.Store(bits_out + SIMDVec32::kLanes);
+}
+
+FJXL_INLINE void HuffmanSIMDUpTo13(const uint16_t* tokens,
+ const PrefixCode& code, uint16_t* nbits_out,
+ uint16_t* bits_out) {
+ SIMDVec16 tok = SIMDVec16::Load(tokens).PrepareForU8Lookup();
+ tok.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+ tok.U8Lookup(code.raw_bits_simd).Store(bits_out);
+}
+
+FJXL_INLINE void HuffmanSIMD14(const uint16_t* tokens, const PrefixCode& code,
+ uint16_t* nbits_out, uint16_t* bits_out) {
+ SIMDVec16 token_cap = SIMDVec16::Val(15);
+ SIMDVec16 tok = SIMDVec16::Load(tokens);
+ SIMDVec16 tok_index = tok.Min(token_cap).PrepareForU8Lookup();
+ SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd);
+ // Set the highest bit when token == 16; the Huffman code is constructed in
+ // such a way that the code for token 15 is the same as the code for 16,
+ // except for the highest bit.
+ Mask16 needs_high_bit = tok.Eq(SIMDVec16::Val(16));
+ SIMDVec16 huff_bits = needs_high_bit.IfThenElse(
+ huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre);
+ huff_bits.Store(bits_out);
+ tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+}
+
+FJXL_INLINE void HuffmanSIMDAbove14(const uint16_t* tokens,
+ const PrefixCode& code, uint16_t* nbits_out,
+ uint16_t* bits_out) {
+ SIMDVec16 tok = SIMDVec16::Load(tokens);
+ // We assume `tok` fits in a *signed* 16-bit integer.
+ Mask16 above = tok.Gt(SIMDVec16::Val(12));
+ // 13, 14 -> 13
+ // 15, 16 -> 14
+ // 17, 18 -> 15
+ SIMDVec16 remap_tok = above.IfThenElse(tok.HAdd(SIMDVec16::Val(13)), tok);
+ SIMDVec16 tok_index = remap_tok.PrepareForU8Lookup();
+ SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd);
+ // Set the highest bit when token == 14, 16, 18.
+ Mask16 needs_high_bit = above.And(tok.Eq(tok.And(SIMDVec16::Val(0xFFFE))));
+ SIMDVec16 huff_bits = needs_high_bit.IfThenElse(
+ huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre);
+ huff_bits.Store(bits_out);
+ tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out);
+}
+
+FJXL_INLINE void StoreSIMDUpTo8(const uint16_t* nbits_tok,
+ const uint16_t* bits_tok,
+ const uint16_t* nbits_huff,
+ const uint16_t* bits_huff, size_t n,
+ size_t skip, Bits32* bits_out) {
+ Bits16 bits =
+ Bits16::FromRaw(SIMDVec16::Load(nbits_tok), SIMDVec16::Load(bits_tok));
+ Bits16 huff_bits =
+ Bits16::FromRaw(SIMDVec16::Load(nbits_huff), SIMDVec16::Load(bits_huff));
+ bits.Interleave(huff_bits);
+ bits.ClipTo(n);
+ bits.Skip(skip);
+ bits_out[0] = bits.Merge();
+}
+
+// Huffman and raw bits don't necessarily fit in a single u16 here.
+FJXL_INLINE void StoreSIMDUpTo14(const uint16_t* nbits_tok,
+ const uint16_t* bits_tok,
+ const uint16_t* nbits_huff,
+ const uint16_t* bits_huff, size_t n,
+ size_t skip, Bits32* bits_out) {
+ VecPair<SIMDVec16> bits =
+ SIMDVec16::Load(bits_tok).Interleave(SIMDVec16::Load(bits_huff));
+ VecPair<SIMDVec16> nbits =
+ SIMDVec16::Load(nbits_tok).Interleave(SIMDVec16::Load(nbits_huff));
+ Bits16 low = Bits16::FromRaw(nbits.low, bits.low);
+ Bits16 hi = Bits16::FromRaw(nbits.hi, bits.hi);
+ low.ClipTo(2 * n);
+ low.Skip(2 * skip);
+ hi.ClipTo(std::max(2 * n, SIMDVec16::kLanes) - SIMDVec16::kLanes);
+ hi.Skip(std::max(2 * skip, SIMDVec16::kLanes) - SIMDVec16::kLanes);
+
+ bits_out[0] = low.Merge();
+ bits_out[1] = hi.Merge();
+}
+
+FJXL_INLINE void StoreSIMDAbove14(const uint32_t* nbits_tok,
+ const uint32_t* bits_tok,
+ const uint16_t* nbits_huff,
+ const uint16_t* bits_huff, size_t n,
+ size_t skip, Bits32* bits_out) {
+ static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, "");
+ Bits32 bits_low =
+ Bits32::FromRaw(SIMDVec32::Load(nbits_tok), SIMDVec32::Load(bits_tok));
+ Bits32 bits_hi =
+ Bits32::FromRaw(SIMDVec32::Load(nbits_tok + SIMDVec32::kLanes),
+ SIMDVec32::Load(bits_tok + SIMDVec32::kLanes));
+
+ VecPair<SIMDVec32> huff_bits = SIMDVec16::Load(bits_huff).Upcast();
+ VecPair<SIMDVec32> huff_nbits = SIMDVec16::Load(nbits_huff).Upcast();
+
+ Bits32 huff_low = Bits32::FromRaw(huff_nbits.low, huff_bits.low);
+ Bits32 huff_hi = Bits32::FromRaw(huff_nbits.hi, huff_bits.hi);
+
+ bits_low.Interleave(huff_low);
+ bits_low.ClipTo(n);
+ bits_low.Skip(skip);
+ bits_out[0] = bits_low;
+ bits_hi.Interleave(huff_hi);
+ bits_hi.ClipTo(std::max(n, SIMDVec32::kLanes) - SIMDVec32::kLanes);
+ bits_hi.Skip(std::max(skip, SIMDVec32::kLanes) - SIMDVec32::kLanes);
+ bits_out[1] = bits_hi;
+}
+
+#ifdef FJXL_AVX512
+FJXL_INLINE void StoreToWriterAVX512(const Bits32& bits32, BitWriter& output) {
+ __m512i bits = bits32.bits;
+ __m512i nbits = bits32.nbits;
+
+ // Insert the leftover bits from the bit buffer at the bottom of the vector
+ // and extract the top of the vector.
+ uint64_t trail_bits =
+ _mm512_cvtsi512_si32(_mm512_alignr_epi32(bits, bits, 15));
+ uint64_t trail_nbits =
+ _mm512_cvtsi512_si32(_mm512_alignr_epi32(nbits, nbits, 15));
+ __m512i lead_bits = _mm512_set1_epi32(output.buffer);
+ __m512i lead_nbits = _mm512_set1_epi32(output.bits_in_buffer);
+ bits = _mm512_alignr_epi32(bits, lead_bits, 15);
+ nbits = _mm512_alignr_epi32(nbits, lead_nbits, 15);
+
+ // Merge 32 -> 64 bits.
+ Bits32 b{nbits, bits};
+ Bits64 b64 = b.Merge();
+ bits = b64.bits;
+ nbits = b64.nbits;
+
+ __m512i zero = _mm512_setzero_si512();
+
+ auto sh1 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 7); };
+ auto sh2 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 6); };
+ auto sh4 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 4); };
+
+ // Compute first-past-end-bit-position.
+ __m512i end_interm0 = _mm512_add_epi64(nbits, sh1(nbits));
+ __m512i end_interm1 = _mm512_add_epi64(end_interm0, sh2(end_interm0));
+ __m512i end = _mm512_add_epi64(end_interm1, sh4(end_interm1));
+
+ uint64_t simd_nbits = _mm512_cvtsi512_si32(_mm512_alignr_epi64(end, end, 7));
+
+ // Compute begin-bit-position.
+ __m512i begin = _mm512_sub_epi64(end, nbits);
+
+ // Index of the last bit in the chunk, or the end bit if nbits==0.
+ __m512i last = _mm512_mask_sub_epi64(
+ end, _mm512_cmpneq_epi64_mask(nbits, zero), end, _mm512_set1_epi64(1));
+
+ __m512i lane_offset_mask = _mm512_set1_epi64(63);
+
+ // Starting position of the chunk that each lane will ultimately belong to.
+ __m512i chunk_start = _mm512_andnot_si512(lane_offset_mask, last);
+
+ // For all lanes that contain bits belonging to two different 64-bit chunks,
+ // compute the number of bits that belong to the first chunk.
+ // total # of bits fit in a u16, so we can satsub_u16 here.
+ __m512i first_chunk_nbits = _mm512_subs_epu16(chunk_start, begin);
+
+ // Move all the previous-chunk-bits to the previous lane.
+ __m512i negnbits = _mm512_sub_epi64(_mm512_set1_epi64(64), first_chunk_nbits);
+ __m512i first_chunk_bits =
+ _mm512_srlv_epi64(_mm512_sllv_epi64(bits, negnbits), negnbits);
+ __m512i first_chunk_bits_down =
+ _mm512_alignr_epi32(zero, first_chunk_bits, 2);
+ bits = _mm512_srlv_epi64(bits, first_chunk_nbits);
+ nbits = _mm512_sub_epi64(nbits, first_chunk_nbits);
+ bits = _mm512_or_si512(bits, _mm512_sllv_epi64(first_chunk_bits_down, nbits));
+ begin = _mm512_add_epi64(begin, first_chunk_nbits);
+
+ // We now know that every lane should give bits to only one chunk. We can
+ // shift the bits and then horizontally-or-reduce them within the same chunk.
+ __m512i offset = _mm512_and_si512(begin, lane_offset_mask);
+ __m512i aligned_bits = _mm512_sllv_epi64(bits, offset);
+ // h-or-reduce within same chunk
+ __m512i red0 = _mm512_mask_or_epi64(
+ aligned_bits, _mm512_cmpeq_epi64_mask(sh1(chunk_start), chunk_start),
+ sh1(aligned_bits), aligned_bits);
+ __m512i red1 = _mm512_mask_or_epi64(
+ red0, _mm512_cmpeq_epi64_mask(sh2(chunk_start), chunk_start), sh2(red0),
+ red0);
+ __m512i reduced = _mm512_mask_or_epi64(
+ red1, _mm512_cmpeq_epi64_mask(sh4(chunk_start), chunk_start), sh4(red1),
+ red1);
+ // Extract the highest lane that belongs to each chunk (the lane that ends up
+ // with the OR-ed value of all the other lanes of that chunk).
+ __m512i next_chunk_start =
+ _mm512_alignr_epi32(_mm512_set1_epi64(~0), chunk_start, 2);
+ __m512i result = _mm512_maskz_compress_epi64(
+ _mm512_cmpneq_epi64_mask(chunk_start, next_chunk_start), reduced);
+
+ _mm512_storeu_si512((__m512i*)(output.data.get() + output.bytes_written),
+ result);
+
+ // Update the bit writer and add the last 32-bit lane.
+ // Note that since trail_nbits was at most 32 to begin with, operating on
+ // trail_bits does not risk overflowing.
+ output.bytes_written += simd_nbits / 8;
+ // Here we are implicitly relying on the fact that simd_nbits < 512 to know
+ // that the byte of bitreader data we access is initialized. This is
+ // guaranteed because the remaining bits in the bitreader buffer are at most
+ // 7, so simd_nbits <= 505 always.
+ trail_bits = (trail_bits << (simd_nbits % 8)) +
+ output.data.get()[output.bytes_written];
+ trail_nbits += simd_nbits % 8;
+ StoreLE64(output.data.get() + output.bytes_written, trail_bits);
+ size_t trail_bytes = trail_nbits / 8;
+ output.bits_in_buffer = trail_nbits % 8;
+ output.buffer = trail_bits >> (trail_bytes * 8);
+ output.bytes_written += trail_bytes;
+}
+
+#endif
+
+template <size_t n>
+FJXL_INLINE void StoreToWriter(const Bits32* bits, BitWriter& output) {
+#ifdef FJXL_AVX512
+ static_assert(n <= 2, "");
+ StoreToWriterAVX512(bits[0], output);
+ if (n == 2) {
+ StoreToWriterAVX512(bits[1], output);
+ }
+ return;
+#endif
+ static_assert(n <= 4, "");
+ alignas(64) uint64_t nbits64[Bits64::kLanes * n];
+ alignas(64) uint64_t bits64[Bits64::kLanes * n];
+ bits[0].Merge().Store(nbits64, bits64);
+ if (n > 1) {
+ bits[1].Merge().Store(nbits64 + Bits64::kLanes, bits64 + Bits64::kLanes);
+ }
+ if (n > 2) {
+ bits[2].Merge().Store(nbits64 + 2 * Bits64::kLanes,
+ bits64 + 2 * Bits64::kLanes);
+ }
+ if (n > 3) {
+ bits[3].Merge().Store(nbits64 + 3 * Bits64::kLanes,
+ bits64 + 3 * Bits64::kLanes);
+ }
+ output.WriteMultiple(nbits64, bits64, Bits64::kLanes * n);
+}
+
+namespace detail {
+template <typename T>
+struct IntegerTypes;
+
+template <>
+struct IntegerTypes<SIMDVec16> {
+ using signed_ = int16_t;
+ using unsigned_ = uint16_t;
+};
+
+template <>
+struct IntegerTypes<SIMDVec32> {
+ using signed_ = int32_t;
+ using unsigned_ = uint32_t;
+};
+
+template <typename T>
+struct SIMDType;
+
+template <>
+struct SIMDType<int16_t> {
+ using type = SIMDVec16;
+};
+
+template <>
+struct SIMDType<int32_t> {
+ using type = SIMDVec32;
+};
+
+} // namespace detail
+
+template <typename T>
+using signed_t = typename detail::IntegerTypes<T>::signed_;
+
+template <typename T>
+using unsigned_t = typename detail::IntegerTypes<T>::unsigned_;
+
+template <typename T>
+using simd_t = typename detail::SIMDType<T>::type;
+
+// This function will process exactly one vector worth of pixels.
+
+template <typename T>
+size_t PredictPixels(const signed_t<T>* pixels, const signed_t<T>* pixels_left,
+ const signed_t<T>* pixels_top,
+ const signed_t<T>* pixels_topleft,
+ unsigned_t<T>* residuals) {
+ T px = T::Load((unsigned_t<T>*)pixels);
+ T left = T::Load((unsigned_t<T>*)pixels_left);
+ T top = T::Load((unsigned_t<T>*)pixels_top);
+ T topleft = T::Load((unsigned_t<T>*)pixels_topleft);
+ T ac = left.Sub(topleft);
+ T ab = left.Sub(top);
+ T bc = top.Sub(topleft);
+ T grad = ac.Add(top);
+ T d = ab.Xor(bc);
+ T zero = T::Val(0);
+ T clamp = zero.Gt(d).IfThenElse(top, left);
+ T s = ac.Xor(bc);
+ T pred = zero.Gt(s).IfThenElse(grad, clamp);
+ T res = px.Sub(pred);
+ T res_times_2 = res.Add(res);
+ res = zero.Gt(res).IfThenElse(T::Val(-1).Sub(res_times_2), res_times_2);
+ res.Store(residuals);
+ return res.Eq(T::Val(0)).CountPrefix();
+}
+
+#endif
+
+void EncodeHybridUint000(uint32_t value, uint32_t* token, uint32_t* nbits,
+ uint32_t* bits) {
+ uint32_t n = FloorLog2(value);
+ *token = value ? n + 1 : 0;
+ *nbits = value ? n : 0;
+ *bits = value ? value - (1 << n) : 0;
+}
+
+#ifdef FJXL_AVX512
+constexpr static size_t kLogChunkSize = 5;
+#elif defined(FJXL_AVX2) || defined(FJXL_NEON)
+// Even if NEON only has 128-bit lanes, it is still significantly (~1.3x) faster
+// to process two vectors at a time.
+constexpr static size_t kLogChunkSize = 4;
+#else
+constexpr static size_t kLogChunkSize = 3;
+#endif
+
+constexpr static size_t kChunkSize = 1 << kLogChunkSize;
+
+template <typename Residual>
+void GenericEncodeChunk(const Residual* residuals, size_t n, size_t skip,
+ const PrefixCode& code, BitWriter& output) {
+ for (size_t ix = skip; ix < n; ix++) {
+ unsigned token, nbits, bits;
+ EncodeHybridUint000(residuals[ix], &token, &nbits, &bits);
+ output.Write(code.raw_nbits[token] + nbits,
+ code.raw_bits[token] | bits << code.raw_nbits[token]);
+ }
+}
+
+struct UpTo8Bits {
+ size_t bitdepth;
+ explicit UpTo8Bits(size_t bitdepth) : bitdepth(bitdepth) {
+ assert(bitdepth <= 8);
+ }
+ // Here we can fit up to 9 extra bits + 7 Huffman bits in a u16; for all other
+ // symbols, we could actually go up to 8 Huffman bits as we have at most 8
+ // extra bits; however, the SIMD bit merging logic for AVX2 assumes that no
+ // Huffman length is 8 or more, so we cap at 8 anyway. Last symbol is used for
+ // LZ77 lengths and has no limitations except allowing to represent 32 symbols
+ // in total.
+ static constexpr uint8_t kMinRawLength[12] = {};
+ static constexpr uint8_t kMaxRawLength[12] = {
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10,
+ };
+ static size_t MaxEncodedBitsPerSample() { return 16; }
+ static constexpr size_t kInputBytes = 1;
+ using pixel_t = int16_t;
+ using upixel_t = uint16_t;
+
+ static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+ size_t n, uint8_t* nbits_simd,
+ uint8_t* bits_simd) {
+ assert(n <= 16);
+ memcpy(nbits_simd, nbits, 16);
+ memcpy(bits_simd, bits, 16);
+ }
+
+ static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+ const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+ Bits32 bits32[kChunkSize / SIMDVec16::kLanes];
+ alignas(64) uint16_t bits[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+ alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t token[SIMDVec16::kLanes];
+ for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+ TokenizeSIMD(residuals + i, token, nbits, bits);
+ HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff);
+ StoreSIMDUpTo8(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+ std::max(skip, i) - i, bits32 + i / SIMDVec16::kLanes);
+ }
+ StoreToWriter<kChunkSize / SIMDVec16::kLanes>(bits32, output);
+ return;
+#endif
+ GenericEncodeChunk(residuals, n, skip, code, output);
+ }
+
+ size_t NumSymbols(bool doing_ycocg) const {
+ // values gain 1 bit for YCoCg, 1 bit for prediction.
+ // Maximum symbol is 1 + effective bit depth of residuals.
+ if (doing_ycocg) {
+ return bitdepth + 3;
+ } else {
+ return bitdepth + 2;
+ }
+ }
+};
+constexpr uint8_t UpTo8Bits::kMinRawLength[];
+constexpr uint8_t UpTo8Bits::kMaxRawLength[];
+
+struct From9To13Bits {
+ size_t bitdepth;
+ explicit From9To13Bits(size_t bitdepth) : bitdepth(bitdepth) {
+ assert(bitdepth <= 13 && bitdepth >= 9);
+ }
+ // Last symbol is used for LZ77 lengths and has no limitations except allowing
+ // to represent 32 symbols in total.
+ // We cannot fit all the bits in a u16, so do not even try and use up to 8
+ // bits per raw symbol.
+ // There are at most 16 raw symbols, so Huffman coding can be SIMDfied without
+ // any special tricks.
+ static constexpr uint8_t kMinRawLength[17] = {};
+ static constexpr uint8_t kMaxRawLength[17] = {
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10,
+ };
+ static size_t MaxEncodedBitsPerSample() { return 21; }
+ static constexpr size_t kInputBytes = 2;
+ using pixel_t = int16_t;
+ using upixel_t = uint16_t;
+
+ static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+ size_t n, uint8_t* nbits_simd,
+ uint8_t* bits_simd) {
+ assert(n <= 16);
+ memcpy(nbits_simd, nbits, 16);
+ memcpy(bits_simd, bits, 16);
+ }
+
+ static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+ const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+ Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+ alignas(64) uint16_t bits[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+ alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t token[SIMDVec16::kLanes];
+ for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+ TokenizeSIMD(residuals + i, token, nbits, bits);
+ HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff);
+ StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+ std::max(skip, i) - i,
+ bits32 + 2 * i / SIMDVec16::kLanes);
+ }
+ StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+ return;
+#endif
+ GenericEncodeChunk(residuals, n, skip, code, output);
+ }
+
+ size_t NumSymbols(bool doing_ycocg) const {
+ // values gain 1 bit for YCoCg, 1 bit for prediction.
+ // Maximum symbol is 1 + effective bit depth of residuals.
+ if (doing_ycocg) {
+ return bitdepth + 3;
+ } else {
+ return bitdepth + 2;
+ }
+ }
+};
+constexpr uint8_t From9To13Bits::kMinRawLength[];
+constexpr uint8_t From9To13Bits::kMaxRawLength[];
+
+void CheckHuffmanBitsSIMD(int bits1, int nbits1, int bits2, int nbits2) {
+ assert(nbits1 == 8);
+ assert(nbits2 == 8);
+ assert(bits2 == (bits1 | 128));
+}
+
+struct Exactly14Bits {
+ explicit Exactly14Bits(size_t bitdepth) { assert(bitdepth == 14); }
+ // Force LZ77 symbols to have at least 8 bits, and raw symbols 15 and 16 to
+ // have exactly 8, and no other symbol to have 8 or more. This ensures that
+ // the representation for 15 and 16 is identical up to one bit.
+ static constexpr uint8_t kMinRawLength[18] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 7,
+ };
+ static constexpr uint8_t kMaxRawLength[18] = {
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 10,
+ };
+ static constexpr size_t bitdepth = 14;
+ static size_t MaxEncodedBitsPerSample() { return 22; }
+ static constexpr size_t kInputBytes = 2;
+ using pixel_t = int16_t;
+ using upixel_t = uint16_t;
+
+ static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+ size_t n, uint8_t* nbits_simd,
+ uint8_t* bits_simd) {
+ assert(n == 17);
+ CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]);
+ memcpy(nbits_simd, nbits, 16);
+ memcpy(bits_simd, bits, 16);
+ }
+
+ static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+ const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+ Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+ alignas(64) uint16_t bits[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits[SIMDVec16::kLanes];
+ alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t token[SIMDVec16::kLanes];
+ for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+ TokenizeSIMD(residuals + i, token, nbits, bits);
+ HuffmanSIMD14(token, code, nbits_huff, bits_huff);
+ StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+ std::max(skip, i) - i,
+ bits32 + 2 * i / SIMDVec16::kLanes);
+ }
+ StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+ return;
+#endif
+ GenericEncodeChunk(residuals, n, skip, code, output);
+ }
+
+ size_t NumSymbols(bool) const { return 17; }
+};
+constexpr uint8_t Exactly14Bits::kMinRawLength[];
+constexpr uint8_t Exactly14Bits::kMaxRawLength[];
+
+struct MoreThan14Bits {
+ size_t bitdepth;
+ explicit MoreThan14Bits(size_t bitdepth) : bitdepth(bitdepth) {
+ assert(bitdepth > 14);
+ assert(bitdepth <= 16);
+ }
+ // Force LZ77 symbols to have at least 8 bits, and raw symbols 13 to 18 to
+ // have exactly 8, and no other symbol to have 8 or more. This ensures that
+ // the representation for (13, 14), (15, 16), (17, 18) is identical up to one
+ // bit.
+ static constexpr uint8_t kMinRawLength[20] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 7,
+ };
+ static constexpr uint8_t kMaxRawLength[20] = {
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 10,
+ };
+ static size_t MaxEncodedBitsPerSample() { return 24; }
+ static constexpr size_t kInputBytes = 2;
+ using pixel_t = int32_t;
+ using upixel_t = uint32_t;
+
+ static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits,
+ size_t n, uint8_t* nbits_simd,
+ uint8_t* bits_simd) {
+ assert(n == 19);
+ CheckHuffmanBitsSIMD(bits[13], nbits[13], bits[14], nbits[14]);
+ CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]);
+ CheckHuffmanBitsSIMD(bits[17], nbits[17], bits[18], nbits[18]);
+ for (size_t i = 0; i < 14; i++) {
+ nbits_simd[i] = nbits[i];
+ bits_simd[i] = bits[i];
+ }
+ nbits_simd[14] = nbits[15];
+ bits_simd[14] = bits[15];
+ nbits_simd[15] = nbits[17];
+ bits_simd[15] = bits[17];
+ }
+
+ static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip,
+ const PrefixCode& code, BitWriter& output) {
+#ifdef FJXL_GENERIC_SIMD
+ Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes];
+ alignas(64) uint32_t bits[SIMDVec16::kLanes];
+ alignas(64) uint32_t nbits[SIMDVec16::kLanes];
+ alignas(64) uint16_t bits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes];
+ alignas(64) uint16_t token[SIMDVec16::kLanes];
+ for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) {
+ TokenizeSIMD(residuals + i, token, nbits, bits);
+ HuffmanSIMDAbove14(token, code, nbits_huff, bits_huff);
+ StoreSIMDAbove14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i,
+ std::max(skip, i) - i,
+ bits32 + 2 * i / SIMDVec16::kLanes);
+ }
+ StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output);
+ return;
+#endif
+ GenericEncodeChunk(residuals, n, skip, code, output);
+ }
+ size_t NumSymbols(bool) const { return 19; }
+};
+constexpr uint8_t MoreThan14Bits::kMinRawLength[];
+constexpr uint8_t MoreThan14Bits::kMaxRawLength[];
+
+void PrepareDCGlobalCommon(bool is_single_group, size_t width, size_t height,
+ const PrefixCode code[4], BitWriter* output) {
+ output->Allocate(100000 + (is_single_group ? width * height * 16 : 0));
+ // No patches, spline or noise.
+ output->Write(1, 1); // default DC dequantization factors (?)
+ output->Write(1, 1); // use global tree / histograms
+ output->Write(1, 0); // no lz77 for the tree
+
+ output->Write(1, 1); // simple code for the tree's context map
+ output->Write(2, 0); // all contexts clustered together
+ output->Write(1, 1); // use prefix code for tree
+ output->Write(4, 0); // 000 hybrid uint
+ output->Write(6, 0b100011); // Alphabet size is 4 (var16)
+ output->Write(2, 1); // simple prefix code
+ output->Write(2, 3); // with 4 symbols
+ output->Write(2, 0);
+ output->Write(2, 1);
+ output->Write(2, 2);
+ output->Write(2, 3);
+ output->Write(1, 0); // First tree encoding option
+ // Huffman table + extra bits for the tree.
+ uint8_t symbol_bits[6] = {0b00, 0b10, 0b001, 0b101, 0b0011, 0b0111};
+ uint8_t symbol_nbits[6] = {2, 2, 3, 3, 4, 4};
+ // Write a tree with a leaf per channel, and gradient predictor for every
+ // leaf.
+ for (auto v : {1, 2, 1, 4, 1, 0, 0, 5, 0, 0, 0, 0, 5,
+ 0, 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 0, 0}) {
+ output->Write(symbol_nbits[v], symbol_bits[v]);
+ }
+
+ output->Write(1, 1); // Enable lz77 for the main bitstream
+ output->Write(2, 0b00); // lz77 offset 224
+ static_assert(kLZ77Offset == 224, "");
+ output->Write(4, 0b1010); // lz77 min length 7
+ // 400 hybrid uint config for lz77
+ output->Write(4, 4);
+ output->Write(3, 0);
+ output->Write(3, 0);
+
+ output->Write(1, 1); // simple code for the context map
+ output->Write(2, 3); // 3 bits per entry
+ output->Write(3, 4); // channel 3
+ output->Write(3, 3); // channel 2
+ output->Write(3, 2); // channel 1
+ output->Write(3, 1); // channel 0
+ output->Write(3, 0); // distance histogram first
+
+ output->Write(1, 1); // use prefix codes
+ output->Write(4, 0); // 000 hybrid uint config for distances (only need 0)
+ for (size_t i = 0; i < 4; i++) {
+ output->Write(4, 0); // 000 hybrid uint config for symbols (only <= 10)
+ }
+
+ // Distance alphabet size:
+ output->Write(5, 0b00001); // 2: just need 1 for RLE (i.e. distance 1)
+ // Symbol + LZ77 alphabet size:
+ for (size_t i = 0; i < 4; i++) {
+ output->Write(1, 1); // > 1
+ output->Write(4, 8); // <= 512
+ output->Write(8, 256); // == 512
+ }
+
+ // Distance histogram:
+ output->Write(2, 1); // simple prefix code
+ output->Write(2, 0); // with one symbol
+ output->Write(1, 1); // 1
+
+ // Symbol + lz77 histogram:
+ for (size_t i = 0; i < 4; i++) {
+ code[i].WriteTo(output);
+ }
+
+ // Group header for global modular image.
+ output->Write(1, 1); // Global tree
+ output->Write(1, 1); // All default wp
+}
+
+void PrepareDCGlobal(bool is_single_group, size_t width, size_t height,
+ size_t nb_chans, const PrefixCode code[4],
+ BitWriter* output) {
+ PrepareDCGlobalCommon(is_single_group, width, height, code, output);
+ if (nb_chans > 2) {
+ output->Write(2, 0b01); // 1 transform
+ output->Write(2, 0b00); // RCT
+ output->Write(5, 0b00000); // Starting from ch 0
+ output->Write(2, 0b00); // YCoCg
+ } else {
+ output->Write(2, 0b00); // no transforms
+ }
+ if (!is_single_group) {
+ output->ZeroPadToByte();
+ }
+}
+
+template <typename BitDepth>
+struct ChunkEncoder {
+ FJXL_INLINE static void EncodeRle(size_t count, const PrefixCode& code,
+ BitWriter& output) {
+ if (count == 0) return;
+ count -= kLZ77MinLength + 1;
+ if (count < kLZ77CacheSize) {
+ output.Write(code.lz77_cache_nbits[count], code.lz77_cache_bits[count]);
+ } else {
+ unsigned token, nbits, bits;
+ EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+ uint64_t wbits = bits;
+ wbits = (wbits << code.lz77_nbits[token]) | code.lz77_bits[token];
+ wbits = (wbits << code.raw_nbits[0]) | code.raw_bits[0];
+ output.Write(code.lz77_nbits[token] + nbits + code.raw_nbits[0], wbits);
+ }
+ }
+
+ FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals,
+ size_t skip, size_t n) {
+ EncodeRle(run, *code, *output);
+ BitDepth::EncodeChunk(residuals, n, skip, *code, *output);
+ }
+
+ inline void Finalize(size_t run) { EncodeRle(run, *code, *output); }
+
+ const PrefixCode* code;
+ BitWriter* output;
+};
+
+template <typename BitDepth>
+struct ChunkSampleCollector {
+ FJXL_INLINE void Rle(size_t count, uint64_t* lz77_counts) {
+ if (count == 0) return;
+ raw_counts[0] += 1;
+ count -= kLZ77MinLength + 1;
+ unsigned token, nbits, bits;
+ EncodeHybridUintLZ77(count, &token, &nbits, &bits);
+ lz77_counts[token]++;
+ }
+
+ FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals,
+ size_t skip, size_t n) {
+ // Run is broken. Encode the run and encode the individual vector.
+ Rle(run, lz77_counts);
+ for (size_t ix = skip; ix < n; ix++) {
+ unsigned token, nbits, bits;
+ EncodeHybridUint000(residuals[ix], &token, &nbits, &bits);
+ raw_counts[token]++;
+ }
+ }
+
+ // don't count final run since we don't know how long it really is
+ void Finalize(size_t run) {}
+
+ uint64_t* raw_counts;
+ uint64_t* lz77_counts;
+};
+
+constexpr uint32_t PackSigned(int32_t value) {
+ return (static_cast<uint32_t>(value) << 1) ^
+ ((static_cast<uint32_t>(~value) >> 31) - 1);
+}
+
+template <typename T, typename BitDepth>
+struct ChannelRowProcessor {
+ using upixel_t = typename BitDepth::upixel_t;
+ using pixel_t = typename BitDepth::pixel_t;
+ T* t;
+ void ProcessChunk(const pixel_t* row, const pixel_t* row_left,
+ const pixel_t* row_top, const pixel_t* row_topleft,
+ size_t n) {
+ alignas(64) upixel_t residuals[kChunkSize] = {};
+ size_t prefix_size = 0;
+ size_t required_prefix_size = 0;
+#ifdef FJXL_GENERIC_SIMD
+ constexpr size_t kNum =
+ sizeof(pixel_t) == 2 ? SIMDVec16::kLanes : SIMDVec32::kLanes;
+ for (size_t ix = 0; ix < kChunkSize; ix += kNum) {
+ size_t c =
+ PredictPixels<simd_t<pixel_t>>(row + ix, row_left + ix, row_top + ix,
+ row_topleft + ix, residuals + ix);
+ prefix_size =
+ prefix_size == required_prefix_size ? prefix_size + c : prefix_size;
+ required_prefix_size += kNum;
+ }
+#else
+ for (size_t ix = 0; ix < kChunkSize; ix++) {
+ pixel_t px = row[ix];
+ pixel_t left = row_left[ix];
+ pixel_t top = row_top[ix];
+ pixel_t topleft = row_topleft[ix];
+ pixel_t ac = left - topleft;
+ pixel_t ab = left - top;
+ pixel_t bc = top - topleft;
+ pixel_t grad = static_cast<pixel_t>(static_cast<upixel_t>(ac) +
+ static_cast<upixel_t>(top));
+ pixel_t d = ab ^ bc;
+ pixel_t clamp = d < 0 ? top : left;
+ pixel_t s = ac ^ bc;
+ pixel_t pred = s < 0 ? grad : clamp;
+ residuals[ix] = PackSigned(px - pred);
+ prefix_size = prefix_size == required_prefix_size
+ ? prefix_size + (residuals[ix] == 0)
+ : prefix_size;
+ required_prefix_size += 1;
+ }
+#endif
+ prefix_size = std::min(n, prefix_size);
+ if (prefix_size == n && (run > 0 || prefix_size > kLZ77MinLength)) {
+ // Run continues, nothing to do.
+ run += prefix_size;
+ } else if (prefix_size + run > kLZ77MinLength) {
+ // Run is broken. Encode the run and encode the individual vector.
+ t->Chunk(run + prefix_size, residuals, prefix_size, n);
+ run = 0;
+ } else {
+ // There was no run to begin with.
+ t->Chunk(0, residuals, 0, n);
+ }
+ }
+
+ void ProcessRow(const pixel_t* row, const pixel_t* row_left,
+ const pixel_t* row_top, const pixel_t* row_topleft,
+ size_t xs) {
+ for (size_t x = 0; x < xs; x += kChunkSize) {
+ ProcessChunk(row + x, row_left + x, row_top + x, row_topleft + x,
+ std::min(kChunkSize, xs - x));
+ }
+ }
+
+ void Finalize() { t->Finalize(run); }
+ // Invariant: run == 0 or run > kLZ77MinLength.
+ size_t run = 0;
+};
+
+uint16_t LoadLE16(const unsigned char* ptr) {
+ return uint16_t{ptr[0]} | (uint16_t{ptr[1]} << 8);
+}
+
+uint16_t SwapEndian(uint16_t in) { return (in >> 8) | (in << 8); }
+
+#ifdef FJXL_GENERIC_SIMD
+void StorePixels(SIMDVec16 p, int16_t* dest) { p.Store((uint16_t*)dest); }
+
+void StorePixels(SIMDVec16 p, int32_t* dest) {
+ VecPair<SIMDVec32> p_up = p.Upcast();
+ p_up.low.Store((uint32_t*)dest);
+ p_up.hi.Store((uint32_t*)dest + SIMDVec32::kLanes);
+}
+#endif
+
+template <typename pixel_t>
+void FillRowG8(const unsigned char* rgba, size_t oxs, pixel_t* luma) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadG8(rgba + x);
+ StorePixels(rgb[0], luma + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ luma[x] = rgba[x];
+ }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowG16(const unsigned char* rgba, size_t oxs, pixel_t* luma) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadG16(rgba + 2 * x);
+ if (big_endian) {
+ rgb[0].SwapEndian();
+ }
+ StorePixels(rgb[0], luma + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t val = LoadLE16(rgba + 2 * x);
+ if (big_endian) {
+ val = SwapEndian(val);
+ }
+ luma[x] = val;
+ }
+}
+
+template <typename pixel_t>
+void FillRowGA8(const unsigned char* rgba, size_t oxs, pixel_t* luma,
+ pixel_t* alpha) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadGA8(rgba + 2 * x);
+ StorePixels(rgb[0], luma + x);
+ StorePixels(rgb[1], alpha + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ luma[x] = rgba[2 * x];
+ alpha[x] = rgba[2 * x + 1];
+ }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowGA16(const unsigned char* rgba, size_t oxs, pixel_t* luma,
+ pixel_t* alpha) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadGA16(rgba + 4 * x);
+ if (big_endian) {
+ rgb[0].SwapEndian();
+ rgb[1].SwapEndian();
+ }
+ StorePixels(rgb[0], luma + x);
+ StorePixels(rgb[1], alpha + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t l = LoadLE16(rgba + 4 * x);
+ uint16_t a = LoadLE16(rgba + 4 * x + 2);
+ if (big_endian) {
+ l = SwapEndian(l);
+ a = SwapEndian(a);
+ }
+ luma[x] = l;
+ alpha[x] = a;
+ }
+}
+
+template <typename pixel_t>
+void StoreYCoCg(pixel_t r, pixel_t g, pixel_t b, pixel_t* y, pixel_t* co,
+ pixel_t* cg) {
+ *co = r - b;
+ pixel_t tmp = b + (*co >> 1);
+ *cg = g - tmp;
+ *y = tmp + (*cg >> 1);
+}
+
+#ifdef FJXL_GENERIC_SIMD
+void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int16_t* y, int16_t* co,
+ int16_t* cg) {
+ SIMDVec16 co_v = r.Sub(b);
+ SIMDVec16 tmp = b.Add(co_v.SignedShiftRight<1>());
+ SIMDVec16 cg_v = g.Sub(tmp);
+ SIMDVec16 y_v = tmp.Add(cg_v.SignedShiftRight<1>());
+ y_v.Store((uint16_t*)y);
+ co_v.Store((uint16_t*)co);
+ cg_v.Store((uint16_t*)cg);
+}
+
+void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int32_t* y, int32_t* co,
+ int32_t* cg) {
+ VecPair<SIMDVec32> r_up = r.Upcast();
+ VecPair<SIMDVec32> g_up = g.Upcast();
+ VecPair<SIMDVec32> b_up = b.Upcast();
+ SIMDVec32 co_lo_v = r_up.low.Sub(b_up.low);
+ SIMDVec32 tmp_lo = b_up.low.Add(co_lo_v.SignedShiftRight<1>());
+ SIMDVec32 cg_lo_v = g_up.low.Sub(tmp_lo);
+ SIMDVec32 y_lo_v = tmp_lo.Add(cg_lo_v.SignedShiftRight<1>());
+ SIMDVec32 co_hi_v = r_up.hi.Sub(b_up.hi);
+ SIMDVec32 tmp_hi = b_up.hi.Add(co_hi_v.SignedShiftRight<1>());
+ SIMDVec32 cg_hi_v = g_up.hi.Sub(tmp_hi);
+ SIMDVec32 y_hi_v = tmp_hi.Add(cg_hi_v.SignedShiftRight<1>());
+ y_lo_v.Store((uint32_t*)y);
+ co_lo_v.Store((uint32_t*)co);
+ cg_lo_v.Store((uint32_t*)cg);
+ y_hi_v.Store((uint32_t*)y + SIMDVec32::kLanes);
+ co_hi_v.Store((uint32_t*)co + SIMDVec32::kLanes);
+ cg_hi_v.Store((uint32_t*)cg + SIMDVec32::kLanes);
+}
+#endif
+
+template <typename pixel_t>
+void FillRowRGB8(const unsigned char* rgba, size_t oxs, pixel_t* y, pixel_t* co,
+ pixel_t* cg) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadRGB8(rgba + 3 * x);
+ StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t r = rgba[3 * x];
+ uint16_t g = rgba[3 * x + 1];
+ uint16_t b = rgba[3 * x + 2];
+ StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+ }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowRGB16(const unsigned char* rgba, size_t oxs, pixel_t* y,
+ pixel_t* co, pixel_t* cg) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadRGB16(rgba + 6 * x);
+ if (big_endian) {
+ rgb[0].SwapEndian();
+ rgb[1].SwapEndian();
+ rgb[2].SwapEndian();
+ }
+ StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t r = LoadLE16(rgba + 6 * x);
+ uint16_t g = LoadLE16(rgba + 6 * x + 2);
+ uint16_t b = LoadLE16(rgba + 6 * x + 4);
+ if (big_endian) {
+ r = SwapEndian(r);
+ g = SwapEndian(g);
+ b = SwapEndian(b);
+ }
+ StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+ }
+}
+
+template <typename pixel_t>
+void FillRowRGBA8(const unsigned char* rgba, size_t oxs, pixel_t* y,
+ pixel_t* co, pixel_t* cg, pixel_t* alpha) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadRGBA8(rgba + 4 * x);
+ StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+ StorePixels(rgb[3], alpha + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t r = rgba[4 * x];
+ uint16_t g = rgba[4 * x + 1];
+ uint16_t b = rgba[4 * x + 2];
+ uint16_t a = rgba[4 * x + 3];
+ StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+ alpha[x] = a;
+ }
+}
+
+template <bool big_endian, typename pixel_t>
+void FillRowRGBA16(const unsigned char* rgba, size_t oxs, pixel_t* y,
+ pixel_t* co, pixel_t* cg, pixel_t* alpha) {
+ size_t x = 0;
+#ifdef FJXL_GENERIC_SIMD
+ for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) {
+ auto rgb = SIMDVec16::LoadRGBA16(rgba + 8 * x);
+ if (big_endian) {
+ rgb[0].SwapEndian();
+ rgb[1].SwapEndian();
+ rgb[2].SwapEndian();
+ rgb[3].SwapEndian();
+ }
+ StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x);
+ StorePixels(rgb[3], alpha + x);
+ }
+#endif
+ for (; x < oxs; x++) {
+ uint16_t r = LoadLE16(rgba + 8 * x);
+ uint16_t g = LoadLE16(rgba + 8 * x + 2);
+ uint16_t b = LoadLE16(rgba + 8 * x + 4);
+ uint16_t a = LoadLE16(rgba + 8 * x + 6);
+ if (big_endian) {
+ r = SwapEndian(r);
+ g = SwapEndian(g);
+ b = SwapEndian(b);
+ a = SwapEndian(a);
+ }
+ StoreYCoCg<pixel_t>(r, g, b, y + x, co + x, cg + x);
+ alpha[x] = a;
+ }
+}
+
+template <typename Processor, typename BitDepth>
+void ProcessImageArea(const unsigned char* rgba, size_t x0, size_t y0,
+ size_t xs, size_t yskip, size_t ys, size_t row_stride,
+ BitDepth bitdepth, size_t nb_chans, bool big_endian,
+ Processor* processors) {
+ constexpr size_t kPadding = 32;
+
+ using pixel_t = typename BitDepth::pixel_t;
+
+ constexpr size_t kAlign = 64;
+ constexpr size_t kAlignPixels = kAlign / sizeof(pixel_t);
+
+ auto align = [=](pixel_t* ptr) {
+ size_t offset = reinterpret_cast<uintptr_t>(ptr) % kAlign;
+ if (offset) {
+ ptr += offset / sizeof(pixel_t);
+ }
+ return ptr;
+ };
+
+ constexpr size_t kNumPx =
+ (256 + kPadding * 2 + kAlignPixels + kAlignPixels - 1) / kAlignPixels *
+ kAlignPixels;
+
+ std::vector<std::array<std::array<pixel_t, kNumPx>, 2>> group_data(nb_chans);
+
+ for (size_t y = 0; y < ys; y++) {
+ const auto rgba_row =
+ rgba + row_stride * (y0 + y) + x0 * nb_chans * BitDepth::kInputBytes;
+ pixel_t* crow[4] = {};
+ pixel_t* prow[4] = {};
+ for (size_t i = 0; i < nb_chans; i++) {
+ crow[i] = align(&group_data[i][y & 1][kPadding]);
+ prow[i] = align(&group_data[i][(y - 1) & 1][kPadding]);
+ }
+
+ // Pre-fill rows with YCoCg converted pixels.
+ if (nb_chans == 1) {
+ if (BitDepth::kInputBytes == 1) {
+ FillRowG8(rgba_row, xs, crow[0]);
+ } else if (big_endian) {
+ FillRowG16</*big_endian=*/true>(rgba_row, xs, crow[0]);
+ } else {
+ FillRowG16</*big_endian=*/false>(rgba_row, xs, crow[0]);
+ }
+ } else if (nb_chans == 2) {
+ if (BitDepth::kInputBytes == 1) {
+ FillRowGA8(rgba_row, xs, crow[0], crow[1]);
+ } else if (big_endian) {
+ FillRowGA16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1]);
+ } else {
+ FillRowGA16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1]);
+ }
+ } else if (nb_chans == 3) {
+ if (BitDepth::kInputBytes == 1) {
+ FillRowRGB8(rgba_row, xs, crow[0], crow[1], crow[2]);
+ } else if (big_endian) {
+ FillRowRGB16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1],
+ crow[2]);
+ } else {
+ FillRowRGB16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1],
+ crow[2]);
+ }
+ } else {
+ if (BitDepth::kInputBytes == 1) {
+ FillRowRGBA8(rgba_row, xs, crow[0], crow[1], crow[2], crow[3]);
+ } else if (big_endian) {
+ FillRowRGBA16</*big_endian=*/true>(rgba_row, xs, crow[0], crow[1],
+ crow[2], crow[3]);
+ } else {
+ FillRowRGBA16</*big_endian=*/false>(rgba_row, xs, crow[0], crow[1],
+ crow[2], crow[3]);
+ }
+ }
+ // Deal with x == 0.
+ for (size_t c = 0; c < nb_chans; c++) {
+ *(crow[c] - 1) = y > 0 ? *(prow[c]) : 0;
+ // Fix topleft.
+ *(prow[c] - 1) = y > 0 ? *(prow[c]) : 0;
+ }
+ if (y < yskip) continue;
+ for (size_t c = 0; c < nb_chans; c++) {
+ // Get pointers to px/left/top/topleft data to speedup loop.
+ const pixel_t* row = crow[c];
+ const pixel_t* row_left = crow[c] - 1;
+ const pixel_t* row_top = y == 0 ? row_left : prow[c];
+ const pixel_t* row_topleft = y == 0 ? row_left : prow[c] - 1;
+
+ processors[c].ProcessRow(row, row_left, row_top, row_topleft, xs);
+ }
+ }
+ for (size_t c = 0; c < nb_chans; c++) {
+ processors[c].Finalize();
+ }
+}
+
+template <typename BitDepth>
+void WriteACSection(const unsigned char* rgba, size_t x0, size_t y0, size_t xs,
+ size_t ys, size_t row_stride, bool is_single_group,
+ BitDepth bitdepth, size_t nb_chans, bool big_endian,
+ const PrefixCode code[4],
+ std::array<BitWriter, 4>& output) {
+ for (size_t i = 0; i < nb_chans; i++) {
+ if (is_single_group && i == 0) continue;
+ output[i].Allocate(xs * ys * bitdepth.MaxEncodedBitsPerSample() + 4);
+ }
+ if (!is_single_group) {
+ // Group header for modular image.
+ // When the image is single-group, the global modular image is the one
+ // that contains the pixel data, and there is no group header.
+ output[0].Write(1, 1); // Global tree
+ output[0].Write(1, 1); // All default wp
+ output[0].Write(2, 0b00); // 0 transforms
+ }
+
+ ChunkEncoder<BitDepth> encoders[4];
+ ChannelRowProcessor<ChunkEncoder<BitDepth>, BitDepth> row_encoders[4];
+ for (size_t c = 0; c < nb_chans; c++) {
+ row_encoders[c].t = &encoders[c];
+ encoders[c].output = &output[c];
+ encoders[c].code = &code[c];
+ }
+ ProcessImageArea<ChannelRowProcessor<ChunkEncoder<BitDepth>, BitDepth>>(
+ rgba, x0, y0, xs, 0, ys, row_stride, bitdepth, nb_chans, big_endian,
+ row_encoders);
+}
+
+constexpr int kHashExp = 16;
+constexpr uint32_t kHashSize = 1 << kHashExp;
+constexpr uint32_t kHashMultiplier = 2654435761;
+constexpr int kMaxColors = 512;
+
+// can be any function that returns a value in 0 .. kHashSize-1
+// has to map 0 to 0
+inline uint32_t pixel_hash(uint32_t p) {
+ return (p * kHashMultiplier) >> (32 - kHashExp);
+}
+
+template <size_t nb_chans>
+void FillRowPalette(const unsigned char* inrow, size_t xs,
+ const int16_t* lookup, int16_t* out) {
+ for (size_t x = 0; x < xs; x++) {
+ uint32_t p = 0;
+ memcpy(&p, inrow + x * nb_chans, nb_chans);
+ out[x] = lookup[pixel_hash(p)];
+ }
+}
+
+template <typename Processor>
+void ProcessImageAreaPalette(const unsigned char* rgba, size_t x0, size_t y0,
+ size_t xs, size_t yskip, size_t ys,
+ size_t row_stride, const int16_t* lookup,
+ size_t nb_chans, Processor* processors) {
+ constexpr size_t kPadding = 32;
+
+ std::vector<std::array<int16_t, 256 + kPadding * 2>> group_data(2);
+ Processor& row_encoder = processors[0];
+
+ for (size_t y = 0; y < ys; y++) {
+ // Pre-fill rows with palette converted pixels.
+ const unsigned char* inrow = rgba + row_stride * (y0 + y) + x0 * nb_chans;
+ int16_t* outrow = &group_data[y & 1][kPadding];
+ if (nb_chans == 1) {
+ FillRowPalette<1>(inrow, xs, lookup, outrow);
+ } else if (nb_chans == 2) {
+ FillRowPalette<2>(inrow, xs, lookup, outrow);
+ } else if (nb_chans == 3) {
+ FillRowPalette<3>(inrow, xs, lookup, outrow);
+ } else if (nb_chans == 4) {
+ FillRowPalette<4>(inrow, xs, lookup, outrow);
+ }
+ // Deal with x == 0.
+ group_data[y & 1][kPadding - 1] =
+ y > 0 ? group_data[(y - 1) & 1][kPadding] : 0;
+ // Fix topleft.
+ group_data[(y - 1) & 1][kPadding - 1] =
+ y > 0 ? group_data[(y - 1) & 1][kPadding] : 0;
+ // Get pointers to px/left/top/topleft data to speedup loop.
+ const int16_t* row = &group_data[y & 1][kPadding];
+ const int16_t* row_left = &group_data[y & 1][kPadding - 1];
+ const int16_t* row_top =
+ y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding];
+ const int16_t* row_topleft =
+ y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding - 1];
+
+ row_encoder.ProcessRow(row, row_left, row_top, row_topleft, xs);
+ }
+ row_encoder.Finalize();
+}
+
+void WriteACSectionPalette(const unsigned char* rgba, size_t x0, size_t y0,
+ size_t xs, size_t ys, size_t row_stride,
+ bool is_single_group, const PrefixCode code[4],
+ const int16_t* lookup, size_t nb_chans,
+ BitWriter& output) {
+ if (!is_single_group) {
+ output.Allocate(16 * xs * ys + 4);
+ // Group header for modular image.
+ // When the image is single-group, the global modular image is the one
+ // that contains the pixel data, and there is no group header.
+ output.Write(1, 1); // Global tree
+ output.Write(1, 1); // All default wp
+ output.Write(2, 0b00); // 0 transforms
+ }
+
+ ChunkEncoder<UpTo8Bits> encoder;
+ ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits> row_encoder;
+
+ row_encoder.t = &encoder;
+ encoder.output = &output;
+ encoder.code = &code[is_single_group ? 1 : 0];
+ ProcessImageAreaPalette<
+ ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits>>(
+ rgba, x0, y0, xs, 0, ys, row_stride, lookup, nb_chans, &row_encoder);
+}
+
+template <typename BitDepth>
+void CollectSamples(const unsigned char* rgba, size_t x0, size_t y0, size_t xs,
+ size_t row_stride, size_t row_count,
+ uint64_t raw_counts[4][kNumRawSymbols],
+ uint64_t lz77_counts[4][kNumLZ77], bool is_single_group,
+ bool palette, BitDepth bitdepth, size_t nb_chans,
+ bool big_endian, const int16_t* lookup) {
+ if (palette) {
+ ChunkSampleCollector<UpTo8Bits> sample_collectors[4];
+ ChannelRowProcessor<ChunkSampleCollector<UpTo8Bits>, UpTo8Bits>
+ row_sample_collectors[4];
+ for (size_t c = 0; c < nb_chans; c++) {
+ row_sample_collectors[c].t = &sample_collectors[c];
+ sample_collectors[c].raw_counts = raw_counts[is_single_group ? 1 : 0];
+ sample_collectors[c].lz77_counts = lz77_counts[is_single_group ? 1 : 0];
+ }
+ ProcessImageAreaPalette<
+ ChannelRowProcessor<ChunkSampleCollector<UpTo8Bits>, UpTo8Bits>>(
+ rgba, x0, y0, xs, 1, 1 + row_count, row_stride, lookup, nb_chans,
+ row_sample_collectors);
+ } else {
+ ChunkSampleCollector<BitDepth> sample_collectors[4];
+ ChannelRowProcessor<ChunkSampleCollector<BitDepth>, BitDepth>
+ row_sample_collectors[4];
+ for (size_t c = 0; c < nb_chans; c++) {
+ row_sample_collectors[c].t = &sample_collectors[c];
+ sample_collectors[c].raw_counts = raw_counts[c];
+ sample_collectors[c].lz77_counts = lz77_counts[c];
+ }
+ ProcessImageArea<
+ ChannelRowProcessor<ChunkSampleCollector<BitDepth>, BitDepth>>(
+ rgba, x0, y0, xs, 1, 1 + row_count, row_stride, bitdepth, nb_chans,
+ big_endian, row_sample_collectors);
+ }
+}
+
+void PrepareDCGlobalPalette(bool is_single_group, size_t width, size_t height,
+ const PrefixCode code[4],
+ const std::vector<uint32_t>& palette,
+ size_t pcolors, BitWriter* output) {
+ PrepareDCGlobalCommon(is_single_group, width, height, code, output);
+ output->Write(2, 0b01); // 1 transform
+ output->Write(2, 0b01); // Palette
+ output->Write(5, 0b00000); // Starting from ch 0
+ output->Write(2, 0b10); // 4-channel palette (RGBA)
+ // pcolors <= kMaxColors + kChunkSize - 1
+ static_assert(kMaxColors + kChunkSize < 1281,
+ "add code to signal larger palette sizes");
+ if (pcolors < 256) {
+ output->Write(2, 0b00);
+ output->Write(8, pcolors);
+ } else {
+ output->Write(2, 0b01);
+ output->Write(10, pcolors - 256);
+ }
+
+ output->Write(2, 0b00); // nb_deltas == 0
+ output->Write(4, 0); // Zero predictor for delta palette
+ // Encode palette
+ ChunkEncoder<UpTo8Bits> encoder;
+ ChannelRowProcessor<ChunkEncoder<UpTo8Bits>, UpTo8Bits> row_encoder;
+ row_encoder.t = &encoder;
+ encoder.output = output;
+ encoder.code = &code[0];
+ int16_t p[4][32 + 1024] = {};
+ uint8_t prgba[4];
+ size_t i = 0;
+ size_t have_zero = 0;
+ if (palette[pcolors - 1] == 0) have_zero = 1;
+ for (; i < pcolors; i++) {
+ memcpy(prgba, &palette[i], 4);
+ p[0][16 + i + have_zero] = prgba[0];
+ p[1][16 + i + have_zero] = prgba[1];
+ p[2][16 + i + have_zero] = prgba[2];
+ p[3][16 + i + have_zero] = prgba[3];
+ }
+ p[0][15] = 0;
+ row_encoder.ProcessRow(p[0] + 16, p[0] + 15, p[0] + 15, p[0] + 15, pcolors);
+ p[1][15] = p[0][16];
+ p[0][15] = p[0][16];
+ row_encoder.ProcessRow(p[1] + 16, p[1] + 15, p[0] + 16, p[0] + 15, pcolors);
+ p[2][15] = p[1][16];
+ p[1][15] = p[1][16];
+ row_encoder.ProcessRow(p[2] + 16, p[2] + 15, p[1] + 16, p[1] + 15, pcolors);
+ p[3][15] = p[2][16];
+ p[2][15] = p[2][16];
+ row_encoder.ProcessRow(p[3] + 16, p[3] + 15, p[2] + 16, p[2] + 15, pcolors);
+ row_encoder.Finalize();
+
+ if (!is_single_group) {
+ output->ZeroPadToByte();
+ }
+}
+
+template <typename BitDepth>
+JxlFastLosslessFrameState* LLEnc(const unsigned char* rgba, size_t width,
+ size_t stride, size_t height,
+ BitDepth bitdepth, size_t nb_chans,
+ bool big_endian, int effort,
+ void* runner_opaque,
+ FJxlParallelRunner runner) {
+ assert(width != 0);
+ assert(height != 0);
+ assert(stride >= nb_chans * BitDepth::kInputBytes * width);
+
+ // Count colors to try palette
+ std::vector<uint32_t> palette(kHashSize);
+ palette[0] = 1;
+ std::vector<int16_t> lookup(kHashSize);
+ lookup[0] = 0;
+ int pcolors = 0;
+ bool collided = effort < 2 || bitdepth.bitdepth != 8 ||
+ nb_chans < 4; // todo: also do rgb palette
+ for (size_t y = 0; y < height && !collided; y++) {
+ const unsigned char* r = rgba + stride * y;
+ size_t x = 0;
+ if (nb_chans == 4) {
+ // this is just an unrolling of the next loop
+ for (; x + 7 < width; x += 8) {
+ uint32_t p[8], index[8];
+ memcpy(p, r + x * 4, 32);
+ for (int i = 0; i < 8; i++) index[i] = pixel_hash(p[i]);
+ for (int i = 0; i < 8; i++) {
+ uint32_t init_entry = index[i] ? 0 : 1;
+ if (init_entry != palette[index[i]] && p[i] != palette[index[i]]) {
+ collided = true;
+ }
+ }
+ for (int i = 0; i < 8; i++) palette[index[i]] = p[i];
+ }
+ for (; x < width; x++) {
+ uint32_t p;
+ memcpy(&p, r + x * 4, 4);
+ uint32_t index = pixel_hash(p);
+ uint32_t init_entry = index ? 0 : 1;
+ if (init_entry != palette[index] && p != palette[index]) {
+ collided = true;
+ }
+ palette[index] = p;
+ }
+ } else {
+ for (; x < width; x++) {
+ uint32_t p = 0;
+ memcpy(&p, r + x * nb_chans, nb_chans);
+ uint32_t index = pixel_hash(p);
+ uint32_t init_entry = index ? 0 : 1;
+ if (init_entry != palette[index] && p != palette[index]) {
+ collided = true;
+ }
+ palette[index] = p;
+ }
+ }
+ }
+
+ int nb_entries = 0;
+ if (!collided) {
+ if (palette[0] == 0) pcolors = 1;
+ if (palette[0] == 1) palette[0] = 0;
+ bool have_color = false;
+ uint8_t minG = 255, maxG = 0;
+ for (uint32_t k = 0; k < kHashSize; k++) {
+ if (palette[k] == 0) continue;
+ uint8_t p[4];
+ memcpy(p, &palette[k], 4);
+ // move entries to front so sort has less work
+ palette[nb_entries] = palette[k];
+ if (p[0] != p[1] || p[0] != p[2]) have_color = true;
+ if (p[1] < minG) minG = p[1];
+ if (p[1] > maxG) maxG = p[1];
+ nb_entries++;
+ // don't do palette if too many colors are needed
+ if (nb_entries + pcolors > kMaxColors) {
+ collided = true;
+ break;
+ }
+ }
+ if (!have_color) {
+ // don't do palette if it's just grayscale without many holes
+ if (maxG - minG < nb_entries * 1.4f) collided = true;
+ }
+ }
+ if (!collided) {
+ std::sort(
+ palette.begin(), palette.begin() + nb_entries,
+ [](uint32_t ap, uint32_t bp) {
+ if (ap == 0) return false;
+ if (bp == 0) return true;
+ uint8_t a[4], b[4];
+ memcpy(a, &ap, 4);
+ memcpy(b, &bp, 4);
+ float ay, by;
+ ay = (0.299f * a[0] + 0.587f * a[1] + 0.114f * a[2] + 0.01f) * a[3];
+ by = (0.299f * b[0] + 0.587f * b[1] + 0.114f * b[2] + 0.01f) * b[3];
+ return ay < by; // sort on alpha*luma
+ });
+ for (int k = 0; k < nb_entries; k++) {
+ if (palette[k] == 0) break;
+ lookup[pixel_hash(palette[k])] = pcolors++;
+ }
+ }
+
+ size_t num_groups_x = (width + 255) / 256;
+ size_t num_groups_y = (height + 255) / 256;
+ size_t num_dc_groups_x = (width + 2047) / 2048;
+ size_t num_dc_groups_y = (height + 2047) / 2048;
+
+ uint64_t raw_counts[4][kNumRawSymbols] = {};
+ uint64_t lz77_counts[4][kNumLZ77] = {};
+
+ bool onegroup = num_groups_x == 1 && num_groups_y == 1;
+
+ // sample the middle (effort * 2) rows of every group
+ for (size_t g = 0; g < num_groups_y * num_groups_x; g++) {
+ size_t xg = g % num_groups_x;
+ size_t yg = g / num_groups_x;
+ int y_offset = yg * 256;
+ int y_max = std::min<size_t>(height - yg * 256, 256);
+ int y_begin = y_offset + std::max<int>(0, y_max - 2 * effort) / 2;
+ int y_count =
+ std::min<int>(2 * effort * y_max / 256, y_offset + y_max - y_begin - 1);
+ int x_max =
+ std::min<size_t>(width - xg * 256, 256) / kChunkSize * kChunkSize;
+ CollectSamples(rgba, xg * 256, y_begin, x_max, stride, y_count, raw_counts,
+ lz77_counts, onegroup, !collided, bitdepth, nb_chans,
+ big_endian, lookup.data());
+ }
+
+ // TODO(veluca): can probably improve this and make it bitdepth-dependent.
+ uint64_t base_raw_counts[kNumRawSymbols] = {
+ 3843, 852, 1270, 1214, 1014, 727, 481, 300, 159, 51,
+ 5, 1, 1, 1, 1, 1, 1, 1, 1};
+
+ bool doing_ycocg = nb_chans > 2 && collided;
+ for (size_t i = bitdepth.NumSymbols(doing_ycocg); i < kNumRawSymbols; i++) {
+ base_raw_counts[i] = 0;
+ }
+
+ for (size_t c = 0; c < 4; c++) {
+ for (size_t i = 0; i < kNumRawSymbols; i++) {
+ raw_counts[c][i] = (raw_counts[c][i] << 8) + base_raw_counts[i];
+ }
+ }
+
+ if (!collided) {
+ unsigned token, nbits, bits;
+ EncodeHybridUint000(PackSigned(pcolors - 1), &token, &nbits, &bits);
+ // ensure all palette indices can actually be encoded
+ for (size_t i = 0; i < token + 1; i++)
+ raw_counts[0][i] = std::max<uint64_t>(raw_counts[0][i], 1);
+ // these tokens are only used for the palette itself so they can get a bad
+ // code
+ for (size_t i = token + 1; i < 10; i++) raw_counts[0][i] = 1;
+ }
+
+ uint64_t base_lz77_counts[kNumLZ77] = {
+ 29, 27, 25, 23, 21, 21, 19, 18, 21, 17, 16, 15, 15, 14,
+ 13, 13, 137, 98, 61, 34, 1, 1, 1, 1, 1, 1, 1, 1,
+ };
+
+ for (size_t c = 0; c < 4; c++) {
+ for (size_t i = 0; i < kNumLZ77; i++) {
+ lz77_counts[c][i] = (lz77_counts[c][i] << 8) + base_lz77_counts[i];
+ }
+ }
+
+ alignas(64) PrefixCode hcode[4];
+ for (size_t i = 0; i < 4; i++) {
+ hcode[i] = PrefixCode(bitdepth, raw_counts[i], lz77_counts[i]);
+ }
+
+ size_t num_groups = onegroup ? 1
+ : (2 + num_dc_groups_x * num_dc_groups_y +
+ num_groups_x * num_groups_y);
+
+ JxlFastLosslessFrameState* frame_state = new JxlFastLosslessFrameState();
+
+ frame_state->width = width;
+ frame_state->height = height;
+ frame_state->nb_chans = nb_chans;
+ frame_state->bitdepth = bitdepth.bitdepth;
+
+ frame_state->group_data = std::vector<std::array<BitWriter, 4>>(num_groups);
+ if (collided) {
+ PrepareDCGlobal(onegroup, width, height, nb_chans, hcode,
+ &frame_state->group_data[0][0]);
+ } else {
+ PrepareDCGlobalPalette(onegroup, width, height, hcode, palette, pcolors,
+ &frame_state->group_data[0][0]);
+ }
+
+ auto run_one = [&](size_t g) {
+ size_t xg = g % num_groups_x;
+ size_t yg = g / num_groups_x;
+ size_t group_id =
+ onegroup ? 0 : (2 + num_dc_groups_x * num_dc_groups_y + g);
+ size_t xs = std::min<size_t>(width - xg * 256, 256);
+ size_t ys = std::min<size_t>(height - yg * 256, 256);
+ size_t x0 = xg * 256;
+ size_t y0 = yg * 256;
+ auto& gd = frame_state->group_data[group_id];
+ if (collided) {
+ WriteACSection(rgba, x0, y0, xs, ys, stride, onegroup, bitdepth, nb_chans,
+ big_endian, hcode, gd);
+
+ } else {
+ WriteACSectionPalette(rgba, x0, y0, xs, ys, stride, onegroup, hcode,
+ lookup.data(), nb_chans, gd[0]);
+ }
+ };
+
+ runner(
+ runner_opaque, &run_one,
+ +[](void* r, size_t i) { (*reinterpret_cast<decltype(&run_one)>(r))(i); },
+ num_groups_x * num_groups_y);
+
+ return frame_state;
+}
+
+JxlFastLosslessFrameState* JxlFastLosslessEncodeImpl(
+ const unsigned char* rgba, size_t width, size_t stride, size_t height,
+ size_t nb_chans, size_t bitdepth, bool big_endian, int effort,
+ void* runner_opaque, FJxlParallelRunner runner) {
+ assert(bitdepth > 0);
+ assert(nb_chans <= 4);
+ assert(nb_chans != 0);
+ if (bitdepth <= 8) {
+ return LLEnc(rgba, width, stride, height, UpTo8Bits(bitdepth), nb_chans,
+ big_endian, effort, runner_opaque, runner);
+ }
+ if (bitdepth <= 13) {
+ return LLEnc(rgba, width, stride, height, From9To13Bits(bitdepth), nb_chans,
+ big_endian, effort, runner_opaque, runner);
+ }
+ if (bitdepth == 14) {
+ return LLEnc(rgba, width, stride, height, Exactly14Bits(bitdepth), nb_chans,
+ big_endian, effort, runner_opaque, runner);
+ }
+ return LLEnc(rgba, width, stride, height, MoreThan14Bits(bitdepth), nb_chans,
+ big_endian, effort, runner_opaque, runner);
+}
+
+} // namespace
+
+#endif // FJXL_SELF_INCLUDE
+
+#ifndef FJXL_SELF_INCLUDE
+
+#define FJXL_SELF_INCLUDE
+
+// If we have NEON enabled, it is the default target.
+#if FJXL_ENABLE_NEON
+
+namespace default_implementation {
+#define FJXL_NEON
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_NEON
+} // namespace default_implementation
+
+#else // FJXL_ENABLE_NEON
+
+namespace default_implementation {
+#include "lib/jxl/enc_fast_lossless.cc"
+}
+
+#if FJXL_ENABLE_AVX2
+#ifdef __clang__
+#pragma clang attribute push(__attribute__((target("avx,avx2"))), \
+ apply_to = function)
+// Causes spurious warnings on clang5.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmissing-braces"
+#elif defined(__GNUC__)
+#pragma GCC push_options
+// Seems to cause spurious errors on GCC8.
+#pragma GCC diagnostic ignored "-Wpsabi"
+#pragma GCC target "avx,avx2"
+#endif
+
+namespace AVX2 {
+#define FJXL_AVX2
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_AVX2
+} // namespace AVX2
+
+#ifdef __clang__
+#pragma clang attribute pop
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif // FJXL_ENABLE_AVX2
+
+#if FJXL_ENABLE_AVX512
+#ifdef __clang__
+#pragma clang attribute push( \
+ __attribute__((target("avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi"))), \
+ apply_to = function)
+#elif defined(__GNUC__)
+#pragma GCC push_options
+#pragma GCC target "avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi"
+#endif
+
+namespace AVX512 {
+#define FJXL_AVX512
+#include "lib/jxl/enc_fast_lossless.cc"
+#undef FJXL_AVX512
+} // namespace AVX512
+
+#ifdef __clang__
+#pragma clang attribute pop
+#elif defined(__GNUC__)
+#pragma GCC pop_options
+#endif
+#endif // FJXL_ENABLE_AVX512
+
+#endif
+
+extern "C" {
+
+size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
+ size_t row_stride, size_t height, size_t nb_chans,
+ size_t bitdepth, int big_endian, int effort,
+ unsigned char** output, void* runner_opaque,
+ FJxlParallelRunner runner) {
+ auto frame_state = JxlFastLosslessPrepareFrame(
+ rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort,
+ runner_opaque, runner);
+ JxlFastLosslessPrepareHeader(frame_state, /*add_image_header=*/1,
+ /*is_last=*/1);
+ size_t output_size = JxlFastLosslessMaxRequiredOutput(frame_state);
+ *output = (unsigned char*)malloc(output_size);
+ size_t written = 0;
+ size_t total = 0;
+ while ((written = JxlFastLosslessWriteOutput(frame_state, *output + total,
+ output_size - total)) != 0) {
+ total += written;
+ }
+ return total;
+}
+
+JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
+ const unsigned char* rgba, size_t width, size_t row_stride, size_t height,
+ size_t nb_chans, size_t bitdepth, int big_endian, int effort,
+ void* runner_opaque, FJxlParallelRunner runner) {
+ auto trivial_runner =
+ +[](void*, void* opaque, void fun(void*, size_t), size_t count) {
+ for (size_t i = 0; i < count; i++) {
+ fun(opaque, i);
+ }
+ };
+
+ if (runner == nullptr) {
+ runner = trivial_runner;
+ }
+
+#if FJXL_ENABLE_AVX512
+ if (__builtin_cpu_supports("avx512cd") &&
+ __builtin_cpu_supports("avx512vbmi") &&
+ __builtin_cpu_supports("avx512bw") && __builtin_cpu_supports("avx512f") &&
+ __builtin_cpu_supports("avx512vl")) {
+ return AVX512::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height,
+ nb_chans, bitdepth, big_endian,
+ effort, runner_opaque, runner);
+ }
+#endif
+#if FJXL_ENABLE_AVX2
+ if (__builtin_cpu_supports("avx2")) {
+ return AVX2::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height,
+ nb_chans, bitdepth, big_endian,
+ effort, runner_opaque, runner);
+ }
+#endif
+
+ return default_implementation::JxlFastLosslessEncodeImpl(
+ rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort,
+ runner_opaque, runner);
+}
+
+} // extern "C"
+
+#endif // FJXL_SELF_INCLUDE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.h b/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.h
new file mode 100644
index 0000000000..4ea1d4f69b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_fast_lossless.h
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FAST_LOSSLESS_H_
+#define LIB_JXL_ENC_FAST_LOSSLESS_H_
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Simple encoding API.
+
+// A FJxlParallelRunner must call fun(opaque, i) for all i from 0 to count. It
+// may do so in parallel.
+typedef void(FJxlParallelRunner)(void* runner_opaque, void* opaque,
+ void fun(void*, size_t), size_t count);
+
+// You may pass `nullptr` as a runner: encoding will be sequential.
+size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width,
+ size_t row_stride, size_t height, size_t nb_chans,
+ size_t bitdepth, int big_endian, int effort,
+ unsigned char** output, void* runner_opaque,
+ FJxlParallelRunner runner);
+
+// More complex API for cases in which you may want to allocate your own buffer
+// and other advanced use cases.
+
+// Opaque struct that represents an intermediate state of the computation.
+struct JxlFastLosslessFrameState;
+
+// Returned JxlFastLosslessFrameState must be freed by calling
+// JxlFastLosslessFreeFrameState.
+JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame(
+ const unsigned char* rgba, size_t width, size_t row_stride, size_t height,
+ size_t nb_chans, size_t bitdepth, int big_endian, int effort,
+ void* runner_opaque, FJxlParallelRunner runner);
+
+// Prepare the (image/frame) header. You may encode animations by concatenating
+// the output of multiple frames, of which the first one has add_image_header =
+// 1 and subsequent ones have add_image_header = 0, and all frames but the last
+// one have is_last = 0.
+void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame,
+ int add_image_header, int is_last);
+
+// Upper bound on the required output size, including any padding that may be
+// required by JxlFastLosslessWriteOutput. Cannot be called before
+// JxlFastLosslessPrepareHeader.
+size_t JxlFastLosslessMaxRequiredOutput(const JxlFastLosslessFrameState* frame);
+
+// Actual size of the frame once it is encoded. This is not identical to
+// JxlFastLosslessMaxRequiredOutput because JxlFastLosslessWriteOutput may
+// require extra padding.
+size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame);
+
+// Writes the frame to the given output buffer. Returns the number of bytes that
+// were written, which is at least 1 unless the entire output has been written
+// already. It is required that `output_size >= 32` when calling this function.
+// This function must be called repeatedly until it returns 0.
+size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame,
+ unsigned char* output, size_t output_size);
+
+// Frees the provided frame state.
+void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // LIB_JXL_ENC_FAST_LOSSLESS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_fields.cc b/third_party/jpeg-xl/lib/jxl/enc_fields.cc
new file mode 100644
index 0000000000..22c763e13f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_fields.cc
@@ -0,0 +1,239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_fields.h"
+
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+namespace {
+using ::jxl::fields_internal::VisitorBase;
+class WriteVisitor : public VisitorBase {
+ public:
+ WriteVisitor(const size_t extension_bits, BitWriter* JXL_RESTRICT writer)
+ : extension_bits_(extension_bits), writer_(writer) {}
+
+ Status Bits(const size_t bits, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ ok_ &= BitsCoder::Write(bits, *value, writer_);
+ return true;
+ }
+ Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ ok_ &= U32Coder::Write(enc, *value, writer_);
+ return true;
+ }
+
+ Status U64(const uint64_t /*default_value*/,
+ uint64_t* JXL_RESTRICT value) override {
+ ok_ &= U64Coder::Write(*value, writer_);
+ return true;
+ }
+
+ Status F16(const float /*default_value*/,
+ float* JXL_RESTRICT value) override {
+ ok_ &= F16Coder::Write(*value, writer_);
+ return true;
+ }
+
+ Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+ JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+ if (*extensions == 0) {
+ JXL_ASSERT(extension_bits_ == 0);
+ return true;
+ }
+ // TODO(janwas): extend API to pass in array of extension_bits, one per
+ // extension. We currently ascribe all bits to the first extension, but
+ // this is only an encoder limitation. NOTE: extension_bits_ can be zero
+ // if an extension does not require any additional fields.
+ ok_ &= U64Coder::Write(extension_bits_, writer_);
+ // For each nonzero bit except the lowest/first (already written):
+ for (uint64_t remaining_extensions = *extensions & (*extensions - 1);
+ remaining_extensions != 0;
+ remaining_extensions &= remaining_extensions - 1) {
+ ok_ &= U64Coder::Write(0, writer_);
+ }
+ return true;
+ }
+ // EndExtensions = default.
+
+ Status OK() const { return ok_; }
+
+ private:
+ const size_t extension_bits_;
+ BitWriter* JXL_RESTRICT writer_;
+ bool ok_ = true;
+};
+} // namespace
+
+Status Bundle::Write(const Fields& fields, BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ size_t extension_bits, total_bits;
+ JXL_RETURN_IF_ERROR(Bundle::CanEncode(fields, &extension_bits, &total_bits));
+
+ BitWriter::Allotment allotment(writer, total_bits);
+ WriteVisitor visitor(extension_bits, writer);
+ JXL_RETURN_IF_ERROR(visitor.VisitConst(fields));
+ JXL_RETURN_IF_ERROR(visitor.OK());
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ return true;
+}
+
+// Returns false if the value is too large to encode.
+Status BitsCoder::Write(const size_t bits, const uint32_t value,
+ BitWriter* JXL_RESTRICT writer) {
+ if (value >= (1ULL << bits)) {
+ return JXL_FAILURE("Value %d too large to encode in %" PRIu64 " bits",
+ value, static_cast<uint64_t>(bits));
+ }
+ writer->Write(bits, value);
+ return true;
+}
+
+// Returns false if the value is too large to encode.
+Status U32Coder::Write(const U32Enc enc, const uint32_t value,
+ BitWriter* JXL_RESTRICT writer) {
+ uint32_t selector;
+ size_t total_bits;
+ JXL_RETURN_IF_ERROR(ChooseSelector(enc, value, &selector, &total_bits));
+
+ writer->Write(2, selector);
+
+ const U32Distr d = enc.GetDistr(selector);
+ if (!d.IsDirect()) { // Nothing more to write for direct encoding
+ const uint32_t offset = d.Offset();
+ JXL_ASSERT(value >= offset);
+ writer->Write(total_bits - 2, value - offset);
+ }
+
+ return true;
+}
+
+// Returns false if the value is too large to encode.
+Status U64Coder::Write(uint64_t value, BitWriter* JXL_RESTRICT writer) {
+ if (value == 0) {
+ // Selector: use 0 bits, value 0
+ writer->Write(2, 0);
+ } else if (value <= 16) {
+ // Selector: use 4 bits, value 1..16
+ writer->Write(2, 1);
+ writer->Write(4, value - 1);
+ } else if (value <= 272) {
+ // Selector: use 8 bits, value 17..272
+ writer->Write(2, 2);
+ writer->Write(8, value - 17);
+ } else {
+ // Selector: varint, first a 12-bit group, after that per 8-bit group.
+ writer->Write(2, 3);
+ writer->Write(12, value & 4095);
+ value >>= 12;
+ int shift = 12;
+ while (value > 0 && shift < 60) {
+ // Indicate varint not done
+ writer->Write(1, 1);
+ writer->Write(8, value & 255);
+ value >>= 8;
+ shift += 8;
+ }
+ if (value > 0) {
+ // This only could happen if shift == N - 4.
+ writer->Write(1, 1);
+ writer->Write(4, value & 15);
+ // Implicitly closed sequence, no extra stop bit is required.
+ } else {
+ // Indicate end of varint
+ writer->Write(1, 0);
+ }
+ }
+
+ return true;
+}
+
+Status F16Coder::Write(float value, BitWriter* JXL_RESTRICT writer) {
+ uint32_t bits32;
+ memcpy(&bits32, &value, sizeof(bits32));
+ const uint32_t sign = bits32 >> 31;
+ const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF;
+ const uint32_t mantissa32 = bits32 & 0x7FFFFF;
+
+ const int32_t exp = static_cast<int32_t>(biased_exp32) - 127;
+ if (JXL_UNLIKELY(exp > 15)) {
+ return JXL_FAILURE("Too big to encode, CanEncode should return false");
+ }
+
+ // Tiny or zero => zero.
+ if (exp < -24) {
+ writer->Write(16, 0);
+ return true;
+ }
+
+ uint32_t biased_exp16, mantissa16;
+
+ // exp = [-24, -15] => subnormal
+ if (JXL_UNLIKELY(exp < -14)) {
+ biased_exp16 = 0;
+ const uint32_t sub_exp = static_cast<uint32_t>(-14 - exp);
+ JXL_ASSERT(1 <= sub_exp && sub_exp < 11);
+ mantissa16 = (1 << (10 - sub_exp)) + (mantissa32 >> (13 + sub_exp));
+ } else {
+ // exp = [-14, 15]
+ biased_exp16 = static_cast<uint32_t>(exp + 15);
+ JXL_ASSERT(1 <= biased_exp16 && biased_exp16 < 31);
+ mantissa16 = mantissa32 >> 13;
+ }
+
+ JXL_ASSERT(mantissa16 < 1024);
+ const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16;
+ JXL_ASSERT(bits16 < 0x10000);
+ writer->Write(16, bits16);
+ return true;
+}
+
+Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer,
+ AuxOut* aux_out) {
+ // Marker/signature
+ BitWriter::Allotment allotment(writer, 16);
+ writer->Write(8, 0xFF);
+ writer->Write(8, kCodestreamMarker);
+ allotment.ReclaimAndCharge(writer, kLayerHeader, aux_out);
+
+ JXL_RETURN_IF_ERROR(
+ WriteSizeHeader(metadata->size, writer, kLayerHeader, aux_out));
+
+ JXL_RETURN_IF_ERROR(
+ WriteImageMetadata(metadata->m, writer, kLayerHeader, aux_out));
+
+ metadata->transform_data.nonserialized_xyb_encoded = metadata->m.xyb_encoded;
+ JXL_RETURN_IF_ERROR(
+ Bundle::Write(metadata->transform_data, writer, kLayerHeader, aux_out));
+
+ return true;
+}
+
+Status WriteFrameHeader(const FrameHeader& frame,
+ BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+ return Bundle::Write(frame, writer, kLayerHeader, aux_out);
+}
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+ BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* aux_out) {
+ return Bundle::Write(metadata, writer, layer, aux_out);
+}
+
+Status WriteQuantizerParams(const QuantizerParams& params,
+ BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* aux_out) {
+ return Bundle::Write(params, writer, layer, aux_out);
+}
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+ size_t layer, AuxOut* aux_out) {
+ return Bundle::Write(size, writer, layer, aux_out);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_fields.h b/third_party/jpeg-xl/lib/jxl/enc_fields.h
new file mode 100644
index 0000000000..5bb179a719
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_fields.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FIELDS_H_
+#define LIB_JXL_ENC_FIELDS_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Write headers from the CodecMetadata. Also may modify nonserialized_...
+// fields of the metadata.
+Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer,
+ AuxOut* aux_out);
+
+Status WriteFrameHeader(const FrameHeader& frame,
+ BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+Status WriteQuantizerParams(const QuantizerParams& params,
+ BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* aux_out);
+
+Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer,
+ size_t layer, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_FIELDS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_file.cc b/third_party/jpeg-xl/lib/jxl/enc_file.cc
new file mode 100644
index 0000000000..b1f1442cc2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_file.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_file.h"
+
+#include <stddef.h>
+
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+namespace {
+
+Status PrepareCodecMetadataFromIO(const CompressParams& cparams,
+ const CodecInOut* io,
+ CodecMetadata* metadata) {
+ *metadata = io->metadata;
+ size_t ups = 1;
+ if (cparams.already_downsampled) ups = cparams.resampling;
+
+ JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize() * ups, io->ysize() * ups));
+
+ // Keep ICC profile in lossless modes because a reconstructed profile may be
+ // slightly different (quantization).
+ // Also keep ICC in JPEG reconstruction mode as we need byte-exact profiles.
+ if (!cparams.IsLossless() && !io->Main().IsJPEG()) {
+ metadata->m.color_encoding.DecideIfWantICC();
+ }
+
+ metadata->m.xyb_encoded =
+ cparams.color_transform == ColorTransform::kXYB ? true : false;
+
+ // TODO(firsching): move this EncodeFile to test_utils / re-implement this
+ // using API functions
+ return true;
+}
+
+} // namespace
+
+Status EncodePreview(const CompressParams& cparams, const ImageBundle& ib,
+ const CodecMetadata* metadata, const JxlCmsInterface& cms,
+ ThreadPool* pool, BitWriter* JXL_RESTRICT writer) {
+ BitWriter preview_writer;
+ // TODO(janwas): also support generating preview by downsampling
+ if (ib.HasColor()) {
+ AuxOut aux_out;
+ PassesEncoderState passes_enc_state;
+ // TODO(lode): check if we want all extra channels and matching xyb_encoded
+ // for the preview, such that using the main ImageMetadata object for
+ // encoding this frame is warrented.
+ FrameInfo frame_info;
+ frame_info.is_preview = true;
+ JXL_RETURN_IF_ERROR(EncodeFrame(cparams, frame_info, metadata, ib,
+ &passes_enc_state, cms, pool,
+ &preview_writer, &aux_out));
+ preview_writer.ZeroPadToByte();
+ }
+
+ if (preview_writer.BitsWritten() != 0) {
+ writer->ZeroPadToByte();
+ writer->AppendByteAligned(preview_writer);
+ }
+
+ return true;
+}
+
+Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+ PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+ const JxlCmsInterface& cms, AuxOut* aux_out,
+ ThreadPool* pool) {
+ io->CheckMetadata();
+ BitWriter writer;
+
+ CompressParams cparams = params;
+ if (io->Main().color_transform != ColorTransform::kNone) {
+ // Set the color transform to YCbCr or XYB if the original image is such.
+ cparams.color_transform = io->Main().color_transform;
+ }
+
+ JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams));
+
+ std::unique_ptr<CodecMetadata> metadata = jxl::make_unique<CodecMetadata>();
+ JXL_RETURN_IF_ERROR(PrepareCodecMetadataFromIO(cparams, io, metadata.get()));
+ JXL_RETURN_IF_ERROR(WriteCodestreamHeaders(metadata.get(), &writer, aux_out));
+
+ // Only send ICC (at least several hundred bytes) if fields aren't enough.
+ if (metadata->m.color_encoding.WantICC()) {
+ JXL_RETURN_IF_ERROR(WriteICC(metadata->m.color_encoding.ICC(), &writer,
+ kLayerHeader, aux_out));
+ }
+
+ if (metadata->m.have_preview) {
+ JXL_RETURN_IF_ERROR(EncodePreview(cparams, io->preview_frame,
+ metadata.get(), cms, pool, &writer));
+ }
+
+ // Each frame should start on byte boundaries.
+ BitWriter::Allotment allotment(&writer, 8);
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, kLayerHeader, aux_out);
+
+ for (size_t i = 0; i < io->frames.size(); i++) {
+ FrameInfo info;
+ info.is_last = i == io->frames.size() - 1;
+ if (io->frames[i].use_for_next_frame) {
+ info.save_as_reference = 1;
+ }
+ JXL_RETURN_IF_ERROR(EncodeFrame(cparams, info, metadata.get(),
+ io->frames[i], passes_enc_state, cms, pool,
+ &writer, aux_out));
+ }
+
+ // Clean up passes_enc_state in case it gets reused.
+ for (size_t i = 0; i < 4; i++) {
+ passes_enc_state->shared.dc_frames[i] = Image3F();
+ passes_enc_state->shared.reference_frames[i].frame = ImageBundle();
+ }
+
+ *compressed = std::move(writer).TakeBytes();
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_file.h b/third_party/jpeg-xl/lib/jxl/enc_file.h
new file mode 100644
index 0000000000..ff3ad1233d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_file.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FILE_H_
+#define LIB_JXL_ENC_FILE_H_
+
+// Facade for JXL encoding.
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+class CodecInOut;
+
+// Compresses pixels from `io` (given in any ColorEncoding).
+// `io->metadata.m.original` must be set.
+Status EncodeFile(const CompressParams& params, const CodecInOut* io,
+ PassesEncoderState* passes_enc_state, PaddedBytes* compressed,
+ const JxlCmsInterface& cms, AuxOut* aux_out = nullptr,
+ ThreadPool* pool = nullptr);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_FILE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_frame.cc b/third_party/jpeg-xl/lib/jxl/enc_frame.cc
new file mode 100644
index 0000000000..ed4088120e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_frame.cc
@@ -0,0 +1,1745 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_frame.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cmath>
+#include <limits>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/enc_context_map.h"
+#include "lib/jxl/enc_entropy_coder.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_group.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+namespace {
+
+PassDefinition progressive_passes_dc_vlf_lf_full_ac[] = {
+ {/*num_coefficients=*/2, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/4},
+ {/*num_coefficients=*/3, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/2},
+ {/*num_coefficients=*/8, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/0},
+};
+
+PassDefinition progressive_passes_dc_quant_ac_full_ac[] = {
+ {/*num_coefficients=*/8, /*shift=*/1,
+ /*suitable_for_downsampling_of_at_least=*/2},
+ {/*num_coefficients=*/8, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/0},
+};
+
+void ClusterGroups(PassesEncoderState* enc_state) {
+ if (enc_state->shared.frame_header.passes.num_passes > 1) {
+ // TODO(veluca): implement this for progressive modes.
+ return;
+ }
+ // This only considers pass 0 for now.
+ std::vector<uint8_t> context_map;
+ EntropyEncodingData codes;
+ auto& ac = enc_state->passes[0].ac_tokens;
+ size_t limit = std::ceil(std::sqrt(ac.size()));
+ if (limit == 1) return;
+ size_t num_contexts = enc_state->shared.block_ctx_map.NumACContexts();
+ std::vector<float> costs(ac.size());
+ HistogramParams params;
+ params.uint_method = HistogramParams::HybridUintMethod::kNone;
+ params.lz77_method = HistogramParams::LZ77Method::kNone;
+ params.ans_histogram_strategy =
+ HistogramParams::ANSHistogramStrategy::kApproximate;
+ size_t max = 0;
+ auto token_cost = [&](std::vector<std::vector<Token>>& tokens, size_t num_ctx,
+ bool estimate = true) {
+ // TODO(veluca): not estimating is very expensive.
+ BitWriter writer;
+ size_t c = BuildAndEncodeHistograms(
+ params, num_ctx, tokens, &codes, &context_map,
+ estimate ? nullptr : &writer, 0, /*aux_out=*/0);
+ if (estimate) return c;
+ for (size_t i = 0; i < tokens.size(); i++) {
+ WriteTokens(tokens[i], codes, context_map, &writer, 0, nullptr);
+ }
+ return writer.BitsWritten();
+ };
+ for (size_t i = 0; i < ac.size(); i++) {
+ std::vector<std::vector<Token>> tokens{ac[i]};
+ costs[i] =
+ token_cost(tokens, enc_state->shared.block_ctx_map.NumACContexts());
+ if (costs[i] > costs[max]) {
+ max = i;
+ }
+ }
+ auto dist = [&](int i, int j) {
+ std::vector<std::vector<Token>> tokens{ac[i], ac[j]};
+ return token_cost(tokens, num_contexts) - costs[i] - costs[j];
+ };
+ std::vector<size_t> out{max};
+ std::vector<float> dists(ac.size());
+ size_t farthest = 0;
+ for (size_t i = 0; i < ac.size(); i++) {
+ if (i == max) continue;
+ dists[i] = dist(max, i);
+ if (dists[i] > dists[farthest]) {
+ farthest = i;
+ }
+ }
+
+ while (dists[farthest] > 0 && out.size() < limit) {
+ out.push_back(farthest);
+ dists[farthest] = 0;
+ enc_state->histogram_idx[farthest] = out.size() - 1;
+ for (size_t i = 0; i < ac.size(); i++) {
+ float d = dist(out.back(), i);
+ if (d < dists[i]) {
+ dists[i] = d;
+ enc_state->histogram_idx[i] = out.size() - 1;
+ }
+ if (dists[i] > dists[farthest]) {
+ farthest = i;
+ }
+ }
+ }
+
+ std::vector<size_t> remap(out.size());
+ std::iota(remap.begin(), remap.end(), 0);
+ for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+ enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+ }
+ auto remap_cost = [&](std::vector<size_t> remap) {
+ std::vector<size_t> re_remap(remap.size(), remap.size());
+ size_t r = 0;
+ for (size_t i = 0; i < remap.size(); i++) {
+ if (re_remap[remap[i]] == remap.size()) {
+ re_remap[remap[i]] = r++;
+ }
+ remap[i] = re_remap[remap[i]];
+ }
+ auto tokens = ac;
+ size_t max_hist = 0;
+ for (size_t i = 0; i < tokens.size(); i++) {
+ for (size_t j = 0; j < tokens[i].size(); j++) {
+ size_t hist = remap[enc_state->histogram_idx[i]];
+ tokens[i][j].context += hist * num_contexts;
+ max_hist = std::max(hist + 1, max_hist);
+ }
+ }
+ return token_cost(tokens, max_hist * num_contexts, /*estimate=*/false);
+ };
+
+ for (size_t src = 0; src < out.size(); src++) {
+ float cost = remap_cost(remap);
+ size_t best = src;
+ for (size_t j = src + 1; j < out.size(); j++) {
+ if (remap[src] == remap[j]) continue;
+ auto remap_c = remap;
+ std::replace(remap_c.begin(), remap_c.end(), remap[src], remap[j]);
+ float c = remap_cost(remap_c);
+ if (c < cost) {
+ best = j;
+ cost = c;
+ }
+ }
+ if (src != best) {
+ std::replace(remap.begin(), remap.end(), remap[src], remap[best]);
+ }
+ }
+ std::vector<size_t> re_remap(remap.size(), remap.size());
+ size_t r = 0;
+ for (size_t i = 0; i < remap.size(); i++) {
+ if (re_remap[remap[i]] == remap.size()) {
+ re_remap[remap[i]] = r++;
+ }
+ remap[i] = re_remap[remap[i]];
+ }
+
+ enc_state->shared.num_histograms =
+ *std::max_element(remap.begin(), remap.end()) + 1;
+ for (size_t i = 0; i < enc_state->histogram_idx.size(); i++) {
+ enc_state->histogram_idx[i] = remap[enc_state->histogram_idx[i]];
+ }
+ for (size_t i = 0; i < ac.size(); i++) {
+ for (size_t j = 0; j < ac[i].size(); j++) {
+ ac[i][j].context += enc_state->histogram_idx[i] * num_contexts;
+ }
+ }
+}
+
+uint64_t FrameFlagsFromParams(const CompressParams& cparams) {
+ uint64_t flags = 0;
+
+ const float dist = cparams.butteraugli_distance;
+
+ // We don't add noise at low butteraugli distances because the original
+ // noise is stored within the compressed image and adding noise makes things
+ // worse.
+ if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise) ||
+ cparams.photon_noise_iso > 0 ||
+ cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) {
+ flags |= FrameHeader::kNoise;
+ }
+
+ if (cparams.progressive_dc > 0 && cparams.modular_mode == false) {
+ flags |= FrameHeader::kUseDcFrame;
+ }
+
+ return flags;
+}
+
+Status LoopFilterFromParams(const CompressParams& cparams,
+ FrameHeader* JXL_RESTRICT frame_header) {
+ LoopFilter* loop_filter = &frame_header->loop_filter;
+
+ // Gaborish defaults to enabled in Hare or slower.
+ loop_filter->gab = ApplyOverride(
+ cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare &&
+ frame_header->encoding == FrameEncoding::kVarDCT &&
+ cparams.decoding_speed_tier < 4);
+
+ if (cparams.epf != -1) {
+ loop_filter->epf_iters = cparams.epf;
+ } else {
+ if (frame_header->encoding == FrameEncoding::kModular) {
+ loop_filter->epf_iters = 0;
+ } else {
+ constexpr float kThresholds[3] = {0.7, 1.5, 4.0};
+ loop_filter->epf_iters = 0;
+ if (cparams.decoding_speed_tier < 3) {
+ for (size_t i = cparams.decoding_speed_tier == 2 ? 1 : 0; i < 3; i++) {
+ if (cparams.butteraugli_distance >= kThresholds[i]) {
+ loop_filter->epf_iters++;
+ }
+ }
+ }
+ }
+ }
+ // Strength of EPF in modular mode.
+ if (frame_header->encoding == FrameEncoding::kModular &&
+ !cparams.IsLossless()) {
+ // TODO(veluca): this formula is nonsense.
+ loop_filter->epf_sigma_for_modular = cparams.butteraugli_distance;
+ }
+ if (frame_header->encoding == FrameEncoding::kModular &&
+ cparams.lossy_palette) {
+ loop_filter->epf_sigma_for_modular = 1.0f;
+ }
+
+ return true;
+}
+
+Status MakeFrameHeader(const CompressParams& cparams,
+ const ProgressiveSplitter& progressive_splitter,
+ const FrameInfo& frame_info, const ImageBundle& ib,
+ FrameHeader* JXL_RESTRICT frame_header) {
+ frame_header->nonserialized_is_preview = frame_info.is_preview;
+ frame_header->is_last = frame_info.is_last;
+ frame_header->save_before_color_transform =
+ frame_info.save_before_color_transform;
+ frame_header->frame_type = frame_info.frame_type;
+ frame_header->name = ib.name;
+
+ progressive_splitter.InitPasses(&frame_header->passes);
+
+ if (cparams.modular_mode) {
+ frame_header->encoding = FrameEncoding::kModular;
+ frame_header->group_size_shift = cparams.modular_group_size_shift;
+ }
+
+ frame_header->chroma_subsampling = ib.chroma_subsampling;
+ if (ib.IsJPEG()) {
+ // we are transcoding a JPEG, so we don't get to choose
+ frame_header->encoding = FrameEncoding::kVarDCT;
+ frame_header->color_transform = ib.color_transform;
+ } else {
+ frame_header->color_transform = cparams.color_transform;
+ if (!cparams.modular_mode &&
+ (frame_header->chroma_subsampling.MaxHShift() != 0 ||
+ frame_header->chroma_subsampling.MaxVShift() != 0)) {
+ return JXL_FAILURE(
+ "Chroma subsampling is not supported in VarDCT mode when not "
+ "recompressing JPEGs");
+ }
+ }
+ if (frame_header->color_transform != ColorTransform::kYCbCr &&
+ (frame_header->chroma_subsampling.MaxHShift() != 0 ||
+ frame_header->chroma_subsampling.MaxVShift() != 0)) {
+ return JXL_FAILURE(
+ "Chroma subsampling is not supported when color transform is not "
+ "YCbCr");
+ }
+
+ frame_header->flags = FrameFlagsFromParams(cparams);
+ // Non-photon noise is not supported in the Modular encoder for now.
+ if (frame_header->encoding != FrameEncoding::kVarDCT &&
+ cparams.photon_noise_iso == 0 && cparams.manual_noise.empty()) {
+ frame_header->UpdateFlag(false, FrameHeader::Flags::kNoise);
+ }
+
+ JXL_RETURN_IF_ERROR(LoopFilterFromParams(cparams, frame_header));
+
+ frame_header->dc_level = frame_info.dc_level;
+ if (frame_header->dc_level > 2) {
+ // With 3 or more progressive_dc frames, the implementation does not yet
+ // work, see enc_cache.cc.
+ return JXL_FAILURE("progressive_dc > 2 is not yet supported");
+ }
+ if (cparams.progressive_dc > 0 &&
+ (cparams.ec_resampling != 1 || cparams.resampling != 1)) {
+ return JXL_FAILURE("Resampling not supported with DC frames");
+ }
+ if (cparams.resampling != 1 && cparams.resampling != 2 &&
+ cparams.resampling != 4 && cparams.resampling != 8) {
+ return JXL_FAILURE("Invalid resampling factor");
+ }
+ if (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 &&
+ cparams.ec_resampling != 4 && cparams.ec_resampling != 8) {
+ return JXL_FAILURE("Invalid ec_resampling factor");
+ }
+ // Resized frames.
+ if (frame_info.frame_type != FrameType::kDCFrame) {
+ frame_header->frame_origin = ib.origin;
+ size_t ups = 1;
+ if (cparams.already_downsampled) ups = cparams.resampling;
+
+ // TODO(lode): this is not correct in case of odd original image sizes in
+ // combination with cparams.already_downsampled. Likely these values should
+ // be set to respectively frame_header->default_xsize() and
+ // frame_header->default_ysize() instead, the original (non downsampled)
+ // intended decoded image dimensions. But it may be more subtle than that
+ // if combined with crop. This issue causes custom_size_or_origin to be
+ // incorrectly set to true in case of already_downsampled with odd output
+ // image size when no cropping is used.
+ frame_header->frame_size.xsize = ib.xsize() * ups;
+ frame_header->frame_size.ysize = ib.ysize() * ups;
+ if (ib.origin.x0 != 0 || ib.origin.y0 != 0 ||
+ frame_header->frame_size.xsize != frame_header->default_xsize() ||
+ frame_header->frame_size.ysize != frame_header->default_ysize()) {
+ frame_header->custom_size_or_origin = true;
+ }
+ }
+ // Upsampling.
+ frame_header->upsampling = cparams.resampling;
+ const std::vector<ExtraChannelInfo>& extra_channels =
+ frame_header->nonserialized_metadata->m.extra_channel_info;
+ frame_header->extra_channel_upsampling.clear();
+ frame_header->extra_channel_upsampling.resize(extra_channels.size(),
+ cparams.ec_resampling);
+ frame_header->save_as_reference = frame_info.save_as_reference;
+
+ // Set blending-related information.
+ if (ib.blend || frame_header->custom_size_or_origin) {
+ // Set blend_channel to the first alpha channel. These values are only
+ // encoded in case a blend mode involving alpha is used and there are more
+ // than one extra channels.
+ size_t index = 0;
+ if (frame_info.alpha_channel == -1) {
+ if (extra_channels.size() > 1) {
+ for (size_t i = 0; i < extra_channels.size(); i++) {
+ if (extra_channels[i].type == ExtraChannel::kAlpha) {
+ index = i;
+ break;
+ }
+ }
+ }
+ } else {
+ index = static_cast<size_t>(frame_info.alpha_channel);
+ JXL_ASSERT(index == 0 || index < extra_channels.size());
+ }
+ frame_header->blending_info.alpha_channel = index;
+ frame_header->blending_info.mode =
+ ib.blend ? ib.blendmode : BlendMode::kReplace;
+ frame_header->blending_info.source = frame_info.source;
+ frame_header->blending_info.clamp = frame_info.clamp;
+ const auto& extra_channel_info = frame_info.extra_channel_blending_info;
+ for (size_t i = 0; i < extra_channels.size(); i++) {
+ if (i < extra_channel_info.size()) {
+ frame_header->extra_channel_blending_info[i] = extra_channel_info[i];
+ } else {
+ frame_header->extra_channel_blending_info[i].alpha_channel = index;
+ BlendMode default_blend = ib.blendmode;
+ if (extra_channels[i].type != ExtraChannel::kBlack && i != index) {
+ // K needs to be blended, spot colors and other stuff gets added
+ default_blend = BlendMode::kAdd;
+ }
+ frame_header->extra_channel_blending_info[i].mode =
+ ib.blend ? default_blend : BlendMode::kReplace;
+ frame_header->extra_channel_blending_info[i].source = 1;
+ }
+ }
+ }
+
+ frame_header->animation_frame.duration = ib.duration;
+ frame_header->animation_frame.timecode = ib.timecode;
+
+ return true;
+}
+
+// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs.
+// Since they have no visual impact whatsoever, we can replace them with
+// something that compresses better and reduces artifacts near the edges. This
+// does some kind of smooth stuff that seems to work.
+// Replace invisible pixels with a weighted average of the pixel to the left,
+// the pixel to the topright, and non-invisible neighbours.
+// Produces downward-blurry smears, with in the upwards direction only a 1px
+// edge duplication but not more. It would probably be better to smear in all
+// directions. That requires an alpha-weighed convolution with a large enough
+// kernel though, which might be overkill...
+void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) {
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ float* JXL_RESTRICT row = image->PlaneRow(c, y);
+ const float* JXL_RESTRICT prow =
+ (y > 0 ? image->PlaneRow(c, y - 1) : nullptr);
+ const float* JXL_RESTRICT nrow =
+ (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr);
+ const float* JXL_RESTRICT a = alpha.Row(y);
+ const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr);
+ const float* JXL_RESTRICT na =
+ (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ if (a[x] == 0) {
+ if (lossless) {
+ row[x] = 0;
+ continue;
+ }
+ float d = 0.f;
+ row[x] = 0;
+ if (x > 0) {
+ row[x] += row[x - 1];
+ d++;
+ if (a[x - 1] > 0.f) {
+ row[x] += row[x - 1];
+ d++;
+ }
+ }
+ if (x + 1 < image->xsize()) {
+ if (y > 0) {
+ row[x] += prow[x + 1];
+ d++;
+ }
+ if (a[x + 1] > 0.f) {
+ row[x] += 2.f * row[x + 1];
+ d += 2.f;
+ }
+ if (y > 0 && pa[x + 1] > 0.f) {
+ row[x] += 2.f * prow[x + 1];
+ d += 2.f;
+ }
+ if (y + 1 < image->ysize() && na[x + 1] > 0.f) {
+ row[x] += 2.f * nrow[x + 1];
+ d += 2.f;
+ }
+ }
+ if (y > 0 && pa[x] > 0.f) {
+ row[x] += 2.f * prow[x];
+ d += 2.f;
+ }
+ if (y + 1 < image->ysize() && na[x] > 0.f) {
+ row[x] += 2.f * nrow[x];
+ d += 2.f;
+ }
+ if (d > 1.f) row[x] /= d;
+ }
+ }
+ }
+ }
+}
+
+struct PixelStatsForChromacityAdjustment {
+ float dx = 0;
+ float db = 0;
+ float exposed_blue = 0;
+ float CalcPlane(const ImageF* JXL_RESTRICT plane) const {
+ float xmax = 0;
+ float ymax = 0;
+ for (size_t ty = 1; ty < plane->ysize(); ++ty) {
+ for (size_t tx = 1; tx < plane->xsize(); ++tx) {
+ float cur = plane->Row(ty)[tx];
+ float prev_row = plane->Row(ty - 1)[tx];
+ float prev = plane->Row(ty)[tx - 1];
+ xmax = std::max(xmax, std::abs(cur - prev));
+ ymax = std::max(ymax, std::abs(cur - prev_row));
+ }
+ }
+ return std::max(xmax, ymax);
+ }
+ void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y,
+ const ImageF* JXL_RESTRICT plane_b) {
+ float eb = 0;
+ float xmax = 0;
+ float ymax = 0;
+ for (size_t ty = 1; ty < plane_y->ysize(); ++ty) {
+ for (size_t tx = 1; tx < plane_y->xsize(); ++tx) {
+ float cur_y = plane_y->Row(ty)[tx];
+ float cur_b = plane_b->Row(ty)[tx];
+ float exposed_b = cur_b - cur_y * 1.2;
+ float diff_b = cur_b - cur_y;
+ float prev_row = plane_b->Row(ty - 1)[tx];
+ float prev = plane_b->Row(ty)[tx - 1];
+ float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx];
+ float diff_prev = prev - plane_y->Row(ty)[tx - 1];
+ xmax = std::max(xmax, std::abs(diff_b - diff_prev));
+ ymax = std::max(ymax, std::abs(diff_b - diff_prev_row));
+ if (exposed_b >= 0) {
+ exposed_b *= fabs(cur_b - prev) + fabs(cur_b - prev_row);
+ eb = std::max(eb, exposed_b);
+ }
+ }
+ }
+ exposed_blue = eb;
+ db = std::max(xmax, ymax);
+ }
+ void Calc(const Image3F* JXL_RESTRICT opsin) {
+ dx = CalcPlane(&opsin->Plane(0));
+ CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2));
+ }
+ int HowMuchIsXChannelPixelized() {
+ if (dx >= 0.03) {
+ return 2;
+ }
+ if (dx >= 0.017) {
+ return 1;
+ }
+ return 0;
+ }
+ int HowMuchIsBChannelPixelized() {
+ int add = exposed_blue >= 0.13 ? 1 : 0;
+ if (db > 0.38) {
+ return 2 + add;
+ }
+ if (db > 0.33) {
+ return 1 + add;
+ }
+ if (db > 0.28) {
+ return add;
+ }
+ return 0;
+ }
+};
+
+} // namespace
+
+class LossyFrameEncoder {
+ public:
+ LossyFrameEncoder(const CompressParams& cparams,
+ const FrameHeader& frame_header,
+ PassesEncoderState* JXL_RESTRICT enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out)
+ : enc_state_(enc_state), cms_(cms), pool_(pool), aux_out_(aux_out) {
+ JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state_->shared,
+ /*encoder=*/true));
+ enc_state_->cparams = cparams;
+ enc_state_->passes.clear();
+ }
+
+ Status ComputeEncodingData(const ImageBundle* linear,
+ Image3F* JXL_RESTRICT opsin,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ ModularFrameEncoder* modular_frame_encoder,
+ FrameHeader* frame_header) {
+ PROFILER_ZONE("ComputeEncodingData uninstrumented");
+ JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 &&
+ (opsin->ysize() % kBlockDim) == 0);
+ PassesSharedState& shared = enc_state_->shared;
+
+ if (!enc_state_->cparams.max_error_mode) {
+ // Compute chromacity adjustments using two approaches.
+ // 1) Distance based approach for chromacity adjustment:
+ float x_qm_scale_steps[4] = {1.25f, 7.0f, 15.0f, 24.0f};
+ shared.frame_header.x_qm_scale = 2;
+ for (float x_qm_scale_step : x_qm_scale_steps) {
+ if (enc_state_->cparams.original_butteraugli_distance >
+ x_qm_scale_step) {
+ shared.frame_header.x_qm_scale++;
+ }
+ }
+ if (enc_state_->cparams.butteraugli_distance < 0.299f) {
+ // Favor chromacity preservation for making images appear more
+ // faithful to original even with extreme (5-10x) zooming.
+ shared.frame_header.x_qm_scale++;
+ }
+ // 2) Pixel-based approach for chromacity adjustment:
+ // look at the individual pixels and make a guess how difficult
+ // the image would be based on the worst case pixel.
+ PixelStatsForChromacityAdjustment pixel_stats;
+ if (enc_state_->cparams.speed_tier <= SpeedTier::kWombat) {
+ pixel_stats.Calc(opsin);
+ }
+ // For X take the most severe adjustment.
+ shared.frame_header.x_qm_scale =
+ std::max<int>(shared.frame_header.x_qm_scale,
+ 2 + pixel_stats.HowMuchIsXChannelPixelized());
+ // B only ajudsted by pixel-based approach.
+ shared.frame_header.b_qm_scale =
+ 2 + pixel_stats.HowMuchIsBChannelPixelized();
+ }
+
+ JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics(
+ enc_state_, modular_frame_encoder, linear, opsin, cms_, pool_,
+ aux_out_));
+
+ JXL_RETURN_IF_ERROR(InitializePassesEncoder(
+ *opsin, cms, pool_, enc_state_, modular_frame_encoder, aux_out_));
+
+ enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+ for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+ pass.ac_tokens.resize(shared.frame_dim.num_groups);
+ }
+
+ ComputeAllCoeffOrders(shared.frame_dim);
+ shared.num_histograms = 1;
+
+ const auto tokenize_group_init = [&](const size_t num_threads) {
+ group_caches_.resize(num_threads);
+ return true;
+ };
+ const auto tokenize_group = [&](const uint32_t group_index,
+ const size_t thread) {
+ // Tokenize coefficients.
+ const Rect rect = shared.BlockGroupRect(group_index);
+ for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+ idx_pass++) {
+ JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+ const int32_t* JXL_RESTRICT ac_rows[3] = {
+ enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+ enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+ enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+ };
+ // Ensure group cache is initialized.
+ group_caches_[thread].InitOnce();
+ TokenizeCoefficients(
+ &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+ ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+ &group_caches_[thread].num_nzeroes,
+ &enc_state_->passes[idx_pass].ac_tokens[group_index],
+ enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+ enc_state_->shared.block_ctx_map);
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups,
+ tokenize_group_init, tokenize_group,
+ "TokenizeGroup"));
+
+ *frame_header = shared.frame_header;
+ return true;
+ }
+
+ Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data,
+ ModularFrameEncoder* modular_frame_encoder,
+ FrameHeader* frame_header) {
+ PROFILER_ZONE("ComputeJPEGTranscodingData uninstrumented");
+ PassesSharedState& shared = enc_state_->shared;
+
+ frame_header->x_qm_scale = 2;
+ frame_header->b_qm_scale = 2;
+
+ FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+ const size_t xsize = frame_dim.xsize_padded;
+ const size_t ysize = frame_dim.ysize_padded;
+ const size_t xsize_blocks = frame_dim.xsize_blocks;
+ const size_t ysize_blocks = frame_dim.ysize_blocks;
+
+ // no-op chroma from luma
+ shared.cmap = ColorCorrelationMap(xsize, ysize, false);
+ shared.ac_strategy.FillDCT8();
+ FillImage(uint8_t(0), &shared.epf_sharpness);
+
+ enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses());
+ for (PassesEncoderState::PassData& pass : enc_state_->passes) {
+ pass.ac_tokens.resize(shared.frame_dim.num_groups);
+ }
+
+ enc_state_->coeffs.clear();
+ while (enc_state_->coeffs.size() < enc_state_->passes.size()) {
+ enc_state_->coeffs.emplace_back(make_unique<ACImageT<int32_t>>(
+ kGroupDim * kGroupDim, frame_dim.num_groups));
+ }
+
+ // convert JPEG quantization table to a Quantizer object
+ float dcquantization[3];
+ std::vector<QuantEncoding> qe(DequantMatrices::kNum,
+ QuantEncoding::Library(0));
+
+ auto jpeg_c_map = JpegOrder(frame_header->color_transform,
+ jpeg_data.components.size() == 1);
+
+ std::vector<int> qt(192);
+ for (size_t c = 0; c < 3; c++) {
+ size_t jpeg_c = jpeg_c_map[c];
+ const int32_t* quant =
+ jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data();
+
+ dcquantization[c] = 255 * 8.0f / quant[0];
+ for (size_t y = 0; y < 8; y++) {
+ for (size_t x = 0; x < 8; x++) {
+ // JPEG XL transposes the DCT, JPEG doesn't.
+ qt[c * 64 + 8 * x + y] = quant[8 * y + x];
+ }
+ }
+ }
+ DequantMatricesSetCustomDC(&shared.matrices, dcquantization);
+ float dcquantization_r[3] = {1.0f / dcquantization[0],
+ 1.0f / dcquantization[1],
+ 1.0f / dcquantization[2]};
+
+ qe[AcStrategy::Type::DCT] = QuantEncoding::RAW(qt);
+ DequantMatricesSetCustom(&shared.matrices, qe, modular_frame_encoder);
+
+ // Ensure that InvGlobalScale() is 1.
+ shared.quantizer = Quantizer(&shared.matrices, 1, kGlobalScaleDenom);
+ // Recompute MulDC() and InvMulDC().
+ shared.quantizer.RecomputeFromGlobalScale();
+
+ // Per-block dequant scaling should be 1.
+ FillImage(static_cast<int32_t>(shared.quantizer.InvGlobalScale()),
+ &shared.raw_quant_field);
+
+ std::vector<int32_t> scaled_qtable(192);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 64; i++) {
+ scaled_qtable[64 * c + i] =
+ (1 << kCFLFixedPointPrecision) * qt[64 + i] / qt[64 * c + i];
+ }
+ }
+
+ auto jpeg_row = [&](size_t c, size_t y) {
+ return jpeg_data.components[jpeg_c_map[c]].coeffs.data() +
+ jpeg_data.components[jpeg_c_map[c]].width_in_blocks *
+ kDCTBlockSize * y;
+ };
+
+ Image3F dc = Image3F(xsize_blocks, ysize_blocks);
+ bool DCzero =
+ (shared.frame_header.color_transform == ColorTransform::kYCbCr);
+ // Compute chroma-from-luma for AC (doesn't seem to be useful for DC)
+ if (frame_header->chroma_subsampling.Is444() &&
+ enc_state_->cparams.force_cfl_jpeg_recompression &&
+ jpeg_data.components.size() == 3) {
+ for (size_t c : {0, 2}) {
+ ImageSB* map = (c == 0 ? &shared.cmap.ytox_map : &shared.cmap.ytob_map);
+ const float kScale = kDefaultColorFactor;
+ const int kOffset = 127;
+ const float kBase =
+ c == 0 ? shared.cmap.YtoXRatio(0) : shared.cmap.YtoBRatio(0);
+ const float kZeroThresh =
+ kScale * kZeroBiasDefault[c] *
+ 0.9999f; // just epsilon less for better rounding
+
+ auto process_row = [&](const uint32_t task, const size_t thread) {
+ size_t ty = task;
+ int8_t* JXL_RESTRICT row_out = map->Row(ty);
+ for (size_t tx = 0; tx < map->xsize(); ++tx) {
+ const size_t y0 = ty * kColorTileDimInBlocks;
+ const size_t x0 = tx * kColorTileDimInBlocks;
+ const size_t y1 = std::min(frame_dim.ysize_blocks,
+ (ty + 1) * kColorTileDimInBlocks);
+ const size_t x1 = std::min(frame_dim.xsize_blocks,
+ (tx + 1) * kColorTileDimInBlocks);
+ int32_t d_num_zeros[257] = {0};
+ // TODO(veluca): this needs SIMD + fixed point adaptation, and/or
+ // conversion to the new CfL algorithm.
+ for (size_t y = y0; y < y1; ++y) {
+ const int16_t* JXL_RESTRICT row_m = jpeg_row(1, y);
+ const int16_t* JXL_RESTRICT row_s = jpeg_row(c, y);
+ for (size_t x = x0; x < x1; ++x) {
+ for (size_t coeffpos = 1; coeffpos < kDCTBlockSize;
+ coeffpos++) {
+ const float scaled_m =
+ row_m[x * kDCTBlockSize + coeffpos] *
+ scaled_qtable[64 * c + coeffpos] *
+ (1.0f / (1 << kCFLFixedPointPrecision));
+ const float scaled_s =
+ kScale * row_s[x * kDCTBlockSize + coeffpos] +
+ (kOffset - kBase * kScale) * scaled_m;
+ if (std::abs(scaled_m) > 1e-8f) {
+ float from, to;
+ if (scaled_m > 0) {
+ from = (scaled_s - kZeroThresh) / scaled_m;
+ to = (scaled_s + kZeroThresh) / scaled_m;
+ } else {
+ from = (scaled_s + kZeroThresh) / scaled_m;
+ to = (scaled_s - kZeroThresh) / scaled_m;
+ }
+ if (from < 0.0f) {
+ from = 0.0f;
+ }
+ if (to > 255.0f) {
+ to = 255.0f;
+ }
+ // Instead of clamping the both values
+ // we just check that range is sane.
+ if (from <= to) {
+ d_num_zeros[static_cast<int>(std::ceil(from))]++;
+ d_num_zeros[static_cast<int>(std::floor(to + 1))]--;
+ }
+ }
+ }
+ }
+ }
+ int best = 0;
+ int32_t best_sum = 0;
+ FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum);
+ int32_t offset_sum = 0;
+ for (int i = 0; i < 256; ++i) {
+ if (i <= kOffset) {
+ offset_sum += d_num_zeros[i];
+ }
+ }
+ row_out[tx] = 0;
+ if (best_sum > offset_sum + 1) {
+ row_out[tx] = best - kOffset;
+ }
+ }
+ };
+
+ JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, map->ysize(),
+ ThreadPool::NoInit, process_row,
+ "FindCorrelation"));
+ }
+ }
+
+ if (!frame_header->chroma_subsampling.Is444()) {
+ ZeroFillImage(&dc);
+ for (auto& coeff : enc_state_->coeffs) {
+ coeff->ZeroFill();
+ }
+ }
+ // JPEG DC is from -1024 to 1023.
+ std::vector<size_t> dc_counts[3] = {};
+ dc_counts[0].resize(2048);
+ dc_counts[1].resize(2048);
+ dc_counts[2].resize(2048);
+ size_t total_dc[3] = {};
+ for (size_t c : {1, 0, 2}) {
+ if (jpeg_data.components.size() == 1 && c != 1) {
+ for (auto& coeff : enc_state_->coeffs) {
+ coeff->ZeroFillPlane(c);
+ }
+ ZeroFillImage(&dc.Plane(c));
+ // Ensure no division by 0.
+ dc_counts[c][1024] = 1;
+ total_dc[c] = 1;
+ continue;
+ }
+ size_t hshift = frame_header->chroma_subsampling.HShift(c);
+ size_t vshift = frame_header->chroma_subsampling.VShift(c);
+ ImageSB& map = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map);
+ for (size_t group_index = 0; group_index < frame_dim.num_groups;
+ group_index++) {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ int32_t* coeffs[kMaxNumPasses];
+ for (size_t i = 0; i < enc_state_->coeffs.size(); i++) {
+ coeffs[i] = enc_state_->coeffs[i]->PlaneRow(c, group_index, 0).ptr32;
+ }
+ int32_t block[64];
+ for (size_t by = gy * kGroupDimInBlocks;
+ by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) {
+ if ((by >> vshift) << vshift != by) continue;
+ const int16_t* JXL_RESTRICT inputjpeg = jpeg_row(c, by >> vshift);
+ const int16_t* JXL_RESTRICT inputjpegY = jpeg_row(1, by);
+ float* JXL_RESTRICT fdc = dc.PlaneRow(c, by >> vshift);
+ const int8_t* JXL_RESTRICT cm =
+ map.ConstRow(by / kColorTileDimInBlocks);
+ for (size_t bx = gx * kGroupDimInBlocks;
+ bx < xsize_blocks && bx < (gx + 1) * kGroupDimInBlocks; ++bx) {
+ if ((bx >> hshift) << hshift != bx) continue;
+ size_t base = (bx >> hshift) * kDCTBlockSize;
+ int idc;
+ if (DCzero) {
+ idc = inputjpeg[base];
+ } else {
+ idc = inputjpeg[base] + 1024 / qt[c * 64];
+ }
+ dc_counts[c][std::min(static_cast<uint32_t>(idc + 1024),
+ uint32_t(2047))]++;
+ total_dc[c]++;
+ fdc[bx >> hshift] = idc * dcquantization_r[c];
+ if (c == 1 || !enc_state_->cparams.force_cfl_jpeg_recompression ||
+ !frame_header->chroma_subsampling.Is444()) {
+ for (size_t y = 0; y < 8; y++) {
+ for (size_t x = 0; x < 8; x++) {
+ block[y * 8 + x] = inputjpeg[base + x * 8 + y];
+ }
+ }
+ } else {
+ const int32_t scale =
+ shared.cmap.RatioJPEG(cm[bx / kColorTileDimInBlocks]);
+
+ for (size_t y = 0; y < 8; y++) {
+ for (size_t x = 0; x < 8; x++) {
+ int Y = inputjpegY[kDCTBlockSize * bx + x * 8 + y];
+ int QChroma = inputjpeg[kDCTBlockSize * bx + x * 8 + y];
+ // Fixed-point multiply of CfL scale with quant table ratio
+ // first, and Y value second.
+ int coeff_scale = (scale * scaled_qtable[64 * c + y * 8 + x] +
+ (1 << (kCFLFixedPointPrecision - 1))) >>
+ kCFLFixedPointPrecision;
+ int cfl_factor = (Y * coeff_scale +
+ (1 << (kCFLFixedPointPrecision - 1))) >>
+ kCFLFixedPointPrecision;
+ int QCR = QChroma - cfl_factor;
+ block[y * 8 + x] = QCR;
+ }
+ }
+ }
+ enc_state_->progressive_splitter.SplitACCoefficients(
+ block, AcStrategy::FromRawStrategy(AcStrategy::Type::DCT), bx,
+ by, coeffs);
+ for (size_t i = 0; i < enc_state_->coeffs.size(); i++) {
+ coeffs[i] += kDCTBlockSize;
+ }
+ }
+ }
+ }
+ }
+
+ auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds;
+ auto& num_dc_ctxs = enc_state_->shared.block_ctx_map.num_dc_ctxs;
+ num_dc_ctxs = 1;
+ for (size_t i = 0; i < 3; i++) {
+ dct[i].clear();
+ int num_thresholds = (CeilLog2Nonzero(total_dc[i]) - 12) / 2;
+ // up to 3 buckets per channel:
+ // dark/medium/bright, yellow/unsat/blue, green/unsat/red
+ num_thresholds = std::min(std::max(num_thresholds, 0), 2);
+ size_t cumsum = 0;
+ size_t cut = total_dc[i] / (num_thresholds + 1);
+ for (int j = 0; j < 2048; j++) {
+ cumsum += dc_counts[i][j];
+ if (cumsum > cut) {
+ dct[i].push_back(j - 1025);
+ cut = total_dc[i] * (dct[i].size() + 1) / (num_thresholds + 1);
+ }
+ }
+ num_dc_ctxs *= dct[i].size() + 1;
+ }
+
+ auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map;
+ ctx_map.clear();
+ ctx_map.resize(3 * kNumOrders * num_dc_ctxs, 0);
+
+ int lbuckets = (dct[1].size() + 1);
+ for (size_t i = 0; i < num_dc_ctxs; i++) {
+ // up to 9 contexts for luma
+ ctx_map[i] = i / lbuckets;
+ // up to 3 contexts for chroma
+ ctx_map[kNumOrders * num_dc_ctxs + i] =
+ ctx_map[2 * kNumOrders * num_dc_ctxs + i] =
+ num_dc_ctxs / lbuckets + (i % lbuckets);
+ }
+ enc_state_->shared.block_ctx_map.num_ctxs =
+ *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+
+ enc_state_->histogram_idx.resize(shared.frame_dim.num_groups);
+
+ // disable DC frame for now
+ shared.frame_header.UpdateFlag(false, FrameHeader::kUseDcFrame);
+ auto compute_dc_coeffs = [&](const uint32_t group_index,
+ size_t /* thread */) {
+ modular_frame_encoder->AddVarDCTDC(dc, group_index, /*nl_dc=*/false,
+ enc_state_, /*jpeg_transcode=*/true);
+ modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/true,
+ enc_state_);
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_dc_groups,
+ ThreadPool::NoInit, compute_dc_coeffs,
+ "Compute DC coeffs"));
+
+ // Must happen before WriteFrameHeader!
+ shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing);
+
+ ComputeAllCoeffOrders(frame_dim);
+ shared.num_histograms = 1;
+
+ const auto tokenize_group_init = [&](const size_t num_threads) {
+ group_caches_.resize(num_threads);
+ return true;
+ };
+ const auto tokenize_group = [&](const uint32_t group_index,
+ const size_t thread) {
+ // Tokenize coefficients.
+ const Rect rect = shared.BlockGroupRect(group_index);
+ for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size();
+ idx_pass++) {
+ JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32);
+ const int32_t* JXL_RESTRICT ac_rows[3] = {
+ enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32,
+ enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32,
+ enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32,
+ };
+ // Ensure group cache is initialized.
+ group_caches_[thread].InitOnce();
+ TokenizeCoefficients(
+ &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect,
+ ac_rows, shared.ac_strategy, frame_header->chroma_subsampling,
+ &group_caches_[thread].num_nzeroes,
+ &enc_state_->passes[idx_pass].ac_tokens[group_index],
+ enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field,
+ enc_state_->shared.block_ctx_map);
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups,
+ tokenize_group_init, tokenize_group,
+ "TokenizeGroup"));
+ *frame_header = shared.frame_header;
+ doing_jpeg_recompression = true;
+ return true;
+ }
+
+ Status EncodeGlobalDCInfo(const FrameHeader& frame_header,
+ BitWriter* writer) const {
+ // Encode quantizer DC and global scale.
+ QuantizerParams params = enc_state_->shared.quantizer.GetParams();
+ JXL_RETURN_IF_ERROR(
+ WriteQuantizerParams(params, writer, kLayerQuant, aux_out_));
+ EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_);
+ ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC,
+ aux_out_);
+ return true;
+ }
+
+ Status EncodeGlobalACInfo(BitWriter* writer,
+ ModularFrameEncoder* modular_frame_encoder) {
+ JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices,
+ writer, kLayerQuant, aux_out_,
+ modular_frame_encoder));
+ if (enc_state_->cparams.speed_tier <= SpeedTier::kTortoise) {
+ if (!doing_jpeg_recompression) ClusterGroups(enc_state_);
+ }
+ size_t num_histo_bits =
+ CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups);
+ if (num_histo_bits != 0) {
+ BitWriter::Allotment allotment(writer, num_histo_bits);
+ writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1);
+ allotment.ReclaimAndCharge(writer, kLayerAC, aux_out_);
+ }
+
+ for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+ i++) {
+ // Encode coefficient orders.
+ size_t order_bits = 0;
+ JXL_RETURN_IF_ERROR(U32Coder::CanEncode(
+ kOrderEnc, enc_state_->used_orders[i], &order_bits));
+ BitWriter::Allotment allotment(writer, order_bits);
+ JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer));
+ allotment.ReclaimAndCharge(writer, kLayerOrder, aux_out_);
+ EncodeCoeffOrders(
+ enc_state_->used_orders[i],
+ &enc_state_->shared
+ .coeff_orders[i * enc_state_->shared.coeff_order_size],
+ writer, kLayerOrder, aux_out_);
+
+ // Encode histograms.
+ HistogramParams hist_params(
+ enc_state_->cparams.speed_tier,
+ enc_state_->shared.block_ctx_map.NumACContexts());
+ if (enc_state_->cparams.speed_tier > SpeedTier::kTortoise) {
+ hist_params.lz77_method = HistogramParams::LZ77Method::kNone;
+ }
+ if (enc_state_->cparams.decoding_speed_tier >= 1) {
+ hist_params.max_histograms = 6;
+ }
+ BuildAndEncodeHistograms(
+ hist_params,
+ enc_state_->shared.num_histograms *
+ enc_state_->shared.block_ctx_map.NumACContexts(),
+ enc_state_->passes[i].ac_tokens, &enc_state_->passes[i].codes,
+ &enc_state_->passes[i].context_map, writer, kLayerAC, aux_out_);
+ }
+
+ return true;
+ }
+
+ Status EncodeACGroup(size_t pass, size_t group_index, BitWriter* group_code,
+ AuxOut* local_aux_out) {
+ return EncodeGroupTokenizedCoefficients(
+ group_index, pass, enc_state_->histogram_idx[group_index], *enc_state_,
+ group_code, local_aux_out);
+ }
+
+ PassesEncoderState* State() { return enc_state_; }
+
+ private:
+ void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) {
+ PROFILER_FUNC;
+ // No coefficient reordering in Falcon or faster.
+ auto used_orders_info = ComputeUsedOrders(
+ enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy,
+ Rect(enc_state_->shared.raw_quant_field));
+ enc_state_->used_orders.clear();
+ enc_state_->used_orders.resize(
+ enc_state_->progressive_splitter.GetNumPasses(),
+ used_orders_info.second);
+ for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses();
+ i++) {
+ ComputeCoeffOrder(
+ enc_state_->cparams.speed_tier, *enc_state_->coeffs[i],
+ enc_state_->shared.ac_strategy, frame_dim, enc_state_->used_orders[i],
+ used_orders_info.first,
+ &enc_state_->shared
+ .coeff_orders[i * enc_state_->shared.coeff_order_size]);
+ }
+ }
+
+ template <typename V, typename R>
+ static inline void FindIndexOfSumMaximum(const V* array, const size_t len,
+ R* idx, V* sum) {
+ JXL_ASSERT(len > 0);
+ V maxval = 0;
+ V val = 0;
+ R maxidx = 0;
+ for (size_t i = 0; i < len; ++i) {
+ val += array[i];
+ if (val > maxval) {
+ maxval = val;
+ maxidx = i;
+ }
+ }
+ *idx = maxidx;
+ *sum = maxval;
+ }
+
+ PassesEncoderState* JXL_RESTRICT enc_state_;
+ JxlCmsInterface cms_;
+ ThreadPool* pool_;
+ AuxOut* aux_out_;
+ std::vector<EncCache> group_caches_;
+ bool doing_jpeg_recompression = false;
+};
+
+Status ParamsPostInit(CompressParams* p) {
+ if (!p->manual_noise.empty() &&
+ p->manual_noise.size() != NoiseParams::kNumNoisePoints) {
+ return JXL_FAILURE("Invalid number of noise lut entries");
+ }
+ if (!p->manual_xyb_factors.empty() && p->manual_xyb_factors.size() != 3) {
+ return JXL_FAILURE("Invalid number of XYB quantization factors");
+ }
+ if (!p->modular_mode && p->butteraugli_distance == 0.0) {
+ p->butteraugli_distance = kMinButteraugliDistance;
+ }
+ if (p->original_butteraugli_distance == -1.0) {
+ p->original_butteraugli_distance = p->butteraugli_distance;
+ }
+ if (p->resampling <= 0) {
+ p->resampling = 1;
+ // For very low bit rates, using 2x2 resampling gives better results on
+ // most photographic images, with an adjusted butteraugli score chosen to
+ // give roughly the same amount of bits per pixel.
+ if (!p->already_downsampled && p->butteraugli_distance >= 20) {
+ p->resampling = 2;
+ p->butteraugli_distance = 6 + ((p->butteraugli_distance - 20) * 0.25);
+ }
+ }
+ if (p->ec_resampling <= 0) {
+ p->ec_resampling = p->resampling;
+ }
+ return true;
+}
+
+Status EncodeFrame(const CompressParams& cparams_orig,
+ const FrameInfo& frame_info, const CodecMetadata* metadata,
+ const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ BitWriter* writer, AuxOut* aux_out) {
+ CompressParams cparams = cparams_orig;
+ if (cparams.speed_tier == SpeedTier::kGlacier && !cparams.IsLossless()) {
+ cparams.speed_tier = SpeedTier::kTortoise;
+ }
+ if (cparams.speed_tier == SpeedTier::kGlacier) {
+ std::vector<CompressParams> all_params;
+ std::vector<size_t> size;
+
+ CompressParams cparams_attempt = cparams_orig;
+ cparams_attempt.speed_tier = SpeedTier::kTortoise;
+ cparams_attempt.options.max_properties = 4;
+
+ for (float x : {0.0f, 80.f}) {
+ cparams_attempt.channel_colors_percent = x;
+ for (float y : {0.0f, 95.0f}) {
+ cparams_attempt.channel_colors_pre_transform_percent = y;
+ // 70000 ensures that the number of palette colors is representable in
+ // modular headers.
+ for (int K : {0, 1 << 10, 70000}) {
+ cparams_attempt.palette_colors = K;
+ for (int tree_mode : {-1, (int)ModularOptions::TreeMode::kNoWP,
+ (int)ModularOptions::TreeMode::kDefault}) {
+ if (tree_mode == -1) {
+ // LZ77 only
+ cparams_attempt.options.nb_repeats = 0;
+ } else {
+ cparams_attempt.options.nb_repeats = 1;
+ cparams_attempt.options.wp_tree_mode =
+ static_cast<ModularOptions::TreeMode>(tree_mode);
+ }
+ for (Predictor pred : {Predictor::Zero, Predictor::Variable}) {
+ cparams_attempt.options.predictor = pred;
+ for (int g : {0, 1, 3}) {
+ cparams_attempt.modular_group_size_shift = g;
+ for (Override patches : {Override::kDefault, Override::kOff}) {
+ cparams_attempt.patches = patches;
+ all_params.push_back(cparams_attempt);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ size.resize(all_params.size());
+
+ std::atomic<int> num_errors{0};
+
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, all_params.size(), ThreadPool::NoInit,
+ [&](size_t task, size_t) {
+ BitWriter w;
+ PassesEncoderState state;
+ if (!EncodeFrame(all_params[task], frame_info, metadata, ib, &state,
+ cms, nullptr, &w, aux_out)) {
+ num_errors.fetch_add(1, std::memory_order_relaxed);
+ return;
+ }
+ size[task] = w.BitsWritten();
+ },
+ "Compress kGlacier"));
+ JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+ size_t best_idx = 0;
+ for (size_t i = 1; i < all_params.size(); i++) {
+ if (size[best_idx] > size[i]) {
+ best_idx = i;
+ }
+ }
+ cparams = all_params[best_idx];
+ }
+
+ if (cparams_orig.target_bitrate > 0.0f &&
+ frame_info.frame_type == FrameType::kRegularFrame) {
+ cparams.target_bitrate = 0.0f;
+ const float target_bitrate = cparams_orig.target_bitrate;
+ float bitrate = 0.0f;
+ float prev_bitrate = 0.0f;
+ float rescale = 1.0f;
+ size_t prev_bits = 0;
+ float error = 0.0f;
+ float best_error = 100.0f;
+ float best_rescale = 1.0f;
+ for (size_t i = 0; i < 10; ++i) {
+ std::unique_ptr<PassesEncoderState> state =
+ jxl::make_unique<PassesEncoderState>();
+ BitWriter bw;
+ JXL_CHECK(EncodeFrame(cparams, frame_info, metadata, ib, state.get(), cms,
+ pool, &bw, nullptr));
+ bitrate = bw.BitsWritten() * 1.0 / (ib.xsize() * ib.ysize());
+ error = target_bitrate / bitrate - 1.0f;
+ if (std::abs(error) < std::abs(best_error)) {
+ best_error = error;
+ best_rescale = cparams.quant_ac_rescale;
+ }
+ if (bw.BitsWritten() == prev_bits || std::abs(error) < 0.0005f) {
+ break;
+ }
+ float lambda = 1.0f;
+ if (i > 0) {
+ lambda = (((bitrate / prev_bitrate) - 1.0f) / (rescale - 1.0f));
+ }
+ rescale = (1.0f + ((target_bitrate / bitrate) - 1.0f) / lambda);
+ if (rescale < 0.0f) {
+ break;
+ }
+ cparams.quant_ac_rescale *= rescale;
+ prev_bitrate = bitrate;
+ prev_bits = bw.BitsWritten();
+ }
+ if (aux_out) {
+ aux_out->max_quant_rescale = best_rescale;
+ aux_out->min_quant_rescale = best_rescale;
+ aux_out->min_bitrate_error = best_error;
+ aux_out->max_bitrate_error = best_error;
+ }
+ cparams.quant_ac_rescale = best_rescale;
+ }
+ ib.VerifyMetadata();
+
+ passes_enc_state->special_frames.clear();
+
+ if (cparams.qprogressive_mode) {
+ passes_enc_state->progressive_splitter.SetProgressiveMode(
+ ProgressiveMode{progressive_passes_dc_quant_ac_full_ac});
+ } else if (cparams.progressive_mode) {
+ passes_enc_state->progressive_splitter.SetProgressiveMode(
+ ProgressiveMode{progressive_passes_dc_vlf_lf_full_ac});
+ }
+
+ JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams));
+
+ if (cparams.progressive_dc < 0) {
+ if (cparams.progressive_dc != -1) {
+ return JXL_FAILURE("Invalid progressive DC setting value (%d)",
+ cparams.progressive_dc);
+ }
+ cparams.progressive_dc = 0;
+ }
+ if (cparams.ec_resampling < cparams.resampling) {
+ cparams.ec_resampling = cparams.resampling;
+ }
+ if (cparams.resampling > 1 || frame_info.is_preview) {
+ cparams.progressive_dc = 0;
+ }
+
+ if (frame_info.dc_level + cparams.progressive_dc > 4) {
+ return JXL_FAILURE("Too many levels of progressive DC");
+ }
+
+ if (cparams.butteraugli_distance != 0 &&
+ cparams.butteraugli_distance < kMinButteraugliDistance) {
+ return JXL_FAILURE("Butteraugli distance is too low (%f)",
+ cparams.butteraugli_distance);
+ }
+
+ if (ib.IsJPEG()) {
+ cparams.gaborish = Override::kOff;
+ cparams.epf = 0;
+ cparams.modular_mode = false;
+ }
+
+ if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image");
+
+ // Assert that this metadata is correctly set up for the compression params,
+ // this should have been done by enc_file.cc
+ JXL_ASSERT(metadata->m.xyb_encoded ==
+ (cparams.color_transform == ColorTransform::kXYB));
+ std::unique_ptr<FrameHeader> frame_header =
+ jxl::make_unique<FrameHeader>(metadata);
+ JXL_RETURN_IF_ERROR(MakeFrameHeader(cparams,
+ passes_enc_state->progressive_splitter,
+ frame_info, ib, frame_header.get()));
+ // Check that if the codestream header says xyb_encoded, the color_transform
+ // matches the requirement. This is checked from the cparams here, even though
+ // optimally we'd be able to check this against what has actually been written
+ // in the main codestream header, but since ib is a const object and the data
+ // written to the main codestream header is (in modified form) in ib, the
+ // encoder cannot indicate this fact in the ib's metadata.
+ if (cparams_orig.color_transform == ColorTransform::kXYB) {
+ if (frame_header->color_transform != ColorTransform::kXYB) {
+ return JXL_FAILURE(
+ "The color transform of frames must be xyb if the codestream is xyb "
+ "encoded");
+ }
+ } else {
+ if (frame_header->color_transform == ColorTransform::kXYB) {
+ return JXL_FAILURE(
+ "The color transform of frames cannot be xyb if the codestream is "
+ "not xyb encoded");
+ }
+ }
+
+ FrameDimensions frame_dim = frame_header->ToFrameDimensions();
+
+ const size_t num_groups = frame_dim.num_groups;
+
+ Image3F opsin;
+ const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray());
+ std::unique_ptr<ImageMetadata> metadata_linear =
+ jxl::make_unique<ImageMetadata>();
+ metadata_linear->xyb_encoded =
+ (cparams.color_transform == ColorTransform::kXYB);
+ metadata_linear->color_encoding = c_linear;
+ ImageBundle linear_storage(metadata_linear.get());
+
+ std::vector<AuxOut> aux_outs;
+ // LossyFrameEncoder stores a reference to a std::function<Status(size_t)>
+ // so we need to keep the std::function<Status(size_t)> being referenced
+ // alive while lossy_frame_encoder is used. We could make resize_aux_outs a
+ // lambda type by making LossyFrameEncoder a template instead, but this is
+ // simpler.
+ const std::function<Status(size_t)> resize_aux_outs =
+ [&aux_outs, aux_out](const size_t num_threads) -> Status {
+ if (aux_out != nullptr) {
+ size_t old_size = aux_outs.size();
+ for (size_t i = num_threads; i < old_size; i++) {
+ aux_out->Assimilate(aux_outs[i]);
+ }
+ aux_outs.resize(num_threads);
+ // Each thread needs these INPUTS. Don't copy the entire AuxOut
+ // because it may contain stats which would be Assimilated multiple
+ // times below.
+ for (size_t i = old_size; i < aux_outs.size(); i++) {
+ aux_outs[i].dump_image = aux_out->dump_image;
+ aux_outs[i].debug_prefix = aux_out->debug_prefix;
+ }
+ }
+ return true;
+ };
+
+ LossyFrameEncoder lossy_frame_encoder(cparams, *frame_header,
+ passes_enc_state, cms, pool, aux_out);
+ std::unique_ptr<ModularFrameEncoder> modular_frame_encoder =
+ jxl::make_unique<ModularFrameEncoder>(*frame_header, cparams);
+
+ const std::vector<ImageF>* extra_channels = &ib.extra_channels();
+ std::vector<ImageF> extra_channels_storage;
+ // Clear patches
+ passes_enc_state->shared.image_features.patches = PatchDictionary();
+ passes_enc_state->shared.image_features.patches.SetPassesSharedState(
+ &passes_enc_state->shared);
+
+ if (ib.IsJPEG()) {
+ JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeJPEGTranscodingData(
+ *ib.jpeg_data, modular_frame_encoder.get(), frame_header.get()));
+ } else if (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion(
+ cparams, ib) ||
+ frame_header->encoding != FrameEncoding::kVarDCT) {
+ // Allocating a large enough image avoids a copy when padding.
+ opsin =
+ Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize()));
+ opsin.ShrinkTo(ib.xsize(), ib.ysize());
+
+ const bool want_linear = frame_header->encoding == FrameEncoding::kVarDCT &&
+ cparams.speed_tier <= SpeedTier::kKitten;
+ const ImageBundle* JXL_RESTRICT ib_or_linear = &ib;
+
+ if (frame_header->color_transform == ColorTransform::kXYB &&
+ frame_info.ib_needs_color_transform) {
+ // linear_storage would only be used by the Butteraugli loop (passing
+ // linear sRGB avoids a color conversion there). Otherwise, don't
+ // fill it to reduce memory usage.
+ ib_or_linear =
+ ToXYB(ib, pool, &opsin, cms, want_linear ? &linear_storage : nullptr);
+ } else { // RGB or YCbCr: don't do anything (forward YCbCr is not
+ // implemented, this is only used when the input is already in
+ // YCbCr)
+ // If encoding a special DC or reference frame, don't do anything:
+ // input is already in XYB.
+ CopyImageTo(ib.color(), &opsin);
+ }
+ bool lossless = cparams.IsLossless();
+ if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() &&
+ frame_header->frame_type == FrameType::kRegularFrame &&
+ !ApplyOverride(cparams.keep_invisible, lossless) &&
+ cparams.ec_resampling == cparams.resampling) {
+ // simplify invisible pixels
+ SimplifyInvisible(&opsin, ib.alpha(), lossless);
+ if (want_linear) {
+ SimplifyInvisible(const_cast<Image3F*>(&ib_or_linear->color()),
+ ib.alpha(), lossless);
+ }
+ }
+ if (aux_out != nullptr) {
+ JXL_RETURN_IF_ERROR(
+ aux_out->InspectImage3F("enc_frame:OpsinDynamicsImage", opsin));
+ }
+ if (frame_header->encoding == FrameEncoding::kVarDCT) {
+ PadImageToBlockMultipleInPlace(&opsin);
+ JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+ ib_or_linear, &opsin, cms, pool, modular_frame_encoder.get(),
+ frame_header.get()));
+ } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) {
+ // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+ // after noise, if necessary.
+ DownsampleImage(&opsin, frame_header->upsampling);
+ }
+ } else {
+ JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData(
+ &ib, &opsin, cms, pool, modular_frame_encoder.get(),
+ frame_header.get()));
+ }
+ if (cparams.ec_resampling != 1 && !cparams.already_downsampled) {
+ extra_channels = &extra_channels_storage;
+ for (size_t i = 0; i < ib.extra_channels().size(); i++) {
+ extra_channels_storage.emplace_back(CopyImage(ib.extra_channels()[i]));
+ DownsampleImage(&extra_channels_storage.back(), cparams.ec_resampling);
+ }
+ }
+ // needs to happen *AFTER* VarDCT-ComputeEncodingData.
+ JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData(
+ *frame_header, *ib.metadata(), &opsin, *extra_channels,
+ lossy_frame_encoder.State(), cms, pool, aux_out,
+ /* do_color=*/frame_header->encoding == FrameEncoding::kModular));
+
+ writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames);
+ frame_header->UpdateFlag(
+ lossy_frame_encoder.State()->shared.image_features.patches.HasAny(),
+ FrameHeader::kPatches);
+ frame_header->UpdateFlag(
+ lossy_frame_encoder.State()->shared.image_features.splines.HasAny(),
+ FrameHeader::kSplines);
+ JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out));
+
+ const size_t num_passes =
+ passes_enc_state->progressive_splitter.GetNumPasses();
+
+ // DC global info + DC groups + AC global info + AC groups *
+ // num_passes.
+ const bool has_ac_global = true;
+ std::vector<BitWriter> group_codes(NumTocEntries(frame_dim.num_groups,
+ frame_dim.num_dc_groups,
+ num_passes, has_ac_global));
+ const size_t global_ac_index = frame_dim.num_dc_groups + 1;
+ const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1;
+ const auto get_output = [&](const size_t index) {
+ return &group_codes[is_small_image ? 0 : index];
+ };
+ auto ac_group_code = [&](size_t pass, size_t group) {
+ return get_output(AcGroupIndex(pass, group, frame_dim.num_groups,
+ frame_dim.num_dc_groups, has_ac_global));
+ };
+
+ if (frame_header->flags & FrameHeader::kPatches) {
+ PatchDictionaryEncoder::Encode(
+ lossy_frame_encoder.State()->shared.image_features.patches,
+ get_output(0), kLayerDictionary, aux_out);
+ }
+
+ if (frame_header->flags & FrameHeader::kSplines) {
+ EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines,
+ get_output(0), kLayerSplines, HistogramParams(), aux_out);
+ }
+
+ if (cparams.photon_noise_iso > 0) {
+ lossy_frame_encoder.State()->shared.image_features.noise_params =
+ SimulatePhotonNoise(ib.xsize(), ib.ysize(), cparams.photon_noise_iso);
+ }
+ if (cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) {
+ for (size_t i = 0; i < NoiseParams::kNumNoisePoints; i++) {
+ lossy_frame_encoder.State()->shared.image_features.noise_params.lut[i] =
+ cparams.manual_noise[i];
+ }
+ }
+ if (frame_header->flags & FrameHeader::kNoise) {
+ EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params,
+ get_output(0), kLayerNoise, aux_out);
+ }
+
+ JXL_RETURN_IF_ERROR(
+ DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices,
+ get_output(0), kLayerQuant, aux_out));
+ if (frame_header->encoding == FrameEncoding::kVarDCT) {
+ JXL_RETURN_IF_ERROR(
+ lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0)));
+ }
+ JXL_RETURN_IF_ERROR(
+ modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out));
+ JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream(
+ get_output(0), aux_out, kLayerModularGlobal, ModularStreamId::Global()));
+
+ const auto process_dc_group = [&](const uint32_t group_index,
+ const size_t thread) {
+ AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+ BitWriter* output = get_output(group_index + 1);
+ if (frame_header->encoding == FrameEncoding::kVarDCT &&
+ !(frame_header->flags & FrameHeader::kUseDcFrame)) {
+ BitWriter::Allotment allotment(output, 2);
+ output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]);
+ allotment.ReclaimAndCharge(output, kLayerDC, my_aux_out);
+ JXL_CHECK(modular_frame_encoder->EncodeStream(
+ output, my_aux_out, kLayerDC,
+ ModularStreamId::VarDCTDC(group_index)));
+ }
+ JXL_CHECK(modular_frame_encoder->EncodeStream(
+ output, my_aux_out, kLayerModularDcGroup,
+ ModularStreamId::ModularDC(group_index)));
+ if (frame_header->encoding == FrameEncoding::kVarDCT) {
+ const Rect& rect =
+ lossy_frame_encoder.State()->shared.DCGroupRect(group_index);
+ size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize());
+ if (nb_bits != 0) {
+ BitWriter::Allotment allotment(output, nb_bits);
+ output->Write(nb_bits,
+ modular_frame_encoder->ac_metadata_size[group_index] - 1);
+ allotment.ReclaimAndCharge(output, kLayerControlFields, my_aux_out);
+ }
+ JXL_CHECK(modular_frame_encoder->EncodeStream(
+ output, my_aux_out, kLayerControlFields,
+ ModularStreamId::ACMetadata(group_index)));
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, frame_dim.num_dc_groups,
+ resize_aux_outs, process_dc_group,
+ "EncodeDCGroup"));
+
+ if (frame_header->encoding == FrameEncoding::kVarDCT) {
+ JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalACInfo(
+ get_output(global_ac_index), modular_frame_encoder.get()));
+ }
+
+ std::atomic<int> num_errors{0};
+ const auto process_group = [&](const uint32_t group_index,
+ const size_t thread) {
+ AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr;
+
+ for (size_t i = 0; i < num_passes; i++) {
+ if (frame_header->encoding == FrameEncoding::kVarDCT) {
+ if (!lossy_frame_encoder.EncodeACGroup(
+ i, group_index, ac_group_code(i, group_index), my_aux_out)) {
+ num_errors.fetch_add(1, std::memory_order_relaxed);
+ return;
+ }
+ }
+ // Write all modular encoded data (color?, alpha, depth, extra channels)
+ if (!modular_frame_encoder->EncodeStream(
+ ac_group_code(i, group_index), my_aux_out, kLayerModularAcGroup,
+ ModularStreamId::ModularAC(group_index, i))) {
+ num_errors.fetch_add(1, std::memory_order_relaxed);
+ return;
+ }
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_groups, resize_aux_outs,
+ process_group, "EncodeGroupCoefficients"));
+
+ // Resizing aux_outs to 0 also Assimilates the array.
+ static_cast<void>(resize_aux_outs(0));
+ JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0);
+
+ for (BitWriter& bw : group_codes) {
+ BitWriter::Allotment allotment(&bw, 8);
+ bw.ZeroPadToByte(); // end of group.
+ allotment.ReclaimAndCharge(&bw, kLayerAC, aux_out);
+ }
+
+ std::vector<coeff_order_t>* permutation_ptr = nullptr;
+ std::vector<coeff_order_t> permutation;
+ if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) {
+ permutation_ptr = &permutation;
+ // Don't permute global DC/AC or DC.
+ permutation.resize(global_ac_index + 1);
+ std::iota(permutation.begin(), permutation.end(), 0);
+ std::vector<coeff_order_t> ac_group_order(num_groups);
+ std::iota(ac_group_order.begin(), ac_group_order.end(), 0);
+ size_t group_dim = frame_dim.group_dim;
+
+ // The center of the image is either given by parameters or chosen
+ // to be the middle of the image by default if center_x, center_y resp.
+ // are not provided.
+
+ int64_t imag_cx;
+ if (cparams.center_x != static_cast<size_t>(-1)) {
+ JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize());
+ imag_cx = cparams.center_x;
+ } else {
+ imag_cx = ib.xsize() / 2;
+ }
+
+ int64_t imag_cy;
+ if (cparams.center_y != static_cast<size_t>(-1)) {
+ JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize());
+ imag_cy = cparams.center_y;
+ } else {
+ imag_cy = ib.ysize() / 2;
+ }
+
+ // The center of the group containing the center of the image.
+ int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2;
+ int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2;
+ // This identifies in what area of the central group the center of the image
+ // lies in.
+ double direction = -std::atan2(imag_cy - cy, imag_cx - cx);
+ // This identifies the side of the central group the center of the image
+ // lies closest to. This can take values 0, 1, 2, 3 corresponding to left,
+ // bottom, right, top.
+ int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi;
+ auto get_distance_from_center = [&](size_t gid) {
+ Rect r = passes_enc_state->shared.GroupRect(gid);
+ int64_t gcx = r.x0() + group_dim / 2;
+ int64_t gcy = r.y0() + group_dim / 2;
+ int64_t dx = gcx - cx;
+ int64_t dy = gcy - cy;
+ // The angle is determined by taking atan2 and adding an appropriate
+ // starting point depending on the side we want to start on.
+ double angle = std::remainder(
+ std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi);
+ // Concentric squares in clockwise order.
+ return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle);
+ };
+ std::sort(ac_group_order.begin(), ac_group_order.end(),
+ [&](coeff_order_t a, coeff_order_t b) {
+ return get_distance_from_center(a) <
+ get_distance_from_center(b);
+ });
+ std::vector<coeff_order_t> inv_ac_group_order(ac_group_order.size(), 0);
+ for (size_t i = 0; i < ac_group_order.size(); i++) {
+ inv_ac_group_order[ac_group_order[i]] = i;
+ }
+ for (size_t i = 0; i < num_passes; i++) {
+ size_t pass_start = permutation.size();
+ for (coeff_order_t v : inv_ac_group_order) {
+ permutation.push_back(pass_start + v);
+ }
+ }
+ std::vector<BitWriter> new_group_codes(group_codes.size());
+ for (size_t i = 0; i < permutation.size(); i++) {
+ new_group_codes[permutation[i]] = std::move(group_codes[i]);
+ }
+ group_codes = std::move(new_group_codes);
+ }
+
+ JXL_RETURN_IF_ERROR(
+ WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out));
+ writer->AppendByteAligned(group_codes);
+
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_frame.h b/third_party/jpeg-xl/lib/jxl/enc_frame.h
new file mode 100644
index 0000000000..b1dc637eb0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_frame.h
@@ -0,0 +1,78 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_FRAME_H_
+#define LIB_JXL_ENC_FRAME_H_
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Information needed for encoding a frame that is not contained elsewhere and
+// does not belong to `cparams`.
+// TODO(lode): if possible, it might be better to replace FrameInfo and several
+// fields from ImageBundle (such as frame name and duration) by direct usage of
+// jxl::FrameHeader itself.
+struct FrameInfo {
+ // TODO(veluca): consider adding more parameters, such as custom patches.
+ bool save_before_color_transform = false;
+ // Whether or not the input image bundle is already in the codestream
+ // colorspace (as deduced by cparams).
+ // TODO(veluca): this is a hack - ImageBundle doesn't have a simple way to say
+ // "this is already in XYB".
+ bool ib_needs_color_transform = true;
+ FrameType frame_type = FrameType::kRegularFrame;
+ size_t dc_level = 0;
+ // Only used for kRegularFrame.
+ bool is_last = true;
+ bool is_preview = false;
+ // Information for storing this frame for future use (only for non-DC frames).
+ size_t save_as_reference = 0;
+ // The source frame for blending of a next frame, matching the
+ // save_as_reference value of a previous frame. Animated frames can use
+ // save_as_reference values 1, 2 and 3, while composite still frames can use
+ // save_as_reference values 0, 1, 2 and 3. The current C++ encoder
+ // implementation is assuming and using 1 for all frames of animations, so
+ // using that as the default value here.
+ // Corresponds to BlendingInfo::source from the FrameHeader.
+ size_t source = 1;
+ // Corresponds to BlendingInfo::clamp from the FrameHeader.
+ size_t clamp = 1;
+ // Corresponds to BlendingInfo::alpha_channel from the FrameHeader, or set to
+ // -1 to automatically choose it as the index of the first extra channel of
+ // type alpha.
+ int alpha_channel = -1;
+
+ // If non-empty, uses this blending info for the extra channels, otherwise
+ // automatically chooses it. The encoder API will fill this vector with the
+ // extra channel info and allows more options. The non-API cjxl leaves it
+ // empty and relies on the default behavior.
+ std::vector<BlendingInfo> extra_channel_blending_info;
+};
+
+// Checks and adjusts CompressParams when they are all initialized.
+Status ParamsPostInit(CompressParams* p);
+
+// Encodes a single frame (including its header) into a byte stream. Groups may
+// be processed in parallel by `pool`. metadata is the ImageMetadata encoded in
+// the codestream, and must be used for the FrameHeaders, do not use
+// ib.metadata.
+Status EncodeFrame(const CompressParams& cparams_orig,
+ const FrameInfo& frame_info, const CodecMetadata* metadata,
+ const ImageBundle& ib, PassesEncoderState* passes_enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ BitWriter* writer, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_FRAME_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_gaborish.cc b/third_party/jpeg-xl/lib/jxl/enc_gaborish.cc
new file mode 100644
index 0000000000..d57bb68b7f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_gaborish.cc
@@ -0,0 +1,61 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_gaborish.h"
+
+#include <stddef.h>
+
+#include <hwy/base.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) {
+ WeightsSymmetric5 weights[3];
+ // Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5
+ // are insufficient. The numbers here have been obtained by butteraugli
+ // based optimizing the whole system and the errors produced are likely
+ // more favorable for good rate-distortion compromises rather than
+ // just using mathematical optimization to find the inverse.
+ static const float kGaborish[5] = {
+ -0.090881924078487886f, -0.043663953593472138f, 0.01392497846646211f,
+ 0.0036189602184591141f, 0.0030557936884763499f};
+ for (int i = 0; i < 3; ++i) {
+ double sum = 1.0 + mul[i] * 4 *
+ (kGaborish[0] + kGaborish[1] + kGaborish[2] +
+ kGaborish[4] + 2 * kGaborish[3]);
+ if (sum < 1e-5) {
+ sum = 1e-5;
+ }
+ const float normalize = static_cast<float>(1.0 / sum);
+ const float normalize_mul = mul[i] * normalize;
+ weights[i] = WeightsSymmetric5{{HWY_REP4(normalize)},
+ {HWY_REP4(normalize_mul * kGaborish[0])},
+ {HWY_REP4(normalize_mul * kGaborish[2])},
+ {HWY_REP4(normalize_mul * kGaborish[1])},
+ {HWY_REP4(normalize_mul * kGaborish[4])},
+ {HWY_REP4(normalize_mul * kGaborish[3])}};
+ }
+ // Reduce memory footprint by only allocating a single plane and swapping it
+ // into the output Image3F. Better still would be tiling.
+ // Note that we cannot *allocate* a plane, as doing so might cause Image3F to
+ // have planes of different stride. Instead, we copy one plane in a temporary
+ // image and reuse the existing planes of the in/out image.
+ ImageF temp = CopyImage(in_out->Plane(2));
+ Symmetric5(in_out->Plane(0), Rect(*in_out), weights[0], pool,
+ &in_out->Plane(2));
+ Symmetric5(in_out->Plane(1), Rect(*in_out), weights[1], pool,
+ &in_out->Plane(0));
+ Symmetric5(temp, Rect(*in_out), weights[2], pool, &in_out->Plane(1));
+ // Now planes are 1, 2, 0.
+ in_out->Plane(0).Swap(in_out->Plane(1));
+ // 2 1 0
+ in_out->Plane(0).Swap(in_out->Plane(2));
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_gaborish.h b/third_party/jpeg-xl/lib/jxl/enc_gaborish.h
new file mode 100644
index 0000000000..102064f9a2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_gaborish.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GABORISH_H_
+#define LIB_JXL_GABORISH_H_
+
+// Linear smoothing (3x3 convolution) for deblocking without too much blur.
+
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+// Used in encoder to reduce the impact of the decoder's smoothing.
+// This is not exact. Works in-place to reduce memory use.
+// The input is typically in XYB space.
+void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_GABORISH_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_gaborish_test.cc b/third_party/jpeg-xl/lib/jxl/enc_gaborish_test.cc
new file mode 100644
index 0000000000..57a18e3338
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_gaborish_test.cc
@@ -0,0 +1,77 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_gaborish.h"
+
+#include <hwy/base.h>
+
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+// weight1,2 need not be normalized.
+WeightsSymmetric3 GaborishKernel(float weight1, float weight2) {
+ constexpr float weight0 = 1.0f;
+
+ // Normalize
+ const float mul = 1.0f / (weight0 + 4 * (weight1 + weight2));
+ const float w0 = weight0 * mul;
+ const float w1 = weight1 * mul;
+ const float w2 = weight2 * mul;
+
+ const WeightsSymmetric3 w = {{HWY_REP4(w0)}, {HWY_REP4(w1)}, {HWY_REP4(w2)}};
+ return w;
+}
+
+void ConvolveGaborish(const ImageF& in, float weight1, float weight2,
+ ThreadPool* pool, ImageF* JXL_RESTRICT out) {
+ JXL_CHECK(SameSize(in, *out));
+ Symmetric3(in, Rect(in), GaborishKernel(weight1, weight2), pool, out);
+}
+
+void TestRoundTrip(const Image3F& in, float max_l1) {
+ Image3F fwd(in.xsize(), in.ysize());
+ ThreadPool* null_pool = nullptr;
+ ConvolveGaborish(in.Plane(0), 0, 0, null_pool, &fwd.Plane(0));
+ ConvolveGaborish(in.Plane(1), 0, 0, null_pool, &fwd.Plane(1));
+ ConvolveGaborish(in.Plane(2), 0, 0, null_pool, &fwd.Plane(2));
+ float w = 0.92718927264540152f;
+ float weights[3] = {
+ w,
+ w,
+ w,
+ };
+ GaborishInverse(&fwd, weights, null_pool);
+ JXL_ASSERT_OK(VerifyRelativeError(in, fwd, max_l1, 1E-4f, _));
+}
+
+TEST(GaborishTest, TestZero) {
+ Image3F in(20, 20);
+ ZeroFillImage(&in);
+ TestRoundTrip(in, 0.0f);
+}
+
+// Disabled: large difference.
+#if 0
+TEST(GaborishTest, TestDirac) {
+ Image3F in(20, 20);
+ ZeroFillImage(&in);
+ in.PlaneRow(1, 10)[10] = 10.0f;
+ TestRoundTrip(in, 0.26f);
+}
+#endif
+
+TEST(GaborishTest, TestFlat) {
+ Image3F in(20, 20);
+ FillImage(1.0f, &in);
+ TestRoundTrip(in, 1E-5f);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_gamma_correct.h b/third_party/jpeg-xl/lib/jxl/enc_gamma_correct.h
new file mode 100644
index 0000000000..0db7012bbe
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_gamma_correct.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GAMMA_CORRECT_H_
+#define LIB_JXL_ENC_GAMMA_CORRECT_H_
+
+// Deprecated: sRGB transfer function. Use color_management.h instead.
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+namespace jxl {
+
+// Values are in [0, 1].
+static JXL_INLINE double Srgb8ToLinearDirect(double srgb) {
+ if (srgb <= 0.0) return 0.0;
+ if (srgb <= 0.04045) return srgb / 12.92;
+ if (srgb >= 1.0) return 1.0;
+ return std::pow((srgb + 0.055) / 1.055, 2.4);
+}
+
+// Values are in [0, 1].
+static JXL_INLINE double LinearToSrgb8Direct(double linear) {
+ if (linear <= 0.0) return 0.0;
+ if (linear >= 1.0) return 1.0;
+ if (linear <= 0.0031308) return linear * 12.92;
+ return std::pow(linear, 1.0 / 2.4) * 1.055 - 0.055;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_GAMMA_CORRECT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_group.cc b/third_party/jpeg-xl/lib/jxl/enc_group.cc
new file mode 100644
index 0000000000..074cf1553a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_group.cc
@@ -0,0 +1,426 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_group.h"
+
+#include <hwy/aligned_allocator.h>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_transforms-inl.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_transforms-inl.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quantizer-inl.h"
+#include "lib/jxl/quantizer.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::MaskFromVec;
+using hwy::HWY_NAMESPACE::Round;
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion,
+ size_t c, float qm_multiplier, size_t quant_kind,
+ size_t xsize, size_t ysize, float* thresholds,
+ const float* JXL_RESTRICT block_in, int32_t* quant,
+ int32_t* JXL_RESTRICT block_out) {
+ PROFILER_FUNC;
+ const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+ float qac = quantizer.Scale() * (*quant);
+ // Not SIMD-fied for now.
+ if (c != 1 && (xsize > 1 || ysize > 1)) {
+ for (int i = 0; i < 4; ++i) {
+ thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
+ if (thresholds[i] < 0.54) {
+ thresholds[i] = 0.54;
+ }
+ }
+ }
+ HWY_CAPPED(float, kBlockDim) df;
+ HWY_CAPPED(int32_t, kBlockDim) di;
+ HWY_CAPPED(uint32_t, kBlockDim) du;
+ const auto quantv = Set(df, qac * qm_multiplier);
+ for (size_t y = 0; y < ysize * kBlockDim; y++) {
+ size_t yfix = static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2;
+ const size_t off = y * kBlockDim * xsize;
+ for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) {
+ auto thr = Zero(df);
+ if (xsize == 1) {
+ HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u};
+ const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x)));
+ thr = IfThenElse(mask, Set(df, thresholds[yfix + 1]),
+ Set(df, thresholds[yfix]));
+ } else {
+ // Same for all lanes in the vector.
+ thr = Set(
+ df,
+ thresholds[yfix + static_cast<size_t>(x >= xsize * kBlockDim / 2)]);
+ }
+ const auto q = Mul(Load(df, qm + off + x), quantv);
+ const auto in = Load(df, block_in + off + x);
+ const auto val = Mul(q, in);
+ const auto nzero_mask = Ge(Abs(val), thr);
+ const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val)));
+ Store(v, di, block_out + off + x);
+ }
+ }
+}
+
+void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c,
+ float qm_multiplier, size_t quant_kind, size_t xsize,
+ size_t ysize, float* thresholds,
+ const float* JXL_RESTRICT block_in, int32_t* quant) {
+ // No quantization adjusting for these small blocks.
+ // Quantization adjusting attempts to fix some known issues
+ // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness
+ // when there are not many non-zeros.
+ constexpr size_t kPartialBlockKinds =
+ (1 << AcStrategy::Type::IDENTITY) | (1 << AcStrategy::Type::DCT2X2) |
+ (1 << AcStrategy::Type::DCT4X4) | (1 << AcStrategy::Type::DCT4X8) |
+ (1 << AcStrategy::Type::DCT8X4) | (1 << AcStrategy::Type::AFV0) |
+ (1 << AcStrategy::Type::AFV1) | (1 << AcStrategy::Type::AFV2) |
+ (1 << AcStrategy::Type::AFV3);
+ if ((1 << quant_kind) & kPartialBlockKinds) return;
+
+ const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c);
+ float qac = quantizer.Scale() * (*quant);
+ if (xsize > 1 || ysize > 1) {
+ for (int i = 0; i < 4; ++i) {
+ thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f);
+ if (thresholds[i] < 0.54) {
+ thresholds[i] = 0.54;
+ }
+ }
+ }
+ float sum_of_highest_freq_row_and_column = 0;
+ float hfNonZeros[4] = {};
+ float hfMaxError[4] = {};
+
+ for (size_t y = 0; y < ysize * kBlockDim; y++) {
+ for (size_t x = 0; x < xsize * kBlockDim; x++) {
+ const size_t pos = y * kBlockDim * xsize + x;
+ if (x < xsize && y < ysize) {
+ continue;
+ }
+ const size_t hfix = (static_cast<size_t>(y >= ysize * kBlockDim / 2) * 2 +
+ static_cast<size_t>(x >= xsize * kBlockDim / 2));
+ const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier);
+ const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val);
+ if (c == 1 && v == 0) {
+ const float error = std::abs(val);
+ if (hfMaxError[hfix] < error) {
+ hfMaxError[hfix] = error;
+ }
+ }
+ if (v != 0.0f) {
+ hfNonZeros[hfix] += std::abs(v);
+ if ((y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1) &&
+ (x >= xsize * 4 && y >= ysize * 4)) {
+ sum_of_highest_freq_row_and_column += std::abs(val);
+ }
+ }
+ }
+ }
+ if (c == 1) {
+ static const double kLimit = 0.49f;
+ for (int i = 1; i < 4; ++i) {
+ if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit) {
+ thresholds[i] = 0.9999 * hfMaxError[i];
+ }
+ }
+ }
+ // Heuristic for improving accuracy of high-frequency patterns
+ // occurring in an environment with no medium-frequency masking
+ // patterns. This should be improved later to be done in X and B
+ // planes too as 32x32 and larger transforms become rather ugly
+ // when this is not compensated for.
+ if (15 * sum_of_highest_freq_row_and_column >= hfNonZeros[0] + 1) {
+ constexpr int inc = 5;
+ *quant += inc;
+ if (8 * sum_of_highest_freq_row_and_column >= hfNonZeros[0] + 1) {
+ *quant += inc;
+ }
+ if (5 * sum_of_highest_freq_row_and_column >= hfNonZeros[0] + 1) {
+ *quant += inc;
+ }
+ if (3 * sum_of_highest_freq_row_and_column >= hfNonZeros[0] + 1) {
+ *quant += inc;
+ }
+ if (*quant >= Quantizer::kQuantMax) {
+ *quant = Quantizer::kQuantMax - 1;
+ }
+ }
+ if (quant_kind == AcStrategy::Type::DCT) {
+ // If this 8x8 block is too flat, increase the adaptive quantization level
+ // a bit to reduce visible block boundaries and requantize the block.
+ if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) {
+ *quant += 1;
+ if (*quant >= Quantizer::kQuantMax) {
+ *quant = Quantizer::kQuantMax - 1;
+ }
+ }
+ }
+ {
+ // Reduce quant in highly active areas.
+ int32_t div = (xsize + ysize) / 2;
+ int32_t activity = (hfNonZeros[0] + div / 2) / div;
+ int32_t orig_qp_limit = std::max(4, *quant / 2);
+ for (int i = 1; i < 4; ++i) {
+ activity = std::min<int32_t>(activity, (hfNonZeros[i] + div / 2) / div);
+ }
+ if (activity >= 15) {
+ activity = 15;
+ }
+ int32_t qp = *quant - activity;
+ if (qp < orig_qp_limit) {
+ qp = orig_qp_limit;
+ }
+ *quant = qp;
+ }
+}
+
+// NOTE: caller takes care of extracting quant from rect of RawQuantField.
+void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size,
+ const Quantizer& quantizer,
+ const bool error_diffusion, size_t quant_kind,
+ size_t xsize, size_t ysize,
+ const float* JXL_RESTRICT biases, int32_t* quant,
+ float* JXL_RESTRICT inout,
+ int32_t* JXL_RESTRICT quantized) {
+ float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f};
+ {
+ int32_t max_quant = 0;
+ int quant_orig = *quant;
+ float val[3] = {enc_state->x_qm_multiplier, 1.0f,
+ enc_state->b_qm_multiplier};
+ int clut[3] = {1, 0, 2};
+ for (int ii = 0; ii < 3; ++ii) {
+ float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f};
+ int c = clut[ii];
+ *quant = quant_orig;
+ AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize,
+ &thres[0], inout + c * size, quant);
+ // Dead zone adjustment
+ if (c == 1) {
+ for (int k = 0; k < 4; ++k) {
+ thres_y[k] = thres[k];
+ }
+ }
+ max_quant = std::max(*quant, max_quant);
+ }
+ *quant = max_quant;
+ }
+
+ QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize,
+ &thres_y[0], inout + size, quant, quantized + size);
+
+ PROFILER_ZONE("enc quant adjust bias");
+ const float* JXL_RESTRICT dequant_matrix =
+ quantizer.DequantMatrix(quant_kind, 1);
+
+ HWY_CAPPED(float, kDCTBlockSize) df;
+ HWY_CAPPED(int32_t, kDCTBlockSize) di;
+ const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant));
+ for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) {
+ const auto quant = Load(di, quantized + size + k);
+ const auto adj_quant = AdjustQuantBias(di, 1, quant, biases);
+ const auto dequantm = Load(df, dequant_matrix + k);
+ Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k);
+ }
+}
+
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+ const Image3F& opsin, Image3F* dc) {
+ PROFILER_FUNC;
+ const Rect block_group_rect = enc_state->shared.BlockGroupRect(group_idx);
+ const Rect group_rect = enc_state->shared.GroupRect(group_idx);
+ const Rect cmap_rect(
+ block_group_rect.x0() / kColorTileDimInBlocks,
+ block_group_rect.y0() / kColorTileDimInBlocks,
+ DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks),
+ DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks));
+
+ const size_t xsize_blocks = block_group_rect.xsize();
+ const size_t ysize_blocks = block_group_rect.ysize();
+
+ const size_t dc_stride = static_cast<size_t>(dc->PixelsPerRow());
+ const size_t opsin_stride = static_cast<size_t>(opsin.PixelsPerRow());
+
+ ImageI& full_quant_field = enc_state->shared.raw_quant_field;
+ const CompressParams& cparams = enc_state->cparams;
+
+ // TODO(veluca): consider strategies to reduce this memory.
+ auto mem = hwy::AllocateAligned<int32_t>(3 * AcStrategy::kMaxCoeffArea);
+ auto fmem = hwy::AllocateAligned<float>(5 * AcStrategy::kMaxCoeffArea);
+ float* JXL_RESTRICT scratch_space =
+ fmem.get() + 3 * AcStrategy::kMaxCoeffArea;
+ {
+ // Only use error diffusion in Squirrel mode or slower.
+ const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel;
+ constexpr HWY_CAPPED(float, kDCTBlockSize) d;
+
+ int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {};
+ size_t num_passes = enc_state->progressive_splitter.GetNumPasses();
+ JXL_DASSERT(num_passes > 0);
+ for (size_t i = 0; i < num_passes; i++) {
+ // TODO(veluca): 16-bit quantized coeffs are not implemented yet.
+ JXL_ASSERT(enc_state->coeffs[i]->Type() == ACType::k32);
+ for (size_t c = 0; c < 3; c++) {
+ coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32;
+ }
+ }
+
+ HWY_ALIGN float* coeffs_in = fmem.get();
+ HWY_ALIGN int32_t* quantized = mem.get();
+
+ for (size_t by = 0; by < ysize_blocks; ++by) {
+ int32_t* JXL_RESTRICT row_quant_ac =
+ block_group_rect.Row(&full_quant_field, by);
+ size_t ty = by / kColorTileDimInBlocks;
+ const int8_t* JXL_RESTRICT row_cmap[3] = {
+ cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty),
+ nullptr,
+ cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty),
+ };
+ const float* JXL_RESTRICT opsin_rows[3] = {
+ group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim),
+ group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim),
+ group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim),
+ };
+ float* JXL_RESTRICT dc_rows[3] = {
+ block_group_rect.PlaneRow(dc, 0, by),
+ block_group_rect.PlaneRow(dc, 1, by),
+ block_group_rect.PlaneRow(dc, 2, by),
+ };
+ AcStrategyRow ac_strategy_row =
+ enc_state->shared.ac_strategy.ConstRow(block_group_rect, by);
+ for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks);
+ tx++) {
+ const auto x_factor =
+ Set(d, enc_state->shared.cmap.YtoXRatio(row_cmap[0][tx]));
+ const auto b_factor =
+ Set(d, enc_state->shared.cmap.YtoBRatio(row_cmap[2][tx]));
+ for (size_t bx = tx * kColorTileDimInBlocks;
+ bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) {
+ const AcStrategy acs = ac_strategy_row[bx];
+ if (!acs.IsFirstBlock()) continue;
+
+ size_t xblocks = acs.covered_blocks_x();
+ size_t yblocks = acs.covered_blocks_y();
+
+ CoefficientLayout(&yblocks, &xblocks);
+
+ size_t size = kDCTBlockSize * xblocks * yblocks;
+
+ // DCT Y channel, roundtrip-quantize it and set DC.
+ int32_t quant_ac = row_quant_ac[bx];
+ for (size_t c : {0, 1, 2}) {
+ TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim,
+ opsin_stride, coeffs_in + c * size,
+ scratch_space);
+ }
+ DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size,
+ dc_rows[1] + bx, dc_stride);
+
+ QuantizeRoundtripYBlockAC(
+ enc_state, size, enc_state->shared.quantizer, error_diffusion,
+ acs.RawStrategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac,
+ coeffs_in, quantized);
+
+ // Unapply color correlation
+ for (size_t k = 0; k < size; k += Lanes(d)) {
+ const auto in_x = Load(d, coeffs_in + k);
+ const auto in_y = Load(d, coeffs_in + size + k);
+ const auto in_b = Load(d, coeffs_in + 2 * size + k);
+ const auto out_x = NegMulAdd(x_factor, in_y, in_x);
+ const auto out_b = NegMulAdd(b_factor, in_y, in_b);
+ Store(out_x, d, coeffs_in + k);
+ Store(out_b, d, coeffs_in + 2 * size + k);
+ }
+
+ // Quantize X and B channels and set DC.
+ for (size_t c : {0, 2}) {
+ float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f};
+ QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c,
+ c == 0 ? enc_state->x_qm_multiplier
+ : enc_state->b_qm_multiplier,
+ acs.RawStrategy(), xblocks, yblocks, &thres[0],
+ coeffs_in + c * size, &quant_ac,
+ quantized + c * size);
+ DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size,
+ dc_rows[c] + bx, dc_stride);
+ }
+ row_quant_ac[bx] = quant_ac;
+ for (size_t c = 0; c < 3; c++) {
+ enc_state->progressive_splitter.SplitACCoefficients(
+ quantized + c * size, acs, bx, by, coeffs[c]);
+ for (size_t p = 0; p < num_passes; p++) {
+ coeffs[c][p] += size;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ComputeCoefficients);
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+ const Image3F& opsin, Image3F* dc) {
+ return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin,
+ dc);
+}
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+ size_t histogram_idx,
+ const PassesEncoderState& enc_state,
+ BitWriter* writer, AuxOut* aux_out) {
+ // Select which histogram to use among those of the current pass.
+ const size_t num_histograms = enc_state.shared.num_histograms;
+ // num_histograms is 0 only for lossless.
+ JXL_ASSERT(num_histograms == 0 || histogram_idx < num_histograms);
+ size_t histo_selector_bits = CeilLog2Nonzero(num_histograms);
+
+ if (histo_selector_bits != 0) {
+ BitWriter::Allotment allotment(writer, histo_selector_bits);
+ writer->Write(histo_selector_bits, histogram_idx);
+ allotment.ReclaimAndCharge(writer, kLayerAC, aux_out);
+ }
+ WriteTokens(enc_state.passes[pass_idx].ac_tokens[group_idx],
+ enc_state.passes[pass_idx].codes,
+ enc_state.passes[pass_idx].context_map, writer, kLayerACTokens,
+ aux_out);
+
+ return true;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_group.h b/third_party/jpeg-xl/lib/jxl/enc_group.h
new file mode 100644
index 0000000000..0caf408a03
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_group.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_GROUP_H_
+#define LIB_JXL_ENC_GROUP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct PassesEncoderState;
+
+// Fills DC
+void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state,
+ const Image3F& opsin, Image3F* dc);
+
+Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx,
+ size_t histogram_idx,
+ const PassesEncoderState& enc_state,
+ BitWriter* writer, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_GROUP_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_heuristics.cc b/third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
new file mode 100644
index 0000000000..18122fa769
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_heuristics.cc
@@ -0,0 +1,948 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_heuristics.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+
+#include "lib/jxl/enc_ac_strategy.h"
+#include "lib/jxl/enc_adaptive_quantization.h"
+#include "lib/jxl/enc_ar_control_field.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_chroma_from_luma.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_noise.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_photon_noise.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/enc_xyb.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+void FindBestBlockEntropyModel(PassesEncoderState& enc_state) {
+ if (enc_state.cparams.decoding_speed_tier >= 1) {
+ static constexpr uint8_t kSimpleCtxMap[] = {
+ // Cluster all blocks together
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //
+ };
+ static_assert(
+ 3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap,
+ "Update simple context map");
+
+ auto bcm = enc_state.shared.block_ctx_map;
+ bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap));
+ bcm.num_ctxs = 2;
+ bcm.num_dc_ctxs = 1;
+ return;
+ }
+ if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) {
+ return;
+ }
+ const ImageI& rqf = enc_state.shared.raw_quant_field;
+ // No need to change context modeling for small images.
+ size_t tot = rqf.xsize() * rqf.ysize();
+ size_t size_for_ctx_model =
+ (1 << 10) * enc_state.cparams.butteraugli_distance;
+ if (tot < size_for_ctx_model) return;
+
+ struct OccCounters {
+ // count the occurrences of each qf value and each strategy type.
+ OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) {
+ for (size_t y = 0; y < rqf.ysize(); y++) {
+ const int32_t* qf_row = rqf.Row(y);
+ AcStrategyRow acs_row = ac_strategy.ConstRow(y);
+ for (size_t x = 0; x < rqf.xsize(); x++) {
+ int ord = kStrategyOrder[acs_row[x].RawStrategy()];
+ int qf = qf_row[x] - 1;
+ qf_counts[qf]++;
+ qf_ord_counts[ord][qf]++;
+ ord_counts[ord]++;
+ }
+ }
+ }
+
+ size_t qf_counts[256] = {};
+ size_t qf_ord_counts[kNumOrders][256] = {};
+ size_t ord_counts[kNumOrders] = {};
+ };
+ // The OccCounters struct is too big to allocate on the stack.
+ std::unique_ptr<OccCounters> counters(
+ new OccCounters(rqf, enc_state.shared.ac_strategy));
+
+ // Splitting the context model according to the quantization field seems to
+ // mostly benefit only large images.
+ size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance;
+ size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2;
+ std::vector<uint32_t>& qft = enc_state.shared.block_ctx_map.qf_thresholds;
+ qft.clear();
+ // Divide the quant field in up to num_qf_segments segments.
+ size_t cumsum = 0;
+ size_t next = 1;
+ size_t last_cut = 256;
+ size_t cut = tot * next / num_qf_segments;
+ for (uint32_t j = 0; j < 256; j++) {
+ cumsum += counters->qf_counts[j];
+ if (cumsum > cut) {
+ if (j != 0) {
+ qft.push_back(j);
+ }
+ last_cut = j;
+ while (cumsum > cut) {
+ next++;
+ cut = tot * next / num_qf_segments;
+ }
+ } else if (next > qft.size() + 1) {
+ if (j - 1 == last_cut && j != 0) {
+ qft.push_back(j);
+ }
+ }
+ }
+
+ // Count the occurrences of each segment.
+ std::vector<size_t> counts(kNumOrders * (qft.size() + 1));
+ size_t qft_pos = 0;
+ for (size_t j = 0; j < 256; j++) {
+ if (qft_pos < qft.size() && j == qft[qft_pos]) {
+ qft_pos++;
+ }
+ for (size_t i = 0; i < kNumOrders; i++) {
+ counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j];
+ }
+ }
+
+ // Repeatedly merge the lowest-count pair.
+ std::vector<uint8_t> remap((qft.size() + 1) * kNumOrders);
+ std::iota(remap.begin(), remap.end(), 0);
+ std::vector<uint8_t> clusters(remap);
+ size_t nb_clusters = Clamp1((int)(tot / size_for_ctx_model / 2), 2, 9);
+ size_t nb_clusters_chroma = Clamp1((int)(tot / size_for_ctx_model / 3), 1, 5);
+ // This is O(n^2 log n), but n is small.
+ while (clusters.size() > nb_clusters) {
+ std::sort(clusters.begin(), clusters.end(),
+ [&](int a, int b) { return counts[a] > counts[b]; });
+ counts[clusters[clusters.size() - 2]] += counts[clusters.back()];
+ counts[clusters.back()] = 0;
+ remap[clusters.back()] = clusters[clusters.size() - 2];
+ clusters.pop_back();
+ }
+ for (size_t i = 0; i < remap.size(); i++) {
+ while (remap[remap[i]] != remap[i]) {
+ remap[i] = remap[remap[i]];
+ }
+ }
+ // Relabel starting from 0.
+ std::vector<uint8_t> remap_remap(remap.size(), remap.size());
+ size_t num = 0;
+ for (size_t i = 0; i < remap.size(); i++) {
+ if (remap_remap[remap[i]] == remap.size()) {
+ remap_remap[remap[i]] = num++;
+ }
+ remap[i] = remap_remap[remap[i]];
+ }
+ // Write the block context map.
+ auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map;
+ ctx_map = remap;
+ ctx_map.resize(remap.size() * 3);
+ // for chroma, only use up to nb_clusters_chroma separate block contexts
+ // (those for the biggest clusters)
+ for (size_t i = remap.size(); i < remap.size() * 3; i++) {
+ ctx_map[i] = num + Clamp1((int)remap[i % remap.size()], 0,
+ (int)nb_clusters_chroma - 1);
+ }
+ enc_state.shared.block_ctx_map.num_ctxs =
+ *std::max_element(ctx_map.begin(), ctx_map.end()) + 1;
+}
+
+} // namespace
+
+void FindBestDequantMatrices(const CompressParams& cparams,
+ const Image3F& opsin,
+ ModularFrameEncoder* modular_frame_encoder,
+ DequantMatrices* dequant_matrices) {
+ // TODO(veluca): quant matrices for no-gaborish.
+ // TODO(veluca): heuristics for in-bitstream quant tables.
+ *dequant_matrices = DequantMatrices();
+ if (cparams.max_error_mode) {
+ // Set numerators of all quantization matrices to constant values.
+ float weights[3][1] = {{1.0f / cparams.max_error[0]},
+ {1.0f / cparams.max_error[1]},
+ {1.0f / cparams.max_error[2]}};
+ DctQuantWeightParams dct_params(weights);
+ std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+ QuantEncoding::DCT(dct_params));
+ DequantMatricesSetCustom(dequant_matrices, encodings,
+ modular_frame_encoder);
+ float dc_weights[3] = {1.0f / cparams.max_error[0],
+ 1.0f / cparams.max_error[1],
+ 1.0f / cparams.max_error[2]};
+ DequantMatricesSetCustomDC(dequant_matrices, dc_weights);
+ }
+}
+
+bool DefaultEncoderHeuristics::HandlesColorConversion(
+ const CompressParams& cparams, const ImageBundle& ib) {
+ return cparams.noise != Override::kOn && cparams.patches != Override::kOn &&
+ cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 &&
+ cparams.color_transform == ColorTransform::kXYB &&
+ !cparams.modular_mode && !ib.HasAlpha();
+}
+
+namespace {
+
+void StoreMin2(const float v, float& min1, float& min2) {
+ if (v < min2) {
+ if (v < min1) {
+ min2 = min1;
+ min1 = v;
+ } else {
+ min2 = v;
+ }
+ }
+}
+
+void CreateMask(const ImageF& image, ImageF& mask) {
+ for (size_t y = 0; y < image.ysize(); y++) {
+ auto* row_n = y > 0 ? image.Row(y - 1) : image.Row(y);
+ auto* row_in = image.Row(y);
+ auto* row_s = y + 1 < image.ysize() ? image.Row(y + 1) : image.Row(y);
+ auto* row_out = mask.Row(y);
+ for (size_t x = 0; x < image.xsize(); x++) {
+ // Center, west, east, north, south values and their absolute difference
+ float c = row_in[x];
+ float w = x > 0 ? row_in[x - 1] : row_in[x];
+ float e = x + 1 < image.xsize() ? row_in[x + 1] : row_in[x];
+ float n = row_n[x];
+ float s = row_s[x];
+ float dw = std::abs(c - w);
+ float de = std::abs(c - e);
+ float dn = std::abs(c - n);
+ float ds = std::abs(c - s);
+ float min = std::numeric_limits<float>::max();
+ float min2 = std::numeric_limits<float>::max();
+ StoreMin2(dw, min, min2);
+ StoreMin2(de, min, min2);
+ StoreMin2(dn, min, min2);
+ StoreMin2(ds, min, min2);
+ row_out[x] = min2;
+ }
+ }
+}
+
+// Downsamples the image by a factor of 2 with a kernel that's sharper than
+// the standard 2x2 box kernel used by DownsampleImage.
+// The kernel is optimized against the result of the 2x2 upsampling kernel used
+// by the decoder. Ringing is slightly reduced by clamping the values of the
+// resulting pixels within certain bounds of a small region in the original
+// image.
+void DownsampleImage2_Sharper(const ImageF& input, ImageF* output) {
+ const int64_t kernelx = 12;
+ const int64_t kernely = 12;
+
+ static const float kernel[144] = {
+ -0.000314256996835, -0.000314256996835, -0.000897597057705,
+ -0.000562751488849, -0.000176807273646, 0.001864627368902,
+ 0.001864627368902, -0.000176807273646, -0.000562751488849,
+ -0.000897597057705, -0.000314256996835, -0.000314256996835,
+ -0.000314256996835, -0.001527942804748, -0.000121760530512,
+ 0.000191123989093, 0.010193185932466, 0.058637519197110,
+ 0.058637519197110, 0.010193185932466, 0.000191123989093,
+ -0.000121760530512, -0.001527942804748, -0.000314256996835,
+ -0.000897597057705, -0.000121760530512, 0.000946363683751,
+ 0.007113577630288, 0.000437956841058, -0.000372823835211,
+ -0.000372823835211, 0.000437956841058, 0.007113577630288,
+ 0.000946363683751, -0.000121760530512, -0.000897597057705,
+ -0.000562751488849, 0.000191123989093, 0.007113577630288,
+ 0.044592622228814, 0.000222278879007, -0.162864473015945,
+ -0.162864473015945, 0.000222278879007, 0.044592622228814,
+ 0.007113577630288, 0.000191123989093, -0.000562751488849,
+ -0.000176807273646, 0.010193185932466, 0.000437956841058,
+ 0.000222278879007, -0.000913092543974, -0.017071696107902,
+ -0.017071696107902, -0.000913092543974, 0.000222278879007,
+ 0.000437956841058, 0.010193185932466, -0.000176807273646,
+ 0.001864627368902, 0.058637519197110, -0.000372823835211,
+ -0.162864473015945, -0.017071696107902, 0.414660099370354,
+ 0.414660099370354, -0.017071696107902, -0.162864473015945,
+ -0.000372823835211, 0.058637519197110, 0.001864627368902,
+ 0.001864627368902, 0.058637519197110, -0.000372823835211,
+ -0.162864473015945, -0.017071696107902, 0.414660099370354,
+ 0.414660099370354, -0.017071696107902, -0.162864473015945,
+ -0.000372823835211, 0.058637519197110, 0.001864627368902,
+ -0.000176807273646, 0.010193185932466, 0.000437956841058,
+ 0.000222278879007, -0.000913092543974, -0.017071696107902,
+ -0.017071696107902, -0.000913092543974, 0.000222278879007,
+ 0.000437956841058, 0.010193185932466, -0.000176807273646,
+ -0.000562751488849, 0.000191123989093, 0.007113577630288,
+ 0.044592622228814, 0.000222278879007, -0.162864473015945,
+ -0.162864473015945, 0.000222278879007, 0.044592622228814,
+ 0.007113577630288, 0.000191123989093, -0.000562751488849,
+ -0.000897597057705, -0.000121760530512, 0.000946363683751,
+ 0.007113577630288, 0.000437956841058, -0.000372823835211,
+ -0.000372823835211, 0.000437956841058, 0.007113577630288,
+ 0.000946363683751, -0.000121760530512, -0.000897597057705,
+ -0.000314256996835, -0.001527942804748, -0.000121760530512,
+ 0.000191123989093, 0.010193185932466, 0.058637519197110,
+ 0.058637519197110, 0.010193185932466, 0.000191123989093,
+ -0.000121760530512, -0.001527942804748, -0.000314256996835,
+ -0.000314256996835, -0.000314256996835, -0.000897597057705,
+ -0.000562751488849, -0.000176807273646, 0.001864627368902,
+ 0.001864627368902, -0.000176807273646, -0.000562751488849,
+ -0.000897597057705, -0.000314256996835, -0.000314256996835};
+
+ int64_t xsize = input.xsize();
+ int64_t ysize = input.ysize();
+
+ ImageF box_downsample = CopyImage(input);
+ DownsampleImage(&box_downsample, 2);
+
+ ImageF mask(box_downsample.xsize(), box_downsample.ysize());
+ CreateMask(box_downsample, mask);
+
+ for (size_t y = 0; y < output->ysize(); y++) {
+ float* row_out = output->Row(y);
+ const float* row_in[kernely];
+ const float* row_mask = mask.Row(y);
+ // get the rows in the support
+ for (size_t ky = 0; ky < kernely; ky++) {
+ int64_t iy = y * 2 + ky - (kernely - 1) / 2;
+ if (iy < 0) iy = 0;
+ if (iy >= ysize) iy = ysize - 1;
+ row_in[ky] = input.Row(iy);
+ }
+
+ for (size_t x = 0; x < output->xsize(); x++) {
+ // get min and max values of the original image in the support
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::min();
+ // kernelx - R and kernely - R are the radius of a rectangular region in
+ // which the values of a pixel are bounded to reduce ringing.
+ static constexpr int64_t R = 5;
+ for (int64_t ky = R; ky + R < kernely; ky++) {
+ for (int64_t kx = R; kx + R < kernelx; kx++) {
+ int64_t ix = x * 2 + kx - (kernelx - 1) / 2;
+ if (ix < 0) ix = 0;
+ if (ix >= xsize) ix = xsize - 1;
+ min = std::min<float>(min, row_in[ky][ix]);
+ max = std::max<float>(max, row_in[ky][ix]);
+ }
+ }
+
+ float sum = 0;
+ for (int64_t ky = 0; ky < kernely; ky++) {
+ for (int64_t kx = 0; kx < kernelx; kx++) {
+ int64_t ix = x * 2 + kx - (kernelx - 1) / 2;
+ if (ix < 0) ix = 0;
+ if (ix >= xsize) ix = xsize - 1;
+ sum += row_in[ky][ix] * kernel[ky * kernelx + kx];
+ }
+ }
+
+ row_out[x] = sum;
+
+ // Clamp the pixel within the value of a small area to prevent ringning.
+ // The mask determines how much to clamp, clamp more to reduce more
+ // ringing in smooth areas, clamp less in noisy areas to get more
+ // sharpness. Higher mask_multiplier gives less clamping, so less
+ // ringing reduction.
+ const constexpr float mask_multiplier = 1;
+ float a = row_mask[x] * mask_multiplier;
+ float clip_min = min - a;
+ float clip_max = max + a;
+ if (row_out[x] < clip_min) {
+ row_out[x] = clip_min;
+ } else if (row_out[x] > clip_max) {
+ row_out[x] = clip_max;
+ }
+ }
+ }
+}
+
+void DownsampleImage2_Sharper(Image3F* opsin) {
+ // Allocate extra space to avoid a reallocation when padding.
+ Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim,
+ DivCeil(opsin->ysize(), 2) + kBlockDim);
+ downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+ downsampled.ysize() - kBlockDim);
+
+ for (size_t c = 0; c < 3; c++) {
+ DownsampleImage2_Sharper(opsin->Plane(c), &downsampled.Plane(c));
+ }
+ *opsin = std::move(downsampled);
+}
+
+// The default upsampling kernels used by Upsampler in the decoder.
+static const constexpr int64_t kSize = 5;
+
+static const float kernel00[25] = {
+ -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+ -0.03452303f, 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f,
+ -0.04022174f, 0.28896755f, 0.56661550f, 0.03777607f, -0.01986694f,
+ -0.02921014f, 0.00278718f, 0.03777607f, -0.03144731f, -0.01185068f,
+ -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f,
+};
+static const float kernel01[25] = {
+ -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f,
+ -0.02921014f, 0.00278718f, 0.03777607f, -0.03144731f, -0.01185068f,
+ -0.04022174f, 0.28896755f, 0.56661550f, 0.03777607f, -0.01986694f,
+ -0.03452303f, 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f,
+ -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+};
+static const float kernel10[25] = {
+ -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f,
+ -0.01610267f, 0.00278718f, 0.28896755f, 0.14111091f, -0.03452303f,
+ -0.01986694f, 0.03777607f, 0.56661550f, 0.28896755f, -0.04022174f,
+ -0.01185068f, -0.03144731f, 0.03777607f, 0.00278718f, -0.02921014f,
+ -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f,
+};
+static const float kernel11[25] = {
+ -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f,
+ -0.01185068f, -0.03144731f, 0.03777607f, 0.00278718f, -0.02921014f,
+ -0.01986694f, 0.03777607f, 0.56661550f, 0.28896755f, -0.04022174f,
+ -0.01610267f, 0.00278718f, 0.28896755f, 0.14111091f, -0.03452303f,
+ -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f,
+};
+
+// Does exactly the same as the Upsampler in dec_upsampler for 2x2 pixels, with
+// default CustomTransformData.
+// TODO(lode): use Upsampler instead. However, it requires pre-initialization
+// and padding on the left side of the image which requires refactoring the
+// other code using this.
+static void UpsampleImage(const ImageF& input, ImageF* output) {
+ int64_t xsize = input.xsize();
+ int64_t ysize = input.ysize();
+ int64_t xsize2 = output->xsize();
+ int64_t ysize2 = output->ysize();
+ for (int64_t y = 0; y < ysize2; y++) {
+ for (int64_t x = 0; x < xsize2; x++) {
+ auto kernel = kernel00;
+ if ((x & 1) && (y & 1)) {
+ kernel = kernel11;
+ } else if (x & 1) {
+ kernel = kernel10;
+ } else if (y & 1) {
+ kernel = kernel01;
+ }
+ float sum = 0;
+ int64_t x2 = x / 2;
+ int64_t y2 = y / 2;
+
+ // get min and max values of the original image in the support
+ float min = std::numeric_limits<float>::max();
+ float max = std::numeric_limits<float>::min();
+
+ for (int64_t ky = 0; ky < kSize; ky++) {
+ for (int64_t kx = 0; kx < kSize; kx++) {
+ int64_t xi = x2 - kSize / 2 + kx;
+ int64_t yi = y2 - kSize / 2 + ky;
+ if (xi < 0) xi = 0;
+ if (xi >= xsize) xi = input.xsize() - 1;
+ if (yi < 0) yi = 0;
+ if (yi >= ysize) yi = input.ysize() - 1;
+ min = std::min<float>(min, input.Row(yi)[xi]);
+ max = std::max<float>(max, input.Row(yi)[xi]);
+ }
+ }
+
+ for (int64_t ky = 0; ky < kSize; ky++) {
+ for (int64_t kx = 0; kx < kSize; kx++) {
+ int64_t xi = x2 - kSize / 2 + kx;
+ int64_t yi = y2 - kSize / 2 + ky;
+ if (xi < 0) xi = 0;
+ if (xi >= xsize) xi = input.xsize() - 1;
+ if (yi < 0) yi = 0;
+ if (yi >= ysize) yi = input.ysize() - 1;
+ sum += input.Row(yi)[xi] * kernel[ky * kSize + kx];
+ }
+ }
+ output->Row(y)[x] = sum;
+ if (output->Row(y)[x] < min) output->Row(y)[x] = min;
+ if (output->Row(y)[x] > max) output->Row(y)[x] = max;
+ }
+ }
+}
+
+// Returns the derivative of Upsampler, with respect to input pixel x2, y2, to
+// output pixel x, y (ignoring the clamping).
+float UpsamplerDeriv(int64_t x2, int64_t y2, int64_t x, int64_t y) {
+ auto kernel = kernel00;
+ if ((x & 1) && (y & 1)) {
+ kernel = kernel11;
+ } else if (x & 1) {
+ kernel = kernel10;
+ } else if (y & 1) {
+ kernel = kernel01;
+ }
+
+ int64_t ix = x / 2;
+ int64_t iy = y / 2;
+ int64_t kx = x2 - ix + kSize / 2;
+ int64_t ky = y2 - iy + kSize / 2;
+
+ // This should not happen.
+ if (kx < 0 || kx >= kSize || ky < 0 || ky >= kSize) return 0;
+
+ return kernel[ky * kSize + kx];
+}
+
+// Apply the derivative of the Upsampler to the input, reversing the effect of
+// its coefficients. The output image is 2x2 times smaller than the input.
+void AntiUpsample(const ImageF& input, ImageF* d) {
+ int64_t xsize = input.xsize();
+ int64_t ysize = input.ysize();
+ int64_t xsize2 = d->xsize();
+ int64_t ysize2 = d->ysize();
+ int64_t k0 = kSize - 1;
+ int64_t k1 = kSize;
+ for (int64_t y2 = 0; y2 < ysize2; ++y2) {
+ auto* row = d->Row(y2);
+ for (int64_t x2 = 0; x2 < xsize2; ++x2) {
+ int64_t x0 = x2 * 2 - k0;
+ if (x0 < 0) x0 = 0;
+ int64_t x1 = x2 * 2 + k1 + 1;
+ if (x1 > xsize) x1 = xsize;
+ int64_t y0 = y2 * 2 - k0;
+ if (y0 < 0) y0 = 0;
+ int64_t y1 = y2 * 2 + k1 + 1;
+ if (y1 > ysize) y1 = ysize;
+
+ float sum = 0;
+ for (int64_t y = y0; y < y1; ++y) {
+ const auto* row_in = input.Row(y);
+ for (int64_t x = x0; x < x1; ++x) {
+ double deriv = UpsamplerDeriv(x2, y2, x, y);
+ sum += deriv * row_in[x];
+ }
+ }
+ row[x2] = sum;
+ }
+ }
+}
+
+// Element-wise multiplies two images.
+template <typename T>
+void ElwiseMul(const Plane<T>& image1, const Plane<T>& image2, Plane<T>* out) {
+ const size_t xsize = image1.xsize();
+ const size_t ysize = image1.ysize();
+ JXL_CHECK(xsize == image2.xsize());
+ JXL_CHECK(ysize == image2.ysize());
+ JXL_CHECK(xsize == out->xsize());
+ JXL_CHECK(ysize == out->ysize());
+ for (size_t y = 0; y < ysize; ++y) {
+ const T* const JXL_RESTRICT row1 = image1.Row(y);
+ const T* const JXL_RESTRICT row2 = image2.Row(y);
+ T* const JXL_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row1[x] * row2[x];
+ }
+ }
+}
+
+// Element-wise divides two images.
+template <typename T>
+void ElwiseDiv(const Plane<T>& image1, const Plane<T>& image2, Plane<T>* out) {
+ const size_t xsize = image1.xsize();
+ const size_t ysize = image1.ysize();
+ JXL_CHECK(xsize == image2.xsize());
+ JXL_CHECK(ysize == image2.ysize());
+ JXL_CHECK(xsize == out->xsize());
+ JXL_CHECK(ysize == out->ysize());
+ for (size_t y = 0; y < ysize; ++y) {
+ const T* const JXL_RESTRICT row1 = image1.Row(y);
+ const T* const JXL_RESTRICT row2 = image2.Row(y);
+ T* const JXL_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row1[x] / row2[x];
+ }
+ }
+}
+
+void ReduceRinging(const ImageF& initial, const ImageF& mask, ImageF& down) {
+ int64_t xsize2 = down.xsize();
+ int64_t ysize2 = down.ysize();
+
+ for (size_t y = 0; y < down.ysize(); y++) {
+ const float* row_mask = mask.Row(y);
+ float* row_out = down.Row(y);
+ for (size_t x = 0; x < down.xsize(); x++) {
+ float v = down.Row(y)[x];
+ float min = initial.Row(y)[x];
+ float max = initial.Row(y)[x];
+ for (int64_t yi = -1; yi < 2; yi++) {
+ for (int64_t xi = -1; xi < 2; xi++) {
+ int64_t x2 = (int64_t)x + xi;
+ int64_t y2 = (int64_t)y + yi;
+ if (x2 < 0 || y2 < 0 || x2 >= (int64_t)xsize2 ||
+ y2 >= (int64_t)ysize2)
+ continue;
+ min = std::min<float>(min, initial.Row(y2)[x2]);
+ max = std::max<float>(max, initial.Row(y2)[x2]);
+ }
+ }
+
+ row_out[x] = v;
+
+ // Clamp the pixel within the value of a small area to prevent ringning.
+ // The mask determines how much to clamp, clamp more to reduce more
+ // ringing in smooth areas, clamp less in noisy areas to get more
+ // sharpness. Higher mask_multiplier gives less clamping, so less
+ // ringing reduction.
+ const constexpr float mask_multiplier = 2;
+ float a = row_mask[x] * mask_multiplier;
+ float clip_min = min - a;
+ float clip_max = max + a;
+ if (row_out[x] < clip_min) row_out[x] = clip_min;
+ if (row_out[x] > clip_max) row_out[x] = clip_max;
+ }
+ }
+}
+
+// TODO(lode): move this to a separate file enc_downsample.cc
+void DownsampleImage2_Iterative(const ImageF& orig, ImageF* output) {
+ int64_t xsize = orig.xsize();
+ int64_t ysize = orig.ysize();
+ int64_t xsize2 = DivCeil(orig.xsize(), 2);
+ int64_t ysize2 = DivCeil(orig.ysize(), 2);
+
+ ImageF box_downsample = CopyImage(orig);
+ DownsampleImage(&box_downsample, 2);
+ ImageF mask(box_downsample.xsize(), box_downsample.ysize());
+ CreateMask(box_downsample, mask);
+
+ output->ShrinkTo(xsize2, ysize2);
+
+ // Initial result image using the sharper downsampling.
+ // Allocate extra space to avoid a reallocation when padding.
+ ImageF initial(DivCeil(orig.xsize(), 2) + kBlockDim,
+ DivCeil(orig.ysize(), 2) + kBlockDim);
+ initial.ShrinkTo(initial.xsize() - kBlockDim, initial.ysize() - kBlockDim);
+ DownsampleImage2_Sharper(orig, &initial);
+
+ ImageF down = CopyImage(initial);
+ ImageF up(xsize, ysize);
+ ImageF corr(xsize, ysize);
+ ImageF corr2(xsize2, ysize2);
+
+ // In the weights map, relatively higher values will allow less ringing but
+ // also less sharpness. With all constant values, it optimizes equally
+ // everywhere. Even in this case, the weights2 computed from
+ // this is still used and differs at the borders of the image.
+ // TODO(lode): Make use of the weights field for anti-ringing and clamping,
+ // the values are all set to 1 for now, but it is intended to be used for
+ // reducing ringing based on the mask, and taking clamping into account.
+ ImageF weights(xsize, ysize);
+ for (size_t y = 0; y < weights.ysize(); y++) {
+ auto* row = weights.Row(y);
+ for (size_t x = 0; x < weights.xsize(); x++) {
+ row[x] = 1;
+ }
+ }
+ ImageF weights2(xsize2, ysize2);
+ AntiUpsample(weights, &weights2);
+
+ const size_t num_it = 3;
+ for (size_t it = 0; it < num_it; ++it) {
+ UpsampleImage(down, &up);
+ corr = LinComb<float>(1, orig, -1, up);
+ ElwiseMul(corr, weights, &corr);
+ AntiUpsample(corr, &corr2);
+ ElwiseDiv(corr2, weights2, &corr2);
+
+ down = LinComb<float>(1, down, 1, corr2);
+ }
+
+ ReduceRinging(initial, mask, down);
+
+ // can't just use CopyImage, because the output image was prepared with
+ // padding.
+ for (size_t y = 0; y < down.ysize(); y++) {
+ for (size_t x = 0; x < down.xsize(); x++) {
+ float v = down.Row(y)[x];
+ output->Row(y)[x] = v;
+ }
+ }
+}
+
+void DownsampleImage2_Iterative(Image3F* opsin) {
+ // Allocate extra space to avoid a reallocation when padding.
+ Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim,
+ DivCeil(opsin->ysize(), 2) + kBlockDim);
+ downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+ downsampled.ysize() - kBlockDim);
+
+ Image3F rgb(opsin->xsize(), opsin->ysize());
+ OpsinParams opsin_params; // TODO: use the ones that are actually used
+ opsin_params.Init(kDefaultIntensityTarget);
+ OpsinToLinear(*opsin, Rect(rgb), nullptr, &rgb, opsin_params);
+
+ ImageF mask(opsin->xsize(), opsin->ysize());
+ ButteraugliParams butter_params;
+ ButteraugliComparator butter(rgb, butter_params);
+ butter.Mask(&mask);
+ ImageF mask_fuzzy(opsin->xsize(), opsin->ysize());
+
+ for (size_t c = 0; c < 3; c++) {
+ DownsampleImage2_Iterative(opsin->Plane(c), &downsampled.Plane(c));
+ }
+ *opsin = std::move(downsampled);
+}
+} // namespace
+
+Status DefaultEncoderHeuristics::LossyFrameHeuristics(
+ PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+ const ImageBundle* original_pixels, Image3F* opsin,
+ const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) {
+ PROFILER_ZONE("JxlLossyFrameHeuristics uninstrumented");
+
+ CompressParams& cparams = enc_state->cparams;
+ PassesSharedState& shared = enc_state->shared;
+
+ // Compute parameters for noise synthesis.
+ if (shared.frame_header.flags & FrameHeader::kNoise) {
+ PROFILER_ZONE("enc GetNoiseParam");
+ if (cparams.photon_noise_iso == 0) {
+ // Don't start at zero amplitude since adding noise is expensive -- it
+ // significantly slows down decoding, and this is unlikely to
+ // completely go away even with advanced optimizations. After the
+ // kNoiseModelingRampUpDistanceRange we have reached the full level,
+ // i.e. noise is no longer represented by the compressed image, so we
+ // can add full noise by the noise modeling itself.
+ static const float kNoiseModelingRampUpDistanceRange = 0.6;
+ static const float kNoiseLevelAtStartOfRampUp = 0.25;
+ static const float kNoiseRampupStart = 1.0;
+ // TODO(user) test and properly select quality_coef with smooth
+ // filter
+ float quality_coef = 1.0f;
+ const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) /
+ kNoiseModelingRampUpDistanceRange;
+ if (rampup < 1.0f) {
+ quality_coef = kNoiseLevelAtStartOfRampUp +
+ (1.0f - kNoiseLevelAtStartOfRampUp) * rampup;
+ }
+ if (rampup < 0.0f) {
+ quality_coef = kNoiseRampupStart;
+ }
+ if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params,
+ quality_coef)) {
+ shared.frame_header.flags &= ~FrameHeader::kNoise;
+ }
+ }
+ }
+ if (enc_state->shared.frame_header.upsampling != 1 &&
+ !cparams.already_downsampled) {
+ // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling
+ // after noise, if necessary.
+ if (cparams.resampling == 2) {
+ // TODO(lode): use the regular DownsampleImage, or adapt to the custom
+ // coefficients, if there is are custom upscaling coefficients in
+ // CustomTransformData
+ if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+ // TODO(lode): DownsampleImage2_Iterative is currently too slow to
+ // be used for squirrel, make it faster, and / or enable it only for
+ // kitten.
+ DownsampleImage2_Iterative(opsin);
+ } else {
+ DownsampleImage2_Sharper(opsin);
+ }
+ } else {
+ DownsampleImage(opsin, cparams.resampling);
+ }
+ PadImageToBlockMultipleInPlace(opsin);
+ }
+
+ if (cparams.butteraugli_distance < 0) {
+ return JXL_FAILURE("Expected non-negative distance");
+ }
+
+ // Find and subtract splines.
+ if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+ // If we do already have them, they were passed upstream to EncodeFile.
+ if (!shared.image_features.splines.HasAny()) {
+ shared.image_features.splines = FindSplines(*opsin);
+ }
+ JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache(
+ opsin->xsize(), opsin->ysize(), shared.cmap));
+ shared.image_features.splines.SubtractFrom(opsin);
+ }
+
+ // Find and subtract patches/dots.
+ if (ApplyOverride(cparams.patches,
+ cparams.speed_tier <= SpeedTier::kSquirrel)) {
+ FindBestPatchDictionary(*opsin, enc_state, cms, pool, aux_out);
+ PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin);
+ }
+
+ static const float kAcQuant = 0.79f;
+ const float quant_dc = InitialQuantDC(cparams.butteraugli_distance);
+ Quantizer& quantizer = enc_state->shared.quantizer;
+ // We don't know the quant field yet, but for computing the global scale
+ // assuming that it will be the same as for Falcon mode is good enough.
+ quantizer.ComputeGlobalScaleAndQuant(
+ quant_dc, kAcQuant / cparams.butteraugli_distance, 0);
+
+ // TODO(veluca): we can now run all the code from here to FindBestQuantizer
+ // (excluded) one rect at a time. Do that.
+
+ // Dependency graph:
+ //
+ // input: either XYB or input image
+ //
+ // input image -> XYB [optional]
+ // XYB -> initial quant field
+ // XYB -> Gaborished XYB
+ // Gaborished XYB -> CfL1
+ // initial quant field, Gaborished XYB, CfL1 -> ACS
+ // initial quant field, ACS, Gaborished XYB -> EPF control field
+ // initial quant field -> adjusted initial quant field
+ // adjusted initial quant field, ACS -> raw quant field
+ // raw quant field, ACS, Gaborished XYB -> CfL2
+ //
+ // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field.
+
+ ArControlFieldHeuristics ar_heuristics;
+ AcStrategyHeuristics acs_heuristics;
+ CfLHeuristics cfl_heuristics;
+
+ if (!opsin->xsize()) {
+ JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels));
+ *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()),
+ RoundUpToBlockDim(original_pixels->ysize()));
+ opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize());
+ ToXYB(*original_pixels, pool, opsin, cms, /*linear=*/nullptr);
+ PadImageToBlockMultipleInPlace(opsin);
+ }
+
+ // Compute an initial estimate of the quantization field.
+ // Call InitialQuantField only in Hare mode or slower. Otherwise, rely
+ // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon
+ // mode.
+ if (cparams.speed_tier > SpeedTier::kHare || cparams.uniform_quant > 0) {
+ enc_state->initial_quant_field =
+ ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+ enc_state->initial_quant_masking =
+ ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks);
+ float q = cparams.uniform_quant > 0
+ ? cparams.uniform_quant
+ : kAcQuant / cparams.butteraugli_distance;
+ FillImage(q, &enc_state->initial_quant_field);
+ FillImage(1.0f / (q + 0.001f), &enc_state->initial_quant_masking);
+ } else {
+ // Call this here, as it relies on pre-gaborish values.
+ float butteraugli_distance_for_iqf = cparams.butteraugli_distance;
+ if (!shared.frame_header.loop_filter.gab) {
+ butteraugli_distance_for_iqf *= 0.73f;
+ }
+ enc_state->initial_quant_field = InitialQuantField(
+ butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f,
+ &enc_state->initial_quant_masking);
+ quantizer.SetQuantField(quant_dc, enc_state->initial_quant_field, nullptr);
+ }
+
+ // TODO(veluca): do something about animations.
+
+ // Apply inverse-gaborish.
+ if (shared.frame_header.loop_filter.gab) {
+ // Unsure why better to do some more gaborish on X and B than Y.
+ float weight[3] = {
+ 1.0036278514398933f,
+ 0.99406123118127299f,
+ 0.99719338015886894f,
+ };
+ GaborishInverse(opsin, weight, pool);
+ }
+
+ FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder,
+ &enc_state->shared.matrices);
+
+ cfl_heuristics.Init(*opsin);
+ acs_heuristics.Init(*opsin, enc_state);
+
+ auto process_tile = [&](const uint32_t tid, const size_t thread) {
+ size_t n_enc_tiles =
+ DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks);
+ size_t tx = tid % n_enc_tiles;
+ size_t ty = tid / n_enc_tiles;
+ size_t by0 = ty * kEncTileDimInBlocks;
+ size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks,
+ enc_state->shared.frame_dim.ysize_blocks);
+ size_t bx0 = tx * kEncTileDimInBlocks;
+ size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks,
+ enc_state->shared.frame_dim.xsize_blocks);
+ Rect r(bx0, by0, bx1 - bx0, by1 - by0);
+
+ // For speeds up to Wombat, we only compute the color correlation map
+ // once we know the transform type and the quantization map.
+ if (cparams.speed_tier <= SpeedTier::kSquirrel) {
+ cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices,
+ /*ac_strategy=*/nullptr,
+ /*raw_quant_field=*/nullptr,
+ /*quantizer=*/nullptr, /*fast=*/false, thread,
+ &enc_state->shared.cmap);
+ }
+
+ // Choose block sizes.
+ acs_heuristics.ProcessRect(r);
+
+ // Choose amount of post-processing smoothing.
+ // TODO(veluca): should this go *after* AdjustQuantField?
+ ar_heuristics.RunRect(r, *opsin, enc_state, thread);
+
+ // Always set the initial quant field, so we can compute the CfL map with
+ // more accuracy. The initial quant field might change in slower modes, but
+ // adjusting the quant field with butteraugli when all the other encoding
+ // parameters are fixed is likely a more reliable choice anyway.
+ AdjustQuantField(enc_state->shared.ac_strategy, r,
+ &enc_state->initial_quant_field);
+ quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r,
+ &enc_state->shared.raw_quant_field);
+
+ // Compute a non-default CfL map if we are at Hare speed, or slower.
+ if (cparams.speed_tier <= SpeedTier::kHare) {
+ cfl_heuristics.ComputeTile(
+ r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy,
+ &enc_state->shared.raw_quant_field, &enc_state->shared.quantizer,
+ /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread,
+ &enc_state->shared.cmap);
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0,
+ DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) *
+ DivCeil(enc_state->shared.frame_dim.ysize_blocks,
+ kEncTileDimInBlocks),
+ [&](const size_t num_threads) {
+ ar_heuristics.PrepareForThreads(num_threads);
+ cfl_heuristics.PrepareForThreads(num_threads);
+ return true;
+ },
+ process_tile, "Enc Heuristics"));
+
+ acs_heuristics.Finalize(aux_out);
+ if (cparams.speed_tier <= SpeedTier::kHare) {
+ cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat,
+ &enc_state->shared.cmap);
+ }
+
+ // Refine quantization levels.
+ FindBestQuantizer(original_pixels, *opsin, enc_state, cms, pool, aux_out);
+
+ // Choose a context model that depends on the amount of quantization for AC.
+ if (cparams.speed_tier < SpeedTier::kFalcon) {
+ FindBestBlockEntropyModel(*enc_state);
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_heuristics.h b/third_party/jpeg-xl/lib/jxl/enc_heuristics.h
new file mode 100644
index 0000000000..3cb9b506a6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_heuristics.h
@@ -0,0 +1,81 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HEURISTICS_H_
+#define LIB_JXL_ENC_HEURISTICS_H_
+
+// Hook for custom encoder heuristics (VarDCT only for now).
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct PassesEncoderState;
+class DequantMatrices;
+class ImageBundle;
+class ModularFrameEncoder;
+
+class EncoderHeuristics {
+ public:
+ virtual ~EncoderHeuristics() = default;
+ // Initializes encoder structures in `enc_state` using the original image data
+ // in `original_pixels`, and the XYB image data in `opsin`. Also modifies the
+ // `opsin` image by applying Gaborish, and doing other modifications if
+ // necessary. `pool` is used for running the computations on multiple threads.
+ // `aux_out` collects statistics and can be used to print debug images.
+ virtual Status LossyFrameHeuristics(
+ PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder,
+ const ImageBundle* original_pixels, Image3F* opsin,
+ const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) = 0;
+
+ // Custom fixed tree for lossless mode. Must set `tree` to a valid tree if
+ // the function returns true.
+ virtual bool CustomFixedTreeLossless(const FrameDimensions& frame_dim,
+ Tree* tree) {
+ return false;
+ }
+
+ // If this method returns `true`, the `opsin` parameter to
+ // LossyFrameHeuristics will not be initialized, and should be initialized
+ // during the call. Moreover, `original_pixels` may not be in a linear
+ // colorspace (but will be the same as the `ib` value passed to this
+ // function).
+ virtual bool HandlesColorConversion(const CompressParams& cparams,
+ const ImageBundle& ib) {
+ return false;
+ }
+};
+
+class DefaultEncoderHeuristics : public EncoderHeuristics {
+ public:
+ Status LossyFrameHeuristics(PassesEncoderState* enc_state,
+ ModularFrameEncoder* modular_frame_encoder,
+ const ImageBundle* original_pixels,
+ Image3F* opsin, const JxlCmsInterface& cms,
+ ThreadPool* pool, AuxOut* aux_out) override;
+ bool HandlesColorConversion(const CompressParams& cparams,
+ const ImageBundle& ib) override;
+};
+
+// Exposed here since it may be used by other EncoderHeuristics implementations
+// outside this project.
+void FindBestDequantMatrices(const CompressParams& cparams,
+ const Image3F& opsin,
+ ModularFrameEncoder* modular_frame_encoder,
+ DequantMatrices* dequant_matrices);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_HEURISTICS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_huffman.cc b/third_party/jpeg-xl/lib/jxl/enc_huffman.cc
new file mode 100644
index 0000000000..3eab2c218a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_huffman.cc
@@ -0,0 +1,214 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_huffman.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/enc_huffman_tree.h"
+
+namespace jxl {
+
+namespace {
+
+constexpr int kCodeLengthCodes = 18;
+
+void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes,
+ const uint8_t* code_length_bitdepth,
+ BitWriter* writer) {
+ static const uint8_t kStorageOrder[kCodeLengthCodes] = {
+ 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ // The bit lengths of the Huffman code over the code length alphabet
+ // are compressed with the following static Huffman code:
+ // Symbol Code
+ // ------ ----
+ // 0 00
+ // 1 1110
+ // 2 110
+ // 3 01
+ // 4 10
+ // 5 1111
+ static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3,
+ 2, 1, 15};
+ static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3,
+ 2, 2, 4};
+
+ // Throw away trailing zeros:
+ size_t codes_to_store = kCodeLengthCodes;
+ if (num_codes > 1) {
+ for (; codes_to_store > 0; --codes_to_store) {
+ if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
+ break;
+ }
+ }
+ }
+ size_t skip_some = 0; // skips none.
+ if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
+ code_length_bitdepth[kStorageOrder[1]] == 0) {
+ skip_some = 2; // skips two.
+ if (code_length_bitdepth[kStorageOrder[2]] == 0) {
+ skip_some = 3; // skips three.
+ }
+ }
+ writer->Write(2, skip_some);
+ for (size_t i = skip_some; i < codes_to_store; ++i) {
+ size_t l = code_length_bitdepth[kStorageOrder[i]];
+ writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l],
+ kHuffmanBitLengthHuffmanCodeSymbols[l]);
+ }
+}
+
+void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size,
+ const uint8_t* huffman_tree,
+ const uint8_t* huffman_tree_extra_bits,
+ const uint8_t* code_length_bitdepth,
+ const uint16_t* code_length_bitdepth_symbols,
+ BitWriter* writer) {
+ for (size_t i = 0; i < huffman_tree_size; ++i) {
+ size_t ix = huffman_tree[i];
+ writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]);
+ // Extra bits
+ switch (ix) {
+ case 16:
+ writer->Write(2, huffman_tree_extra_bits[i]);
+ break;
+ case 17:
+ writer->Write(3, huffman_tree_extra_bits[i]);
+ break;
+ }
+ }
+}
+
+void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4],
+ size_t num_symbols, size_t max_bits,
+ BitWriter* writer) {
+ // value of 1 indicates a simple Huffman code
+ writer->Write(2, 1);
+ writer->Write(2, num_symbols - 1); // NSYM - 1
+
+ // Sort
+ for (size_t i = 0; i < num_symbols; i++) {
+ for (size_t j = i + 1; j < num_symbols; j++) {
+ if (depths[symbols[j]] < depths[symbols[i]]) {
+ std::swap(symbols[j], symbols[i]);
+ }
+ }
+ }
+
+ if (num_symbols == 2) {
+ writer->Write(max_bits, symbols[0]);
+ writer->Write(max_bits, symbols[1]);
+ } else if (num_symbols == 3) {
+ writer->Write(max_bits, symbols[0]);
+ writer->Write(max_bits, symbols[1]);
+ writer->Write(max_bits, symbols[2]);
+ } else {
+ writer->Write(max_bits, symbols[0]);
+ writer->Write(max_bits, symbols[1]);
+ writer->Write(max_bits, symbols[2]);
+ writer->Write(max_bits, symbols[3]);
+ // tree-select
+ writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0);
+ }
+}
+
+// num = alphabet size
+// depths = symbol depths
+void StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) {
+ // Write the Huffman tree into the compact representation.
+ std::unique_ptr<uint8_t[]> arena(new uint8_t[2 * num]);
+ uint8_t* huffman_tree = arena.get();
+ uint8_t* huffman_tree_extra_bits = arena.get() + num;
+ size_t huffman_tree_size = 0;
+ WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree,
+ huffman_tree_extra_bits);
+
+ // Calculate the statistics of the Huffman tree in the compact representation.
+ uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0};
+ for (size_t i = 0; i < huffman_tree_size; ++i) {
+ ++huffman_tree_histogram[huffman_tree[i]];
+ }
+
+ int num_codes = 0;
+ int code = 0;
+ for (int i = 0; i < kCodeLengthCodes; ++i) {
+ if (huffman_tree_histogram[i]) {
+ if (num_codes == 0) {
+ code = i;
+ num_codes = 1;
+ } else if (num_codes == 1) {
+ num_codes = 2;
+ break;
+ }
+ }
+ }
+
+ // Calculate another Huffman tree to use for compressing both the
+ // earlier Huffman tree with.
+ uint8_t code_length_bitdepth[kCodeLengthCodes] = {0};
+ uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0};
+ CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5,
+ &code_length_bitdepth[0]);
+ ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes,
+ &code_length_bitdepth_symbols[0]);
+
+ // Now, we have all the data, let's start storing it
+ StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth,
+ writer);
+
+ if (num_codes == 1) {
+ code_length_bitdepth[code] = 0;
+ }
+
+ // Store the real huffman tree now.
+ StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree,
+ huffman_tree_extra_bits, &code_length_bitdepth[0],
+ code_length_bitdepth_symbols, writer);
+}
+
+} // namespace
+
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length,
+ uint8_t* depth, uint16_t* bits,
+ BitWriter* writer) {
+ size_t count = 0;
+ size_t s4[4] = {0};
+ for (size_t i = 0; i < length; i++) {
+ if (histogram[i]) {
+ if (count < 4) {
+ s4[count] = i;
+ } else if (count > 4) {
+ break;
+ }
+ count++;
+ }
+ }
+
+ size_t max_bits_counter = length - 1;
+ size_t max_bits = 0;
+ while (max_bits_counter) {
+ max_bits_counter >>= 1;
+ ++max_bits;
+ }
+
+ if (count <= 1) {
+ // Output symbol bits and depths are initialized with 0, nothing to do.
+ writer->Write(4, 1);
+ writer->Write(max_bits, s4[0]);
+ return;
+ }
+
+ CreateHuffmanTree(histogram, length, 15, depth);
+ ConvertBitDepthsToSymbols(depth, length, bits);
+
+ if (count <= 4) {
+ StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer);
+ } else {
+ StoreHuffmanTree(depth, length, writer);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_huffman.h b/third_party/jpeg-xl/lib/jxl/enc_huffman.h
new file mode 100644
index 0000000000..d7a66584e8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_huffman.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_HUFFMAN_H_
+#define LIB_JXL_ENC_HUFFMAN_H_
+
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+// Builds a Huffman tree for the given histogram, and encodes it into writer
+// in a format that can be read by HuffmanDecodingData::ReadFromBitstream.
+// An allotment for `writer` must already have been created by the caller.
+void BuildAndStoreHuffmanTree(const uint32_t* histogram, size_t length,
+ uint8_t* depth, uint16_t* bits,
+ BitWriter* writer);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_HUFFMAN_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.cc b/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.cc
new file mode 100644
index 0000000000..5c40dea770
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.cc
@@ -0,0 +1,328 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_huffman_tree.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+ uint8_t level) {
+ if (p.index_left >= 0) {
+ ++level;
+ SetDepth(pool[p.index_left], pool, depth, level);
+ SetDepth(pool[p.index_right_or_value], pool, depth, level);
+ } else {
+ depth[p.index_right_or_value] = level;
+ }
+}
+
+// Sort the root nodes, least popular first.
+static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) {
+ return v0.total_count < v1.total_count;
+}
+
+// This function will create a Huffman tree.
+//
+// The catch here is that the tree cannot be arbitrarily deep.
+// Brotli specifies a maximum depth of 15 bits for "code trees"
+// and 7 bits for "code length code trees."
+//
+// count_limit is the value that is to be faked as the minimum value
+// and this minimum value is raised until the tree matches the
+// maximum length requirement.
+//
+// This algorithm is not of excellent performance for very long data blocks,
+// especially when population counts are longer than 2**tree_limit, but
+// we are not planning to use this with extremely long blocks.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, const size_t length,
+ const int tree_limit, uint8_t* depth) {
+ // For block sizes below 64 kB, we never need to do a second iteration
+ // of this loop. Probably all of our block sizes will be smaller than
+ // that, so this loop is mostly of academic interest. If we actually
+ // would need this, we would be better off with the Katajainen algorithm.
+ for (uint32_t count_limit = 1;; count_limit *= 2) {
+ std::vector<HuffmanTree> tree;
+ tree.reserve(2 * length + 1);
+
+ for (size_t i = length; i != 0;) {
+ --i;
+ if (data[i]) {
+ const uint32_t count = std::max(data[i], count_limit - 1);
+ tree.emplace_back(count, -1, static_cast<int16_t>(i));
+ }
+ }
+
+ const size_t n = tree.size();
+ if (n == 1) {
+ // Fake value; will be fixed on upper level.
+ depth[tree[0].index_right_or_value] = 1;
+ break;
+ }
+
+ std::stable_sort(tree.begin(), tree.end(), Compare);
+
+ // The nodes are:
+ // [0, n): the sorted leaf nodes that we start with.
+ // [n]: we add a sentinel here.
+ // [n + 1, 2n): new parent nodes are added here, starting from
+ // (n+1). These are naturally in ascending order.
+ // [2n]: we add a sentinel at the end as well.
+ // There will be (2n+1) elements at the end.
+ const HuffmanTree sentinel(std::numeric_limits<uint32_t>::max(), -1, -1);
+ tree.push_back(sentinel);
+ tree.push_back(sentinel);
+
+ size_t i = 0; // Points to the next leaf node.
+ size_t j = n + 1; // Points to the next non-leaf node.
+ for (size_t k = n - 1; k != 0; --k) {
+ size_t left, right;
+ if (tree[i].total_count <= tree[j].total_count) {
+ left = i;
+ ++i;
+ } else {
+ left = j;
+ ++j;
+ }
+ if (tree[i].total_count <= tree[j].total_count) {
+ right = i;
+ ++i;
+ } else {
+ right = j;
+ ++j;
+ }
+
+ // The sentinel node becomes the parent node.
+ size_t j_end = tree.size() - 1;
+ tree[j_end].total_count =
+ tree[left].total_count + tree[right].total_count;
+ tree[j_end].index_left = static_cast<int16_t>(left);
+ tree[j_end].index_right_or_value = static_cast<int16_t>(right);
+
+ // Add back the last sentinel node.
+ tree.push_back(sentinel);
+ }
+ JXL_DASSERT(tree.size() == 2 * n + 1);
+ SetDepth(tree[2 * n - 1], &tree[0], depth, 0);
+
+ // We need to pack the Huffman tree in tree_limit bits.
+ // If this was not successful, add fake entities to the lowest values
+ // and retry.
+ if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) {
+ break;
+ }
+ }
+}
+
+void Reverse(uint8_t* v, size_t start, size_t end) {
+ --end;
+ while (start < end) {
+ uint8_t tmp = v[start];
+ v[start] = v[end];
+ v[end] = tmp;
+ ++start;
+ --end;
+ }
+}
+
+void WriteHuffmanTreeRepetitions(const uint8_t previous_value,
+ const uint8_t value, size_t repetitions,
+ size_t* tree_size, uint8_t* tree,
+ uint8_t* extra_bits_data) {
+ JXL_DASSERT(repetitions > 0);
+ if (previous_value != value) {
+ tree[*tree_size] = value;
+ extra_bits_data[*tree_size] = 0;
+ ++(*tree_size);
+ --repetitions;
+ }
+ if (repetitions == 7) {
+ tree[*tree_size] = value;
+ extra_bits_data[*tree_size] = 0;
+ ++(*tree_size);
+ --repetitions;
+ }
+ if (repetitions < 3) {
+ for (size_t i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = value;
+ extra_bits_data[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ } else {
+ repetitions -= 3;
+ size_t start = *tree_size;
+ while (true) {
+ tree[*tree_size] = 16;
+ extra_bits_data[*tree_size] = repetitions & 0x3;
+ ++(*tree_size);
+ repetitions >>= 2;
+ if (repetitions == 0) {
+ break;
+ }
+ --repetitions;
+ }
+ Reverse(tree, start, *tree_size);
+ Reverse(extra_bits_data, start, *tree_size);
+ }
+}
+
+void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size,
+ uint8_t* tree, uint8_t* extra_bits_data) {
+ if (repetitions == 11) {
+ tree[*tree_size] = 0;
+ extra_bits_data[*tree_size] = 0;
+ ++(*tree_size);
+ --repetitions;
+ }
+ if (repetitions < 3) {
+ for (size_t i = 0; i < repetitions; ++i) {
+ tree[*tree_size] = 0;
+ extra_bits_data[*tree_size] = 0;
+ ++(*tree_size);
+ }
+ } else {
+ repetitions -= 3;
+ size_t start = *tree_size;
+ while (true) {
+ tree[*tree_size] = 17;
+ extra_bits_data[*tree_size] = repetitions & 0x7;
+ ++(*tree_size);
+ repetitions >>= 3;
+ if (repetitions == 0) {
+ break;
+ }
+ --repetitions;
+ }
+ Reverse(tree, start, *tree_size);
+ Reverse(extra_bits_data, start, *tree_size);
+ }
+}
+
+static void DecideOverRleUse(const uint8_t* depth, const size_t length,
+ bool* use_rle_for_non_zero,
+ bool* use_rle_for_zero) {
+ size_t total_reps_zero = 0;
+ size_t total_reps_non_zero = 0;
+ size_t count_reps_zero = 1;
+ size_t count_reps_non_zero = 1;
+ for (size_t i = 0; i < length;) {
+ const uint8_t value = depth[i];
+ size_t reps = 1;
+ for (size_t k = i + 1; k < length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ if (reps >= 3 && value == 0) {
+ total_reps_zero += reps;
+ ++count_reps_zero;
+ }
+ if (reps >= 4 && value != 0) {
+ total_reps_non_zero += reps;
+ ++count_reps_non_zero;
+ }
+ i += reps;
+ }
+ *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2;
+ *use_rle_for_zero = total_reps_zero > count_reps_zero * 2;
+}
+
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+ uint8_t* tree, uint8_t* extra_bits_data) {
+ uint8_t previous_value = 8;
+
+ // Throw away trailing zeros.
+ size_t new_length = length;
+ for (size_t i = 0; i < length; ++i) {
+ if (depth[length - i - 1] == 0) {
+ --new_length;
+ } else {
+ break;
+ }
+ }
+
+ // First gather statistics on if it is a good idea to do rle.
+ bool use_rle_for_non_zero = false;
+ bool use_rle_for_zero = false;
+ if (length > 50) {
+ // Find rle coding for longer codes.
+ // Shorter codes seem not to benefit from rle.
+ DecideOverRleUse(depth, new_length, &use_rle_for_non_zero,
+ &use_rle_for_zero);
+ }
+
+ // Actual rle coding.
+ for (size_t i = 0; i < new_length;) {
+ const uint8_t value = depth[i];
+ size_t reps = 1;
+ if ((value != 0 && use_rle_for_non_zero) ||
+ (value == 0 && use_rle_for_zero)) {
+ for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) {
+ ++reps;
+ }
+ }
+ if (value == 0) {
+ WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data);
+ } else {
+ WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree,
+ extra_bits_data);
+ previous_value = value;
+ }
+ i += reps;
+ }
+}
+
+namespace {
+
+uint16_t ReverseBits(int num_bits, uint16_t bits) {
+ static const size_t kLut[16] = {// Pre-reversed 4-bit values.
+ 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+ 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf};
+ size_t retval = kLut[bits & 0xf];
+ for (int i = 4; i < num_bits; i += 4) {
+ retval <<= 4;
+ bits = static_cast<uint16_t>(bits >> 4);
+ retval |= kLut[bits & 0xf];
+ }
+ retval >>= (-num_bits & 0x3);
+ return static_cast<uint16_t>(retval);
+}
+
+} // namespace
+
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+ uint16_t* bits) {
+ // In Brotli, all bit depths are [1..15]
+ // 0 bit depth means that the symbol does not exist.
+ const int kMaxBits = 16; // 0..15 are values for bits
+ uint16_t bl_count[kMaxBits] = {0};
+ {
+ for (size_t i = 0; i < len; ++i) {
+ ++bl_count[depth[i]];
+ }
+ bl_count[0] = 0;
+ }
+ uint16_t next_code[kMaxBits];
+ next_code[0] = 0;
+ {
+ int code = 0;
+ for (size_t i = 1; i < kMaxBits; ++i) {
+ code = (code + bl_count[i - 1]) << 1;
+ next_code[i] = static_cast<uint16_t>(code);
+ }
+ }
+ for (size_t i = 0; i < len; ++i) {
+ if (depth[i]) {
+ bits[i] = ReverseBits(depth[i], next_code[depth[i]]++);
+ }
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.h b/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.h
new file mode 100644
index 0000000000..7d716cd3b5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_huffman_tree.h
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Library for creating Huffman codes from population counts.
+
+#ifndef LIB_JXL_HUFFMAN_TREE_H_
+#define LIB_JXL_HUFFMAN_TREE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+// A node of a Huffman tree.
+struct HuffmanTree {
+ HuffmanTree(uint32_t count, int16_t left, int16_t right)
+ : total_count(count), index_left(left), index_right_or_value(right) {}
+ uint32_t total_count;
+ int16_t index_left;
+ int16_t index_right_or_value;
+};
+
+void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth,
+ uint8_t level);
+
+// This function will create a Huffman tree.
+//
+// The (data,length) contains the population counts.
+// The tree_limit is the maximum bit depth of the Huffman codes.
+//
+// The depth contains the tree, i.e., how many bits are used for
+// the symbol.
+//
+// See http://en.wikipedia.org/wiki/Huffman_coding
+void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit,
+ uint8_t* depth);
+
+// Write a Huffman tree from bit depths into the bitstream representation
+// of a Huffman tree. The generated Huffman tree is to be compressed once
+// more using a Huffman tree
+void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size,
+ uint8_t* tree, uint8_t* extra_bits_data);
+
+// Get the actual bit values for a tree of bit depths.
+void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len,
+ uint16_t* bits);
+
+} // namespace jxl
+
+#endif // LIB_JXL_HUFFMAN_TREE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc b/third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc
new file mode 100644
index 0000000000..a6782f6a45
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_icc_codec.cc
@@ -0,0 +1,406 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+// Unshuffles or de-interleaves bytes, for example with width 2, turns
+// "AaBbCcDc" into "ABCDabcd", this for example de-interleaves UTF-16 bytes into
+// first all the high order bytes, then all the low order bytes.
+// Transposes a matrix of width columns and ceil(size / width) rows. There are
+// size elements, size may be < width * height, if so the
+// last elements of the bottom row are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the bottom of the rightmost column. The input is the input matrix
+// in scanline order, the output is the result matrix in scanline order, with
+// missing elements skipped over (this may occur at multiple positions).
+void Unshuffle(uint8_t* data, size_t size, size_t width) {
+ size_t height = (size + width - 1) / width; // amount of rows of input
+ PaddedBytes result(size);
+ // i = input index, j output index
+ size_t s = 0, j = 0;
+ for (size_t i = 0; i < size; i++) {
+ result[j] = data[i];
+ j += height;
+ if (j >= size) j = ++s;
+ }
+
+ for (size_t i = 0; i < size; i++) {
+ data[i] = result[i];
+ }
+}
+
+// This is performed by the encoder, the encoder must be able to encode any
+// random byte stream (not just byte streams that are a valid ICC profile), so
+// an error returned by this function is an implementation error.
+Status PredictAndShuffle(size_t stride, size_t width, int order, size_t num,
+ const uint8_t* data, size_t size, size_t* pos,
+ PaddedBytes* result) {
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(*pos, num, size));
+ // Required by the specification, see decoder. stride * 4 must be < *pos.
+ if (!*pos || ((*pos - 1u) >> 2u) < stride) {
+ return JXL_FAILURE("Invalid stride");
+ }
+ if (*pos < stride * 4) return JXL_FAILURE("Too large stride");
+ size_t start = result->size();
+ for (size_t i = 0; i < num; i++) {
+ uint8_t predicted =
+ LinearPredictICCValue(data, *pos, i, stride, width, order);
+ result->push_back(data[*pos + i] - predicted);
+ }
+ *pos += num;
+ if (width > 1) Unshuffle(result->data() + start, num, width);
+ return true;
+}
+} // namespace
+
+// Outputs a transformed form of the given icc profile. The result itself is
+// not particularly smaller than the input data in bytes, but it will be in a
+// form that is easier to compress (more zeroes, ...) and will compress better
+// with brotli.
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) {
+ PaddedBytes commands;
+ PaddedBytes data;
+
+ EncodeVarInt(size, result);
+
+ // Header
+ PaddedBytes header = ICCInitialHeaderPrediction();
+ EncodeUint32(0, size, &header);
+ for (size_t i = 0; i < kICCHeaderSize && i < size; i++) {
+ ICCPredictHeader(icc, size, header.data(), i);
+ data.push_back(icc[i] - header[i]);
+ }
+ if (size <= kICCHeaderSize) {
+ EncodeVarInt(0, result); // 0 commands
+ for (size_t i = 0; i < data.size(); i++) {
+ result->push_back(data[i]);
+ }
+ return true;
+ }
+
+ std::vector<Tag> tags;
+ std::vector<size_t> tagstarts;
+ std::vector<size_t> tagsizes;
+ std::map<size_t, size_t> tagmap;
+
+ // Tag list
+ size_t pos = kICCHeaderSize;
+ if (pos + 4 <= size) {
+ uint64_t numtags = DecodeUint32(icc, size, pos);
+ pos += 4;
+ EncodeVarInt(numtags + 1, &commands);
+ uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+ uint32_t prevtagsize = 0;
+ for (size_t i = 0; i < numtags; i++) {
+ if (pos + 12 > size) break;
+
+ Tag tag = DecodeKeyword(icc, size, pos + 0);
+ uint32_t tagstart = DecodeUint32(icc, size, pos + 4);
+ uint32_t tagsize = DecodeUint32(icc, size, pos + 8);
+ pos += 12;
+
+ tags.push_back(tag);
+ tagstarts.push_back(tagstart);
+ tagsizes.push_back(tagsize);
+ tagmap[tagstart] = tags.size() - 1;
+
+ uint8_t tagcode = kCommandTagUnknown;
+ for (size_t j = 0; j < kNumTagStrings; j++) {
+ if (tag == *kTagStrings[j]) {
+ tagcode = j + kCommandTagStringFirst;
+ break;
+ }
+ }
+
+ if (tag == kRtrcTag && pos + 24 < size) {
+ bool ok = true;
+ ok &= DecodeKeyword(icc, size, pos + 0) == kGtrcTag;
+ ok &= DecodeKeyword(icc, size, pos + 12) == kBtrcTag;
+ if (ok) {
+ for (size_t kk = 0; kk < 8; kk++) {
+ if (icc[pos - 8 + kk] != icc[pos + 4 + kk]) ok = false;
+ if (icc[pos - 8 + kk] != icc[pos + 16 + kk]) ok = false;
+ }
+ }
+ if (ok) {
+ tagcode = kCommandTagTRC;
+ pos += 24;
+ i += 2;
+ }
+ }
+
+ if (tag == kRxyzTag && pos + 24 < size) {
+ bool ok = true;
+ ok &= DecodeKeyword(icc, size, pos + 0) == kGxyzTag;
+ ok &= DecodeKeyword(icc, size, pos + 12) == kBxyzTag;
+ uint32_t offsetr = tagstart;
+ uint32_t offsetg = DecodeUint32(icc, size, pos + 4);
+ uint32_t offsetb = DecodeUint32(icc, size, pos + 16);
+ uint32_t sizer = tagsize;
+ uint32_t sizeg = DecodeUint32(icc, size, pos + 8);
+ uint32_t sizeb = DecodeUint32(icc, size, pos + 20);
+ ok &= sizer == 20;
+ ok &= sizeg == 20;
+ ok &= sizeb == 20;
+ ok &= (offsetg == offsetr + 20);
+ ok &= (offsetb == offsetr + 40);
+ if (ok) {
+ tagcode = kCommandTagXYZ;
+ pos += 24;
+ i += 2;
+ }
+ }
+
+ uint8_t command = tagcode;
+ uint64_t predicted_tagstart = prevtagstart + prevtagsize;
+ if (predicted_tagstart != tagstart) command |= kFlagBitOffset;
+ size_t predicted_tagsize = prevtagsize;
+ if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+ tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+ tag == kLumiTag) {
+ predicted_tagsize = 20;
+ }
+ if (predicted_tagsize != tagsize) command |= kFlagBitSize;
+ commands.push_back(command);
+ if (tagcode == 1) {
+ AppendKeyword(tag, &data);
+ }
+ if (command & kFlagBitOffset) EncodeVarInt(tagstart, &commands);
+ if (command & kFlagBitSize) EncodeVarInt(tagsize, &commands);
+
+ prevtagstart = tagstart;
+ prevtagsize = tagsize;
+ }
+ }
+ // Indicate end of tag list or varint indicating there's none
+ commands.push_back(0);
+
+ // Main content
+ // The main content in a valid ICC profile contains tagged elements, with the
+ // tag types (4 letter names) given by the tag list above, and the tag list
+ // pointing to the start and indicating the size of each tagged element. It is
+ // allowed for tagged elements to overlap, e.g. the curve for R, G and B could
+ // all point to the same one.
+ Tag tag;
+ size_t tagstart = 0, tagsize = 0, clutstart = 0;
+
+ size_t last0 = pos;
+ // This loop appends commands to the output, processing some sub-section of a
+ // current tagged element each time. We need to keep track of the tagtype of
+ // the current element, and update it when we encounter the boundary of a
+ // next one.
+ // It is not required that the input data is a valid ICC profile, if the
+ // encoder does not recognize the data it will still be able to output bytes
+ // but will not predict as well.
+ while (pos <= size) {
+ size_t last1 = pos;
+ PaddedBytes commands_add;
+ PaddedBytes data_add;
+
+ // This means the loop brought the position beyond the tag end.
+ if (pos > tagstart + tagsize) {
+ tag = {{0, 0, 0, 0}}; // nonsensical value
+ }
+
+ if (commands_add.empty() && data_add.empty() && tagmap.count(pos) &&
+ pos + 4 <= size) {
+ size_t index = tagmap[pos];
+ tag = DecodeKeyword(icc, size, pos);
+ tagstart = tagstarts[index];
+ tagsize = tagsizes[index];
+
+ if (tag == kMlucTag && pos + tagsize <= size && tagsize > 8 &&
+ icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+ icc[pos + 7] == 0) {
+ size_t num = tagsize - 8;
+ commands_add.push_back(kCommandTypeStartFirst + 3);
+ pos += 8;
+ commands_add.push_back(kCommandShuffle2);
+ EncodeVarInt(num, &commands_add);
+ size_t start = data_add.size();
+ for (size_t i = 0; i < num; i++) {
+ data_add.push_back(icc[pos]);
+ pos++;
+ }
+ Unshuffle(data_add.data() + start, num, 2);
+ }
+
+ if (tag == kCurvTag && pos + tagsize <= size && tagsize > 8 &&
+ icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 &&
+ icc[pos + 7] == 0) {
+ size_t num = tagsize - 8;
+ if (num > 16 && num < (1 << 28) && pos + num <= size && pos > 0) {
+ commands_add.push_back(kCommandTypeStartFirst + 5);
+ pos += 8;
+ commands_add.push_back(kCommandPredict);
+ int order = 1, width = 2, stride = width;
+ commands_add.push_back((order << 2) | (width - 1));
+ EncodeVarInt(num, &commands_add);
+ JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+ size, &pos, &data_add));
+ }
+ }
+ }
+
+ if (tag == kMab_Tag || tag == kMba_Tag) {
+ Tag subTag = DecodeKeyword(icc, size, pos);
+ if (pos + 12 < size && (subTag == kCurvTag || subTag == kVcgtTag) &&
+ DecodeUint32(icc, size, pos + 4) == 0) {
+ uint32_t num = DecodeUint32(icc, size, pos + 8) * 2;
+ if (num > 16 && num < (1 << 28) && pos + 12 + num <= size) {
+ pos += 12;
+ last1 = pos;
+ commands_add.push_back(kCommandPredict);
+ int order = 1, width = 2, stride = width;
+ commands_add.push_back((order << 2) | (width - 1));
+ EncodeVarInt(num, &commands_add);
+ JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+ size, &pos, &data_add));
+ }
+ }
+
+ if (pos == tagstart + 24 && pos + 4 < size) {
+ // Note that this value can be remembered for next iterations of the
+ // loop, so the "pos == clutstart" if below can trigger during a later
+ // iteration.
+ clutstart = tagstart + DecodeUint32(icc, size, pos);
+ }
+
+ if (pos == clutstart && clutstart + 16 < size) {
+ size_t numi = icc[tagstart + 8];
+ size_t numo = icc[tagstart + 9];
+ size_t width = icc[clutstart + 16];
+ size_t stride = width * numo;
+ size_t num = width * numo;
+ for (size_t i = 0; i < numi && clutstart + i < size; i++) {
+ num *= icc[clutstart + i];
+ }
+ if ((width == 1 || width == 2) && num > 64 && num < (1 << 28) &&
+ pos + num <= size && pos > stride * 4) {
+ commands_add.push_back(kCommandPredict);
+ int order = 1;
+ uint8_t flags =
+ (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+ commands_add.push_back(flags);
+ if (flags & 16) EncodeVarInt(stride, &commands_add);
+ EncodeVarInt(num, &commands_add);
+ JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+ size, &pos, &data_add));
+ }
+ }
+ }
+
+ if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag &&
+ pos == tagstart + 8 && pos + tagsize - 8 <= size && pos > 16 &&
+ tagsize > 8) {
+ size_t width = 4, order = 0, stride = width;
+ size_t num = tagsize - 8;
+ uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16);
+ commands_add.push_back(kCommandPredict);
+ commands_add.push_back(flags);
+ if (flags & 16) EncodeVarInt(stride, &commands_add);
+ EncodeVarInt(num, &commands_add);
+ JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc,
+ size, &pos, &data_add));
+ }
+
+ if (commands_add.empty() && data_add.empty() && pos + 20 <= size) {
+ Tag subTag = DecodeKeyword(icc, size, pos);
+ if (subTag == kXyz_Tag && DecodeUint32(icc, size, pos + 4) == 0) {
+ commands_add.push_back(kCommandXYZ);
+ pos += 8;
+ for (size_t j = 0; j < 12; j++) data_add.push_back(icc[pos++]);
+ }
+ }
+
+ if (commands_add.empty() && data_add.empty() && pos + 8 <= size) {
+ if (DecodeUint32(icc, size, pos + 4) == 0) {
+ Tag subTag = DecodeKeyword(icc, size, pos);
+ for (size_t i = 0; i < kNumTypeStrings; i++) {
+ if (subTag == *kTypeStrings[i]) {
+ commands_add.push_back(kCommandTypeStartFirst + i);
+ pos += 8;
+ break;
+ }
+ }
+ }
+ }
+
+ if (!(commands_add.empty() && data_add.empty()) || pos == size) {
+ if (last0 < last1) {
+ commands.push_back(kCommandInsert);
+ EncodeVarInt(last1 - last0, &commands);
+ while (last0 < last1) {
+ data.push_back(icc[last0++]);
+ }
+ }
+ for (size_t i = 0; i < commands_add.size(); i++) {
+ commands.push_back(commands_add[i]);
+ }
+ for (size_t i = 0; i < data_add.size(); i++) {
+ data.push_back(data_add[i]);
+ }
+ last0 = pos;
+ }
+ if (commands_add.empty() && data_add.empty()) {
+ pos++;
+ }
+ }
+
+ EncodeVarInt(commands.size(), result);
+ for (size_t i = 0; i < commands.size(); i++) {
+ result->push_back(commands[i]);
+ }
+ for (size_t i = 0; i < data.size(); i++) {
+ result->push_back(data[i]);
+ }
+
+ return true;
+}
+
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+ size_t layer, AuxOut* JXL_RESTRICT aux_out) {
+ if (icc.empty()) return JXL_FAILURE("ICC must be non-empty");
+ PaddedBytes enc;
+ JXL_RETURN_IF_ERROR(PredictICC(icc.data(), icc.size(), &enc));
+ std::vector<std::vector<Token>> tokens(1);
+ BitWriter::Allotment allotment(writer, 128);
+ JXL_RETURN_IF_ERROR(U64Coder::Write(enc.size(), writer));
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+
+ for (size_t i = 0; i < enc.size(); i++) {
+ tokens[0].emplace_back(
+ ICCANSContext(i, i > 0 ? enc[i - 1] : 0, i > 1 ? enc[i - 2] : 0),
+ enc[i]);
+ }
+ HistogramParams params;
+ params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal
+ : HistogramParams::LZ77Method::kLZ77;
+ EntropyEncodingData code;
+ std::vector<uint8_t> context_map;
+ params.force_huffman = true;
+ BuildAndEncodeHistograms(params, kNumICCContexts, tokens, &code, &context_map,
+ writer, layer, aux_out);
+ WriteTokens(tokens[0], code, context_map, writer, layer, aux_out);
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_icc_codec.h b/third_party/jpeg-xl/lib/jxl/enc_icc_codec.h
new file mode 100644
index 0000000000..c22cf5994e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_icc_codec.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_ICC_CODEC_H_
+#define LIB_JXL_ENC_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Should still be called if `icc.empty()` - if so, writes only 1 bit.
+Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer,
+ size_t layer, AuxOut* JXL_RESTRICT aux_out);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_ICC_CODEC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_image_bundle.cc b/third_party/jpeg-xl/lib/jxl/enc_image_bundle.cc
new file mode 100644
index 0000000000..a77d3e0743
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_image_bundle.cc
@@ -0,0 +1,154 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_image_bundle.h"
+
+#include <jxl/cms_interface.h>
+
+#include <atomic>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+namespace {
+
+// Copies ib:rect, converts, and copies into out.
+Status CopyToT(const ImageMetadata* metadata, const ImageBundle* ib,
+ const Rect& rect, const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, ThreadPool* pool, Image3F* out) {
+ PROFILER_FUNC;
+ ColorSpaceTransform c_transform(cms);
+ // Changing IsGray is probably a bug.
+ JXL_CHECK(ib->IsGray() == c_desired.IsGray());
+ bool is_gray = ib->IsGray();
+ if (out->xsize() < rect.xsize() || out->ysize() < rect.ysize()) {
+ *out = Image3F(rect.xsize(), rect.ysize());
+ } else {
+ out->ShrinkTo(rect.xsize(), rect.ysize());
+ }
+ std::atomic<bool> ok{true};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, rect.ysize(),
+ [&](const size_t num_threads) {
+ return c_transform.Init(ib->c_current(), c_desired,
+ metadata->IntensityTarget(), rect.xsize(),
+ num_threads);
+ },
+ [&](const uint32_t y, const size_t thread) {
+ float* mutable_src_buf = c_transform.BufSrc(thread);
+ const float* src_buf = mutable_src_buf;
+ // Interleave input.
+ if (is_gray) {
+ src_buf = rect.ConstPlaneRow(ib->color(), 0, y);
+ } else if (ib->c_current().IsCMYK()) {
+ if (!ib->HasBlack()) {
+ ok.store(false);
+ return;
+ }
+ const float* JXL_RESTRICT row_in0 =
+ rect.ConstPlaneRow(ib->color(), 0, y);
+ const float* JXL_RESTRICT row_in1 =
+ rect.ConstPlaneRow(ib->color(), 1, y);
+ const float* JXL_RESTRICT row_in2 =
+ rect.ConstPlaneRow(ib->color(), 2, y);
+ const float* JXL_RESTRICT row_in3 = rect.ConstRow(ib->black(), y);
+ for (size_t x = 0; x < rect.xsize(); x++) {
+ // CMYK convention in JXL: 0 = max ink, 1 = white
+ mutable_src_buf[4 * x + 0] = row_in0[x];
+ mutable_src_buf[4 * x + 1] = row_in1[x];
+ mutable_src_buf[4 * x + 2] = row_in2[x];
+ mutable_src_buf[4 * x + 3] = row_in3[x];
+ }
+ } else {
+ const float* JXL_RESTRICT row_in0 =
+ rect.ConstPlaneRow(ib->color(), 0, y);
+ const float* JXL_RESTRICT row_in1 =
+ rect.ConstPlaneRow(ib->color(), 1, y);
+ const float* JXL_RESTRICT row_in2 =
+ rect.ConstPlaneRow(ib->color(), 2, y);
+ for (size_t x = 0; x < rect.xsize(); x++) {
+ mutable_src_buf[3 * x + 0] = row_in0[x];
+ mutable_src_buf[3 * x + 1] = row_in1[x];
+ mutable_src_buf[3 * x + 2] = row_in2[x];
+ }
+ }
+ float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread);
+ if (!c_transform.Run(thread, src_buf, dst_buf)) {
+ ok.store(false);
+ return;
+ }
+ float* JXL_RESTRICT row_out0 = out->PlaneRow(0, y);
+ float* JXL_RESTRICT row_out1 = out->PlaneRow(1, y);
+ float* JXL_RESTRICT row_out2 = out->PlaneRow(2, y);
+ // De-interleave output and convert type.
+ if (is_gray) {
+ for (size_t x = 0; x < rect.xsize(); x++) {
+ row_out0[x] = dst_buf[x];
+ row_out1[x] = dst_buf[x];
+ row_out2[x] = dst_buf[x];
+ }
+ } else {
+ for (size_t x = 0; x < rect.xsize(); x++) {
+ row_out0[x] = dst_buf[3 * x + 0];
+ row_out1[x] = dst_buf[3 * x + 1];
+ row_out2[x] = dst_buf[3 * x + 2];
+ }
+ }
+ },
+ "Colorspace transform"));
+ return ok.load();
+}
+
+} // namespace
+
+Status ImageBundle::TransformTo(const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, ThreadPool* pool) {
+ PROFILER_FUNC;
+ JXL_RETURN_IF_ERROR(CopyTo(Rect(color_), c_desired, cms, &color_, pool));
+ c_current_ = c_desired;
+ return true;
+}
+Status ImageBundle::CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, Image3F* out,
+ ThreadPool* pool) const {
+ return CopyToT(metadata_, this, rect, c_desired, cms, pool, out);
+}
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ ImageBundle* store, const ImageBundle** out) {
+ if (in.c_current().SameColorEncoding(c_desired) && !in.HasBlack()) {
+ *out = &in;
+ return true;
+ }
+ // TODO(janwas): avoid copying via createExternal+copyBackToIO
+ // instead of copy+createExternal+copyBackToIO
+ store->SetFromImage(CopyImage(in.color()), in.c_current());
+
+ // Must at least copy the alpha channel for use by external_image.
+ if (in.HasExtraChannels()) {
+ std::vector<ImageF> extra_channels;
+ for (const ImageF& extra_channel : in.extra_channels()) {
+ extra_channels.emplace_back(CopyImage(extra_channel));
+ }
+ store->SetExtraChannels(std::move(extra_channels));
+ }
+
+ if (!store->TransformTo(c_desired, cms, pool)) {
+ return false;
+ }
+ *out = store;
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_image_bundle.h b/third_party/jpeg-xl/lib/jxl/enc_image_bundle.h
new file mode 100644
index 0000000000..85f8e14e1c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_image_bundle.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_IMAGE_BUNDLE_H_
+#define LIB_JXL_ENC_IMAGE_BUNDLE_H_
+
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Does color transformation from in.c_current() to c_desired if the color
+// encodings are different, or nothing if they are already the same.
+// If color transformation is done, stores the transformed values into store and
+// sets the out pointer to store, else leaves store untouched and sets the out
+// pointer to &in.
+// Returns false if color transform fails.
+Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ ImageBundle* store, const ImageBundle** out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_IMAGE_BUNDLE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_jxl_skcms.h b/third_party/jpeg-xl/lib/jxl/enc_jxl_skcms.h
new file mode 100644
index 0000000000..3c364e883d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_jxl_skcms.h
@@ -0,0 +1,54 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_JXL_SKCMS_H_
+#define LIB_JXL_ENC_JXL_SKCMS_H_
+
+// skcms wrapper to rename the skcms symbols to avoid conflicting names with
+// other projects using skcms as well. When using JPEGXL_BUNDLE_SKCMS the
+// bundled functions will be renamed from skcms_ to jxl_skcms_
+
+#ifdef SKCMS_API
+#error "Must include enc_jxl_skcms.h and not skcms.h directly"
+#endif // SKCMS_API
+
+#if JPEGXL_BUNDLE_SKCMS
+
+#define skcms_252_random_bytes jxl_skcms_252_random_bytes
+#define skcms_AdaptToXYZD50 jxl_skcms_AdaptToXYZD50
+#define skcms_ApproximateCurve jxl_skcms_ApproximateCurve
+#define skcms_ApproximatelyEqualProfiles jxl_skcms_ApproximatelyEqualProfiles
+#define skcms_AreApproximateInverses jxl_skcms_AreApproximateInverses
+#define skcms_GetCHAD jxl_skcms_GetCHAD
+#define skcms_GetTagByIndex jxl_skcms_GetTagByIndex
+#define skcms_GetTagBySignature jxl_skcms_GetTagBySignature
+#define skcms_GetWTPT jxl_skcms_GetWTPT
+#define skcms_Identity_TransferFunction jxl_skcms_Identity_TransferFunction
+#define skcms_MakeUsableAsDestination jxl_skcms_MakeUsableAsDestination
+#define skcms_MakeUsableAsDestinationWithSingleCurve \
+ jxl_skcms_MakeUsableAsDestinationWithSingleCurve
+#define skcms_Matrix3x3_concat jxl_skcms_Matrix3x3_concat
+#define skcms_Matrix3x3_invert jxl_skcms_Matrix3x3_invert
+#define skcms_MaxRoundtripError jxl_skcms_MaxRoundtripError
+#define skcms_Parse jxl_skcms_Parse
+#define skcms_PrimariesToXYZD50 jxl_skcms_PrimariesToXYZD50
+#define skcms_sRGB_Inverse_TransferFunction \
+ jxl_skcms_sRGB_Inverse_TransferFunction
+#define skcms_sRGB_profile jxl_skcms_sRGB_profile
+#define skcms_sRGB_TransferFunction jxl_skcms_sRGB_TransferFunction
+#define skcms_TransferFunction_eval jxl_skcms_TransferFunction_eval
+#define skcms_TransferFunction_invert jxl_skcms_TransferFunction_invert
+#define skcms_TransferFunction_makeHLGish jxl_skcms_TransferFunction_makeHLGish
+#define skcms_TransferFunction_makePQish jxl_skcms_TransferFunction_makePQish
+#define skcms_Transform jxl_skcms_Transform
+#define skcms_TransformWithPalette jxl_skcms_TransformWithPalette
+#define skcms_TRCs_AreApproximateInverse jxl_skcms_TRCs_AreApproximateInverse
+#define skcms_XYZD50_profile jxl_skcms_XYZD50_profile
+
+#endif // JPEGXL_BUNDLE_SKCMS
+
+#include "skcms.h"
+
+#endif // LIB_JXL_ENC_JXL_SKCMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_linalg.cc b/third_party/jpeg-xl/lib/jxl/enc_linalg.cc
new file mode 100644
index 0000000000..fe2090a909
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_linalg.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_linalg.h"
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+void ConvertToDiagonal(const ImageD& A, ImageD* const JXL_RESTRICT diag,
+ ImageD* const JXL_RESTRICT U) {
+#if JXL_ENABLE_ASSERT
+ JXL_ASSERT(A.xsize() == 2);
+ JXL_ASSERT(A.ysize() == 2);
+ JXL_ASSERT(std::abs(A.Row(0)[1] - A.Row(1)[0]) < 1e-15);
+#endif
+
+ if (std::abs(A.ConstRow(0)[1]) < 1e-15) {
+ // Already diagonal.
+ diag->Row(0)[0] = A.ConstRow(0)[0];
+ diag->Row(0)[1] = A.ConstRow(1)[1];
+ U->Row(0)[0] = U->Row(1)[1] = 1.0;
+ U->Row(0)[1] = U->Row(1)[0] = 0.0;
+ return;
+ }
+ double b = -(A.Row(0)[0] + A.Row(1)[1]);
+ double c = A.Row(0)[0] * A.Row(1)[1] - A.Row(0)[1] * A.Row(0)[1];
+ double d = b * b - 4.0 * c;
+ double sqd = std::sqrt(d);
+ double l1 = (-b - sqd) * 0.5;
+ double l2 = (-b + sqd) * 0.5;
+
+ double v1[2] = {A.Row(0)[0] - l1, A.Row(1)[0]};
+ double v1n = 1.0 / std::hypot(v1[0], v1[1]);
+ v1[0] = v1[0] * v1n;
+ v1[1] = v1[1] * v1n;
+
+ diag->Row(0)[0] = l1;
+ diag->Row(0)[1] = l2;
+
+ U->Row(0)[0] = v1[1];
+ U->Row(0)[1] = -v1[0];
+ U->Row(1)[0] = v1[0];
+ U->Row(1)[1] = v1[1];
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_linalg.h b/third_party/jpeg-xl/lib/jxl/enc_linalg.h
new file mode 100644
index 0000000000..791770d5d4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_linalg.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LINALG_H_
+#define LIB_JXL_LINALG_H_
+
+// Linear algebra.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+using ImageD = Plane<double>;
+
+// A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U).
+void ConvertToDiagonal(const ImageD& A, ImageD* JXL_RESTRICT diag,
+ ImageD* JXL_RESTRICT U);
+
+} // namespace jxl
+
+#endif // LIB_JXL_LINALG_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_linalg_test.cc b/third_party/jpeg-xl/lib/jxl/enc_linalg_test.cc
new file mode 100644
index 0000000000..967b9a3afb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_linalg_test.cc
@@ -0,0 +1,118 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_linalg.h"
+
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+ImageD Identity(const size_t N) {
+ ImageD out(N, N);
+ for (size_t i = 0; i < N; ++i) {
+ double* JXL_RESTRICT row = out.Row(i);
+ std::fill(row, row + N, 0);
+ row[i] = 1.0;
+ }
+ return out;
+}
+
+ImageD Diagonal(const ImageD& d) {
+ JXL_ASSERT(d.ysize() == 1);
+ ImageD out(d.xsize(), d.xsize());
+ const double* JXL_RESTRICT row_diag = d.Row(0);
+ for (size_t k = 0; k < d.xsize(); ++k) {
+ double* JXL_RESTRICT row_out = out.Row(k);
+ std::fill(row_out, row_out + d.xsize(), 0.0);
+ row_out[k] = row_diag[k];
+ }
+ return out;
+}
+
+ImageD MatMul(const ImageD& A, const ImageD& B) {
+ JXL_ASSERT(A.ysize() == B.xsize());
+ ImageD out(A.xsize(), B.ysize());
+ for (size_t y = 0; y < B.ysize(); ++y) {
+ const double* const JXL_RESTRICT row_b = B.Row(y);
+ double* const JXL_RESTRICT row_out = out.Row(y);
+ for (size_t x = 0; x < A.xsize(); ++x) {
+ row_out[x] = 0.0;
+ for (size_t k = 0; k < B.xsize(); ++k) {
+ row_out[x] += A.Row(k)[x] * row_b[k];
+ }
+ }
+ }
+ return out;
+}
+
+ImageD Transpose(const ImageD& A) {
+ ImageD out(A.ysize(), A.xsize());
+ for (size_t x = 0; x < A.xsize(); ++x) {
+ double* const JXL_RESTRICT row_out = out.Row(x);
+ for (size_t y = 0; y < A.ysize(); ++y) {
+ row_out[y] = A.Row(y)[x];
+ }
+ }
+ return out;
+}
+
+ImageD RandomSymmetricMatrix(const size_t N, Rng& rng, const double vmin,
+ const double vmax) {
+ ImageD A(N, N);
+ GenerateImage(rng, &A, vmin, vmax);
+ for (size_t i = 0; i < N; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ A.Row(j)[i] = A.Row(i)[j];
+ }
+ }
+ return A;
+}
+
+void VerifyMatrixEqual(const ImageD& A, const ImageD& B, const double eps) {
+ ASSERT_EQ(A.xsize(), B.xsize());
+ ASSERT_EQ(A.ysize(), B.ysize());
+ for (size_t y = 0; y < A.ysize(); ++y) {
+ for (size_t x = 0; x < A.xsize(); ++x) {
+ ASSERT_NEAR(A.Row(y)[x], B.Row(y)[x], eps);
+ }
+ }
+}
+
+void VerifyOrthogonal(const ImageD& A, const double eps) {
+ VerifyMatrixEqual(Identity(A.xsize()), MatMul(Transpose(A), A), eps);
+}
+
+TEST(LinAlgTest, ConvertToDiagonal) {
+ {
+ ImageD I = Identity(2);
+ ImageD U(2, 2), d(2, 1);
+ ConvertToDiagonal(I, &d, &U);
+ VerifyMatrixEqual(I, U, 1e-15);
+ for (size_t k = 0; k < 2; ++k) {
+ ASSERT_NEAR(d.Row(0)[k], 1.0, 1e-15);
+ }
+ }
+ {
+ ImageD A = Identity(2);
+ A.Row(0)[1] = A.Row(1)[0] = 2.0;
+ ImageD U(2, 2), d(2, 1);
+ ConvertToDiagonal(A, &d, &U);
+ VerifyOrthogonal(U, 1e-12);
+ VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+ }
+ Rng rng(0);
+ for (size_t i = 0; i < 100; ++i) {
+ ImageD A = RandomSymmetricMatrix(2, rng, -1.0, 1.0);
+ ImageD U(2, 2), d(2, 1);
+ ConvertToDiagonal(A, &d, &U);
+ VerifyOrthogonal(U, 1e-12);
+ VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_modular.cc b/third_party/jpeg-xl/lib/jxl/enc_modular.cc
new file mode 100644
index 0000000000..0453b34654
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_modular.cc
@@ -0,0 +1,1762 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_modular.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <atomic>
+#include <limits>
+#include <queue>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/compressed_dc.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cluster.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_gaborish.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_patch_dictionary.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+constexpr bool kPrintTree = false;
+
+// Squeeze default quantization factors
+// these quantization factors are for -Q 50 (other qualities simply scale the
+// factors; things are rounded down and obviously cannot get below 1)
+static const float squeeze_quality_factor =
+ 0.35; // for easy tweaking of the quality range (decrease this number for
+ // higher quality)
+static const float squeeze_luma_factor =
+ 1.1; // for easy tweaking of the balance between luma (or anything
+ // non-chroma) and chroma (decrease this number for higher quality
+ // luma)
+static const float squeeze_quality_factor_xyb = 2.4f;
+static const float squeeze_xyb_qtable[3][16] = {
+ {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16,
+ 0.08, 0.04, 0.02, 0.01, 0.005}, // Y
+ {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5,
+ 0.5}, // X
+ {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5,
+ 0.5}, // B-Y
+};
+
+static const float squeeze_luma_qtable[16] = {
+ 163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28,
+ 0.64, 0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005};
+// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this
+// does 4:2:0 subsampling (two most fine grained layers get quantized away)
+static const float squeeze_chroma_qtable[16] = {
+ 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5};
+
+// `cutoffs` must be sorted.
+Tree MakeFixedTree(int property, const std::vector<int32_t>& cutoffs,
+ Predictor pred, size_t num_pixels) {
+ size_t log_px = CeilLog2Nonzero(num_pixels);
+ size_t min_gap = 0;
+ // Reduce fixed tree height when encoding small images.
+ if (log_px < 14) {
+ min_gap = 8 * (14 - log_px);
+ }
+ Tree tree;
+ struct NodeInfo {
+ size_t begin, end, pos;
+ };
+ std::queue<NodeInfo> q;
+ // Leaf IDs will be set by roundtrip decoding the tree.
+ tree.push_back(PropertyDecisionNode::Leaf(pred));
+ q.push(NodeInfo{0, cutoffs.size(), 0});
+ while (!q.empty()) {
+ NodeInfo info = q.front();
+ q.pop();
+ if (info.begin + min_gap >= info.end) continue;
+ uint32_t split = (info.begin + info.end) / 2;
+ tree[info.pos] =
+ PropertyDecisionNode::Split(property, cutoffs[split], tree.size());
+ q.push(NodeInfo{split + 1, info.end, tree.size()});
+ tree.push_back(PropertyDecisionNode::Leaf(pred));
+ q.push(NodeInfo{info.begin, split, tree.size()});
+ tree.push_back(PropertyDecisionNode::Leaf(pred));
+ }
+ return tree;
+}
+
+Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) {
+ if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta ||
+ tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) {
+ // All the data is 0, so no need for a fancy tree.
+ return {PropertyDecisionNode::Leaf(Predictor::Zero)};
+ }
+ if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) {
+ // All the data is 0 except the quant field. TODO(veluca): make that 0 too.
+ return {PropertyDecisionNode::Leaf(Predictor::Left)};
+ }
+ if (tree_kind == ModularOptions::TreeKind::kACMeta) {
+ // Small image.
+ if (total_pixels < 1024) {
+ return {PropertyDecisionNode::Leaf(Predictor::Left)};
+ }
+ Tree tree;
+ // 0: c > 1
+ tree.push_back(PropertyDecisionNode::Split(0, 1, 1));
+ // 1: c > 2
+ tree.push_back(PropertyDecisionNode::Split(0, 2, 3));
+ // 2: c > 0
+ tree.push_back(PropertyDecisionNode::Split(0, 0, 5));
+ // 3: EPF control field (all 0 or 4), top > 0
+ tree.push_back(PropertyDecisionNode::Split(6, 0, 21));
+ // 4: ACS+QF, y > 0
+ tree.push_back(PropertyDecisionNode::Split(2, 0, 7));
+ // 5: CfL x
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+ // 6: CfL b
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient));
+ // 7: QF: split according to the left quant value.
+ tree.push_back(PropertyDecisionNode::Split(7, 5, 9));
+ // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large
+ // rectangular 6-11, 8x8 12+), according to previous ACS value.
+ tree.push_back(PropertyDecisionNode::Split(7, 5, 15));
+ // QF
+ tree.push_back(PropertyDecisionNode::Split(7, 11, 11));
+ tree.push_back(PropertyDecisionNode::Split(7, 3, 13));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left));
+ // ACS
+ tree.push_back(PropertyDecisionNode::Split(7, 11, 17));
+ tree.push_back(PropertyDecisionNode::Split(7, 3, 19));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ // EPF, left > 0
+ tree.push_back(PropertyDecisionNode::Split(7, 0, 23));
+ tree.push_back(PropertyDecisionNode::Split(7, 0, 25));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero));
+ return tree;
+ }
+ if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) {
+ std::vector<int32_t> cutoffs = {
+ -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+ -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
+ 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
+ return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels);
+ }
+ if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) {
+ std::vector<int32_t> cutoffs = {
+ -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15,
+ -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11,
+ 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500};
+ return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient,
+ total_pixels);
+ }
+ JXL_ABORT("Unreachable");
+ return {};
+}
+
+// Merges the trees in `trees` using nodes that decide on stream_id, as defined
+// by `tree_splits`.
+void MergeTrees(const std::vector<Tree>& trees,
+ const std::vector<size_t>& tree_splits, size_t begin,
+ size_t end, Tree* tree) {
+ JXL_ASSERT(trees.size() + 1 == tree_splits.size());
+ JXL_ASSERT(end > begin);
+ JXL_ASSERT(end <= trees.size());
+ if (end == begin + 1) {
+ // Insert the tree, adding the opportune offset to all child nodes.
+ // This will make the leaf IDs wrong, but subsequent roundtripping will fix
+ // them.
+ size_t sz = tree->size();
+ tree->insert(tree->end(), trees[begin].begin(), trees[begin].end());
+ for (size_t i = sz; i < tree->size(); i++) {
+ (*tree)[i].lchild += sz;
+ (*tree)[i].rchild += sz;
+ }
+ return;
+ }
+ size_t mid = (begin + end) / 2;
+ size_t splitval = tree_splits[mid] - 1;
+ size_t cur = tree->size();
+ tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1);
+ (*tree)[cur].lchild = tree->size();
+ MergeTrees(trees, tree_splits, mid, end, tree);
+ (*tree)[cur].rchild = tree->size();
+ MergeTrees(trees, tree_splits, begin, mid, tree);
+}
+
+void QuantizeChannel(Channel& ch, const int q) {
+ if (q == 1) return;
+ for (size_t y = 0; y < ch.plane.ysize(); y++) {
+ pixel_type* row = ch.plane.Row(y);
+ for (size_t x = 0; x < ch.plane.xsize(); x++) {
+ if (row[x] < 0) {
+ row[x] = -((-row[x] + q / 2) / q) * q;
+ } else {
+ row[x] = ((row[x] + q / 2) / q) * q;
+ }
+ }
+ }
+}
+
+// convert binary32 float that corresponds to custom [bits]-bit float (with
+// [exp_bits] exponent bits) to a [bits]-bit integer representation that should
+// fit in pixel_type
+Status float_to_int(const float* const row_in, pixel_type* const row_out,
+ size_t xsize, unsigned int bits, unsigned int exp_bits,
+ bool fp, double dfactor) {
+ JXL_ASSERT(sizeof(pixel_type) * 8 >= bits);
+ if (!fp) {
+ if (bits > 22) {
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5);
+ }
+ } else {
+ float factor = dfactor;
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f);
+ }
+ }
+ return true;
+ }
+ if (bits == 32 && fp) {
+ JXL_ASSERT(exp_bits == 8);
+ memcpy((void*)row_out, (const void*)row_in, 4 * xsize);
+ return true;
+ }
+
+ int exp_bias = (1 << (exp_bits - 1)) - 1;
+ int max_exp = (1 << exp_bits) - 1;
+ uint32_t sign = (1u << (bits - 1));
+ int mant_bits = bits - exp_bits - 1;
+ int mant_shift = 23 - mant_bits;
+ for (size_t x = 0; x < xsize; ++x) {
+ uint32_t f;
+ memcpy(&f, &row_in[x], 4);
+ int signbit = (f >> 31);
+ f &= 0x7fffffff;
+ if (f == 0) {
+ row_out[x] = (signbit ? sign : 0);
+ continue;
+ }
+ int exp = (f >> 23) - 127;
+ if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed");
+ int mantissa = (f & 0x007fffff);
+ // broke up the binary32 into its parts, now reassemble into
+ // arbitrary float
+ exp += exp_bias;
+ if (exp < 0) { // will become a subnormal number
+ // add implicit leading 1 to mantissa
+ mantissa |= 0x00800000;
+ if (exp < -mant_bits) {
+ return JXL_FAILURE(
+ "Invalid float number: %g cannot be represented with %i "
+ "exp_bits and %i mant_bits (exp %i)",
+ row_in[x], exp_bits, mant_bits, exp);
+ }
+ mantissa >>= 1 - exp;
+ exp = 0;
+ }
+ // exp should be representable in exp_bits, otherwise input was
+ // invalid
+ if (exp > max_exp) return JXL_FAILURE("Invalid float exponent");
+ if (mantissa & ((1 << mant_shift) - 1)) {
+ return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x],
+ mantissa);
+ }
+ mantissa >>= mant_shift;
+ f = (signbit ? sign : 0);
+ f |= (exp << mant_bits);
+ f |= mantissa;
+ row_out[x] = (pixel_type)f;
+ }
+ return true;
+}
+} // namespace
+
+ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header,
+ const CompressParams& cparams_orig)
+ : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) {
+ size_t num_streams =
+ ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes);
+ if (cparams_.ModularPartIsLossless()) {
+ switch (cparams_.decoding_speed_tier) {
+ case 0:
+ break;
+ case 1:
+ cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+ break;
+ case 2: {
+ cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly;
+ cparams_.options.predictor = Predictor::Gradient;
+ break;
+ }
+ case 3: { // LZ77, no Gradient.
+ cparams_.options.nb_repeats = 0;
+ cparams_.options.predictor = Predictor::Gradient;
+ break;
+ }
+ default: { // LZ77, no predictor.
+ cparams_.options.nb_repeats = 0;
+ cparams_.options.predictor = Predictor::Zero;
+ break;
+ }
+ }
+ }
+ if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive &&
+ cparams_.ModularPartIsLossless()) {
+ cparams_.options.tree_kind =
+ ModularOptions::TreeKind::kTrivialTreeNoPredictor;
+ cparams_.options.nb_repeats = 0;
+ }
+ stream_images_.resize(num_streams);
+
+ // use a sensible default if nothing explicit is specified:
+ // Squeeze for lossy, no squeeze for lossless
+ if (cparams_.responsive < 0) {
+ if (cparams_.ModularPartIsLossless()) {
+ cparams_.responsive = 0;
+ } else {
+ cparams_.responsive = 1;
+ }
+ }
+
+ if (cparams_.speed_tier > SpeedTier::kWombat) {
+ cparams_.options.splitting_heuristics_node_threshold = 192;
+ } else {
+ cparams_.options.splitting_heuristics_node_threshold = 96;
+ }
+ {
+ // Set properties.
+ std::vector<uint32_t> prop_order;
+ if (cparams_.responsive) {
+ // Properties in order of their likelihood of being useful for Squeeze
+ // residuals.
+ prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3};
+ } else {
+ // Same, but for the non-Squeeze case.
+ prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8};
+ }
+ switch (cparams_.speed_tier) {
+ case SpeedTier::kSquirrel:
+ cparams_.options.splitting_heuristics_properties.assign(
+ prop_order.begin(), prop_order.begin() + 8);
+ cparams_.options.max_property_values = 32;
+ break;
+ case SpeedTier::kKitten:
+ cparams_.options.splitting_heuristics_properties.assign(
+ prop_order.begin(), prop_order.begin() + 10);
+ cparams_.options.max_property_values = 64;
+ break;
+ case SpeedTier::kTortoise:
+ cparams_.options.splitting_heuristics_properties = prop_order;
+ cparams_.options.max_property_values = 256;
+ break;
+ default:
+ cparams_.options.splitting_heuristics_properties.assign(
+ prop_order.begin(), prop_order.begin() + 6);
+ cparams_.options.max_property_values = 16;
+ break;
+ }
+ if (cparams_.speed_tier > SpeedTier::kTortoise) {
+ // Gradient in previous channels.
+ for (int i = 0; i < cparams_.options.max_properties; i++) {
+ cparams_.options.splitting_heuristics_properties.push_back(
+ kNumNonrefProperties + i * 4 + 3);
+ }
+ } else {
+ // All the extra properties in Tortoise mode.
+ for (int i = 0; i < cparams_.options.max_properties * 4; i++) {
+ cparams_.options.splitting_heuristics_properties.push_back(
+ kNumNonrefProperties + i);
+ }
+ }
+ }
+
+ if (cparams_.options.predictor == static_cast<Predictor>(-1)) {
+ // no explicit predictor(s) given, set a good default
+ if ((cparams_.speed_tier <= SpeedTier::kTortoise ||
+ cparams_.modular_mode == false) &&
+ cparams_.IsLossless() && cparams_.responsive == false) {
+ // TODO(veluca): allow all predictors that don't break residual
+ // multipliers in lossy mode.
+ cparams_.options.predictor = Predictor::Variable;
+ } else if (cparams_.responsive || cparams_.lossy_palette) {
+ // zero predictor for Squeeze residues and lossy palette
+ cparams_.options.predictor = Predictor::Zero;
+ } else if (!cparams_.IsLossless()) {
+ // If not responsive and lossy. TODO(veluca): use near_lossless instead?
+ cparams_.options.predictor = Predictor::Gradient;
+ } else if (cparams_.speed_tier < SpeedTier::kFalcon) {
+ // try median and weighted predictor for anything else
+ cparams_.options.predictor = Predictor::Best;
+ } else if (cparams_.speed_tier == SpeedTier::kFalcon) {
+ // just weighted predictor in falcon mode
+ cparams_.options.predictor = Predictor::Weighted;
+ } else if (cparams_.speed_tier > SpeedTier::kFalcon) {
+ // just gradient predictor in thunder mode
+ cparams_.options.predictor = Predictor::Gradient;
+ }
+ } else {
+ delta_pred_ = cparams_.options.predictor;
+ if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero;
+ }
+ if (!cparams_.ModularPartIsLossless()) {
+ if (cparams_.options.predictor == Predictor::Weighted ||
+ cparams_.options.predictor == Predictor::Variable ||
+ cparams_.options.predictor == Predictor::Best)
+ cparams_.options.predictor = Predictor::Zero;
+ }
+ tree_splits_.push_back(0);
+ if (cparams_.modular_mode == false) {
+ cparams_.options.fast_decode_multiplier = 1.0f;
+ tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_));
+ tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_));
+ tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_));
+ tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_));
+ tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_));
+ ac_metadata_size.resize(frame_dim_.num_dc_groups);
+ extra_dc_precision.resize(frame_dim_.num_dc_groups);
+ }
+ tree_splits_.push_back(num_streams);
+ cparams_.options.max_chan_size = frame_dim_.group_dim;
+ cparams_.options.group_dim = frame_dim_.group_dim;
+
+ // TODO(veluca): figure out how to use different predictor sets per channel.
+ stream_options_.resize(num_streams, cparams_.options);
+}
+
+bool do_transform(Image& image, const Transform& tr,
+ const weighted::Header& wp_header,
+ jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) {
+ Transform t = tr;
+ bool did_it = true;
+ if (force_jxlart) {
+ if (!t.MetaApply(image)) return false;
+ } else {
+ did_it = TransformForward(t, image, wp_header, pool);
+ }
+ if (did_it) image.transform.push_back(t);
+ return did_it;
+}
+
+Status ModularFrameEncoder::ComputeEncodingData(
+ const FrameHeader& frame_header, const ImageMetadata& metadata,
+ Image3F* JXL_RESTRICT color, const std::vector<ImageF>& extra_channels,
+ PassesEncoderState* JXL_RESTRICT enc_state, const JxlCmsInterface& cms,
+ ThreadPool* pool, AuxOut* aux_out, bool do_color) {
+ JXL_DEBUG_V(6, "Computing modular encoding data for frame %s",
+ frame_header.DebugString().c_str());
+
+ if (do_color && frame_header.loop_filter.gab) {
+ float w = 0.9908511000000001f;
+ float weights[3] = {w, w, w};
+ GaborishInverse(color, weights, pool);
+ }
+
+ if (do_color && metadata.bit_depth.bits_per_sample <= 16 &&
+ cparams_.speed_tier < SpeedTier::kCheetah &&
+ cparams_.decoding_speed_tier < 2) {
+ FindBestPatchDictionary(*color, enc_state, cms, nullptr, aux_out,
+ cparams_.color_transform == ColorTransform::kXYB);
+ PatchDictionaryEncoder::SubtractFrom(
+ enc_state->shared.image_features.patches, color);
+ }
+
+ // Convert ImageBundle to modular Image object
+ const size_t xsize = frame_dim_.xsize;
+ const size_t ysize = frame_dim_.ysize;
+
+ int nb_chans = 3;
+ if (metadata.color_encoding.IsGray() &&
+ cparams_.color_transform == ColorTransform::kNone) {
+ nb_chans = 1;
+ }
+ if (!do_color) nb_chans = 0;
+
+ nb_chans += extra_channels.size();
+
+ bool fp = metadata.bit_depth.floating_point_sample &&
+ cparams_.color_transform != ColorTransform::kXYB;
+
+ // bits_per_sample is just metadata for XYB images.
+ if (metadata.bit_depth.bits_per_sample >= 32 && do_color &&
+ cparams_.color_transform != ColorTransform::kXYB) {
+ if (metadata.bit_depth.bits_per_sample == 32 && fp == false) {
+ return JXL_FAILURE("uint32_t not supported in enc_modular");
+ } else if (metadata.bit_depth.bits_per_sample > 32) {
+ return JXL_FAILURE("bits_per_sample > 32 not supported");
+ }
+ }
+
+ // in the non-float case, there is an implicit 0 sign bit
+ int max_bitdepth =
+ do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0;
+ Image& gi = stream_images_[0];
+ gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans);
+ int c = 0;
+ if (cparams_.color_transform == ColorTransform::kXYB &&
+ cparams_.modular_mode == true) {
+ float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f};
+ if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) {
+ // quantize XYB here and then treat it as a lossless image
+ enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance);
+ enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
+ enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance);
+ cparams_.butteraugli_distance = 0;
+ }
+ if (cparams_.manual_xyb_factors.size() == 3) {
+ DequantMatricesSetCustomDC(&enc_state->shared.matrices,
+ cparams_.manual_xyb_factors.data());
+ // TODO(jon): update max_bitdepth in this case
+ } else {
+ DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors);
+ max_bitdepth = 12;
+ }
+ }
+ pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0;
+ if (do_color) {
+ for (; c < 3; c++) {
+ if (metadata.color_encoding.IsGray() &&
+ cparams_.color_transform == ColorTransform::kNone &&
+ c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0))
+ continue;
+ int c_out = c;
+ // XYB is encoded as YX(B-Y)
+ if (cparams_.color_transform == ColorTransform::kXYB && c < 2)
+ c_out = 1 - c_out;
+ double factor = maxval;
+ if (cparams_.color_transform == ColorTransform::kXYB)
+ factor = enc_state->shared.matrices.InvDCQuant(c);
+ if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) {
+ JXL_ASSERT(!fp);
+ for (size_t y = 0; y < ysize; ++y) {
+ const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+ pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+ pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row_in[x] * factor + 0.5f;
+ row_out[x] -= row_Y[x];
+ // zero the lsb of B
+ row_out[x] = row_out[x] / 2 * 2;
+ }
+ }
+ } else {
+ int bits = metadata.bit_depth.bits_per_sample;
+ int exp_bits = metadata.bit_depth.exponent_bits_per_sample;
+ gi.channel[c_out].hshift =
+ enc_state->shared.frame_header.chroma_subsampling.HShift(c);
+ gi.channel[c_out].vshift =
+ enc_state->shared.frame_header.chroma_subsampling.VShift(c);
+ size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift);
+ size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift);
+ gi.channel[c_out].shrink(xsize_shifted, ysize_shifted);
+ std::atomic<bool> has_error{false};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, ysize_shifted, ThreadPool::NoInit,
+ [&](const int task, const int thread) {
+ const size_t y = task;
+ const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y);
+ pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y);
+ if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits,
+ fp, factor)) {
+ has_error = true;
+ };
+ },
+ "float2int"));
+ if (has_error) {
+ return JXL_FAILURE("Error in float to integer conversion");
+ }
+ }
+ }
+ if (metadata.color_encoding.IsGray() &&
+ cparams_.color_transform == ColorTransform::kNone)
+ c = 1;
+ }
+
+ for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) {
+ const ExtraChannelInfo& eci = metadata.extra_channel_info[ec];
+ size_t ecups = frame_header.extra_channel_upsampling[ec];
+ gi.channel[c].shrink(DivCeil(frame_dim_.xsize_upsampled, ecups),
+ DivCeil(frame_dim_.ysize_upsampled, ecups));
+ gi.channel[c].hshift = gi.channel[c].vshift =
+ CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling);
+
+ int bits = eci.bit_depth.bits_per_sample;
+ int exp_bits = eci.bit_depth.exponent_bits_per_sample;
+ bool fp = eci.bit_depth.floating_point_sample;
+ double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1));
+ if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1);
+ std::atomic<bool> has_error{false};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit,
+ [&](const int task, const int thread) {
+ const size_t y = task;
+ const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y);
+ pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y);
+ if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits,
+ exp_bits, fp, factor)) {
+ has_error = true;
+ };
+ },
+ "float2int"));
+ if (has_error) return JXL_FAILURE("Error in float to integer conversion");
+ }
+ JXL_ASSERT(c == nb_chans);
+
+ int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32);
+ if (max_bitdepth > level_max_bitdepth)
+ return JXL_FAILURE(
+ "Bitdepth too high for level %i (need %i bits, have only %i in this "
+ "level)",
+ cparams_.level, max_bitdepth, level_max_bitdepth);
+
+ // Set options and apply transformations
+ if (!cparams_.ModularPartIsLossless()) {
+ if (cparams_.palette_colors != 0) {
+ JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms");
+ }
+ if (cparams_.color_transform == ColorTransform::kXYB) {
+ cparams_.channel_colors_pre_transform_percent = 0;
+ }
+ cparams_.channel_colors_percent = 0;
+ cparams_.palette_colors = 0;
+ cparams_.lossy_palette = false;
+ }
+
+ // if few colors, do all-channel palette before trying channel palette
+ // Logic is as follows:
+ // - if you can make a palette with few colors (arbitrary threshold: 200),
+ // then you can also make channel palettes, but they will just be extra
+ // signaling cost for almost no benefit
+ // - if the palette needs more colors, then channel palette might help to
+ // reduce palette signaling cost
+ if (cparams_.palette_colors != 0 &&
+ cparams_.speed_tier < SpeedTier::kFalcon) {
+ // all-channel palette (e.g. RGBA)
+ if (gi.channel.size() > 1) {
+ Transform maybe_palette(TransformId::kPalette);
+ maybe_palette.begin_c = gi.nb_meta_channels;
+ maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+ maybe_palette.nb_colors =
+ std::min(std::min(200, (int)(xsize * ysize / 8)),
+ std::abs(cparams_.palette_colors) / 16);
+ maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
+ maybe_palette.lossy_palette = false;
+ do_transform(gi, maybe_palette, weighted::Header(), pool);
+ }
+ }
+
+ // Global channel palette
+ if (cparams_.channel_colors_pre_transform_percent > 0 &&
+ !cparams_.lossy_palette &&
+ (cparams_.speed_tier <= SpeedTier::kThunder ||
+ (do_color && metadata.bit_depth.bits_per_sample > 8))) {
+ // single channel palette (like FLIF's ChannelCompact)
+ size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+ int orig_bitdepth = max_bitdepth;
+ max_bitdepth = 0;
+ for (size_t i = 0; i < nb_channels; i++) {
+ int32_t min, max;
+ compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+ int64_t colors = (int64_t)max - min + 1;
+ JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
+ Transform maybe_palette_1(TransformId::kPalette);
+ maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+ maybe_palette_1.num_c = 1;
+ // simple heuristic: if less than X percent of the values in the range
+ // actually occur, it is probably worth it to do a compaction
+ // (but only if the channel palette is less than 6% the size of the
+ // image itself)
+ maybe_palette_1.nb_colors = std::min(
+ (int)(xsize * ysize / 16),
+ (int)(cparams_.channel_colors_pre_transform_percent / 100. * colors));
+ if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) {
+ // effective bit depth is lower, adjust quantization accordingly
+ compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+ if (max < maxval) maxval = max;
+ int ch_bitdepth =
+ (max > 0 ? CeilLog2Nonzero(static_cast<uint32_t>(max)) : 0);
+ if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth;
+ } else
+ max_bitdepth = orig_bitdepth;
+ }
+ }
+
+ // Global palette
+ if ((cparams_.palette_colors != 0 || cparams_.lossy_palette) &&
+ cparams_.speed_tier < SpeedTier::kFalcon) {
+ // all-channel palette (e.g. RGBA)
+ if (gi.channel.size() - gi.nb_meta_channels > 1) {
+ Transform maybe_palette(TransformId::kPalette);
+ maybe_palette.begin_c = gi.nb_meta_channels;
+ maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+ maybe_palette.nb_colors =
+ std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors));
+ maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
+ maybe_palette.lossy_palette =
+ (cparams_.lossy_palette && maybe_palette.num_c == 3);
+ if (maybe_palette.lossy_palette) {
+ maybe_palette.predictor = delta_pred_;
+ }
+ // TODO(veluca): use a custom weighted header if using the weighted
+ // predictor.
+ do_transform(gi, maybe_palette, weighted::Header(), pool,
+ cparams_.options.zero_tokens);
+ }
+ // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+ // separate K)
+ if (gi.channel.size() - gi.nb_meta_channels > 3) {
+ Transform maybe_palette_3(TransformId::kPalette);
+ maybe_palette_3.begin_c = gi.nb_meta_channels;
+ maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+ maybe_palette_3.nb_colors =
+ std::min((int)(xsize * ysize / 8), std::abs(cparams_.palette_colors));
+ maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
+ maybe_palette_3.lossy_palette = cparams_.lossy_palette;
+ if (maybe_palette_3.lossy_palette) {
+ maybe_palette_3.predictor = delta_pred_;
+ }
+ do_transform(gi, maybe_palette_3, weighted::Header(), pool,
+ cparams_.options.zero_tokens);
+ }
+ }
+
+ // don't do an RCT if we're short on bits
+ if (cparams_.color_transform == ColorTransform::kNone && do_color &&
+ gi.channel.size() - gi.nb_meta_channels >= 3 &&
+ max_bitdepth + 1 < level_max_bitdepth) {
+ if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() ||
+ cparams_.speed_tier > SpeedTier::kHare)) {
+ Transform ycocg{TransformId::kRCT};
+ ycocg.rct_type = 6;
+ ycocg.begin_c = gi.nb_meta_channels;
+ do_transform(gi, ycocg, weighted::Header(), pool);
+ max_bitdepth++;
+ } else if (cparams_.colorspace > 0) {
+ Transform sg(TransformId::kRCT);
+ sg.begin_c = gi.nb_meta_channels;
+ sg.rct_type = cparams_.colorspace;
+ do_transform(gi, sg, weighted::Header(), pool);
+ max_bitdepth++;
+ }
+ }
+
+ // don't do squeeze if we don't have some spare bits
+ if (cparams_.responsive && !gi.channel.empty() &&
+ max_bitdepth + 2 < level_max_bitdepth) {
+ Transform t(TransformId::kSqueeze);
+ t.squeezes = cparams_.squeezes;
+ do_transform(gi, t, weighted::Header(), pool);
+ max_bitdepth += 2;
+ }
+
+ if (max_bitdepth + 1 > level_max_bitdepth) {
+ // force no group RCTs if we don't have a spare bit
+ cparams_.colorspace = 0;
+ }
+ JXL_ASSERT(max_bitdepth <= level_max_bitdepth);
+
+ std::vector<uint32_t> quants;
+
+ if (!cparams_.ModularPartIsLossless()) {
+ quants.resize(gi.channel.size(), 1);
+ float quantizer = 0.25f;
+ if (!cparams_.responsive) {
+ JXL_DEBUG_V(1,
+ "Warning: lossy compression without Squeeze "
+ "transform is just color quantization.");
+ quantizer *= 0.1f;
+ }
+ float bitdepth_correction = 1.f;
+ if (cparams_.color_transform != ColorTransform::kXYB) {
+ bitdepth_correction = maxval / 255.f;
+ }
+ std::vector<float> quantizers;
+ float dist = cparams_.butteraugli_distance;
+ for (size_t i = 0; i < 3; i++) {
+ quantizers.push_back(quantizer * dist * bitdepth_correction);
+ }
+ for (size_t i = 0; i < extra_channels.size(); i++) {
+ int ec_bitdepth =
+ metadata.extra_channel_info[i].bit_depth.bits_per_sample;
+ pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0;
+ bitdepth_correction = ec_maxval / 255.f;
+ if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i];
+ if (dist < 0) dist = cparams_.butteraugli_distance;
+ quantizers.push_back(quantizer * dist * bitdepth_correction);
+ }
+ if (cparams_.options.nb_repeats == 0) {
+ return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!");
+ }
+ for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) {
+ Channel& ch = gi.channel[i];
+ int shift = ch.hshift + ch.vshift; // number of pixel halvings
+ if (shift > 16) shift = 16;
+ if (shift > 0) shift--;
+ int q;
+ // assuming default Squeeze here
+ int component =
+ (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans);
+ // last 4 channels are final chroma residuals
+ if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) {
+ component = 1;
+ }
+ if (cparams_.color_transform == ColorTransform::kXYB && component < 3) {
+ q = quantizers[component] * squeeze_quality_factor_xyb *
+ squeeze_xyb_qtable[component][shift];
+ } else {
+ if (cparams_.colorspace != 0 && component > 0 && component < 3) {
+ q = quantizers[component] * squeeze_quality_factor *
+ squeeze_chroma_qtable[shift];
+ } else {
+ q = quantizers[component] * squeeze_quality_factor *
+ squeeze_luma_factor * squeeze_luma_qtable[shift];
+ }
+ }
+ if (q < 1) q = 1;
+ QuantizeChannel(gi.channel[i], q);
+ quants[i] = q;
+ }
+ }
+
+ // Fill other groups.
+ struct GroupParams {
+ Rect rect;
+ int minShift;
+ int maxShift;
+ ModularStreamId id;
+ };
+ std::vector<GroupParams> stream_params;
+
+ stream_options_[0] = cparams_.options;
+
+ // DC
+ for (size_t group_id = 0; group_id < frame_dim_.num_dc_groups; group_id++) {
+ const size_t gx = group_id % frame_dim_.xsize_dc_groups;
+ const size_t gy = group_id / frame_dim_.xsize_dc_groups;
+ const Rect rect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim,
+ frame_dim_.dc_group_dim, frame_dim_.dc_group_dim);
+ // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim
+ // maxShift==1000 is infinity
+ stream_params.push_back(
+ GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)});
+ }
+ // AC global -> nothing.
+ // AC
+ for (size_t group_id = 0; group_id < frame_dim_.num_groups; group_id++) {
+ const size_t gx = group_id % frame_dim_.xsize_groups;
+ const size_t gy = group_id / frame_dim_.xsize_groups;
+ const Rect mrect(gx * frame_dim_.group_dim, gy * frame_dim_.group_dim,
+ frame_dim_.group_dim, frame_dim_.group_dim);
+ for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses();
+ i++) {
+ int maxShift, minShift;
+ frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift);
+ stream_params.push_back(GroupParams{
+ mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)});
+ }
+ }
+ // if there's only one group, everything ends up in GlobalModular
+ // in that case, also try RCTs/WP params for the one group
+ if (stream_params.size() == 2) {
+ stream_params.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000,
+ ModularStreamId::Global()});
+ }
+ gi_channel_.resize(stream_images_.size());
+
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, stream_params.size(), ThreadPool::NoInit,
+ [&](const uint32_t i, size_t /* thread */) {
+ stream_options_[stream_params[i].id.ID(frame_dim_)] = cparams_.options;
+ JXL_CHECK(PrepareStreamParams(
+ stream_params[i].rect, cparams_, stream_params[i].minShift,
+ stream_params[i].maxShift, stream_params[i].id, do_color));
+ },
+ "ChooseParams"));
+ {
+ // Clear out channels that have been copied to groups.
+ Image& full_image = stream_images_[0];
+ size_t c = full_image.nb_meta_channels;
+ for (; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
+ }
+ for (; c < full_image.channel.size(); c++) {
+ full_image.channel[c].plane = ImageI();
+ }
+ }
+
+ if (!quants.empty()) {
+ for (uint32_t stream_id = 0; stream_id < stream_images_.size();
+ stream_id++) {
+ // skip non-modular stream_ids
+ if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
+ const Image& image = stream_images_[stream_id];
+ const ModularOptions& options = stream_options_[stream_id];
+ for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) {
+ if (i >= image.nb_meta_channels &&
+ (image.channel[i].w > options.max_chan_size ||
+ image.channel[i].h > options.max_chan_size)) {
+ continue;
+ }
+ if (stream_id > 0 && gi_channel_[stream_id].empty()) continue;
+ size_t ch_id = stream_id == 0
+ ? i
+ : gi_channel_[stream_id][i - image.nb_meta_channels];
+ uint32_t q = quants[ch_id];
+ // Inform the tree splitting heuristics that each channel in each group
+ // used this quantization factor. This will produce a tree with the
+ // given multipliers.
+ if (multiplier_info_.empty() ||
+ multiplier_info_.back().range[1][0] != stream_id ||
+ multiplier_info_.back().multiplier != q) {
+ StaticPropRange range;
+ range[0] = {{i, i + 1}};
+ range[1] = {{stream_id, stream_id + 1}};
+ multiplier_info_.push_back({range, (uint32_t)q});
+ } else {
+ // Previous channel in the same group had the same quantization
+ // factor. Don't provide two different ranges, as that creates
+ // unnecessary nodes.
+ multiplier_info_.back().range[0][1] = i + 1;
+ }
+ }
+ }
+ // Merge group+channel settings that have the same channels and quantization
+ // factors, to avoid unnecessary nodes.
+ std::sort(multiplier_info_.begin(), multiplier_info_.end(),
+ [](ModularMultiplierInfo a, ModularMultiplierInfo b) {
+ return std::make_tuple(a.range, a.multiplier) <
+ std::make_tuple(b.range, b.multiplier);
+ });
+ size_t new_num = 1;
+ for (size_t i = 1; i < multiplier_info_.size(); i++) {
+ ModularMultiplierInfo& prev = multiplier_info_[new_num - 1];
+ ModularMultiplierInfo& cur = multiplier_info_[i];
+ if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier &&
+ prev.range[1][1] == cur.range[1][0]) {
+ prev.range[1][1] = cur.range[1][1];
+ } else {
+ multiplier_info_[new_num++] = multiplier_info_[i];
+ }
+ }
+ multiplier_info_.resize(new_num);
+ }
+
+ JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0]));
+
+ return PrepareEncoding(frame_header, pool, enc_state->heuristics.get(),
+ aux_out);
+}
+
+Status ModularFrameEncoder::PrepareEncoding(const FrameHeader& frame_header,
+ ThreadPool* pool,
+ EncoderHeuristics* heuristics,
+ AuxOut* aux_out) {
+ if (!tree_.empty()) return true;
+
+ // Compute tree.
+ size_t num_streams = stream_images_.size();
+ stream_headers_.resize(num_streams);
+ tokens_.resize(num_streams);
+
+ if (heuristics->CustomFixedTreeLossless(frame_dim_, &tree_)) {
+ // Using a fixed tree.
+ } else if (cparams_.speed_tier < SpeedTier::kFalcon ||
+ !cparams_.modular_mode) {
+ // Avoid creating a tree with leaves that don't correspond to any pixels.
+ std::vector<size_t> useful_splits;
+ useful_splits.reserve(tree_splits_.size());
+ for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) {
+ bool has_pixels = false;
+ size_t start = tree_splits_[chunk];
+ size_t stop = tree_splits_[chunk + 1];
+ for (size_t i = start; i < stop; i++) {
+ if (!stream_images_[i].empty()) has_pixels = true;
+ }
+ if (has_pixels) {
+ useful_splits.push_back(tree_splits_[chunk]);
+ }
+ }
+ // Don't do anything if modular mode does not have any pixels in this image
+ if (useful_splits.empty()) return true;
+ useful_splits.push_back(tree_splits_.back());
+
+ std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT;
+
+ std::vector<Tree> trees(useful_splits.size() - 1);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, useful_splits.size() - 1, ThreadPool::NoInit,
+ [&](const uint32_t chunk, size_t /* thread */) {
+ // TODO(veluca): parallelize more.
+ size_t total_pixels = 0;
+ uint32_t start = useful_splits[chunk];
+ uint32_t stop = useful_splits[chunk + 1];
+ while (start < stop && stream_images_[start].empty()) ++start;
+ while (start < stop && stream_images_[stop - 1].empty()) --stop;
+ uint32_t max_c = 0;
+ if (stream_options_[start].tree_kind !=
+ ModularOptions::TreeKind::kLearn) {
+ for (size_t i = start; i < stop; i++) {
+ for (const Channel& ch : stream_images_[i].channel) {
+ total_pixels += ch.w * ch.h;
+ }
+ }
+ trees[chunk] =
+ PredefinedTree(stream_options_[start].tree_kind, total_pixels);
+ return;
+ }
+ TreeSamples tree_samples;
+ if (!tree_samples.SetPredictor(stream_options_[start].predictor,
+ stream_options_[start].wp_tree_mode)) {
+ invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+ return;
+ }
+ if (!tree_samples.SetProperties(
+ stream_options_[start].splitting_heuristics_properties,
+ stream_options_[start].wp_tree_mode)) {
+ invalid_force_wp.test_and_set(std::memory_order_acq_rel);
+ return;
+ }
+ std::vector<pixel_type> pixel_samples;
+ std::vector<pixel_type> diff_samples;
+ std::vector<uint32_t> group_pixel_count;
+ std::vector<uint32_t> channel_pixel_count;
+ for (size_t i = start; i < stop; i++) {
+ max_c = std::max<uint32_t>(stream_images_[i].channel.size(), max_c);
+ CollectPixelSamples(stream_images_[i], stream_options_[i], i,
+ group_pixel_count, channel_pixel_count,
+ pixel_samples, diff_samples);
+ }
+ StaticPropRange range;
+ range[0] = {{0, max_c}};
+ range[1] = {{start, stop}};
+ auto local_multiplier_info = multiplier_info_;
+
+ tree_samples.PreQuantizeProperties(
+ range, local_multiplier_info, group_pixel_count,
+ channel_pixel_count, pixel_samples, diff_samples,
+ stream_options_[start].max_property_values);
+ for (size_t i = start; i < stop; i++) {
+ JXL_CHECK(ModularGenericCompress(
+ stream_images_[i], stream_options_[i], /*writer=*/nullptr,
+ /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels));
+ }
+
+ // TODO(veluca): parallelize more.
+ trees[chunk] =
+ LearnTree(std::move(tree_samples), total_pixels,
+ stream_options_[start], local_multiplier_info, range);
+ },
+ "LearnTrees"));
+ if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) {
+ return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}");
+ }
+ tree_.clear();
+ MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_);
+ } else {
+ // Fixed tree.
+ size_t total_pixels = 0;
+ for (const Image& img : stream_images_) {
+ for (const Channel& ch : img.channel) {
+ total_pixels += ch.w * ch.h;
+ }
+ }
+ if (cparams_.speed_tier <= SpeedTier::kFalcon) {
+ tree_ =
+ PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels);
+ } else if (cparams_.speed_tier <= SpeedTier::kThunder) {
+ tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC,
+ total_pixels);
+ } else {
+ tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)};
+ }
+ }
+ tree_tokens_.resize(1);
+ tree_tokens_[0].clear();
+ Tree decoded_tree;
+ TokenizeTree(tree_, &tree_tokens_[0], &decoded_tree);
+ JXL_ASSERT(tree_.size() == decoded_tree.size());
+ tree_ = std::move(decoded_tree);
+
+ if (kPrintTree && WantDebugOutput(aux_out)) {
+ if (frame_header.dc_level > 0) {
+ PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" +
+ std::to_string(frame_header.dc_level) + "_tree");
+ } else {
+ PrintTree(tree_, aux_out->debug_prefix + "/global_tree");
+ }
+ }
+
+ image_widths_.resize(num_streams);
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, num_streams, ThreadPool::NoInit,
+ [&](const uint32_t stream_id, size_t /* thread */) {
+ AuxOut my_aux_out;
+ if (aux_out) {
+ my_aux_out.dump_image = aux_out->dump_image;
+ my_aux_out.debug_prefix = aux_out->debug_prefix;
+ }
+ tokens_[stream_id].clear();
+ JXL_CHECK(ModularGenericCompress(
+ stream_images_[stream_id], stream_options_[stream_id],
+ /*writer=*/nullptr, &my_aux_out, 0, stream_id,
+ /*tree_samples=*/nullptr,
+ /*total_pixels=*/nullptr,
+ /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id],
+ /*tokens=*/&tokens_[stream_id],
+ /*widths=*/&image_widths_[stream_id]));
+ },
+ "ComputeTokens"));
+ return true;
+}
+
+Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer,
+ AuxOut* aux_out) {
+ BitWriter::Allotment allotment(writer, 1);
+ // If we are using brotli, or not using modular mode.
+ if (tree_tokens_.empty() || tree_tokens_[0].empty()) {
+ writer->Write(1, 0);
+ allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out);
+ return true;
+ }
+ writer->Write(1, 1);
+ allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out);
+
+ // Write tree
+ HistogramParams params;
+ if (cparams_.speed_tier > SpeedTier::kKitten) {
+ params.clustering = HistogramParams::ClusteringType::kFast;
+ params.ans_histogram_strategy =
+ cparams_.speed_tier > SpeedTier::kThunder
+ ? HistogramParams::ANSHistogramStrategy::kFast
+ : HistogramParams::ANSHistogramStrategy::kApproximate;
+ params.lz77_method =
+ cparams_.decoding_speed_tier >= 3 && cparams_.modular_mode
+ ? (cparams_.speed_tier >= SpeedTier::kFalcon
+ ? HistogramParams::LZ77Method::kRLE
+ : HistogramParams::LZ77Method::kLZ77)
+ : HistogramParams::LZ77Method::kNone;
+ // Near-lossless DC, as well as modular mode, require choosing hybrid uint
+ // more carefully.
+ if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) ||
+ (cparams_.modular_mode && cparams_.speed_tier < SpeedTier::kCheetah)) {
+ params.uint_method = HistogramParams::HybridUintMethod::kFast;
+ } else {
+ params.uint_method = HistogramParams::HybridUintMethod::kNone;
+ }
+ } else if (cparams_.speed_tier <= SpeedTier::kTortoise) {
+ params.lz77_method = HistogramParams::LZ77Method::kOptimal;
+ } else {
+ params.lz77_method = HistogramParams::LZ77Method::kLZ77;
+ }
+ if (cparams_.decoding_speed_tier >= 1) {
+ params.max_histograms = 12;
+ }
+ if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive) {
+ params.lz77_method = cparams_.speed_tier >= SpeedTier::kCheetah
+ ? HistogramParams::LZ77Method::kRLE
+ : cparams_.speed_tier >= SpeedTier::kKitten
+ ? HistogramParams::LZ77Method::kLZ77
+ : HistogramParams::LZ77Method::kOptimal;
+ }
+ if (cparams_.decoding_speed_tier >= 2 && cparams_.responsive) {
+ params.uint_method = HistogramParams::HybridUintMethod::k000;
+ params.force_huffman = true;
+ }
+ BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &code_,
+ &context_map_, writer, kLayerModularTree, aux_out);
+ WriteTokens(tree_tokens_[0], code_, context_map_, writer, kLayerModularTree,
+ aux_out);
+ params.image_widths = image_widths_;
+ // Write histograms.
+ BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_,
+ &context_map_, writer, kLayerModularGlobal, aux_out);
+ return true;
+}
+
+Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out,
+ size_t layer,
+ const ModularStreamId& stream) {
+ size_t stream_id = stream.ID(frame_dim_);
+ if (stream_images_[stream_id].channel.empty()) {
+ return true; // Image with no channels, header never gets decoded.
+ }
+ JXL_RETURN_IF_ERROR(
+ Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out));
+ WriteTokens(tokens_[stream_id], code_, context_map_, writer, layer, aux_out);
+ return true;
+}
+
+namespace {
+float EstimateWPCost(const Image& img, size_t i) {
+ size_t extra_bits = 0;
+ float histo_cost = 0;
+ HybridUintConfig config;
+ int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31,
+ -23, -15, -11, -7, -4, -3, -1, 0, 1,
+ 3, 5, 7, 11, 15, 23, 31, 47, 63,
+ 95, 127, 191, 255, 392, 500};
+ constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+ Histogram histo[nc] = {};
+ weighted::Header wp_header;
+ PredictorMode(i, &wp_header);
+ for (const Channel& ch : img.channel) {
+ const intptr_t onerow = ch.plane.PixelsPerRow();
+ weighted::State wp_state(wp_header, ch.w, ch.h);
+ Properties properties(1);
+ for (size_t y = 0; y < ch.h; y++) {
+ const pixel_type* JXL_RESTRICT r = ch.Row(y);
+ for (size_t x = 0; x < ch.w; x++) {
+ size_t offset = 0;
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ pixel_type_w topright =
+ (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top);
+ pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+ pixel_type guess = wp_state.Predict</*compute_properties=*/true>(
+ x, y, ch.w, top, left, topright, topleft, toptop, &properties,
+ offset);
+ size_t ctx = 0;
+ for (int c : cutoffs) {
+ ctx += c >= properties[0];
+ }
+ pixel_type res = r[x] - guess;
+ uint32_t token, nbits, bits;
+ config.Encode(PackSigned(res), &token, &nbits, &bits);
+ histo[ctx].Add(token);
+ extra_bits += nbits;
+ wp_state.UpdateErrors(r[x], x, y, ch.w);
+ }
+ }
+ for (size_t h = 0; h < nc; h++) {
+ histo_cost += histo[h].ShannonEntropy();
+ histo[h].Clear();
+ }
+ }
+ return histo_cost + extra_bits;
+}
+
+float EstimateCost(const Image& img) {
+ // TODO(veluca): consider SIMDfication of this code.
+ size_t extra_bits = 0;
+ float histo_cost = 0;
+ HybridUintConfig config;
+ uint32_t cutoffs[] = {0, 1, 3, 5, 7, 11, 15, 23, 31,
+ 47, 63, 95, 127, 191, 255, 392, 500};
+ constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1;
+ Histogram histo[nc] = {};
+ for (const Channel& ch : img.channel) {
+ const intptr_t onerow = ch.plane.PixelsPerRow();
+ for (size_t y = 0; y < ch.h; y++) {
+ const pixel_type* JXL_RESTRICT r = ch.Row(y);
+ for (size_t x = 0; x < ch.w; x++) {
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ size_t maxdiff = std::max(std::max(left, top), topleft) -
+ std::min(std::min(left, top), topleft);
+ size_t ctx = 0;
+ for (uint32_t c : cutoffs) {
+ ctx += c > maxdiff;
+ }
+ pixel_type res = r[x] - ClampedGradient(top, left, topleft);
+ uint32_t token, nbits, bits;
+ config.Encode(PackSigned(res), &token, &nbits, &bits);
+ histo[ctx].Add(token);
+ extra_bits += nbits;
+ }
+ }
+ for (size_t h = 0; h < nc; h++) {
+ histo_cost += histo[h].ShannonEntropy();
+ histo[h].Clear();
+ }
+ }
+ return histo_cost + extra_bits;
+}
+
+} // namespace
+
+Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect,
+ const CompressParams& cparams_,
+ int minShift, int maxShift,
+ const ModularStreamId& stream,
+ bool do_color) {
+ size_t stream_id = stream.ID(frame_dim_);
+ Image& full_image = stream_images_[0];
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ Image& gi = stream_images_[stream_id];
+ if (stream_id > 0) {
+ gi = Image(xsize, ysize, full_image.bitdepth, 0);
+ // start at the first bigger-than-frame_dim.group_dim non-metachannel
+ size_t c = full_image.nb_meta_channels;
+ for (; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break;
+ }
+ for (; c < full_image.channel.size(); c++) {
+ Channel& fc = full_image.channel[c];
+ int shift = std::min(fc.hshift, fc.vshift);
+ if (shift > maxShift) continue;
+ if (shift < minShift) continue;
+ Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift,
+ rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h);
+ if (r.xsize() == 0 || r.ysize() == 0) continue;
+ gi_channel_[stream_id].push_back(c);
+ Channel gc(r.xsize(), r.ysize());
+ gc.hshift = fc.hshift;
+ gc.vshift = fc.vshift;
+ for (size_t y = 0; y < r.ysize(); ++y) {
+ memcpy(gc.Row(y), r.ConstRow(fc.plane, y),
+ r.xsize() * sizeof(pixel_type));
+ }
+ gi.channel.emplace_back(std::move(gc));
+ }
+
+ if (gi.channel.empty()) return true;
+ // Do some per-group transforms
+
+ // Local palette
+ // TODO(veluca): make this work with quantize-after-prediction in lossy
+ // mode.
+ if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 &&
+ cparams_.speed_tier < SpeedTier::kCheetah) {
+ // all-channel palette (e.g. RGBA)
+ if (gi.channel.size() - gi.nb_meta_channels > 1) {
+ Transform maybe_palette(TransformId::kPalette);
+ maybe_palette.begin_c = gi.nb_meta_channels;
+ maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels;
+ maybe_palette.nb_colors = std::abs(cparams_.palette_colors);
+ maybe_palette.ordered_palette = cparams_.palette_colors >= 0;
+ do_transform(gi, maybe_palette, weighted::Header());
+ }
+ // all-minus-one-channel palette (RGB with separate alpha, or CMY with
+ // separate K)
+ if (gi.channel.size() - gi.nb_meta_channels > 3) {
+ Transform maybe_palette_3(TransformId::kPalette);
+ maybe_palette_3.begin_c = gi.nb_meta_channels;
+ maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1;
+ maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors);
+ maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0;
+ maybe_palette_3.lossy_palette = cparams_.lossy_palette;
+ if (maybe_palette_3.lossy_palette) {
+ maybe_palette_3.predictor = Predictor::Weighted;
+ }
+ do_transform(gi, maybe_palette_3, weighted::Header());
+ }
+ }
+
+ // Local channel palette
+ if (cparams_.channel_colors_percent > 0 &&
+ cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette &&
+ cparams_.speed_tier < SpeedTier::kCheetah &&
+ !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) {
+ // single channel palette (like FLIF's ChannelCompact)
+ size_t nb_channels = gi.channel.size() - gi.nb_meta_channels;
+ for (size_t i = 0; i < nb_channels; i++) {
+ int32_t min, max;
+ compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max);
+ int64_t colors = (int64_t)max - min + 1;
+ JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max);
+ Transform maybe_palette_1(TransformId::kPalette);
+ maybe_palette_1.begin_c = i + gi.nb_meta_channels;
+ maybe_palette_1.num_c = 1;
+ // simple heuristic: if less than X percent of the values in the range
+ // actually occur, it is probably worth it to do a compaction
+ // (but only if the channel palette is less than 80% the size of the
+ // image itself)
+ maybe_palette_1.nb_colors =
+ std::min((int)(xsize * ysize * 0.8),
+ (int)(cparams_.channel_colors_percent / 100. * colors));
+ do_transform(gi, maybe_palette_1, weighted::Header());
+ }
+ }
+ }
+
+ // lossless and no specific color transform specified: try Nothing, YCoCg,
+ // and 17 RCTs
+ if (cparams_.color_transform == ColorTransform::kNone &&
+ cparams_.IsLossless() && cparams_.colorspace < 0 &&
+ gi.channel.size() - gi.nb_meta_channels >= 3 &&
+ cparams_.responsive == false && do_color &&
+ cparams_.speed_tier <= SpeedTier::kHare) {
+ Transform sg(TransformId::kRCT);
+ sg.begin_c = gi.nb_meta_channels;
+ size_t nb_rcts_to_try = 0;
+ switch (cparams_.speed_tier) {
+ case SpeedTier::kLightning:
+ case SpeedTier::kThunder:
+ case SpeedTier::kFalcon:
+ case SpeedTier::kCheetah:
+ nb_rcts_to_try = 0; // Just do global YCoCg
+ break;
+ case SpeedTier::kHare:
+ nb_rcts_to_try = 4;
+ break;
+ case SpeedTier::kWombat:
+ nb_rcts_to_try = 5;
+ break;
+ case SpeedTier::kSquirrel:
+ nb_rcts_to_try = 7;
+ break;
+ case SpeedTier::kKitten:
+ nb_rcts_to_try = 9;
+ break;
+ case SpeedTier::kGlacier:
+ case SpeedTier::kTortoise:
+ nb_rcts_to_try = 19;
+ break;
+ }
+ float best_cost = std::numeric_limits<float>::max();
+ size_t best_rct = 0;
+ // These should be 19 actually different transforms; the remaining ones
+ // are equivalent to one of these (note that the first two are do-nothing
+ // and YCoCg) modulo channel reordering (which only matters in the case of
+ // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR)
+ for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5,
+ 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4,
+ 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4,
+ 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) {
+ if (nb_rcts_to_try == 0) break;
+ sg.rct_type = i;
+ nb_rcts_to_try--;
+ if (do_transform(gi, sg, weighted::Header())) {
+ float cost = EstimateCost(gi);
+ if (cost < best_cost) {
+ best_rct = i;
+ best_cost = cost;
+ }
+ Transform t = gi.transform.back();
+ JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr));
+ gi.transform.pop_back();
+ }
+ }
+ // Apply the best RCT to the image for future encoding.
+ sg.rct_type = best_rct;
+ do_transform(gi, sg, weighted::Header());
+ } else {
+ // No need to try anything, just use the default options.
+ }
+ size_t nb_wp_modes = 1;
+ if (cparams_.speed_tier <= SpeedTier::kTortoise) {
+ nb_wp_modes = 5;
+ } else if (cparams_.speed_tier <= SpeedTier::kKitten) {
+ nb_wp_modes = 2;
+ }
+ if (nb_wp_modes > 1 &&
+ (stream_options_[stream_id].predictor == Predictor::Weighted ||
+ stream_options_[stream_id].predictor == Predictor::Best ||
+ stream_options_[stream_id].predictor == Predictor::Variable)) {
+ float best_cost = std::numeric_limits<float>::max();
+ stream_options_[stream_id].wp_mode = 0;
+ for (size_t i = 0; i < nb_wp_modes; i++) {
+ float cost = EstimateWPCost(gi, i);
+ if (cost < best_cost) {
+ best_cost = cost;
+ stream_options_[stream_id].wp_mode = i;
+ }
+ }
+ }
+ return true;
+}
+
+constexpr float q_deadzone = 0.62f;
+int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y,
+ size_t w, weighted::State* wp_state, float value,
+ float inv_factor) {
+ float svalue = value * inv_factor;
+ PredictionResult pred =
+ PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state);
+ svalue -= pred.guess;
+ if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
+ int residual = roundf(svalue);
+ if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+ return residual + pred.guess;
+}
+
+int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x,
+ size_t y, size_t w, float value, float inv_factor) {
+ float svalue = value * inv_factor;
+ PredictionResult pred =
+ PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient);
+ svalue -= pred.guess;
+ if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0;
+ int residual = roundf(svalue);
+ if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2;
+ return residual + pred.guess;
+}
+
+void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index,
+ bool nl_dc, PassesEncoderState* enc_state,
+ bool jpeg_transcode) {
+ const Rect r = enc_state->shared.DCGroupRect(group_index);
+ extra_dc_precision[group_index] = nl_dc ? 1 : 0;
+ float mul = 1 << extra_dc_precision[group_index];
+
+ size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_);
+ stream_options_[stream_id].max_chan_size = 0xFFFFFF;
+ stream_options_[stream_id].predictor = Predictor::Weighted;
+ stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly;
+ if (cparams_.speed_tier >= SpeedTier::kSquirrel) {
+ stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC;
+ }
+ if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) {
+ stream_options_[stream_id].predictor =
+ (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable
+ : Predictor::Best);
+ stream_options_[stream_id].wp_tree_mode =
+ ModularOptions::TreeMode::kDefault;
+ stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
+ }
+ if (cparams_.decoding_speed_tier >= 1) {
+ stream_options_[stream_id].tree_kind =
+ ModularOptions::TreeKind::kGradientFixedDC;
+ }
+
+ stream_images_[stream_id] = Image(r.xsize(), r.ysize(), 8, 3);
+ if (nl_dc && stream_options_[stream_id].tree_kind ==
+ ModularOptions::TreeKind::kGradientFixedDC) {
+ JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+ for (size_t c : {1, 0, 2}) {
+ float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+ float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+ float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+ for (size_t y = 0; y < r.ysize(); y++) {
+ int32_t* quant_row =
+ stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+ size_t stride = stream_images_[stream_id]
+ .channel[c < 2 ? c ^ 1 : c]
+ .plane.PixelsPerRow();
+ const float* row = r.ConstPlaneRow(dc, c, y);
+ if (c == 1) {
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y,
+ r.xsize(), row[x], inv_factor);
+ }
+ } else {
+ int32_t* quant_row_y =
+ stream_images_[stream_id].channel[0].plane.Row(y);
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] = QuantizeGradient(
+ quant_row, stride, c, x, y, r.xsize(),
+ row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+ }
+ }
+ }
+ }
+ } else if (nl_dc) {
+ JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444());
+ for (size_t c : {1, 0, 2}) {
+ float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+ float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+ float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+ weighted::Header header;
+ weighted::State wp_state(header, r.xsize(), r.ysize());
+ for (size_t y = 0; y < r.ysize(); y++) {
+ int32_t* quant_row =
+ stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+ size_t stride = stream_images_[stream_id]
+ .channel[c < 2 ? c ^ 1 : c]
+ .plane.PixelsPerRow();
+ const float* row = r.ConstPlaneRow(dc, c, y);
+ if (c == 1) {
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(),
+ &wp_state, row[x], inv_factor);
+ wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+ }
+ } else {
+ int32_t* quant_row_y =
+ stream_images_[stream_id].channel[0].plane.Row(y);
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] = QuantizeWP(
+ quant_row, stride, c, x, y, r.xsize(), &wp_state,
+ row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor);
+ wp_state.UpdateErrors(quant_row[x], x, y, r.xsize());
+ }
+ }
+ }
+ }
+ } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) {
+ for (size_t c : {1, 0, 2}) {
+ float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+ float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul;
+ float cfl_factor = enc_state->shared.cmap.DCFactors()[c];
+ for (size_t y = 0; y < r.ysize(); y++) {
+ int32_t* quant_row =
+ stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y);
+ const float* row = r.ConstPlaneRow(dc, c, y);
+ if (c == 1) {
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] = roundf(row[x] * inv_factor);
+ }
+ } else {
+ int32_t* quant_row_y =
+ stream_images_[stream_id].channel[0].plane.Row(y);
+ for (size_t x = 0; x < r.xsize(); x++) {
+ quant_row[x] =
+ roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) *
+ inv_factor);
+ }
+ }
+ }
+ }
+ } else {
+ for (size_t c : {1, 0, 2}) {
+ Rect rect(
+ r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+ r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c),
+ r.xsize() >>
+ enc_state->shared.frame_header.chroma_subsampling.HShift(c),
+ r.ysize() >>
+ enc_state->shared.frame_header.chroma_subsampling.VShift(c));
+ float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul;
+ size_t ys = rect.ysize();
+ size_t xs = rect.xsize();
+ Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c];
+ ch.w = xs;
+ ch.h = ys;
+ ch.shrink();
+ for (size_t y = 0; y < ys; y++) {
+ int32_t* quant_row = ch.plane.Row(y);
+ const float* row = rect.ConstPlaneRow(dc, c, y);
+ for (size_t x = 0; x < xs; x++) {
+ quant_row[x] = roundf(row[x] * inv_factor);
+ }
+ }
+ }
+ }
+
+ DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc,
+ stream_images_[stream_id], enc_state->shared.quantizer.MulDC(),
+ 1.0 / mul, enc_state->shared.cmap.DCFactors(),
+ enc_state->shared.frame_header.chroma_subsampling,
+ enc_state->shared.block_ctx_map);
+}
+
+void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode,
+ PassesEncoderState* enc_state) {
+ const Rect r = enc_state->shared.DCGroupRect(group_index);
+ size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_);
+ stream_options_[stream_id].max_chan_size = 0xFFFFFF;
+ stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP;
+ if (jpeg_transcode) {
+ stream_options_[stream_id].tree_kind =
+ ModularOptions::TreeKind::kJpegTranscodeACMeta;
+ } else if (cparams_.speed_tier >= SpeedTier::kFalcon) {
+ stream_options_[stream_id].tree_kind =
+ ModularOptions::TreeKind::kFalconACMeta;
+ } else if (cparams_.speed_tier > SpeedTier::kKitten) {
+ stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta;
+ }
+ // If we are using a non-constant CfL field, and are in a slow enough mode,
+ // re-enable tree computation for it.
+ if (cparams_.speed_tier < SpeedTier::kSquirrel &&
+ cparams_.force_cfl_jpeg_recompression) {
+ stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn;
+ }
+ // YToX, YToB, ACS + QF, EPF
+ Image& image = stream_images_[stream_id];
+ image = Image(r.xsize(), r.ysize(), 8, 4);
+ static_assert(kColorTileDimInBlocks == 8, "Color tile size changed");
+ Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3);
+ image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+ image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3);
+ image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0);
+ ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map,
+ Rect(image.channel[0].plane), &image.channel[0].plane);
+ ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map,
+ Rect(image.channel[1].plane), &image.channel[1].plane);
+ size_t num = 0;
+ for (size_t y = 0; y < r.ysize(); y++) {
+ AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y);
+ const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y);
+ const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y);
+ int32_t* out_acs = image.channel[2].plane.Row(0);
+ int32_t* out_qf = image.channel[2].plane.Row(1);
+ int32_t* row_out_epf = image.channel[3].plane.Row(y);
+ for (size_t x = 0; x < r.xsize(); x++) {
+ row_out_epf[x] = row_epf[x];
+ if (!row_acs[x].IsFirstBlock()) continue;
+ out_acs[num] = row_acs[x].RawStrategy();
+ out_qf[num] = row_qf[x] - 1;
+ num++;
+ }
+ }
+ image.channel[2].w = num;
+ ac_metadata_size[group_index] = num;
+}
+
+void ModularFrameEncoder::EncodeQuantTable(
+ size_t size_x, size_t size_y, BitWriter* writer,
+ const QuantEncoding& encoding, size_t idx,
+ ModularFrameEncoder* modular_frame_encoder) {
+ JXL_ASSERT(encoding.qraw.qtable != nullptr);
+ JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+ JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer));
+ if (modular_frame_encoder) {
+ JXL_CHECK(modular_frame_encoder->EncodeStream(
+ writer, nullptr, 0, ModularStreamId::QuantTable(idx)));
+ return;
+ }
+ Image image(size_x, size_y, 8, 3);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < size_y; y++) {
+ int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+ for (size_t x = 0; x < size_x; x++) {
+ row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+ }
+ }
+ }
+ ModularOptions cfopts;
+ JXL_CHECK(ModularGenericCompress(image, cfopts, writer));
+}
+
+void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y,
+ const QuantEncoding& encoding,
+ size_t idx) {
+ size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_);
+ JXL_ASSERT(encoding.qraw.qtable != nullptr);
+ JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size());
+ Image& image = stream_images_[stream_id];
+ image = Image(size_x, size_y, 8, 3);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < size_y; y++) {
+ int32_t* JXL_RESTRICT row = image.channel[c].Row(y);
+ for (size_t x = 0; x < size_x; x++) {
+ row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x];
+ }
+ }
+ }
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_modular.h b/third_party/jpeg-xl/lib/jxl/enc_modular.h
new file mode 100644
index 0000000000..2af66e951f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_modular.h
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_MODULAR_H_
+#define LIB_JXL_ENC_MODULAR_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+class ModularFrameEncoder {
+ public:
+ ModularFrameEncoder(const FrameHeader& frame_header,
+ const CompressParams& cparams_orig);
+ Status ComputeEncodingData(const FrameHeader& frame_header,
+ const ImageMetadata& metadata,
+ Image3F* JXL_RESTRICT color,
+ const std::vector<ImageF>& extra_channels,
+ PassesEncoderState* JXL_RESTRICT enc_state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, bool do_color);
+ // Encodes global info (tree + histograms) in the `writer`.
+ Status EncodeGlobalInfo(BitWriter* writer, AuxOut* aux_out);
+ // Encodes a specific modular image (identified by `stream`) in the `writer`,
+ // assigning bits to the provided `layer`.
+ Status EncodeStream(BitWriter* writer, AuxOut* aux_out, size_t layer,
+ const ModularStreamId& stream);
+ // Creates a modular image for a given DC group of VarDCT mode. `dc` is the
+ // input DC image, not quantized; the group is specified by `group_index`, and
+ // `nl_dc` decides whether to apply a near-lossless processing to the DC or
+ // not.
+ void AddVarDCTDC(const Image3F& dc, size_t group_index, bool nl_dc,
+ PassesEncoderState* enc_state, bool jpeg_transcode);
+ // Creates a modular image for the AC metadata of the given group
+ // (`group_index`).
+ void AddACMetadata(size_t group_index, bool jpeg_transcode,
+ PassesEncoderState* enc_state);
+ // Encodes a RAW quantization table in `writer`. If `modular_frame_encoder` is
+ // null, the quantization table in `encoding` is used, with dimensions `size_x
+ // x size_y`. Otherwise, the table with ID `idx` is encoded from the given
+ // `modular_frame_encoder`.
+ static void EncodeQuantTable(size_t size_x, size_t size_y, BitWriter* writer,
+ const QuantEncoding& encoding, size_t idx,
+ ModularFrameEncoder* modular_frame_encoder);
+ // Stores a quantization table for future usage with `EncodeQuantTable`.
+ void AddQuantTable(size_t size_x, size_t size_y,
+ const QuantEncoding& encoding, size_t idx);
+
+ std::vector<size_t> ac_metadata_size;
+ std::vector<uint8_t> extra_dc_precision;
+
+ private:
+ Status PrepareEncoding(const FrameHeader& frame_header, ThreadPool* pool,
+ EncoderHeuristics* heuristics,
+ AuxOut* aux_out = nullptr);
+ Status PrepareStreamParams(const Rect& rect, const CompressParams& cparams,
+ int minShift, int maxShift,
+ const ModularStreamId& stream, bool do_color);
+ std::vector<Image> stream_images_;
+ std::vector<ModularOptions> stream_options_;
+
+ Tree tree_;
+ std::vector<std::vector<Token>> tree_tokens_;
+ std::vector<GroupHeader> stream_headers_;
+ std::vector<std::vector<Token>> tokens_;
+ EntropyEncodingData code_;
+ std::vector<uint8_t> context_map_;
+ FrameDimensions frame_dim_;
+ CompressParams cparams_;
+ std::vector<size_t> tree_splits_;
+ std::vector<ModularMultiplierInfo> multiplier_info_;
+ std::vector<std::vector<uint32_t>> gi_channel_;
+ std::vector<size_t> image_widths_;
+ Predictor delta_pred_ = Predictor::Average4;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_MODULAR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_noise.cc b/third_party/jpeg-xl/lib/jxl/enc_noise.cc
new file mode 100644
index 0000000000..54bb4482e8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_noise.cc
@@ -0,0 +1,374 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_noise.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <numeric>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_optimize.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+namespace {
+
+using OptimizeArray = optimize::Array<double, NoiseParams::kNumNoisePoints>;
+
+float GetScoreSumsOfAbsoluteDifferences(const Image3F& opsin, const int x,
+ const int y, const int block_size) {
+ const int small_bl_size_x = 3;
+ const int small_bl_size_y = 4;
+ const int kNumSAD =
+ (block_size - small_bl_size_x) * (block_size - small_bl_size_y);
+ // block_size x block_size reference pixels
+ int counter = 0;
+ const int offset = 2;
+
+ std::vector<float> sad(kNumSAD, 0);
+ for (int y_bl = 0; y_bl + small_bl_size_y < block_size; ++y_bl) {
+ for (int x_bl = 0; x_bl + small_bl_size_x < block_size; ++x_bl) {
+ float sad_sum = 0;
+ // size of the center patch, we compare all the patches inside window with
+ // the center one
+ for (int cy = 0; cy < small_bl_size_y; ++cy) {
+ for (int cx = 0; cx < small_bl_size_x; ++cx) {
+ float wnd = 0.5f * (opsin.PlaneRow(1, y + y_bl + cy)[x + x_bl + cx] +
+ opsin.PlaneRow(0, y + y_bl + cy)[x + x_bl + cx]);
+ float center =
+ 0.5f * (opsin.PlaneRow(1, y + offset + cy)[x + offset + cx] +
+ opsin.PlaneRow(0, y + offset + cy)[x + offset + cx]);
+ sad_sum += std::abs(center - wnd);
+ }
+ }
+ sad[counter++] = sad_sum;
+ }
+ }
+ const int kSamples = (kNumSAD) / 2;
+ // As with ROAD (rank order absolute distance), we keep the smallest half of
+ // the values in SAD (we use here the more robust patch SAD instead of
+ // absolute single-pixel differences).
+ std::sort(sad.begin(), sad.end());
+ const float total_sad_sum =
+ std::accumulate(sad.begin(), sad.begin() + kSamples, 0.0f);
+ return total_sad_sum / kSamples;
+}
+
+class NoiseHistogram {
+ public:
+ static constexpr int kBins = 256;
+
+ NoiseHistogram() { std::fill(bins, bins + kBins, 0); }
+
+ void Increment(const float x) { bins[Index(x)] += 1; }
+ int Get(const float x) const { return bins[Index(x)]; }
+ int Bin(const size_t bin) const { return bins[bin]; }
+
+ int Mode() const {
+ size_t max_idx = 0;
+ for (size_t i = 0; i < kBins; i++) {
+ if (bins[i] > bins[max_idx]) max_idx = i;
+ }
+ return max_idx;
+ }
+
+ double Quantile(double q01) const {
+ const int64_t total = std::accumulate(bins, bins + kBins, int64_t{1});
+ const int64_t target = static_cast<int64_t>(q01 * total);
+ // Until sum >= target:
+ int64_t sum = 0;
+ size_t i = 0;
+ for (; i < kBins; ++i) {
+ sum += bins[i];
+ // Exact match: assume middle of bin i
+ if (sum == target) {
+ return i + 0.5;
+ }
+ if (sum > target) break;
+ }
+
+ // Next non-empty bin (in case histogram is sparsely filled)
+ size_t next = i + 1;
+ while (next < kBins && bins[next] == 0) {
+ ++next;
+ }
+
+ // Linear interpolation according to how far into next we went
+ const double excess = target - sum;
+ const double weight_next = bins[Index(next)] / excess;
+ return ClampX(next * weight_next + i * (1.0 - weight_next));
+ }
+
+ // Inter-quartile range
+ double IQR() const { return Quantile(0.75) - Quantile(0.25); }
+
+ private:
+ template <typename T>
+ T ClampX(const T x) const {
+ return std::min(std::max(T(0), x), T(kBins - 1));
+ }
+ size_t Index(const float x) const { return ClampX(static_cast<int>(x)); }
+
+ uint32_t bins[kBins];
+};
+
+std::vector<float> GetSADScoresForPatches(const Image3F& opsin,
+ const size_t block_s,
+ const size_t num_bin,
+ NoiseHistogram* sad_histogram) {
+ std::vector<float> sad_scores(
+ (opsin.ysize() / block_s) * (opsin.xsize() / block_s), 0.0f);
+
+ int block_index = 0;
+
+ for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+ for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+ float sad_sc = GetScoreSumsOfAbsoluteDifferences(opsin, x, y, block_s);
+ sad_scores[block_index++] = sad_sc;
+ sad_histogram->Increment(sad_sc * num_bin);
+ }
+ }
+ return sad_scores;
+}
+
+float GetSADThreshold(const NoiseHistogram& histogram, const int num_bin) {
+ // Here we assume that the most patches with similar SAD value is a "flat"
+ // patches. However, some images might contain regular texture part and
+ // generate second strong peak at the histogram
+ // TODO(user) handle bimodal and heavy-tailed case
+ const int mode = histogram.Mode();
+ return static_cast<float>(mode) / NoiseHistogram::kBins;
+}
+
+// loss = sum asym * (F(x) - nl)^2 + kReg * num_points * sum (w[i] - w[i+1])^2
+// where asym = 1 if F(x) < nl, kAsym if F(x) > nl.
+struct LossFunction {
+ explicit LossFunction(std::vector<NoiseLevel> nl0) : nl(std::move(nl0)) {}
+
+ double Compute(const OptimizeArray& w, OptimizeArray* df,
+ bool skip_regularization = false) const {
+ constexpr double kReg = 0.005;
+ constexpr double kAsym = 1.1;
+ double loss_function = 0;
+ for (size_t i = 0; i < w.size(); i++) {
+ (*df)[i] = 0;
+ }
+ for (auto ind : nl) {
+ std::pair<int, float> pos = IndexAndFrac(ind.intensity);
+ JXL_DASSERT(pos.first >= 0 && static_cast<size_t>(pos.first) <
+ NoiseParams::kNumNoisePoints - 1);
+ double low = w[pos.first];
+ double hi = w[pos.first + 1];
+ double val = low * (1.0f - pos.second) + hi * pos.second;
+ double dist = val - ind.noise_level;
+ if (dist > 0) {
+ loss_function += kAsym * dist * dist;
+ (*df)[pos.first] -= kAsym * (1.0f - pos.second) * dist;
+ (*df)[pos.first + 1] -= kAsym * pos.second * dist;
+ } else {
+ loss_function += dist * dist;
+ (*df)[pos.first] -= (1.0f - pos.second) * dist;
+ (*df)[pos.first + 1] -= pos.second * dist;
+ }
+ }
+ if (skip_regularization) return loss_function;
+ for (size_t i = 0; i + 1 < w.size(); i++) {
+ double diff = w[i] - w[i + 1];
+ loss_function += kReg * nl.size() * diff * diff;
+ (*df)[i] -= kReg * diff * nl.size();
+ (*df)[i + 1] += kReg * diff * nl.size();
+ }
+ return loss_function;
+ }
+
+ std::vector<NoiseLevel> nl;
+};
+
+void OptimizeNoiseParameters(const std::vector<NoiseLevel>& noise_level,
+ NoiseParams* noise_params) {
+ constexpr double kMaxError = 1e-3;
+ static const double kPrecision = 1e-8;
+ static const int kMaxIter = 40;
+
+ float avg = 0;
+ for (const NoiseLevel& nl : noise_level) {
+ avg += nl.noise_level;
+ }
+ avg /= noise_level.size();
+
+ LossFunction loss_function(noise_level);
+ OptimizeArray parameter_vector;
+ for (size_t i = 0; i < parameter_vector.size(); i++) {
+ parameter_vector[i] = avg;
+ }
+
+ parameter_vector = optimize::OptimizeWithScaledConjugateGradientMethod(
+ loss_function, parameter_vector, kPrecision, kMaxIter);
+
+ OptimizeArray df = parameter_vector;
+ float loss = loss_function.Compute(parameter_vector, &df,
+ /*skip_regularization=*/true) /
+ noise_level.size();
+
+ // Approximation went too badly: escape with no noise at all.
+ if (loss > kMaxError) {
+ noise_params->Clear();
+ return;
+ }
+
+ for (size_t i = 0; i < parameter_vector.size(); i++) {
+ noise_params->lut[i] = std::max(parameter_vector[i], 0.0);
+ }
+}
+
+std::vector<NoiseLevel> GetNoiseLevel(
+ const Image3F& opsin, const std::vector<float>& texture_strength,
+ const float threshold, const size_t block_s) {
+ std::vector<NoiseLevel> noise_level_per_intensity;
+
+ const int filt_size = 1;
+ static const float kLaplFilter[filt_size * 2 + 1][filt_size * 2 + 1] = {
+ {-0.25f, -1.0f, -0.25f},
+ {-1.0f, 5.0f, -1.0f},
+ {-0.25f, -1.0f, -0.25f},
+ };
+
+ // The noise model is built based on channel 0.5 * (X+Y) as we notice that it
+ // is similar to the model 0.5 * (Y-X)
+ size_t patch_index = 0;
+
+ for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) {
+ for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) {
+ if (texture_strength[patch_index] <= threshold) {
+ // Calculate mean value
+ float mean_int = 0;
+ for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+ for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+ mean_int += 0.5f * (opsin.PlaneRow(1, y + y_bl)[x + x_bl] +
+ opsin.PlaneRow(0, y + y_bl)[x + x_bl]);
+ }
+ }
+ mean_int /= block_s * block_s;
+
+ // Calculate Noise level
+ float noise_level = 0;
+ size_t count = 0;
+ for (size_t y_bl = 0; y_bl < block_s; ++y_bl) {
+ for (size_t x_bl = 0; x_bl < block_s; ++x_bl) {
+ float filtered_value = 0;
+ for (int y_f = -1 * filt_size; y_f <= filt_size; ++y_f) {
+ if ((static_cast<ssize_t>(y_bl) + y_f) >= 0 &&
+ (y_bl + y_f) < block_s) {
+ for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+ if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+ (x_bl + x_f) < block_s) {
+ filtered_value +=
+ 0.5f *
+ (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl + x_f] +
+ opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl + x_f]) *
+ kLaplFilter[y_f + filt_size][x_f + filt_size];
+ } else {
+ filtered_value +=
+ 0.5f *
+ (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl - x_f] +
+ opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl - x_f]) *
+ kLaplFilter[y_f + filt_size][x_f + filt_size];
+ }
+ }
+ } else {
+ for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) {
+ if ((static_cast<ssize_t>(x_bl) + x_f) >= 0 &&
+ (x_bl + x_f) < block_s) {
+ filtered_value +=
+ 0.5f *
+ (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl + x_f] +
+ opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl + x_f]) *
+ kLaplFilter[y_f + filt_size][x_f + filt_size];
+ } else {
+ filtered_value +=
+ 0.5f *
+ (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl - x_f] +
+ opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl - x_f]) *
+ kLaplFilter[y_f + filt_size][x_f + filt_size];
+ }
+ }
+ }
+ }
+ noise_level += std::abs(filtered_value);
+ ++count;
+ }
+ }
+ noise_level /= count;
+ NoiseLevel nl;
+ nl.intensity = mean_int;
+ nl.noise_level = noise_level;
+ noise_level_per_intensity.push_back(nl);
+ }
+ ++patch_index;
+ }
+ }
+ return noise_level_per_intensity;
+}
+
+void EncodeFloatParam(float val, float precision, BitWriter* writer) {
+ JXL_ASSERT(val >= 0);
+ const int absval_quant = static_cast<int>(val * precision + 0.5f);
+ JXL_ASSERT(absval_quant < (1 << 10));
+ writer->Write(10, absval_quant);
+}
+
+} // namespace
+
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+ float quality_coef) {
+ // The size of a patch in decoder might be different from encoder's patch
+ // size.
+ // For encoder: the patch size should be big enough to estimate
+ // noise level, but, at the same time, it should be not too big
+ // to be able to estimate intensity value of the patch
+ const size_t block_s = 8;
+ const size_t kNumBin = 256;
+ NoiseHistogram sad_histogram;
+ std::vector<float> sad_scores =
+ GetSADScoresForPatches(opsin, block_s, kNumBin, &sad_histogram);
+ float sad_threshold = GetSADThreshold(sad_histogram, kNumBin);
+ // If threshold is too large, the image has a strong pattern. This pattern
+ // fools our model and it will add too much noise. Therefore, we do not add
+ // noise for such images
+ if (sad_threshold > 0.15f || sad_threshold <= 0.0f) {
+ noise_params->Clear();
+ return false;
+ }
+ std::vector<NoiseLevel> nl =
+ GetNoiseLevel(opsin, sad_scores, sad_threshold, block_s);
+
+ OptimizeNoiseParameters(nl, noise_params);
+ for (float& i : noise_params->lut) {
+ i *= quality_coef * 1.4;
+ }
+ return noise_params->HasAny();
+}
+
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+ size_t layer, AuxOut* aux_out) {
+ JXL_ASSERT(noise_params.HasAny());
+
+ BitWriter::Allotment allotment(writer, NoiseParams::kNumNoisePoints * 16);
+ for (float i : noise_params.lut) {
+ EncodeFloatParam(i, kNoisePrecision, writer);
+ }
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_noise.h b/third_party/jpeg-xl/lib/jxl/enc_noise.h
new file mode 100644
index 0000000000..851fdd12db
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_noise.h
@@ -0,0 +1,34 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_NOISE_H_
+#define LIB_JXL_ENC_NOISE_H_
+
+// Noise parameter estimation.
+
+#include <stddef.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Get parameters of the noise for NoiseParams model
+// Returns whether a valid noise model (with HasAny()) is set.
+Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params,
+ float quality_coef);
+
+// Does not write anything if `noise_params` are empty. Otherwise, caller must
+// set FrameHeader.flags.kNoise.
+void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer,
+ size_t layer, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_NOISE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_optimize.cc b/third_party/jpeg-xl/lib/jxl/enc_optimize.cc
new file mode 100644
index 0000000000..6865ff67df
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_optimize.cc
@@ -0,0 +1,163 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_optimize.h"
+
+#include <algorithm>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+namespace optimize {
+
+namespace {
+
+// simplex vector must be sorted by first element of its elements
+std::vector<double> Midpoint(const std::vector<std::vector<double>>& simplex) {
+ JXL_CHECK(!simplex.empty());
+ JXL_CHECK(simplex.size() == simplex[0].size());
+ int dim = simplex.size() - 1;
+ std::vector<double> result(dim + 1, 0);
+ for (int i = 0; i < dim; i++) {
+ for (int k = 0; k < dim; k++) {
+ result[i + 1] += simplex[k][i + 1];
+ }
+ result[i + 1] /= dim;
+ }
+ return result;
+}
+
+// first element ignored
+std::vector<double> Subtract(const std::vector<double>& a,
+ const std::vector<double>& b) {
+ JXL_CHECK(a.size() == b.size());
+ std::vector<double> result(a.size());
+ result[0] = 0;
+ for (size_t i = 1; i < result.size(); i++) {
+ result[i] = a[i] - b[i];
+ }
+ return result;
+}
+
+// first element ignored
+std::vector<double> Add(const std::vector<double>& a,
+ const std::vector<double>& b) {
+ JXL_CHECK(a.size() == b.size());
+ std::vector<double> result(a.size());
+ result[0] = 0;
+ for (size_t i = 1; i < result.size(); i++) {
+ result[i] = a[i] + b[i];
+ }
+ return result;
+}
+
+// first element ignored
+std::vector<double> Average(const std::vector<double>& a,
+ const std::vector<double>& b) {
+ JXL_CHECK(a.size() == b.size());
+ std::vector<double> result(a.size());
+ result[0] = 0;
+ for (size_t i = 1; i < result.size(); i++) {
+ result[i] = 0.5 * (a[i] + b[i]);
+ }
+ return result;
+}
+
+// vec: [0] will contain the objective function, [1:] will
+// contain the vector position for the objective function.
+// fun: the function evaluates the value.
+void Eval(std::vector<double>* vec,
+ const std::function<double(const std::vector<double>&)>& fun) {
+ std::vector<double> args(vec->begin() + 1, vec->end());
+ (*vec)[0] = fun(args);
+}
+
+void Sort(std::vector<std::vector<double>>* simplex) {
+ std::sort(simplex->begin(), simplex->end());
+}
+
+// Main iteration step of Nelder-Mead like optimization.
+void Reflect(std::vector<std::vector<double>>* simplex,
+ const std::function<double(const std::vector<double>&)>& fun) {
+ Sort(simplex);
+ const std::vector<double>& last = simplex->back();
+ std::vector<double> mid = Midpoint(*simplex);
+ std::vector<double> diff = Subtract(mid, last);
+ std::vector<double> mirrored = Add(mid, diff);
+ Eval(&mirrored, fun);
+ if (mirrored[0] > (*simplex)[simplex->size() - 2][0]) {
+ // Still the worst, shrink towards the best.
+ std::vector<double> shrinking = Average(simplex->back(), (*simplex)[0]);
+ Eval(&shrinking, fun);
+ simplex->back() = shrinking;
+ } else if (mirrored[0] < (*simplex)[0][0]) {
+ // new best
+ std::vector<double> even_further = Add(mirrored, diff);
+ Eval(&even_further, fun);
+ if (even_further[0] < mirrored[0]) {
+ mirrored = even_further;
+ }
+ simplex->back() = mirrored;
+ } else {
+ // not a best, not a worst point
+ simplex->back() = mirrored;
+ }
+}
+
+// Initialize the simplex at origin.
+std::vector<std::vector<double>> InitialSimplex(
+ int dim, double amount, const std::vector<double>& init,
+ const std::function<double(const std::vector<double>&)>& fun) {
+ std::vector<double> best(1 + dim, 0);
+ std::copy(init.begin(), init.end(), best.begin() + 1);
+ Eval(&best, fun);
+ std::vector<std::vector<double>> result{best};
+ for (int i = 0; i < dim; i++) {
+ best = result[0];
+ best[i + 1] += amount;
+ Eval(&best, fun);
+ result.push_back(best);
+ Sort(&result);
+ }
+ return result;
+}
+
+// For comparing the same with the python tool
+/*void RunSimplexExternal(
+ int dim, double amount, int max_iterations,
+ const std::function<double((const vector<double>&))>& fun) {
+ vector<double> vars;
+ for (int i = 0; i < dim; i++) {
+ vars.push_back(atof(getenv(StrCat("VAR", i).c_str())));
+ }
+ double result = fun(vars);
+ std::cout << "Result=" << result;
+}*/
+
+} // namespace
+
+std::vector<double> RunSimplex(
+ int dim, double amount, int max_iterations, const std::vector<double>& init,
+ const std::function<double(const std::vector<double>&)>& fun) {
+ std::vector<std::vector<double>> simplex =
+ InitialSimplex(dim, amount, init, fun);
+ for (int i = 0; i < max_iterations; i++) {
+ Sort(&simplex);
+ Reflect(&simplex, fun);
+ }
+ return simplex[0];
+}
+
+std::vector<double> RunSimplex(
+ int dim, double amount, int max_iterations,
+ const std::function<double(const std::vector<double>&)>& fun) {
+ std::vector<double> init(dim, 0.0);
+ return RunSimplex(dim, amount, max_iterations, init, fun);
+}
+
+} // namespace optimize
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_optimize.h b/third_party/jpeg-xl/lib/jxl/enc_optimize.h
new file mode 100644
index 0000000000..0a60198214
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_optimize.h
@@ -0,0 +1,218 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility functions for optimizing multi-dimensional nonlinear functions.
+
+#ifndef LIB_JXL_OPTIMIZE_H_
+#define LIB_JXL_OPTIMIZE_H_
+
+#include <stdio.h>
+
+#include <cmath>
+#include <cstdio>
+#include <functional>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace optimize {
+
+// An array type of numeric values that supports math operations with operator-,
+// operator+, etc.
+template <typename T, size_t N>
+class Array {
+ public:
+ Array() = default;
+ explicit Array(T v) {
+ for (size_t i = 0; i < N; i++) v_[i] = v;
+ }
+
+ size_t size() const { return N; }
+
+ T& operator[](size_t index) {
+ JXL_DASSERT(index < N);
+ return v_[index];
+ }
+ T operator[](size_t index) const {
+ JXL_DASSERT(index < N);
+ return v_[index];
+ }
+
+ private:
+ // The values used by this Array.
+ T v_[N];
+};
+
+template <typename T, size_t N>
+Array<T, N> operator+(const Array<T, N>& x, const Array<T, N>& y) {
+ Array<T, N> z;
+ for (size_t i = 0; i < N; ++i) {
+ z[i] = x[i] + y[i];
+ }
+ return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator-(const Array<T, N>& x, const Array<T, N>& y) {
+ Array<T, N> z;
+ for (size_t i = 0; i < N; ++i) {
+ z[i] = x[i] - y[i];
+ }
+ return z;
+}
+
+template <typename T, size_t N>
+Array<T, N> operator*(T v, const Array<T, N>& x) {
+ Array<T, N> y;
+ for (size_t i = 0; i < N; ++i) {
+ y[i] = v * x[i];
+ }
+ return y;
+}
+
+template <typename T, size_t N>
+T operator*(const Array<T, N>& x, const Array<T, N>& y) {
+ T r = 0.0;
+ for (size_t i = 0; i < N; ++i) {
+ r += x[i] * y[i];
+ }
+ return r;
+}
+
+// Runs Nelder-Mead like optimization. Runs for max_iterations times,
+// fun gets called with a vector of size dim as argument, and returns the score
+// based on those parameters (lower is better). Returns a vector of dim+1
+// dimensions, where the first value is the optimal value of the function and
+// the rest is the argmin value. Use init to pass an initial guess or where
+// the optimal value is.
+//
+// Usage example:
+//
+// RunSimplex(2, 0.1, 100, [](const vector<float>& v) {
+// return (v[0] - 5) * (v[0] - 5) + (v[1] - 7) * (v[1] - 7);
+// });
+//
+// Returns (0.0, 5, 7)
+std::vector<double> RunSimplex(
+ int dim, double amount, int max_iterations,
+ const std::function<double(const std::vector<double>&)>& fun);
+std::vector<double> RunSimplex(
+ int dim, double amount, int max_iterations, const std::vector<double>& init,
+ const std::function<double(const std::vector<double>&)>& fun);
+
+// Implementation of the Scaled Conjugate Gradient method described in the
+// following paper:
+// Moller, M. "A Scaled Conjugate Gradient Algorithm for Fast Supervised
+// Learning", Neural Networks, Vol. 6. pp. 525-533, 1993
+// http://sci2s.ugr.es/keel/pdf/algorithm/articulo/moller1990.pdf
+//
+// The Function template parameter is a class that has the following method:
+//
+// // Returns the value of the function at point w and sets *df to be the
+// // negative gradient vector of the function at point w.
+// double Compute(const optimize::Array<T, N>& w,
+// optimize::Array<T, N>* df) const;
+//
+// Returns a vector w, such that |df(w)| < grad_norm_threshold.
+template <typename T, size_t N, typename Function>
+Array<T, N> OptimizeWithScaledConjugateGradientMethod(
+ const Function& f, const Array<T, N>& w0, const T grad_norm_threshold,
+ size_t max_iters) {
+ const size_t n = w0.size();
+ const T rsq_threshold = grad_norm_threshold * grad_norm_threshold;
+ const T sigma0 = static_cast<T>(0.0001);
+ const T l_min = static_cast<T>(1.0e-15);
+ const T l_max = static_cast<T>(1.0e15);
+
+ Array<T, N> w = w0;
+ Array<T, N> wp;
+ Array<T, N> r;
+ Array<T, N> rt;
+ Array<T, N> e;
+ Array<T, N> p;
+ T psq;
+ T fp;
+ T D;
+ T d;
+ T m;
+ T a;
+ T b;
+ T s;
+ T t;
+
+ T fw = f.Compute(w, &r);
+ T rsq = r * r;
+ e = r;
+ p = r;
+ T l = static_cast<T>(1.0);
+ bool success = true;
+ size_t n_success = 0;
+ size_t k = 0;
+
+ while (k++ < max_iters) {
+ if (success) {
+ m = -(p * r);
+ if (m >= 0) {
+ p = r;
+ m = -(p * r);
+ }
+ psq = p * p;
+ s = sigma0 / std::sqrt(psq);
+ f.Compute(w + (s * p), &rt);
+ t = (p * (r - rt)) / s;
+ }
+
+ d = t + l * psq;
+ if (d <= 0) {
+ d = l * psq;
+ l = l - t / psq;
+ }
+
+ a = -m / d;
+ wp = w + a * p;
+ fp = f.Compute(wp, &rt);
+
+ D = 2.0 * (fp - fw) / (a * m);
+ if (D >= 0.0) {
+ success = true;
+ n_success++;
+ w = wp;
+ } else {
+ success = false;
+ }
+
+ if (success) {
+ e = r;
+ r = rt;
+ rsq = r * r;
+ fw = fp;
+ if (rsq <= rsq_threshold) {
+ break;
+ }
+ }
+
+ if (D < 0.25) {
+ l = std::min(4.0 * l, l_max);
+ } else if (D > 0.75) {
+ l = std::max(0.25 * l, l_min);
+ }
+
+ if ((n_success % n) == 0) {
+ p = r;
+ l = 1.0;
+ } else if (success) {
+ b = ((e - r) * r) / m;
+ p = b * p + r;
+ }
+ }
+
+ return w;
+}
+
+} // namespace optimize
+} // namespace jxl
+
+#endif // LIB_JXL_OPTIMIZE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_optimize_test.cc b/third_party/jpeg-xl/lib/jxl/enc_optimize_test.cc
new file mode 100644
index 0000000000..1c6699f99e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_optimize_test.cc
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_optimize.h"
+
+#include <stdio.h>
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace optimize {
+namespace {
+
+// The maximum number of iterations for the test.
+static const size_t kMaxTestIter = 100000;
+
+// F(w) = (w - w_min)^2.
+struct SimpleQuadraticFunction {
+ typedef Array<double, 2> ArrayType;
+ explicit SimpleQuadraticFunction(const ArrayType& w0) : w_min(w0) {}
+
+ double Compute(const ArrayType& w, ArrayType* df) const {
+ ArrayType dw = w - w_min;
+ *df = -2.0 * dw;
+ return dw * dw;
+ }
+
+ ArrayType w_min;
+};
+
+// F(alpha, beta, gamma| x,y) = \sum_i(y_i - (alpha x_i ^ gamma + beta))^2.
+struct PowerFunction {
+ explicit PowerFunction(const std::vector<double>& x0,
+ const std::vector<double>& y0)
+ : x(x0), y(y0) {}
+
+ typedef Array<double, 3> ArrayType;
+ double Compute(const ArrayType& w, ArrayType* df) const {
+ double loss_function = 0;
+ (*df)[0] = 0;
+ (*df)[1] = 0;
+ (*df)[2] = 0;
+ for (size_t ind = 0; ind < y.size(); ++ind) {
+ if (x[ind] != 0) {
+ double l_f = y[ind] - (w[0] * pow(x[ind], w[1]) + w[2]);
+ (*df)[0] += 2.0 * l_f * pow(x[ind], w[1]);
+ (*df)[1] += 2.0 * l_f * w[0] * pow(x[ind], w[1]) * log(x[ind]);
+ (*df)[2] += 2.0 * l_f * 1;
+ loss_function += l_f * l_f;
+ }
+ }
+ return loss_function;
+ }
+
+ std::vector<double> x;
+ std::vector<double> y;
+};
+
+TEST(OptimizeTest, SimpleQuadraticFunction) {
+ SimpleQuadraticFunction::ArrayType w_min;
+ w_min[0] = 1.0;
+ w_min[1] = 2.0;
+ SimpleQuadraticFunction f(w_min);
+ SimpleQuadraticFunction::ArrayType w(0.);
+ static const double kPrecision = 1e-8;
+ w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+ kMaxTestIter);
+ EXPECT_NEAR(w[0], 1.0, kPrecision);
+ EXPECT_NEAR(w[1], 2.0, kPrecision);
+}
+
+TEST(OptimizeTest, PowerFunction) {
+ std::vector<double> x(10);
+ std::vector<double> y(10);
+ for (int ind = 0; ind < 10; ++ind) {
+ x[ind] = 1. * ind;
+ y[ind] = 2. * pow(x[ind], 3) + 5.;
+ }
+ PowerFunction f(x, y);
+ PowerFunction::ArrayType w(0.);
+
+ static const double kPrecision = 0.01;
+ w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision,
+ kMaxTestIter);
+ EXPECT_NEAR(w[0], 2.0, kPrecision);
+ EXPECT_NEAR(w[1], 3.0, kPrecision);
+ EXPECT_NEAR(w[2], 5.0, kPrecision);
+}
+
+TEST(OptimizeTest, SimplexOptTest) {
+ auto f = [](const std::vector<double>& x) -> double {
+ double t1 = x[0] - 1.0;
+ double t2 = x[1] + 1.5;
+ return 2.0 + t1 * t1 + t2 * t2;
+ };
+ auto opt = RunSimplex(2, 0.01, 100, f);
+ EXPECT_EQ(opt.size(), 3u);
+
+ static const double kPrecision = 0.01;
+ EXPECT_NEAR(opt[0], 2.0, kPrecision);
+ EXPECT_NEAR(opt[1], 1.0, kPrecision);
+ EXPECT_NEAR(opt[2], -1.5, kPrecision);
+}
+
+} // namespace
+} // namespace optimize
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_params.h b/third_party/jpeg-xl/lib/jxl/enc_params.h
new file mode 100644
index 0000000000..737a951362
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_params.h
@@ -0,0 +1,225 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PARAMS_H_
+#define LIB_JXL_ENC_PARAMS_H_
+
+// Parameters and flags that govern JXL compression.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/butteraugli/butteraugli.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+enum class SpeedTier {
+ // Try multiple combinations of Tortoise flags for modular mode. Otherwise
+ // like kTortoise.
+ kGlacier = 0,
+ // Turns on FindBestQuantizationHQ loop. Equivalent to "guetzli" mode.
+ kTortoise = 1,
+ // Turns on FindBestQuantization butteraugli loop.
+ kKitten = 2,
+ // Turns on dots, patches, and spline detection by default, as well as full
+ // context clustering. Default.
+ kSquirrel = 3,
+ // Turns on error diffusion and full AC strategy heuristics. Equivalent to
+ // "fast" mode.
+ kWombat = 4,
+ // Turns on gaborish by default, non-default cmap, initial quant field.
+ kHare = 5,
+ // Turns on simple heuristics for AC strategy, quant field, and clustering;
+ // also enables coefficient reordering.
+ kCheetah = 6,
+ // Turns off most encoder features. Does context clustering.
+ // Modular: uses fixed tree with Weighted predictor.
+ kFalcon = 7,
+ // Currently fastest possible setting for VarDCT.
+ // Modular: uses fixed tree with Gradient predictor.
+ kThunder = 8,
+ // VarDCT: same as kThunder.
+ // Modular: no tree, Gradient predictor, fast histograms
+ kLightning = 9
+};
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct CompressParams {
+ float butteraugli_distance = 1.0f;
+
+ // explicit distances for extra channels (defaults to butteraugli_distance
+ // when not set; value of -1 can be used to represent 'default')
+ std::vector<float> ec_distance;
+ size_t target_size = 0;
+ float target_bitrate = 0.0f;
+
+ // 0.0 means search for the adaptive quantization map that matches the
+ // butteraugli distance, positive values mean quantize everywhere with that
+ // value.
+ float uniform_quant = 0.0f;
+ float quant_border_bias = 0.0f;
+
+ // Try to achieve a maximum pixel-by-pixel error on each channel.
+ bool max_error_mode = false;
+ float max_error[3] = {0.0, 0.0, 0.0};
+
+ SpeedTier speed_tier = SpeedTier::kSquirrel;
+ int brotli_effort = -1;
+
+ // 0 = default.
+ // 1 = slightly worse quality.
+ // 4 = fastest speed, lowest quality
+ size_t decoding_speed_tier = 0;
+
+ int max_butteraugli_iters = 4;
+
+ int max_butteraugli_iters_guetzli_mode = 100;
+
+ ColorTransform color_transform = ColorTransform::kXYB;
+ YCbCrChromaSubsampling chroma_subsampling;
+
+ // If true, the "modular mode options" members below are used.
+ bool modular_mode = false;
+
+ // Change group size in modular mode (0=128, 1=256, 2=512, 3=1024).
+ size_t modular_group_size_shift = 1;
+
+ Override preview = Override::kDefault;
+ Override noise = Override::kDefault;
+ Override dots = Override::kDefault;
+ Override patches = Override::kDefault;
+ Override gaborish = Override::kDefault;
+ int epf = -1;
+
+ // Progressive mode.
+ bool progressive_mode = false;
+
+ // Quantized-progressive mode.
+ bool qprogressive_mode = false;
+
+ // Put center groups first in the bitstream.
+ bool centerfirst = false;
+
+ // Pixel coordinates of the center. First group will contain that center.
+ size_t center_x = static_cast<size_t>(-1);
+ size_t center_y = static_cast<size_t>(-1);
+
+ int progressive_dc = -1;
+
+ // If on: preserve color of invisible pixels (if off: don't care)
+ // Default: on for lossless, off for lossy
+ Override keep_invisible = Override::kDefault;
+
+ // Currently unused as of 2020-01.
+ bool clear_metadata = false;
+
+ // Prints extra information during/after encoding.
+ bool verbose = false;
+ bool log_search_state = false;
+
+ ButteraugliParams ba_params;
+
+ // Force usage of CfL when doing JPEG recompression. This can have unexpected
+ // effects on the decoded pixels, while still being JPEG-compliant and
+ // allowing reconstruction of the original JPEG.
+ bool force_cfl_jpeg_recompression = true;
+
+ // Use brotli compression for any boxes derived from a JPEG frame.
+ bool jpeg_compress_boxes = true;
+
+ // Set the noise to what it would approximately be if shooting at the nominal
+ // exposure for a given ISO setting on a 35mm camera.
+ float photon_noise_iso = 0;
+
+ // modular mode options below
+ ModularOptions options;
+ int responsive = -1;
+ // empty for default squeeze
+ std::vector<SqueezeParams> squeezes;
+ int colorspace = -1;
+ // Use Global channel palette if #colors < this percentage of range
+ float channel_colors_pre_transform_percent = 95.f;
+ // Use Local channel palette if #colors < this percentage of range
+ float channel_colors_percent = 80.f;
+ int palette_colors = 1 << 10; // up to 10-bit palette is probably worthwhile
+ bool lossy_palette = false;
+
+ // Returns whether these params are lossless as defined by SetLossless();
+ bool IsLossless() const { return modular_mode && ModularPartIsLossless(); }
+
+ bool ModularPartIsLossless() const {
+ if (modular_mode) {
+ // YCbCr is also considered lossless here since it's intended for
+ // source material that is already YCbCr (we don't do the fwd transform)
+ if (butteraugli_distance != 0 ||
+ color_transform == jxl::ColorTransform::kXYB)
+ return false;
+ }
+ for (float f : ec_distance) {
+ if (f > 0) return false;
+ if (f < 0 && butteraugli_distance != 0) return false;
+ }
+ // if no explicit ec_distance given, and using vardct, then the modular part
+ // is empty or not lossless
+ if (!modular_mode && ec_distance.empty()) return false;
+ // all modular channels are encoded at distance 0
+ return true;
+ }
+
+ // Sets the parameters required to make the codec lossless.
+ void SetLossless() {
+ modular_mode = true;
+ butteraugli_distance = 0.0f;
+ for (float &f : ec_distance) f = 0.0f;
+ color_transform = jxl::ColorTransform::kNone;
+ }
+
+ // Down/upsample the image before encoding / after decoding by this factor.
+ // The resampling value can also be set to <= 0 to automatically choose based
+ // on distance, however EncodeFrame doesn't support this, so it is
+ // required to call PostInit() to set a valid positive resampling
+ // value and altered butteraugli score if this is used.
+ int resampling = -1;
+ int ec_resampling = -1;
+ // Skip the downsampling before encoding if this is true.
+ bool already_downsampled = false;
+ // Butteraugli target distance on the original full size image, this can be
+ // different from butteraugli_distance if resampling was used.
+ float original_butteraugli_distance = -1.0f;
+
+ float quant_ac_rescale = 1.0;
+
+ // Codestream level to conform to.
+ // -1: don't care
+ int level = -1;
+
+ std::vector<float> manual_noise;
+ std::vector<float> manual_xyb_factors;
+};
+
+static constexpr float kMinButteraugliForDynamicAR = 0.5f;
+static constexpr float kMinButteraugliForDots = 3.0f;
+static constexpr float kMinButteraugliToSubtractOriginalPatches = 3.0f;
+
+// Always off
+static constexpr float kMinButteraugliForNoise = 99.0f;
+
+// Minimum butteraugli distance the encoder accepts.
+static constexpr float kMinButteraugliDistance = 0.001f;
+
+// Tile size for encoder-side processing. Must be equal to color tile dim in the
+// current implementation.
+static constexpr size_t kEncTileDim = 64;
+static constexpr size_t kEncTileDimInBlocks = kEncTileDim / kBlockDim;
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_PARAMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc
new file mode 100644
index 0000000000..157e18c3a8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.cc
@@ -0,0 +1,813 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_patch_dictionary.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include <algorithm>
+#include <atomic>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_dot_dictionary.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/patch_dictionary_internal.h"
+
+namespace jxl {
+
+static constexpr size_t kPatchFrameReferenceId = 3;
+
+// static
+void PatchDictionaryEncoder::Encode(const PatchDictionary& pdic,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ JXL_ASSERT(pdic.HasAny());
+ std::vector<std::vector<Token>> tokens(1);
+ size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+
+ auto add_num = [&](int context, size_t num) {
+ tokens[0].emplace_back(context, num);
+ };
+ size_t num_ref_patch = 0;
+ for (size_t i = 0; i < pdic.positions_.size();) {
+ size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+ while (i < pdic.positions_.size() &&
+ pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+ i++;
+ }
+ num_ref_patch++;
+ }
+ add_num(kNumRefPatchContext, num_ref_patch);
+ size_t blend_pos = 0;
+ for (size_t i = 0; i < pdic.positions_.size();) {
+ size_t i_start = i;
+ size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx;
+ const auto& ref_pos = pdic.ref_positions_[ref_pos_idx];
+ while (i < pdic.positions_.size() &&
+ pdic.positions_[i].ref_pos_idx == ref_pos_idx) {
+ i++;
+ }
+ size_t num = i - i_start;
+ JXL_ASSERT(num > 0);
+ add_num(kReferenceFrameContext, ref_pos.ref);
+ add_num(kPatchReferencePositionContext, ref_pos.x0);
+ add_num(kPatchReferencePositionContext, ref_pos.y0);
+ add_num(kPatchSizeContext, ref_pos.xsize - 1);
+ add_num(kPatchSizeContext, ref_pos.ysize - 1);
+ add_num(kPatchCountContext, num - 1);
+ for (size_t j = i_start; j < i; j++) {
+ const PatchPosition& pos = pdic.positions_[j];
+ if (j == i_start) {
+ add_num(kPatchPositionContext, pos.x);
+ add_num(kPatchPositionContext, pos.y);
+ } else {
+ add_num(kPatchOffsetContext,
+ PackSigned(pos.x - pdic.positions_[j - 1].x));
+ add_num(kPatchOffsetContext,
+ PackSigned(pos.y - pdic.positions_[j - 1].y));
+ }
+ for (size_t j = 0; j < num_ec + 1; ++j, ++blend_pos) {
+ const PatchBlending& info = pdic.blendings_[blend_pos];
+ add_num(kPatchBlendModeContext, static_cast<uint32_t>(info.mode));
+ if (UsesAlpha(info.mode) &&
+ pdic.shared_->metadata->m.extra_channel_info.size() > 1) {
+ add_num(kPatchAlphaChannelContext, info.alpha_channel);
+ }
+ if (UsesClamp(info.mode)) {
+ add_num(kPatchClampContext, info.clamp);
+ }
+ }
+ }
+ }
+
+ EntropyEncodingData codes;
+ std::vector<uint8_t> context_map;
+ BuildAndEncodeHistograms(HistogramParams(), kNumPatchDictionaryContexts,
+ tokens, &codes, &context_map, writer, layer,
+ aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+// static
+void PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic,
+ Image3F* opsin) {
+ size_t num_ec = pdic.shared_->metadata->m.num_extra_channels;
+ // TODO(veluca): this can likely be optimized knowing it runs on full images.
+ for (size_t y = 0; y < opsin->ysize(); y++) {
+ float* JXL_RESTRICT rows[3] = {
+ opsin->PlaneRow(0, y),
+ opsin->PlaneRow(1, y),
+ opsin->PlaneRow(2, y),
+ };
+ for (size_t pos_idx : pdic.GetPatchesForRow(y)) {
+ const size_t blending_idx = pos_idx * (num_ec + 1);
+ const PatchPosition& pos = pdic.positions_[pos_idx];
+ const PatchReferencePosition& ref_pos =
+ pdic.ref_positions_[pos.ref_pos_idx];
+ const PatchBlendMode mode = pdic.blendings_[blending_idx].mode;
+ size_t by = pos.y;
+ size_t bx = pos.x;
+ size_t xsize = ref_pos.xsize;
+ JXL_DASSERT(y >= by);
+ JXL_DASSERT(y < by + ref_pos.ysize);
+ size_t iy = y - by;
+ size_t ref = ref_pos.ref;
+ const float* JXL_RESTRICT ref_rows[3] = {
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 0, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 1, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow(
+ 2, ref_pos.y0 + iy) +
+ ref_pos.x0,
+ };
+ for (size_t ix = 0; ix < xsize; ix++) {
+ for (size_t c = 0; c < 3; c++) {
+ if (mode == PatchBlendMode::kAdd) {
+ rows[c][bx + ix] -= ref_rows[c][ix];
+ } else if (mode == PatchBlendMode::kReplace) {
+ rows[c][bx + ix] = 0;
+ } else if (mode == PatchBlendMode::kNone) {
+ // Nothing to do.
+ } else {
+ JXL_ABORT("Blending mode %u not yet implemented", (uint32_t)mode);
+ }
+ }
+ }
+ }
+ }
+}
+
+namespace {
+
+struct PatchColorspaceInfo {
+ float kChannelDequant[3];
+ float kChannelWeights[3];
+
+ explicit PatchColorspaceInfo(bool is_xyb) {
+ if (is_xyb) {
+ kChannelDequant[0] = 0.01615;
+ kChannelDequant[1] = 0.08875;
+ kChannelDequant[2] = 0.1922;
+ kChannelWeights[0] = 30.0;
+ kChannelWeights[1] = 3.0;
+ kChannelWeights[2] = 1.0;
+ } else {
+ kChannelDequant[0] = 20.0f / 255;
+ kChannelDequant[1] = 22.0f / 255;
+ kChannelDequant[2] = 20.0f / 255;
+ kChannelWeights[0] = 0.017 * 255;
+ kChannelWeights[1] = 0.02 * 255;
+ kChannelWeights[2] = 0.017 * 255;
+ }
+ }
+
+ float ScaleForQuantization(float val, size_t c) {
+ return val / kChannelDequant[c];
+ }
+
+ int Quantize(float val, size_t c) {
+ return truncf(ScaleForQuantization(val, c));
+ }
+
+ bool is_similar_v(const float v1[3], const float v2[3], float threshold) {
+ float distance = 0;
+ for (size_t c = 0; c < 3; c++) {
+ distance += std::fabs(v1[c] - v2[c]) * kChannelWeights[c];
+ }
+ return distance <= threshold;
+ }
+};
+
+std::vector<PatchInfo> FindTextLikePatches(
+ const Image3F& opsin, const PassesEncoderState* JXL_RESTRICT state,
+ ThreadPool* pool, AuxOut* aux_out, bool is_xyb) {
+ if (state->cparams.patches == Override::kOff) return {};
+
+ PatchColorspaceInfo pci(is_xyb);
+ float kSimilarThreshold = 0.8f;
+
+ auto is_similar_impl = [&pci](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2,
+ const float* JXL_RESTRICT rows[3],
+ size_t stride, float threshold) {
+ float v1[3], v2[3];
+ for (size_t c = 0; c < 3; c++) {
+ v1[c] = rows[c][p1.second * stride + p1.first];
+ v2[c] = rows[c][p2.second * stride + p2.first];
+ }
+ return pci.is_similar_v(v1, v2, threshold);
+ };
+
+ std::atomic<bool> has_screenshot_areas{false};
+ const size_t opsin_stride = opsin.PixelsPerRow();
+ const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0),
+ opsin.ConstPlaneRow(1, 0),
+ opsin.ConstPlaneRow(2, 0)};
+
+ auto is_same = [&opsin_rows, opsin_stride](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2) {
+ for (size_t c = 0; c < 3; c++) {
+ float v1 = opsin_rows[c][p1.second * opsin_stride + p1.first];
+ float v2 = opsin_rows[c][p2.second * opsin_stride + p2.first];
+ if (std::fabs(v1 - v2) > 1e-4) {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ auto is_similar = [&](std::pair<uint32_t, uint32_t> p1,
+ std::pair<uint32_t, uint32_t> p2) {
+ return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold);
+ };
+
+ constexpr int64_t kPatchSide = 4;
+ constexpr int64_t kExtraSide = 4;
+
+ // Look for kPatchSide size squares, naturally aligned, that all have the same
+ // pixel values.
+ ImageB is_screenshot_like(DivCeil(opsin.xsize(), kPatchSide),
+ DivCeil(opsin.ysize(), kPatchSide));
+ ZeroFillImage(&is_screenshot_like);
+ uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0);
+ const size_t screenshot_stride = is_screenshot_like.PixelsPerRow();
+ const auto process_row = [&](const uint32_t y, size_t /* thread */) {
+ for (uint64_t x = 0; x < opsin.xsize() / kPatchSide; x++) {
+ bool all_same = true;
+ for (size_t iy = 0; iy < static_cast<size_t>(kPatchSide); iy++) {
+ for (size_t ix = 0; ix < static_cast<size_t>(kPatchSide); ix++) {
+ size_t cx = x * kPatchSide + ix;
+ size_t cy = y * kPatchSide + iy;
+ if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) {
+ all_same = false;
+ break;
+ }
+ }
+ }
+ if (!all_same) continue;
+ size_t num = 0;
+ size_t num_same = 0;
+ for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) {
+ for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) {
+ int64_t cx = x * kPatchSide + ix;
+ int64_t cy = y * kPatchSide + iy;
+ if (cx < 0 || static_cast<uint64_t>(cx) >= opsin.xsize() || //
+ cy < 0 || static_cast<uint64_t>(cy) >= opsin.ysize()) {
+ continue;
+ }
+ num++;
+ if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++;
+ }
+ }
+ // Too few equal pixels nearby.
+ if (num_same * 8 < num * 7) continue;
+ screenshot_row[y * screenshot_stride + x] = 1;
+ has_screenshot_areas = true;
+ }
+ };
+ JXL_CHECK(RunOnPool(pool, 0, opsin.ysize() / kPatchSide, ThreadPool::NoInit,
+ process_row, "IsScreenshotLike"));
+
+ // TODO(veluca): also parallelize the rest of this function.
+ if (WantDebugOutput(aux_out)) {
+ aux_out->DumpPlaneNormalized("screenshot_like", is_screenshot_like);
+ }
+
+ constexpr int kSearchRadius = 1;
+
+ if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) {
+ return {};
+ }
+
+ // Search for "similar enough" pixels near the screenshot-like areas.
+ ImageB is_background(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&is_background);
+ Image3F background(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&background);
+ constexpr size_t kDistanceLimit = 50;
+ float* JXL_RESTRICT background_rows[3] = {
+ background.PlaneRow(0, 0),
+ background.PlaneRow(1, 0),
+ background.PlaneRow(2, 0),
+ };
+ const size_t background_stride = background.PixelsPerRow();
+ uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0);
+ const size_t is_background_stride = is_background.PixelsPerRow();
+ std::vector<
+ std::pair<std::pair<uint32_t, uint32_t>, std::pair<uint32_t, uint32_t>>>
+ queue;
+ size_t queue_front = 0;
+ for (size_t y = 0; y < opsin.ysize(); y++) {
+ for (size_t x = 0; x < opsin.xsize(); x++) {
+ if (!screenshot_row[screenshot_stride * (y / kPatchSide) +
+ (x / kPatchSide)])
+ continue;
+ queue.push_back({{x, y}, {x, y}});
+ }
+ }
+ while (queue.size() != queue_front) {
+ std::pair<uint32_t, uint32_t> cur = queue[queue_front].first;
+ std::pair<uint32_t, uint32_t> src = queue[queue_front].second;
+ queue_front++;
+ if (is_background_row[cur.second * is_background_stride + cur.first])
+ continue;
+ is_background_row[cur.second * is_background_stride + cur.first] = 1;
+ for (size_t c = 0; c < 3; c++) {
+ background_rows[c][cur.second * background_stride + cur.first] =
+ opsin_rows[c][src.second * opsin_stride + src.first];
+ }
+ for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+ for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+ if (dx == 0 && dy == 0) continue;
+ int next_first = cur.first + dx;
+ int next_second = cur.second + dy;
+ if (next_first < 0 || next_second < 0 ||
+ static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+ static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+ continue;
+ }
+ if (static_cast<uint32_t>(
+ std::abs(next_first - static_cast<int>(src.first)) +
+ std::abs(next_second - static_cast<int>(src.second))) >
+ kDistanceLimit) {
+ continue;
+ }
+ std::pair<uint32_t, uint32_t> next{next_first, next_second};
+ if (is_similar(src, next)) {
+ if (!screenshot_row[next.second / kPatchSide * screenshot_stride +
+ next.first / kPatchSide] ||
+ is_same(src, next)) {
+ if (!is_background_row[next.second * is_background_stride +
+ next.first])
+ queue.emplace_back(next, src);
+ }
+ }
+ }
+ }
+ }
+ queue.clear();
+
+ ImageF ccs;
+ Rng rng(0);
+ bool paint_ccs = false;
+ if (WantDebugOutput(aux_out)) {
+ aux_out->DumpPlaneNormalized("is_background", is_background);
+ if (is_xyb) {
+ aux_out->DumpXybImage("background", background);
+ } else {
+ aux_out->DumpImage("background", background);
+ }
+ ccs = ImageF(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&ccs);
+ paint_ccs = true;
+ }
+
+ constexpr float kVerySimilarThreshold = 0.03f;
+ constexpr float kHasSimilarThreshold = 0.03f;
+
+ const float* JXL_RESTRICT const_background_rows[3] = {
+ background_rows[0], background_rows[1], background_rows[2]};
+ auto is_similar_b = [&](std::pair<int, int> p1, std::pair<int, int> p2) {
+ return is_similar_impl(p1, p2, const_background_rows, background_stride,
+ kVerySimilarThreshold);
+ };
+
+ constexpr int kMinPeak = 2;
+ constexpr int kHasSimilarRadius = 2;
+
+ std::vector<PatchInfo> info;
+
+ // Find small CC outside the "similar enough" areas, compute bounding boxes,
+ // and run heuristics to exclude some patches.
+ ImageB visited(opsin.xsize(), opsin.ysize());
+ ZeroFillImage(&visited);
+ uint8_t* JXL_RESTRICT visited_row = visited.Row(0);
+ const size_t visited_stride = visited.PixelsPerRow();
+ std::vector<std::pair<uint32_t, uint32_t>> cc;
+ std::vector<std::pair<uint32_t, uint32_t>> stack;
+ for (size_t y = 0; y < opsin.ysize(); y++) {
+ for (size_t x = 0; x < opsin.xsize(); x++) {
+ if (is_background_row[y * is_background_stride + x]) continue;
+ cc.clear();
+ stack.clear();
+ stack.emplace_back(x, y);
+ size_t min_x = x;
+ size_t max_x = x;
+ size_t min_y = y;
+ size_t max_y = y;
+ std::pair<uint32_t, uint32_t> reference;
+ bool found_border = false;
+ bool all_similar = true;
+ while (!stack.empty()) {
+ std::pair<uint32_t, uint32_t> cur = stack.back();
+ stack.pop_back();
+ if (visited_row[cur.second * visited_stride + cur.first]) continue;
+ visited_row[cur.second * visited_stride + cur.first] = 1;
+ if (cur.first < min_x) min_x = cur.first;
+ if (cur.first > max_x) max_x = cur.first;
+ if (cur.second < min_y) min_y = cur.second;
+ if (cur.second > max_y) max_y = cur.second;
+ if (paint_ccs) {
+ cc.push_back(cur);
+ }
+ for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) {
+ for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) {
+ if (dx == 0 && dy == 0) continue;
+ int next_first = static_cast<int32_t>(cur.first) + dx;
+ int next_second = static_cast<int32_t>(cur.second) + dy;
+ if (next_first < 0 || next_second < 0 ||
+ static_cast<uint32_t>(next_first) >= opsin.xsize() ||
+ static_cast<uint32_t>(next_second) >= opsin.ysize()) {
+ continue;
+ }
+ std::pair<uint32_t, uint32_t> next{next_first, next_second};
+ if (!is_background_row[next.second * is_background_stride +
+ next.first]) {
+ stack.push_back(next);
+ } else {
+ if (!found_border) {
+ reference = next;
+ found_border = true;
+ } else {
+ if (!is_similar_b(next, reference)) all_similar = false;
+ }
+ }
+ }
+ }
+ }
+ if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize ||
+ max_y - min_y >= kMaxPatchSize) {
+ continue;
+ }
+ size_t bpos = background_stride * reference.second + reference.first;
+ float ref[3] = {background_rows[0][bpos], background_rows[1][bpos],
+ background_rows[2][bpos]};
+ bool has_similar = false;
+ for (size_t iy = std::max<int>(
+ static_cast<int32_t>(min_y) - kHasSimilarRadius, 0);
+ iy < std::min(max_y + kHasSimilarRadius + 1, opsin.ysize()); iy++) {
+ for (size_t ix = std::max<int>(
+ static_cast<int32_t>(min_x) - kHasSimilarRadius, 0);
+ ix < std::min(max_x + kHasSimilarRadius + 1, opsin.xsize());
+ ix++) {
+ size_t opos = opsin_stride * iy + ix;
+ float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos],
+ opsin_rows[2][opos]};
+ if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) {
+ has_similar = true;
+ }
+ }
+ }
+ if (!has_similar) continue;
+ info.emplace_back();
+ info.back().second.emplace_back(min_x, min_y);
+ QuantizedPatch& patch = info.back().first;
+ patch.xsize = max_x - min_x + 1;
+ patch.ysize = max_y - min_y + 1;
+ int max_value = 0;
+ for (size_t c : {1, 0, 2}) {
+ for (size_t iy = min_y; iy <= max_y; iy++) {
+ for (size_t ix = min_x; ix <= max_x; ix++) {
+ size_t offset = (iy - min_y) * patch.xsize + ix - min_x;
+ patch.fpixels[c][offset] =
+ opsin_rows[c][iy * opsin_stride + ix] - ref[c];
+ int val = pci.Quantize(patch.fpixels[c][offset], c);
+ patch.pixels[c][offset] = val;
+ if (std::abs(val) > max_value) max_value = std::abs(val);
+ }
+ }
+ }
+ if (max_value < kMinPeak) {
+ info.pop_back();
+ continue;
+ }
+ if (paint_ccs) {
+ float cc_color = rng.UniformF(0.5, 1.0);
+ for (std::pair<uint32_t, uint32_t> p : cc) {
+ ccs.Row(p.second)[p.first] = cc_color;
+ }
+ }
+ }
+ }
+
+ if (paint_ccs) {
+ JXL_ASSERT(WantDebugOutput(aux_out));
+ aux_out->DumpPlaneNormalized("ccs", ccs);
+ }
+ if (info.empty()) {
+ return {};
+ }
+
+ // Remove duplicates.
+ constexpr size_t kMinPatchOccurrences = 2;
+ std::sort(info.begin(), info.end());
+ size_t unique = 0;
+ for (size_t i = 1; i < info.size(); i++) {
+ if (info[i].first == info[unique].first) {
+ info[unique].second.insert(info[unique].second.end(),
+ info[i].second.begin(), info[i].second.end());
+ } else {
+ if (info[unique].second.size() >= kMinPatchOccurrences) {
+ unique++;
+ }
+ info[unique] = info[i];
+ }
+ }
+ if (info[unique].second.size() >= kMinPatchOccurrences) {
+ unique++;
+ }
+ info.resize(unique);
+
+ size_t max_patch_size = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ size_t pixels = info[i].first.xsize * info[i].first.ysize;
+ if (pixels > max_patch_size) max_patch_size = pixels;
+ }
+
+ // don't use patches if all patches are smaller than this
+ constexpr size_t kMinMaxPatchSize = 20;
+ if (max_patch_size < kMinMaxPatchSize) return {};
+
+ return info;
+}
+
+} // namespace
+
+void FindBestPatchDictionary(const Image3F& opsin,
+ PassesEncoderState* JXL_RESTRICT state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, bool is_xyb) {
+ std::vector<PatchInfo> info =
+ FindTextLikePatches(opsin, state, pool, aux_out, is_xyb);
+
+ // TODO(veluca): this doesn't work if both dots and patches are enabled.
+ // For now, since dots and patches are not likely to occur in the same kind of
+ // images, disable dots if some patches were found.
+ if (info.empty() &&
+ ApplyOverride(
+ state->cparams.dots,
+ state->cparams.speed_tier <= SpeedTier::kSquirrel &&
+ state->cparams.butteraugli_distance >= kMinButteraugliForDots)) {
+ info = FindDotDictionary(state->cparams, opsin, state->shared.cmap, pool);
+ }
+
+ if (info.empty()) return;
+
+ std::sort(
+ info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) {
+ return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize;
+ });
+
+ size_t max_x_size = 0;
+ size_t max_y_size = 0;
+ size_t total_pixels = 0;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ size_t pixels = info[i].first.xsize * info[i].first.ysize;
+ if (max_x_size < info[i].first.xsize) max_x_size = info[i].first.xsize;
+ if (max_y_size < info[i].first.ysize) max_y_size = info[i].first.ysize;
+ total_pixels += pixels;
+ }
+
+ // Bin-packing & conversion of patches.
+ constexpr float kBinPackingSlackness = 1.05f;
+ size_t ref_xsize = std::max<float>(max_x_size, std::sqrt(total_pixels));
+ size_t ref_ysize = std::max<float>(max_y_size, std::sqrt(total_pixels));
+ std::vector<std::pair<size_t, size_t>> ref_positions(info.size());
+ // TODO(veluca): allow partial overlaps of patches that have the same pixels.
+ size_t max_y = 0;
+ do {
+ max_y = 0;
+ // Increase packed image size.
+ ref_xsize = ref_xsize * kBinPackingSlackness + 1;
+ ref_ysize = ref_ysize * kBinPackingSlackness + 1;
+
+ ImageB occupied(ref_xsize, ref_ysize);
+ ZeroFillImage(&occupied);
+ uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0);
+ size_t occupied_stride = occupied.PixelsPerRow();
+
+ bool success = true;
+ // For every patch...
+ for (size_t patch = 0; patch < info.size(); patch++) {
+ size_t x0 = 0;
+ size_t y0 = 0;
+ size_t xsize = info[patch].first.xsize;
+ size_t ysize = info[patch].first.ysize;
+ bool found = false;
+ // For every possible start position ...
+ for (; y0 + ysize <= ref_ysize; y0++) {
+ x0 = 0;
+ for (; x0 + xsize <= ref_xsize; x0++) {
+ bool has_occupied_pixel = false;
+ size_t x = x0;
+ // Check if it is possible to place the patch in this position in the
+ // reference frame.
+ for (size_t y = y0; y < y0 + ysize; y++) {
+ x = x0;
+ for (; x < x0 + xsize; x++) {
+ if (occupied_rows[y * occupied_stride + x]) {
+ has_occupied_pixel = true;
+ break;
+ }
+ }
+ } // end of positioning check
+ if (!has_occupied_pixel) {
+ found = true;
+ break;
+ }
+ x0 = x; // Jump to next pixel after the occupied one.
+ }
+ if (found) break;
+ } // end of start position checking
+
+ // We didn't find a possible position: repeat from the beginning with a
+ // larger reference frame size.
+ if (!found) {
+ success = false;
+ break;
+ }
+
+ // We found a position: mark the corresponding positions in the reference
+ // image as used.
+ ref_positions[patch] = {x0, y0};
+ for (size_t y = y0; y < y0 + ysize; y++) {
+ for (size_t x = x0; x < x0 + xsize; x++) {
+ occupied_rows[y * occupied_stride + x] = true;
+ }
+ }
+ max_y = std::max(max_y, y0 + ysize);
+ }
+
+ if (success) break;
+ } while (true);
+
+ JXL_ASSERT(ref_ysize >= max_y);
+
+ ref_ysize = max_y;
+
+ Image3F reference_frame(ref_xsize, ref_ysize);
+ // TODO(veluca): figure out a better way to fill the image.
+ ZeroFillImage(&reference_frame);
+ std::vector<PatchPosition> positions;
+ std::vector<PatchReferencePosition> pref_positions;
+ std::vector<PatchBlending> blendings;
+ float* JXL_RESTRICT ref_rows[3] = {
+ reference_frame.PlaneRow(0, 0),
+ reference_frame.PlaneRow(1, 0),
+ reference_frame.PlaneRow(2, 0),
+ };
+ size_t ref_stride = reference_frame.PixelsPerRow();
+ size_t num_ec = state->shared.metadata->m.num_extra_channels;
+
+ for (size_t i = 0; i < info.size(); i++) {
+ PatchReferencePosition ref_pos;
+ ref_pos.xsize = info[i].first.xsize;
+ ref_pos.ysize = info[i].first.ysize;
+ ref_pos.x0 = ref_positions[i].first;
+ ref_pos.y0 = ref_positions[i].second;
+ ref_pos.ref = kPatchFrameReferenceId;
+ for (size_t y = 0; y < ref_pos.ysize; y++) {
+ for (size_t x = 0; x < ref_pos.xsize; x++) {
+ for (size_t c = 0; c < 3; c++) {
+ ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] =
+ info[i].first.fpixels[c][y * ref_pos.xsize + x];
+ }
+ }
+ }
+ for (const auto& pos : info[i].second) {
+ positions.emplace_back(
+ PatchPosition{pos.first, pos.second, pref_positions.size()});
+ // Add blending for color channels, ignore other channels.
+ blendings.push_back({PatchBlendMode::kAdd, 0, false});
+ for (size_t j = 0; j < num_ec; ++j) {
+ blendings.push_back({PatchBlendMode::kNone, 0, false});
+ }
+ }
+ pref_positions.emplace_back(std::move(ref_pos));
+ }
+
+ CompressParams cparams = state->cparams;
+ // Recursive application of patches could create very weird issues.
+ cparams.patches = Override::kOff;
+
+ RoundtripPatchFrame(&reference_frame, state, kPatchFrameReferenceId, cparams,
+ cms, pool, aux_out, /*subtract=*/true);
+
+ // TODO(veluca): this assumes that applying patches is commutative, which is
+ // not true for all blending modes. This code only produces kAdd patches, so
+ // this works out.
+ PatchDictionaryEncoder::SetPositions(
+ &state->shared.image_features.patches, std::move(positions),
+ std::move(pref_positions), std::move(blendings));
+}
+
+void RoundtripPatchFrame(Image3F* reference_frame,
+ PassesEncoderState* JXL_RESTRICT state, int idx,
+ CompressParams& cparams, const JxlCmsInterface& cms,
+ ThreadPool* pool, AuxOut* aux_out, bool subtract) {
+ FrameInfo patch_frame_info;
+ cparams.resampling = 1;
+ cparams.ec_resampling = 1;
+ cparams.dots = Override::kOff;
+ cparams.noise = Override::kOff;
+ cparams.modular_mode = true;
+ cparams.responsive = 0;
+ cparams.progressive_dc = 0;
+ cparams.progressive_mode = false;
+ cparams.qprogressive_mode = false;
+ // Use gradient predictor and not Predictor::Best.
+ cparams.options.predictor = Predictor::Gradient;
+ patch_frame_info.save_as_reference = idx; // always saved.
+ patch_frame_info.frame_type = FrameType::kReferenceOnly;
+ patch_frame_info.save_before_color_transform = true;
+ ImageBundle ib(&state->shared.metadata->m);
+ // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is
+ // no simple way to express that yet.
+ patch_frame_info.ib_needs_color_transform = false;
+ ib.SetFromImage(std::move(*reference_frame),
+ state->shared.metadata->m.color_encoding);
+ if (!ib.metadata()->extra_channel_info.empty()) {
+ // Add dummy extra channels to the patch image: patch encoding does not yet
+ // support extra channels, but the codec expects that the amount of extra
+ // channels in frames matches that in the metadata of the codestream.
+ std::vector<ImageF> extra_channels;
+ extra_channels.reserve(ib.metadata()->extra_channel_info.size());
+ for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) {
+ extra_channels.emplace_back(ib.xsize(), ib.ysize());
+ // Must initialize the image with data to not affect blending with
+ // uninitialized memory.
+ // TODO(lode): patches must copy and use the real extra channels instead.
+ ZeroFillImage(&extra_channels.back());
+ }
+ ib.SetExtraChannels(std::move(extra_channels));
+ }
+ PassesEncoderState roundtrip_state;
+ auto special_frame = std::unique_ptr<BitWriter>(new BitWriter());
+ AuxOut patch_aux_out;
+ JXL_CHECK(EncodeFrame(cparams, patch_frame_info, state->shared.metadata, ib,
+ &roundtrip_state, cms, pool, special_frame.get(),
+ aux_out ? &patch_aux_out : nullptr));
+ if (aux_out) {
+ for (const auto& l : patch_aux_out.layers) {
+ aux_out->layers[kLayerDictionary].Assimilate(l);
+ }
+ }
+ const Span<const uint8_t> encoded = special_frame->GetSpan();
+ state->special_frames.emplace_back(std::move(special_frame));
+ if (subtract) {
+ ImageBundle decoded(&state->shared.metadata->m);
+ PassesDecoderState dec_state;
+ JXL_CHECK(dec_state.output_encoding_info.SetFromMetadata(
+ *state->shared.metadata));
+ const uint8_t* frame_start = encoded.data();
+ size_t encoded_size = encoded.size();
+ JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size, &decoded,
+ *state->shared.metadata));
+ frame_start += decoded.decoded_bytes();
+ encoded_size -= decoded.decoded_bytes();
+ size_t ref_xsize =
+ dec_state.shared_storage.reference_frames[idx].frame.color()->xsize();
+ // if the frame itself uses patches, we need to decode another frame
+ if (!ref_xsize) {
+ JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size,
+ &decoded, *state->shared.metadata));
+ }
+ JXL_CHECK(encoded_size == 0);
+ state->shared.reference_frames[idx] =
+ std::move(dec_state.shared_storage.reference_frames[idx]);
+ } else {
+ state->shared.reference_frames[idx].frame = std::move(ib);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.h b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.h
new file mode 100644
index 0000000000..f30881b232
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_patch_dictionary.h
@@ -0,0 +1,109 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PATCH_DICTIONARY_H_
+#define LIB_JXL_ENC_PATCH_DICTIONARY_H_
+
+// Chooses reference patches, and avoids encoding them once per occurrence.
+
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <tuple>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+constexpr size_t kMaxPatchSize = 32;
+
+struct QuantizedPatch {
+ size_t xsize;
+ size_t ysize;
+ QuantizedPatch() {
+ for (size_t i = 0; i < 3; i++) {
+ pixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+ fpixels[i].resize(kMaxPatchSize * kMaxPatchSize);
+ }
+ }
+ std::vector<int8_t> pixels[3] = {};
+ // Not compared. Used only to retrieve original pixels to construct the
+ // reference image.
+ std::vector<float> fpixels[3] = {};
+ bool operator==(const QuantizedPatch& other) const {
+ if (xsize != other.xsize) return false;
+ if (ysize != other.ysize) return false;
+ for (size_t c = 0; c < 3; c++) {
+ if (memcmp(pixels[c].data(), other.pixels[c].data(),
+ sizeof(int8_t) * xsize * ysize) != 0)
+ return false;
+ }
+ return true;
+ }
+
+ bool operator<(const QuantizedPatch& other) const {
+ if (xsize != other.xsize) return xsize < other.xsize;
+ if (ysize != other.ysize) return ysize < other.ysize;
+ for (size_t c = 0; c < 3; c++) {
+ int cmp = memcmp(pixels[c].data(), other.pixels[c].data(),
+ sizeof(int8_t) * xsize * ysize);
+ if (cmp > 0) return false;
+ if (cmp < 0) return true;
+ }
+ return false;
+ }
+};
+
+// Pair (patch, vector of occurrences).
+using PatchInfo =
+ std::pair<QuantizedPatch, std::vector<std::pair<uint32_t, uint32_t>>>;
+
+// Friend class of PatchDictionary.
+class PatchDictionaryEncoder {
+ public:
+ // Only call if HasAny().
+ static void Encode(const PatchDictionary& pdic, BitWriter* writer,
+ size_t layer, AuxOut* aux_out);
+
+ static void SetPositions(PatchDictionary* pdic,
+ std::vector<PatchPosition> positions,
+ std::vector<PatchReferencePosition> ref_positions,
+ std::vector<PatchBlending> blendings) {
+ pdic->positions_ = std::move(positions);
+ pdic->ref_positions_ = std::move(ref_positions);
+ pdic->blendings_ = std::move(blendings);
+ pdic->ComputePatchTree();
+ }
+
+ static void SubtractFrom(const PatchDictionary& pdic, Image3F* opsin);
+};
+
+void FindBestPatchDictionary(const Image3F& opsin,
+ PassesEncoderState* JXL_RESTRICT state,
+ const JxlCmsInterface& cms, ThreadPool* pool,
+ AuxOut* aux_out, bool is_xyb = true);
+
+void RoundtripPatchFrame(Image3F* reference_frame,
+ PassesEncoderState* JXL_RESTRICT state, int idx,
+ CompressParams& cparams, const JxlCmsInterface& cms,
+ ThreadPool* pool, AuxOut* aux_out, bool subtract);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_PATCH_DICTIONARY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_photon_noise.cc b/third_party/jpeg-xl/lib/jxl/enc_photon_noise.cc
new file mode 100644
index 0000000000..3786ef5cf5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_photon_noise.cc
@@ -0,0 +1,89 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+namespace jxl {
+
+namespace {
+
+// Assumes a daylight-like spectrum.
+// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s
+constexpr float kPhotonsPerLxSPerUm2 = 11260;
+
+// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into
+// account.
+constexpr float kEffectiveQuantumEfficiency = 0.20;
+
+// TODO(sboukortt): reevaluate whether these are good defaults, notably whether
+// it would be worth making read noise higher at lower ISO settings.
+constexpr float kPhotoResponseNonUniformity = 0.005;
+constexpr float kInputReferredReadNoise = 3;
+
+// Assumes a 35mm sensor.
+constexpr float kSensorAreaUm2 = 36000.f * 24000;
+
+template <typename T>
+inline constexpr T Square(const T x) {
+ return x * x;
+}
+template <typename T>
+inline constexpr T Cube(const T x) {
+ return x * x * x;
+}
+
+} // namespace
+
+NoiseParams SimulatePhotonNoise(const size_t xsize, const size_t ysize,
+ const float iso) {
+ const float kOpsinAbsorbanceBiasCbrt = std::cbrt(kOpsinAbsorbanceBias[1]);
+
+ // Focal plane exposure for 18% of kDefaultIntensityTarget, in lx·s.
+ // (ISO = 10 lx·s ÷ H)
+ const float h_18 = 10 / iso;
+
+ const float pixel_area_um2 = kSensorAreaUm2 / (xsize * ysize);
+
+ const float electrons_per_pixel_18 = kEffectiveQuantumEfficiency *
+ kPhotonsPerLxSPerUm2 * h_18 *
+ pixel_area_um2;
+
+ NoiseParams params;
+
+ for (size_t i = 0; i < NoiseParams::kNumNoisePoints; ++i) {
+ const float scaled_index = i / (NoiseParams::kNumNoisePoints - 2.f);
+ // scaled_index is used for XYB = (0, 2·scaled_index, 2·scaled_index)
+ const float y = 2 * scaled_index;
+ // 1 = default intensity target
+ const float linear = std::max(
+ 0.f, Cube(y - kOpsinAbsorbanceBiasCbrt) + kOpsinAbsorbanceBias[1]);
+ const float electrons_per_pixel = electrons_per_pixel_18 * (linear / 0.18f);
+ // Quadrature sum of read noise, photon shot noise (sqrt(S) so simply not
+ // squared here) and photo response non-uniformity.
+ // https://doi.org/10.1117/3.725073
+ // Units are electrons rms.
+ const float noise =
+ std::sqrt(Square(kInputReferredReadNoise) + electrons_per_pixel +
+ Square(kPhotoResponseNonUniformity * electrons_per_pixel));
+ const float linear_noise = noise * (0.18f / electrons_per_pixel_18);
+ const float opsin_derivative =
+ (1.f / 3) / Square(std::cbrt(linear - kOpsinAbsorbanceBias[1]));
+ const float opsin_noise = linear_noise * opsin_derivative;
+
+ // TODO(sboukortt): verify more thoroughly whether the denominator is
+ // correct.
+ params.lut[i] =
+ Clamp1(opsin_noise /
+ (0.22f // norm_const
+ * std::sqrt(2.f) // red_noise + green_noise
+ * 1.13f // standard deviation of a plane of generated noise
+ ),
+ 0.f, 1.f);
+ }
+
+ return params;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_photon_noise.h b/third_party/jpeg-xl/lib/jxl/enc_photon_noise.h
new file mode 100644
index 0000000000..f43e14d560
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_photon_noise.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_PHOTON_NOISE_H_
+#define LIB_JXL_ENC_PHOTON_NOISE_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/noise.h"
+
+namespace jxl {
+
+// Constructs a NoiseParams representing the noise that would be seen at the
+// selected nominal exposure on a last-decade (as of 2021) color camera with a
+// 36×24mm sensor (“35mm format”).
+NoiseParams SimulatePhotonNoise(size_t xsize, size_t ysize, float iso);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_PHOTON_NOISE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_photon_noise_test.cc b/third_party/jpeg-xl/lib/jxl/enc_photon_noise_test.cc
new file mode 100644
index 0000000000..be11b465ad
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_photon_noise_test.cc
@@ -0,0 +1,51 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_photon_noise.h"
+
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+MATCHER(AreApproximatelyEqual, "") {
+ constexpr float kTolerance = 1e-6;
+ const float actual = std::get<0>(arg);
+ const float expected = std::get<1>(arg);
+ return testing::ExplainMatchResult(FloatNear(expected, kTolerance), actual,
+ result_listener);
+}
+
+TEST(EncPhotonNoiseTest, LUTs) {
+ EXPECT_THAT(
+ SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/100).lut,
+ Pointwise(AreApproximatelyEqual(),
+ {0.00259652, 0.0139648, 0.00681551, 0.00632582, 0.00694917,
+ 0.00803922, 0.00934574, 0.0107607}));
+ EXPECT_THAT(
+ SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/800).lut,
+ Pointwise(AreApproximatelyEqual(),
+ {0.02077220, 0.0420923, 0.01820690, 0.01439020, 0.01293670,
+ 0.01254030, 0.01277390, 0.0134161}));
+ EXPECT_THAT(
+ SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/6400).lut,
+ Pointwise(AreApproximatelyEqual(),
+ {0.1661770, 0.1691120, 0.05309080, 0.03963960, 0.03357410,
+ 0.03001650, 0.02776740, 0.0263478}));
+
+ // Lower when measured on a per-pixel basis as there are fewer of them.
+ EXPECT_THAT(
+ SimulatePhotonNoise(/*xsize=*/4000, /*ysize=*/3000, /*iso=*/6400).lut,
+ Pointwise(AreApproximatelyEqual(),
+ {0.0830886, 0.1008720, 0.0367748, 0.0280305, 0.0240236,
+ 0.0218040, 0.0205771, 0.0200058}));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_progressive_split.cc b/third_party/jpeg-xl/lib/jxl/enc_progressive_split.cc
new file mode 100644
index 0000000000..b65319f3fd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_progressive_split.cc
@@ -0,0 +1,82 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_progressive_split.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void ProgressiveSplitter::SplitACCoefficients(
+ const T* JXL_RESTRICT block, const AcStrategy& acs, size_t bx, size_t by,
+ T* JXL_RESTRICT output[kMaxNumPasses]) {
+ size_t size = acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize;
+ auto shift_right_round0 = [&](T v, int shift) {
+ T one_if_negative = static_cast<uint32_t>(v) >> 31;
+ T add = (one_if_negative << shift) - one_if_negative;
+ return (v + add) >> shift;
+ };
+ // Early quit for the simple case of only one pass.
+ if (mode_.num_passes == 1) {
+ memcpy(output[0], block, sizeof(T) * size);
+ return;
+ }
+ size_t ncoeffs_all_done_from_earlier_passes = 1;
+
+ int previous_pass_shift = 0;
+ for (size_t num_pass = 0; num_pass < mode_.num_passes; num_pass++) { // pass
+ // Zero out output block.
+ memset(output[num_pass], 0, size * sizeof(T));
+ const int pass_shift = mode_.passes[num_pass].shift;
+ size_t frame_ncoeffs = mode_.passes[num_pass].num_coefficients;
+ size_t xsize = acs.covered_blocks_x();
+ size_t ysize = acs.covered_blocks_y();
+ CoefficientLayout(&ysize, &xsize);
+ for (size_t y = 0; y < ysize * frame_ncoeffs; y++) { // superblk-y
+ for (size_t x = 0; x < xsize * frame_ncoeffs; x++) { // superblk-x
+ size_t pos = y * xsize * kBlockDim + x;
+ if (x < xsize * ncoeffs_all_done_from_earlier_passes &&
+ y < ysize * ncoeffs_all_done_from_earlier_passes) {
+ // This coefficient was already included in an earlier pass,
+ // which included a genuinely smaller set of coefficients.
+ continue;
+ }
+ T v = block[pos];
+ // Previous pass discarded some bits: do not encode them again.
+ if (previous_pass_shift != 0) {
+ T previous_v = shift_right_round0(v, previous_pass_shift) *
+ (1 << previous_pass_shift);
+ v -= previous_v;
+ }
+ output[num_pass][pos] = shift_right_round0(v, pass_shift);
+ } // superblk-x
+ } // superblk-y
+ // We just finished a pass.
+ // Hence, we are now guaranteed to have included all coeffs up to
+ // frame_ncoeffs in every block, unless the current pass is shifted.
+ if (mode_.passes[num_pass].shift == 0) {
+ ncoeffs_all_done_from_earlier_passes = frame_ncoeffs;
+ }
+ previous_pass_shift = mode_.passes[num_pass].shift;
+ } // num_pass
+}
+
+template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+ const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+ int32_t* JXL_RESTRICT[kMaxNumPasses]);
+
+template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+ const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+ int16_t* JXL_RESTRICT[kMaxNumPasses]);
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_progressive_split.h b/third_party/jpeg-xl/lib/jxl/enc_progressive_split.h
new file mode 100644
index 0000000000..ef25944bb7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_progressive_split.h
@@ -0,0 +1,131 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PROGRESSIVE_SPLIT_H_
+#define LIB_JXL_PROGRESSIVE_SPLIT_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/splines.h"
+
+// Functions to split DCT coefficients in multiple passes. All the passes of a
+// single frame are added together.
+
+namespace jxl {
+
+constexpr size_t kNoDownsamplingFactor = std::numeric_limits<size_t>::max();
+
+struct PassDefinition {
+ // Side of the square of the coefficients that should be kept in each 8x8
+ // block. Must be greater than 1, and at most 8. Should be in non-decreasing
+ // order.
+ size_t num_coefficients;
+
+ // How much to shift the encoded values by, with rounding.
+ size_t shift;
+
+ // If specified, this indicates that if the requested downsampling factor is
+ // sufficiently high, then it is fine to stop decoding after this pass.
+ // By default, passes are not marked as being suitable for any downsampling.
+ size_t suitable_for_downsampling_of_at_least;
+};
+
+struct ProgressiveMode {
+ size_t num_passes = 1;
+ PassDefinition passes[kMaxNumPasses] = {
+ PassDefinition{/*num_coefficients=*/8, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/1}};
+
+ ProgressiveMode() = default;
+
+ template <size_t nump>
+ explicit ProgressiveMode(const PassDefinition (&p)[nump]) {
+ JXL_ASSERT(nump <= kMaxNumPasses);
+ num_passes = nump;
+ PassDefinition previous_pass{
+ /*num_coefficients=*/1, /*shift=*/0,
+ /*suitable_for_downsampling_of_at_least=*/kNoDownsamplingFactor};
+ size_t last_downsampling_factor = kNoDownsamplingFactor;
+ for (size_t i = 0; i < nump; i++) {
+ JXL_ASSERT(p[i].num_coefficients > previous_pass.num_coefficients ||
+ (p[i].num_coefficients == previous_pass.num_coefficients &&
+ p[i].shift < previous_pass.shift));
+ JXL_ASSERT(p[i].suitable_for_downsampling_of_at_least ==
+ kNoDownsamplingFactor ||
+ p[i].suitable_for_downsampling_of_at_least <=
+ last_downsampling_factor);
+ // Only used inside assert.
+ (void)last_downsampling_factor;
+ if (p[i].suitable_for_downsampling_of_at_least != kNoDownsamplingFactor) {
+ last_downsampling_factor = p[i].suitable_for_downsampling_of_at_least;
+ }
+ previous_pass = passes[i] = p[i];
+ }
+ }
+};
+
+class ProgressiveSplitter {
+ public:
+ void SetProgressiveMode(ProgressiveMode mode) { mode_ = mode; }
+
+ size_t GetNumPasses() const { return mode_.num_passes; }
+
+ void InitPasses(Passes* JXL_RESTRICT passes) const {
+ passes->num_passes = static_cast<uint32_t>(GetNumPasses());
+ passes->num_downsample = 0;
+ JXL_ASSERT(passes->num_passes != 0);
+ passes->shift[passes->num_passes - 1] = 0;
+ if (passes->num_passes == 1) return; // Done, arrays are empty
+
+ for (uint32_t i = 0; i < mode_.num_passes - 1; ++i) {
+ const size_t min_downsampling_factor =
+ mode_.passes[i].suitable_for_downsampling_of_at_least;
+ passes->shift[i] = mode_.passes[i].shift;
+ if (1 < min_downsampling_factor &&
+ min_downsampling_factor != kNoDownsamplingFactor) {
+ passes->downsample[passes->num_downsample] = min_downsampling_factor;
+ passes->last_pass[passes->num_downsample] = i;
+ if (mode_.passes[i + 1].suitable_for_downsampling_of_at_least <
+ min_downsampling_factor) {
+ passes->num_downsample += 1;
+ }
+ }
+ }
+ }
+
+ template <typename T>
+ void SplitACCoefficients(const T* JXL_RESTRICT block, const AcStrategy& acs,
+ size_t bx, size_t by,
+ T* JXL_RESTRICT output[kMaxNumPasses]);
+
+ private:
+ ProgressiveMode mode_;
+};
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int32_t>(
+ const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+ int32_t* JXL_RESTRICT[kMaxNumPasses]);
+
+extern template void ProgressiveSplitter::SplitACCoefficients<int16_t>(
+ const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t,
+ int16_t* JXL_RESTRICT[kMaxNumPasses]);
+
+} // namespace jxl
+
+#endif // LIB_JXL_PROGRESSIVE_SPLIT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_quant_weights.cc b/third_party/jpeg-xl/lib/jxl/enc_quant_weights.cc
new file mode 100644
index 0000000000..848310e75d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_quant_weights.cc
@@ -0,0 +1,214 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+
+Status EncodeDctParams(const DctQuantWeightParams& params, BitWriter* writer) {
+ JXL_ASSERT(params.num_distance_bands >= 1);
+ writer->Write(DctQuantWeightParams::kLog2MaxDistanceBands,
+ params.num_distance_bands - 1);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < params.num_distance_bands; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Write(
+ params.distance_bands[c][i] * (i == 0 ? (1 / 64.0f) : 1.0f), writer));
+ }
+ }
+ return true;
+}
+
+Status EncodeQuant(const QuantEncoding& encoding, size_t idx, size_t size_x,
+ size_t size_y, BitWriter* writer,
+ ModularFrameEncoder* modular_frame_encoder) {
+ writer->Write(kLog2NumQuantModes, encoding.mode);
+ size_x *= kBlockDim;
+ size_y *= kBlockDim;
+ switch (encoding.mode) {
+ case QuantEncoding::kQuantModeLibrary: {
+ writer->Write(kCeilLog2NumPredefinedTables, encoding.predefined);
+ break;
+ }
+ case QuantEncoding::kQuantModeID: {
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 3; i++) {
+ JXL_RETURN_IF_ERROR(
+ F16Coder::Write(encoding.idweights[c][i] * (1.0f / 64), writer));
+ }
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT2: {
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 6; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Write(
+ encoding.dct2weights[c][i] * (1.0f / 64), writer));
+ }
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4X8: {
+ for (size_t c = 0; c < 3; c++) {
+ JXL_RETURN_IF_ERROR(
+ F16Coder::Write(encoding.dct4x8multipliers[c], writer));
+ }
+ JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4: {
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 2; i++) {
+ JXL_RETURN_IF_ERROR(
+ F16Coder::Write(encoding.dct4multipliers[c][i], writer));
+ }
+ }
+ JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT: {
+ JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+ break;
+ }
+ case QuantEncoding::kQuantModeRAW: {
+ ModularFrameEncoder::EncodeQuantTable(size_x, size_y, writer, encoding,
+ idx, modular_frame_encoder);
+ break;
+ }
+ case QuantEncoding::kQuantModeAFV: {
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 9; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Write(
+ encoding.afv_weights[c][i] * (i < 6 ? 1.0f / 64 : 1.0f), writer));
+ }
+ }
+ JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer));
+ JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params_afv_4x4, writer));
+ break;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+Status DequantMatricesEncode(const DequantMatrices* matrices, BitWriter* writer,
+ size_t layer, AuxOut* aux_out,
+ ModularFrameEncoder* modular_frame_encoder) {
+ bool all_default = true;
+ const std::vector<QuantEncoding>& encodings = matrices->encodings();
+
+ for (size_t i = 0; i < encodings.size(); i++) {
+ if (encodings[i].mode != QuantEncoding::kQuantModeLibrary ||
+ encodings[i].predefined != 0) {
+ all_default = false;
+ }
+ }
+ // TODO(janwas): better bound
+ BitWriter::Allotment allotment(writer, 512 * 1024);
+ writer->Write(1, all_default);
+ if (!all_default) {
+ for (size_t i = 0; i < encodings.size(); i++) {
+ JXL_RETURN_IF_ERROR(EncodeQuant(
+ encodings[i], i, DequantMatrices::required_size_x[i],
+ DequantMatrices::required_size_y[i], writer, modular_frame_encoder));
+ }
+ }
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ return true;
+}
+
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out) {
+ bool all_default = true;
+ const float* dc_quant = matrices->DCQuants();
+ for (size_t c = 0; c < 3; c++) {
+ if (dc_quant[c] != kDCQuant[c]) {
+ all_default = false;
+ }
+ }
+ BitWriter::Allotment allotment(writer, 1 + sizeof(float) * kBitsPerByte * 3);
+ writer->Write(1, all_default);
+ if (!all_default) {
+ for (size_t c = 0; c < 3; c++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, writer));
+ }
+ }
+ allotment.ReclaimAndCharge(writer, layer, aux_out);
+ return true;
+}
+
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc) {
+ matrices->SetDCQuant(dc);
+ // Roundtrip encode/decode DC to ensure same values as decoder.
+ BitWriter writer;
+ JXL_CHECK(DequantMatricesEncodeDC(matrices, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ BitReader br(writer.GetSpan());
+ // Called only in the encoder: should fail only for programmer errors.
+ JXL_CHECK(matrices->DecodeDC(&br));
+ JXL_CHECK(br.Close());
+}
+
+void DequantMatricesScaleDC(DequantMatrices* matrices, const float scale) {
+ float dc[3];
+ for (size_t c = 0; c < 3; ++c) {
+ dc[c] = matrices->InvDCQuant(c) * (1.0f / scale);
+ }
+ DequantMatricesSetCustomDC(matrices, dc);
+}
+
+void DequantMatricesRoundtrip(DequantMatrices* matrices) {
+ // Do not pass modular en/decoder, as they only change entropy and not
+ // values.
+ BitWriter writer;
+ JXL_CHECK(DequantMatricesEncode(matrices, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ BitReader br(writer.GetSpan());
+ // Called only in the encoder: should fail only for programmer errors.
+ JXL_CHECK(matrices->Decode(&br));
+ JXL_CHECK(br.Close());
+}
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+ const std::vector<QuantEncoding>& encodings,
+ ModularFrameEncoder* encoder) {
+ JXL_ASSERT(encodings.size() == DequantMatrices::kNum);
+ matrices->SetEncodings(encodings);
+ for (size_t i = 0; i < encodings.size(); i++) {
+ if (encodings[i].mode == QuantEncodingInternal::kQuantModeRAW) {
+ encoder->AddQuantTable(DequantMatrices::required_size_x[i] * kBlockDim,
+ DequantMatrices::required_size_y[i] * kBlockDim,
+ encodings[i], i);
+ }
+ }
+ DequantMatricesRoundtrip(matrices);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_quant_weights.h b/third_party/jpeg-xl/lib/jxl/enc_quant_weights.h
new file mode 100644
index 0000000000..e0a387fed5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_quant_weights.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_QUANT_WEIGHTS_H_
+#define LIB_JXL_ENC_QUANT_WEIGHTS_H_
+
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct BitWriter;
+
+Status DequantMatricesEncode(
+ const DequantMatrices* matrices, BitWriter* writer, size_t layer,
+ AuxOut* aux_out, ModularFrameEncoder* modular_frame_encoder = nullptr);
+Status DequantMatricesEncodeDC(const DequantMatrices* matrices,
+ BitWriter* writer, size_t layer,
+ AuxOut* aux_out);
+// For consistency with QuantEncoding, higher values correspond to more
+// precision.
+void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc);
+
+void DequantMatricesScaleDC(DequantMatrices* matrices, float scale);
+
+void DequantMatricesSetCustom(DequantMatrices* matrices,
+ const std::vector<QuantEncoding>& encodings,
+ ModularFrameEncoder* encoder);
+
+// Roundtrip encode/decode the matrices to ensure same values as decoder.
+void DequantMatricesRoundtrip(DequantMatrices* matrices);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_QUANT_WEIGHTS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_splines.cc b/third_party/jpeg-xl/lib/jxl/enc_splines.cc
new file mode 100644
index 0000000000..ddcd78a748
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_splines.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <algorithm>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+class QuantizedSplineEncoder {
+ public:
+ // Only call if HasAny().
+ static void Tokenize(const QuantizedSpline& spline,
+ std::vector<Token>* const tokens) {
+ tokens->emplace_back(kNumControlPointsContext,
+ spline.control_points_.size());
+ for (const auto& point : spline.control_points_) {
+ tokens->emplace_back(kControlPointsContext, PackSigned(point.first));
+ tokens->emplace_back(kControlPointsContext, PackSigned(point.second));
+ }
+ const auto encode_dct = [tokens](const int dct[32]) {
+ for (int i = 0; i < 32; ++i) {
+ tokens->emplace_back(kDCTContext, PackSigned(dct[i]));
+ }
+ };
+ for (int c = 0; c < 3; ++c) {
+ encode_dct(spline.color_dct_[c]);
+ }
+ encode_dct(spline.sigma_dct_);
+ }
+};
+
+namespace {
+
+void EncodeAllStartingPoints(const std::vector<Spline::Point>& points,
+ std::vector<Token>* tokens) {
+ int64_t last_x = 0;
+ int64_t last_y = 0;
+ for (size_t i = 0; i < points.size(); i++) {
+ const int64_t x = lroundf(points[i].x);
+ const int64_t y = lroundf(points[i].y);
+ if (i == 0) {
+ tokens->emplace_back(kStartingPositionContext, x);
+ tokens->emplace_back(kStartingPositionContext, y);
+ } else {
+ tokens->emplace_back(kStartingPositionContext, PackSigned(x - last_x));
+ tokens->emplace_back(kStartingPositionContext, PackSigned(y - last_y));
+ }
+ last_x = x;
+ last_y = y;
+ }
+}
+
+} // namespace
+
+void EncodeSplines(const Splines& splines, BitWriter* writer,
+ const size_t layer, const HistogramParams& histogram_params,
+ AuxOut* aux_out) {
+ JXL_ASSERT(splines.HasAny());
+
+ const std::vector<QuantizedSpline>& quantized_splines =
+ splines.QuantizedSplines();
+ std::vector<std::vector<Token>> tokens(1);
+ tokens[0].emplace_back(kNumSplinesContext, quantized_splines.size() - 1);
+ EncodeAllStartingPoints(splines.StartingPoints(), &tokens[0]);
+
+ tokens[0].emplace_back(kQuantizationAdjustmentContext,
+ PackSigned(splines.GetQuantizationAdjustment()));
+
+ for (const QuantizedSpline& spline : quantized_splines) {
+ QuantizedSplineEncoder::Tokenize(spline, &tokens[0]);
+ }
+
+ EntropyEncodingData codes;
+ std::vector<uint8_t> context_map;
+ BuildAndEncodeHistograms(histogram_params, kNumSplineContexts, tokens, &codes,
+ &context_map, writer, layer, aux_out);
+ WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out);
+}
+
+Splines FindSplines(const Image3F& opsin) {
+ // TODO: implement spline detection.
+ return {};
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_splines.h b/third_party/jpeg-xl/lib/jxl/enc_splines.h
new file mode 100644
index 0000000000..be700dba75
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_splines.h
@@ -0,0 +1,38 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_SPLINES_H_
+#define LIB_JXL_ENC_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Only call if splines.HasAny().
+void EncodeSplines(const Splines& splines, BitWriter* writer, size_t layer,
+ const HistogramParams& histogram_params, AuxOut* aux_out);
+
+Splines FindSplines(const Image3F& opsin);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_SPLINES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_toc.cc b/third_party/jpeg-xl/lib/jxl/enc_toc.cc
new file mode 100644
index 0000000000..dc75fdd9ba
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_toc.cc
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_coeff_order.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+ const std::vector<coeff_order_t>* permutation,
+ BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) {
+ BitWriter::Allotment allotment(writer, MaxBits(group_codes.size()));
+ if (permutation && !group_codes.empty()) {
+ // Don't write a permutation at all for an empty group_codes.
+ writer->Write(1, 1); // permutation
+ JXL_DASSERT(permutation->size() == group_codes.size());
+ EncodePermutation(permutation->data(), /*skip=*/0, permutation->size(),
+ writer, /* layer= */ 0, aux_out);
+
+ } else {
+ writer->Write(1, 0); // no permutation
+ }
+ writer->ZeroPadToByte(); // before TOC entries
+
+ for (size_t i = 0; i < group_codes.size(); i++) {
+ JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0);
+ const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte;
+ JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer));
+ }
+ writer->ZeroPadToByte(); // before first group
+ allotment.ReclaimAndCharge(writer, kLayerTOC, aux_out);
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_toc.h b/third_party/jpeg-xl/lib/jxl/enc_toc.h
new file mode 100644
index 0000000000..242b3efccb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_toc.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TOC_H_
+#define LIB_JXL_ENC_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/enc_bit_writer.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// Writes the group offsets. If the permutation vector is nullptr, the identity
+// permutation will be used.
+Status WriteGroupOffsets(const std::vector<BitWriter>& group_codes,
+ const std::vector<coeff_order_t>* permutation,
+ BitWriter* JXL_RESTRICT writer, AuxOut* aux_out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_TOC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_transforms-inl.h b/third_party/jpeg-xl/lib/jxl/enc_transforms-inl.h
new file mode 100644
index 0000000000..ef6dc2bbd7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_transforms-inl.h
@@ -0,0 +1,827 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
+#else
+#define LIB_JXL_ENC_TRANSFORMS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/dct_scales.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// Inverse of ReinterpretingDCT.
+template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
+ size_t ROWS, size_t COLS>
+HWY_INLINE void ReinterpretingIDCT(const float* input,
+ const size_t input_stride, float* output,
+ const size_t output_stride) {
+ HWY_ALIGN float block[ROWS * COLS] = {};
+ if (ROWS < COLS) {
+ for (size_t y = 0; y < LF_ROWS; y++) {
+ for (size_t x = 0; x < LF_COLS; x++) {
+ block[y * COLS + x] = input[y * input_stride + x] *
+ DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
+ DCTTotalResampleScale<DCT_COLS, COLS>(x);
+ }
+ }
+ } else {
+ for (size_t y = 0; y < LF_COLS; y++) {
+ for (size_t x = 0; x < LF_ROWS; x++) {
+ block[y * ROWS + x] = input[y * input_stride + x] *
+ DCTTotalResampleScale<DCT_COLS, COLS>(y) *
+ DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
+ }
+ }
+ }
+
+ // ROWS, COLS <= 8, so we can put scratch space on the stack.
+ HWY_ALIGN float scratch_space[ROWS * COLS];
+ ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
+ scratch_space);
+}
+
+template <size_t S>
+void DCT2TopBlock(const float* block, size_t stride, float* out) {
+ static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
+ static_assert(S % 2 == 0, "S should be even");
+ float temp[kDCTBlockSize];
+ constexpr size_t num_2x2 = S / 2;
+ for (size_t y = 0; y < num_2x2; y++) {
+ for (size_t x = 0; x < num_2x2; x++) {
+ float c00 = block[y * 2 * stride + x * 2];
+ float c01 = block[y * 2 * stride + x * 2 + 1];
+ float c10 = block[(y * 2 + 1) * stride + x * 2];
+ float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
+ float r00 = c00 + c01 + c10 + c11;
+ float r01 = c00 + c01 - c10 - c11;
+ float r10 = c00 - c01 + c10 - c11;
+ float r11 = c00 - c01 - c10 + c11;
+ r00 *= 0.25f;
+ r01 *= 0.25f;
+ r10 *= 0.25f;
+ r11 *= 0.25f;
+ temp[y * kBlockDim + x] = r00;
+ temp[y * kBlockDim + num_2x2 + x] = r01;
+ temp[(y + num_2x2) * kBlockDim + x] = r10;
+ temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
+ }
+ }
+ for (size_t y = 0; y < S; y++) {
+ for (size_t x = 0; x < S; x++) {
+ out[y * kBlockDim + x] = temp[y * kBlockDim + x];
+ }
+ }
+}
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+ HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
+ {
+ 0.2500000000000000,
+ 0.8769029297991420f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ -0.4105377591765233f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ },
+ {
+ 0.2500000000000000,
+ 0.2206518106944235f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ -0.7071067811865474f,
+ 0.6235485373547691f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375376f,
+ 0.4067007583026075f,
+ -0.2125574805828875f,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ -0.4517556589999482f,
+ -0.3046847507248690f,
+ 0.3017929516615495f,
+ 0.4082482904638627f,
+ 0.1747866975480809f,
+ -0.2110560104933578f,
+ -0.1426608480880726f,
+ -0.1381354035075859f,
+ -0.1743760259965107f,
+ 0.1135498731499434f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375375f,
+ 0.4444481661973445f,
+ 0.3085497062849767f,
+ 0.0000000000000000f,
+ -0.0643507165794627f,
+ 0.1585450355184006f,
+ 0.5112616136591823f,
+ 0.2579236279634118f,
+ 0.0000000000000000,
+ 0.0812611176717539f,
+ 0.1856718091610980f,
+ -0.3416446842253372f,
+ 0.3302282550303788f,
+ 0.0702790691196284f,
+ -0.0741750459581035f,
+ },
+ {
+ 0.2500000000000000,
+ 0.2206518106944236f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.7071067811865476f,
+ 0.6235485373547694f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375378f,
+ 0.0000000000000000,
+ 0.4706702258572536f,
+ 0.0000000000000000,
+ -0.0643507165794628f,
+ -0.0403851516082220f,
+ 0.0000000000000000,
+ 0.1627234014286620f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.7367497537172237f,
+ 0.0875511500058708f,
+ -0.2921026642334881f,
+ 0.1940289303259434f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375377f,
+ 0.1957439937204294f,
+ -0.1621205195722993f,
+ 0.0000000000000000,
+ -0.0643507165794628f,
+ 0.0074182263792424f,
+ -0.2904801297289980f,
+ 0.0952002265347504f,
+ 0.0000000000000000,
+ -0.3675398009862027f,
+ 0.4921585901373873f,
+ 0.2462710772207515f,
+ -0.0794670660590957f,
+ 0.3623817333531167f,
+ -0.4351904965232280f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375376f,
+ 0.2929100136981264f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ 0.3935103426921017f,
+ -0.0657870154914280f,
+ 0.0000000000000000,
+ -0.4082482904638628f,
+ -0.3078822139579090f,
+ -0.3852501370925192f,
+ -0.0857401903551931f,
+ -0.4613374887461511f,
+ 0.0000000000000000,
+ 0.2191868483885747f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375376f,
+ -0.4067007583026072f,
+ -0.2125574805828705f,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ -0.4517556589999464f,
+ 0.3046847507248840f,
+ 0.3017929516615503f,
+ -0.4082482904638635f,
+ -0.1747866975480813f,
+ 0.2110560104933581f,
+ -0.1426608480880734f,
+ -0.1381354035075829f,
+ -0.1743760259965108f,
+ 0.1135498731499426f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375377f,
+ -0.1957439937204287f,
+ -0.1621205195722833f,
+ 0.0000000000000000,
+ -0.0643507165794628f,
+ 0.0074182263792444f,
+ 0.2904801297290076f,
+ 0.0952002265347505f,
+ 0.0000000000000000,
+ 0.3675398009862011f,
+ -0.4921585901373891f,
+ 0.2462710772207514f,
+ -0.0794670660591026f,
+ 0.3623817333531165f,
+ -0.4351904965232251f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375375f,
+ 0.0000000000000000,
+ -0.4706702258572528f,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ 0.1107416575309343f,
+ 0.0000000000000000,
+ -0.1627234014286617f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.1488339922711357f,
+ 0.4972464710953509f,
+ 0.2921026642334879f,
+ 0.5550443808910661f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375377f,
+ 0.1137907446044809f,
+ -0.1464291867126764f,
+ 0.0000000000000000,
+ -0.0643507165794628f,
+ 0.0829816309488205f,
+ -0.2388977352334460f,
+ -0.3531238544981630f,
+ -0.4082482904638630f,
+ 0.4826689115059883f,
+ 0.1741941265991622f,
+ -0.0476868035022925f,
+ 0.1253805944856366f,
+ -0.4326608024727445f,
+ -0.2546827712406646f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375377f,
+ -0.4444481661973438f,
+ 0.3085497062849487f,
+ 0.0000000000000000,
+ -0.0643507165794628f,
+ 0.1585450355183970f,
+ -0.5112616136592012f,
+ 0.2579236279634129f,
+ 0.0000000000000000,
+ -0.0812611176717504f,
+ -0.1856718091610990f,
+ -0.3416446842253373f,
+ 0.3302282550303805f,
+ 0.0702790691196282f,
+ -0.0741750459581023f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375376f,
+ -0.2929100136981264f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ 0.3935103426921022f,
+ 0.0657870154914254f,
+ 0.0000000000000000,
+ 0.4082482904638634f,
+ 0.3078822139579031f,
+ 0.3852501370925211f,
+ -0.0857401903551927f,
+ -0.4613374887461554f,
+ 0.0000000000000000,
+ 0.2191868483885728f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375376f,
+ -0.1137907446044814f,
+ -0.1464291867126654f,
+ 0.0000000000000000,
+ -0.0643507165794627f,
+ 0.0829816309488214f,
+ 0.2388977352334547f,
+ -0.3531238544981624f,
+ 0.4082482904638630f,
+ -0.4826689115059858f,
+ -0.1741941265991621f,
+ -0.0476868035022928f,
+ 0.1253805944856431f,
+ -0.4326608024727457f,
+ -0.2546827712406641f,
+ },
+ {
+ 0.2500000000000000,
+ -0.1014005039375374f,
+ 0.0000000000000000,
+ 0.4251149611657548f,
+ 0.0000000000000000,
+ -0.0643507165794626f,
+ -0.4517556589999480f,
+ 0.0000000000000000,
+ -0.6035859033230976f,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ 0.0000000000000000,
+ -0.1426608480880724f,
+ -0.1381354035075845f,
+ 0.3487520519930227f,
+ 0.1135498731499429f,
+ },
+ };
+
+ const HWY_CAPPED(float, 16) d;
+ for (size_t i = 0; i < 16; i += Lanes(d)) {
+ auto scalar = Zero(d);
+ for (size_t j = 0; j < 16; j++) {
+ auto px = Set(d, pixels[j]);
+ auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
+ scalar = MulAdd(px, basis, scalar);
+ }
+ Store(scalar, d, coeffs + i);
+ }
+}
+
+// Coefficient layout:
+// - (even, even) positions hold AFV coefficients
+// - (odd, even) positions hold DCT4x4 coefficients
+// - (any, odd) positions hold DCT4x8 coefficients
+template <size_t afv_kind>
+void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
+ size_t pixels_stride,
+ float* JXL_RESTRICT coefficients) {
+ HWY_ALIGN float scratch_space[4 * 8 * 2];
+ size_t afv_x = afv_kind & 1;
+ size_t afv_y = afv_kind / 2;
+ HWY_ALIGN float block[4 * 8];
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
+ pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
+ }
+ }
+ // AFV coefficients in (even, even) positions.
+ HWY_ALIGN float coeff[4 * 4];
+ AFVDCT4x4(block, coeff);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
+ }
+ }
+ // 4x4 DCT of the block with same y and different x.
+ ComputeScaledDCT<4, 4>()(
+ DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
+ pixels_stride),
+ block, scratch_space);
+ // ... in (odd, even) positions.
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
+ }
+ }
+ // 4x8 DCT of the other half of the block.
+ ComputeScaledDCT<4, 8>()(
+ DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
+ block, scratch_space);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+ }
+ }
+ float block00 = coefficients[0] * 0.25f;
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
+ coefficients[1] = (block00 - block01) * 0.5f;
+ coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
+}
+
+HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy,
+ const float* JXL_RESTRICT pixels,
+ size_t pixels_stride,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT scratch_space) {
+ using Type = AcStrategy::Type;
+ switch (strategy) {
+ case Type::IDENTITY: {
+ PROFILER_ZONE("DCT Identity");
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ float block_dc = 0;
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
+ }
+ }
+ block_dc *= 1.0f / 16;
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ if (ix == 1 && iy == 1) continue;
+ coefficients[(y + iy * 2) * 8 + x + ix * 2] =
+ pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
+ pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
+ }
+ }
+ coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
+ coefficients[y * 8 + x] = block_dc;
+ }
+ }
+ float block00 = coefficients[0];
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ float block11 = coefficients[9];
+ coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+ coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+ coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+ coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+ break;
+ }
+ case Type::DCT8X4: {
+ PROFILER_ZONE("DCT 8x4");
+ for (size_t x = 0; x < 2; x++) {
+ HWY_ALIGN float block[4 * 8];
+ ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
+ scratch_space);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ // Store transposed.
+ coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+ }
+ }
+ }
+ float block0 = coefficients[0];
+ float block1 = coefficients[8];
+ coefficients[0] = (block0 + block1) * 0.5f;
+ coefficients[8] = (block0 - block1) * 0.5f;
+ break;
+ }
+ case Type::DCT4X8: {
+ PROFILER_ZONE("DCT 4x8");
+ for (size_t y = 0; y < 2; y++) {
+ HWY_ALIGN float block[4 * 8];
+ ComputeScaledDCT<4, 8>()(
+ DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
+ scratch_space);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 8; ix++) {
+ coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
+ }
+ }
+ }
+ float block0 = coefficients[0];
+ float block1 = coefficients[8];
+ coefficients[0] = (block0 + block1) * 0.5f;
+ coefficients[8] = (block0 - block1) * 0.5f;
+ break;
+ }
+ case Type::DCT4X4: {
+ PROFILER_ZONE("DCT 4");
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ HWY_ALIGN float block[4 * 4];
+ ComputeScaledDCT<4, 4>()(
+ DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
+ block, scratch_space);
+ for (size_t iy = 0; iy < 4; iy++) {
+ for (size_t ix = 0; ix < 4; ix++) {
+ coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
+ }
+ }
+ }
+ }
+ float block00 = coefficients[0];
+ float block01 = coefficients[1];
+ float block10 = coefficients[8];
+ float block11 = coefficients[9];
+ coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
+ coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
+ coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
+ coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
+ break;
+ }
+ case Type::DCT2X2: {
+ PROFILER_ZONE("DCT 2");
+ DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
+ DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
+ DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
+ break;
+ }
+ case Type::DCT16X16: {
+ PROFILER_ZONE("DCT 16");
+ ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT16X8: {
+ PROFILER_ZONE("DCT 16x8");
+ ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT8X16: {
+ PROFILER_ZONE("DCT 8x16");
+ ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X8: {
+ PROFILER_ZONE("DCT 32x8");
+ ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT8X32: {
+ PROFILER_ZONE("DCT 8x32");
+ ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X16: {
+ PROFILER_ZONE("DCT 32x16");
+ ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT16X32: {
+ PROFILER_ZONE("DCT 16x32");
+ ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X32: {
+ PROFILER_ZONE("DCT 32");
+ ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT: {
+ PROFILER_ZONE("DCT 8");
+ ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::AFV0: {
+ PROFILER_ZONE("AFV0");
+ AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
+ break;
+ }
+ case Type::AFV1: {
+ PROFILER_ZONE("AFV1");
+ AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
+ break;
+ }
+ case Type::AFV2: {
+ PROFILER_ZONE("AFV2");
+ AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
+ break;
+ }
+ case Type::AFV3: {
+ PROFILER_ZONE("AFV3");
+ AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
+ break;
+ }
+ case Type::DCT64X64: {
+ PROFILER_ZONE("DCT 64x64");
+ ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT64X32: {
+ PROFILER_ZONE("DCT 64x32");
+ ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT32X64: {
+ PROFILER_ZONE("DCT 32x64");
+ ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X128: {
+ PROFILER_ZONE("DCT 128x128");
+ ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X64: {
+ PROFILER_ZONE("DCT 128x64");
+ ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT64X128: {
+ PROFILER_ZONE("DCT 64x128");
+ ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT256X256: {
+ PROFILER_ZONE("DCT 256x256");
+ ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT256X128: {
+ PROFILER_ZONE("DCT 256x128");
+ ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::DCT128X256: {
+ PROFILER_ZONE("DCT 128x256");
+ ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
+ scratch_space);
+ break;
+ }
+ case Type::kNumValidStrategies:
+ JXL_ABORT("Invalid strategy");
+ }
+}
+
+HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy,
+ const float* block, float* dc,
+ size_t dc_stride) {
+ using Type = AcStrategy::Type;
+ switch (strategy) {
+ case Type::DCT16X8: {
+ ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
+ block, 2 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT8X16: {
+ ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
+ block, 2 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT16X16: {
+ ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
+ block, 2 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT32X8: {
+ ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
+ block, 4 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT8X32: {
+ ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
+ block, 4 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT32X16: {
+ ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
+ block, 4 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT16X32: {
+ ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
+ block, 4 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT32X32: {
+ ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
+ block, 4 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT64X32: {
+ ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
+ block, 8 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT32X64: {
+ ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
+ block, 8 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT64X64: {
+ ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
+ block, 8 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT128X64: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
+ block, 16 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT64X128: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
+ block, 16 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT128X128: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
+ block, 16 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT256X128: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
+ /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
+ block, 32 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT128X256: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+ /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
+ block, 32 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT256X256: {
+ ReinterpretingIDCT<
+ /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
+ /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
+ block, 32 * kBlockDim, dc, dc_stride);
+ break;
+ }
+ case Type::DCT:
+ case Type::DCT2X2:
+ case Type::DCT4X4:
+ case Type::DCT4X8:
+ case Type::DCT8X4:
+ case Type::AFV0:
+ case Type::AFV1:
+ case Type::AFV2:
+ case Type::AFV3:
+ case Type::IDENTITY:
+ dc[0] = block[0];
+ break;
+ case Type::kNumValidStrategies:
+ JXL_ABORT("Invalid strategy");
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_ENC_TRANSFORMS_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_transforms.cc b/third_party/jpeg-xl/lib/jxl/enc_transforms.cc
new file mode 100644
index 0000000000..8978ba1dcb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_transforms.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_transforms.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_transforms.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/enc_transforms-inl.h"
+
+namespace jxl {
+
+#if HWY_ONCE
+HWY_EXPORT(TransformFromPixels);
+void TransformFromPixels(const AcStrategy::Type strategy,
+ const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT coefficients,
+ float* scratch_space) {
+ return HWY_DYNAMIC_DISPATCH(TransformFromPixels)(
+ strategy, pixels, pixels_stride, coefficients, scratch_space);
+}
+
+HWY_EXPORT(DCFromLowestFrequencies);
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+ float* dc, size_t dc_stride) {
+ return HWY_DYNAMIC_DISPATCH(DCFromLowestFrequencies)(strategy, block, dc,
+ dc_stride);
+}
+
+HWY_EXPORT(AFVDCT4x4);
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
+ return HWY_DYNAMIC_DISPATCH(AFVDCT4x4)(pixels, coeffs);
+}
+#endif // HWY_ONCE
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/enc_transforms.h b/third_party/jpeg-xl/lib/jxl/enc_transforms.h
new file mode 100644
index 0000000000..039ccc3893
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_transforms.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_TRANSFORMS_H_
+#define LIB_JXL_ENC_TRANSFORMS_H_
+
+// Facade for (non-inlined) integral transforms.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+void TransformFromPixels(const AcStrategy::Type strategy,
+ const float* JXL_RESTRICT pixels, size_t pixels_stride,
+ float* JXL_RESTRICT coefficients,
+ float* JXL_RESTRICT scratch_space);
+
+// Equivalent of the above for DC image.
+void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block,
+ float* dc, size_t dc_stride);
+
+void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_TRANSFORMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/enc_xyb.cc b/third_party/jpeg-xl/lib/jxl/enc_xyb.cc
new file mode 100644
index 0000000000..2ee0abf821
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_xyb.cc
@@ -0,0 +1,520 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/enc_xyb.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cstdlib>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_image_bundle.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+// 4x3 matrix * 3x1 SIMD vectors
+template <class V>
+JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b,
+ const float* JXL_RESTRICT premul_absorb,
+ V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1,
+ V* JXL_RESTRICT mixed2) {
+ const float* bias = &kOpsinAbsorbanceBias[0];
+ const HWY_FULL(float) d;
+ const size_t N = Lanes(d);
+ const auto m0 = Load(d, premul_absorb + 0 * N);
+ const auto m1 = Load(d, premul_absorb + 1 * N);
+ const auto m2 = Load(d, premul_absorb + 2 * N);
+ const auto m3 = Load(d, premul_absorb + 3 * N);
+ const auto m4 = Load(d, premul_absorb + 4 * N);
+ const auto m5 = Load(d, premul_absorb + 5 * N);
+ const auto m6 = Load(d, premul_absorb + 6 * N);
+ const auto m7 = Load(d, premul_absorb + 7 * N);
+ const auto m8 = Load(d, premul_absorb + 8 * N);
+ *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0]))));
+ *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1]))));
+ *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2]))));
+}
+
+template <class V>
+void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx,
+ float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) {
+ const HWY_FULL(float) d;
+ const V half = Set(d, 0.5f);
+ Store(Mul(half, Sub(r, g)), d, valx);
+ Store(Mul(half, Add(r, g)), d, valy);
+ Store(b, d, valz);
+}
+
+// Converts one RGB vector to XYB.
+template <class V>
+void LinearRGBToXYB(const V r, const V g, const V b,
+ const float* JXL_RESTRICT premul_absorb,
+ float* JXL_RESTRICT valx, float* JXL_RESTRICT valy,
+ float* JXL_RESTRICT valz) {
+ V mixed0, mixed1, mixed2;
+ OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2);
+
+ // mixed* should be non-negative even for wide-gamut, so clamp to zero.
+ mixed0 = ZeroIfNegative(mixed0);
+ mixed1 = ZeroIfNegative(mixed1);
+ mixed2 = ZeroIfNegative(mixed2);
+
+ const HWY_FULL(float) d;
+ const size_t N = Lanes(d);
+ mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N));
+ mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N));
+ mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N));
+ StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz);
+
+ // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative.
+}
+
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+ float* JXL_RESTRICT row2,
+ const float* JXL_RESTRICT premul_absorb, size_t xsize) {
+ const HWY_FULL(float) d;
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto r = Load(d, row0 + x);
+ const auto g = Load(d, row1 + x);
+ const auto b = Load(d, row2 + x);
+ LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x);
+ }
+}
+
+// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut.
+template <class V>
+V LinearFromSRGB(V encoded) {
+ return TF_SRGB().DisplayFromEncoded(encoded);
+}
+
+Status LinearSRGBToXYB(const Image3F& linear,
+ const float* JXL_RESTRICT premul_absorb,
+ ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
+ const size_t xsize = linear.xsize();
+
+ const HWY_FULL(float) d;
+ return RunOnPool(
+ pool, 0, static_cast<uint32_t>(linear.ysize()), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = static_cast<size_t>(task);
+ const float* JXL_RESTRICT row_in0 = linear.ConstPlaneRow(0, y);
+ const float* JXL_RESTRICT row_in1 = linear.ConstPlaneRow(1, y);
+ const float* JXL_RESTRICT row_in2 = linear.ConstPlaneRow(2, y);
+ float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+ float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+ float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto in_r = Load(d, row_in0 + x);
+ const auto in_g = Load(d, row_in1 + x);
+ const auto in_b = Load(d, row_in2 + x);
+ LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+ row_xyb1 + x, row_xyb2 + x);
+ }
+ },
+ "LinearToXYB");
+}
+
+Status SRGBToXYB(const Image3F& srgb, const float* JXL_RESTRICT premul_absorb,
+ ThreadPool* pool, Image3F* JXL_RESTRICT xyb) {
+ const size_t xsize = srgb.xsize();
+
+ const HWY_FULL(float) d;
+ return RunOnPool(
+ pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = static_cast<size_t>(task);
+ const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+ const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+ const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+ float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+ float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+ float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+ const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+ const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+ LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+ row_xyb1 + x, row_xyb2 + x);
+ }
+ },
+ "SRGBToXYB");
+}
+
+Status SRGBToXYBAndLinear(const Image3F& srgb,
+ const float* JXL_RESTRICT premul_absorb,
+ ThreadPool* pool, Image3F* JXL_RESTRICT xyb,
+ Image3F* JXL_RESTRICT linear) {
+ const size_t xsize = srgb.xsize();
+
+ const HWY_FULL(float) d;
+ return RunOnPool(
+ pool, 0, static_cast<uint32_t>(srgb.ysize()), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = static_cast<size_t>(task);
+ const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y);
+ const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y);
+ const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y);
+
+ float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y);
+ float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y);
+ float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y);
+
+ float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y);
+ float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y);
+ float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y);
+
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x));
+ const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x));
+ const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x));
+
+ Store(in_r, d, row_linear0 + x);
+ Store(in_g, d, row_linear1 + x);
+ Store(in_b, d, row_linear2 + x);
+
+ LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x,
+ row_xyb1 + x, row_xyb2 + x);
+ }
+ },
+ "SRGBToXYBAndLinear");
+}
+
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
+ const HWY_FULL(float) d;
+ const size_t N = Lanes(d);
+ const float mul = intensity_target / 255.0f;
+ for (size_t i = 0; i < 9; ++i) {
+ const auto absorb = Set(d, kOpsinAbsorbanceMatrix[i] * mul);
+ Store(absorb, d, premul_absorb + i * N);
+ }
+ for (size_t i = 0; i < 3; ++i) {
+ const auto neg_bias_cbrt = Set(d, -cbrtf(kOpsinAbsorbanceBias[i]));
+ Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N);
+ }
+}
+
+Image3F TransformToLinearRGB(const Image3F& in,
+ const ColorEncoding& color_encoding,
+ float intensity_target, const JxlCmsInterface& cms,
+ ThreadPool* pool) {
+ ColorSpaceTransform c_transform(cms);
+ bool is_gray = color_encoding.IsGray();
+ const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(is_gray);
+ Image3F out(in.xsize(), in.ysize());
+ std::atomic<bool> ok{true};
+ JXL_CHECK(RunOnPool(
+ pool, 0, in.ysize(),
+ [&](const size_t num_threads) {
+ return c_transform.Init(color_encoding, c_desired, intensity_target,
+ in.xsize(), num_threads);
+ },
+ [&](const uint32_t y, const size_t thread) {
+ float* mutable_src_buf = c_transform.BufSrc(thread);
+ const float* src_buf = mutable_src_buf;
+ // Interleave input.
+ if (is_gray) {
+ src_buf = in.ConstPlaneRow(0, y);
+ } else {
+ const float* JXL_RESTRICT row_in0 = in.ConstPlaneRow(0, y);
+ const float* JXL_RESTRICT row_in1 = in.ConstPlaneRow(1, y);
+ const float* JXL_RESTRICT row_in2 = in.ConstPlaneRow(2, y);
+ for (size_t x = 0; x < in.xsize(); x++) {
+ mutable_src_buf[3 * x + 0] = row_in0[x];
+ mutable_src_buf[3 * x + 1] = row_in1[x];
+ mutable_src_buf[3 * x + 2] = row_in2[x];
+ }
+ }
+ float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread);
+ if (!c_transform.Run(thread, src_buf, dst_buf)) {
+ ok.store(false);
+ return;
+ }
+ float* JXL_RESTRICT row_out0 = out.PlaneRow(0, y);
+ float* JXL_RESTRICT row_out1 = out.PlaneRow(1, y);
+ float* JXL_RESTRICT row_out2 = out.PlaneRow(2, y);
+ // De-interleave output and convert type.
+ if (is_gray) {
+ for (size_t x = 0; x < in.xsize(); x++) {
+ row_out0[x] = dst_buf[x];
+ row_out1[x] = dst_buf[x];
+ row_out2[x] = dst_buf[x];
+ }
+ } else {
+ for (size_t x = 0; x < in.xsize(); x++) {
+ row_out0[x] = dst_buf[3 * x + 0];
+ row_out1[x] = dst_buf[3 * x + 1];
+ row_out2[x] = dst_buf[3 * x + 2];
+ }
+ }
+ },
+ "Colorspace transform"));
+ JXL_CHECK(ok.load());
+ return out;
+}
+
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+ float intensity_target, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
+ JXL_ASSERT(SameSize(in, *xyb));
+
+ const HWY_FULL(float) d;
+ // Pre-broadcasted constants
+ HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
+ ComputePremulAbsorb(intensity_target, premul_absorb);
+
+ bool is_gray = color_encoding.IsGray();
+ const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(is_gray);
+ if (c_linear_srgb.SameColorEncoding(color_encoding)) {
+ JXL_CHECK(LinearSRGBToXYB(in, premul_absorb, pool, xyb));
+ } else if (color_encoding.IsSRGB()) {
+ JXL_CHECK(SRGBToXYB(in, premul_absorb, pool, xyb));
+ } else {
+ Image3F linear =
+ TransformToLinearRGB(in, color_encoding, intensity_target, cms, pool);
+ JXL_CHECK(LinearSRGBToXYB(linear, premul_absorb, pool, xyb));
+ }
+}
+
+// This is different from Butteraugli's OpsinDynamicsImage() in the sense that
+// it does not contain a sensitivity multiplier based on the blurred image.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+ ImageBundle* const JXL_RESTRICT linear) {
+ PROFILER_FUNC;
+
+ const size_t xsize = in.xsize();
+ const size_t ysize = in.ysize();
+ JXL_ASSERT(SameSize(in, *xyb));
+
+ const HWY_FULL(float) d;
+ // Pre-broadcasted constants
+ HWY_ALIGN float premul_absorb[MaxLanes(d) * 12];
+ ComputePremulAbsorb(in.metadata()->IntensityTarget(), premul_absorb);
+
+ const bool want_linear = linear != nullptr;
+
+ const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(in.IsGray());
+ // Linear sRGB inputs are rare but can be useful for the fastest encoders, for
+ // which undoing the sRGB transfer function would be a large part of the cost.
+ if (c_linear_srgb.SameColorEncoding(in.c_current())) {
+ JXL_CHECK(LinearSRGBToXYB(in.color(), premul_absorb, pool, xyb));
+ // This only happens if kitten or slower, moving ImageBundle might be
+ // possible but the encoder is much slower than this copy.
+ if (want_linear) {
+ *linear = in.Copy();
+ return linear;
+ }
+ return &in;
+ }
+
+ // Common case: already sRGB, can avoid the color transform
+ if (in.IsSRGB()) {
+ // Common case: can avoid allocating/copying
+ if (!want_linear) {
+ JXL_CHECK(SRGBToXYB(in.color(), premul_absorb, pool, xyb));
+ return &in;
+ }
+
+ // Slow encoder also wants linear sRGB.
+ linear->SetFromImage(Image3F(xsize, ysize), c_linear_srgb);
+ JXL_CHECK(SRGBToXYBAndLinear(in.color(), premul_absorb, pool, xyb,
+ linear->color()));
+ return linear;
+ }
+
+ // General case: not sRGB, need color transform.
+ ImageBundle linear_storage; // Local storage only used if !want_linear.
+
+ ImageBundle* linear_storage_ptr;
+ if (want_linear) {
+ // Caller asked for linear, use that storage directly.
+ linear_storage_ptr = linear;
+ } else {
+ // Caller didn't ask for linear, create our own local storage
+ // OK to reuse metadata, it will not be changed.
+ linear_storage = ImageBundle(const_cast<ImageMetadata*>(in.metadata()));
+ linear_storage_ptr = &linear_storage;
+ }
+
+ const ImageBundle* ptr;
+ JXL_CHECK(TransformIfNeeded(in, c_linear_srgb, cms, pool, linear_storage_ptr,
+ &ptr));
+ // If no transform was necessary, should have taken the above codepath.
+ JXL_ASSERT(ptr == linear_storage_ptr);
+
+ JXL_CHECK(
+ LinearSRGBToXYB(*linear_storage_ptr->color(), premul_absorb, pool, xyb));
+ return want_linear ? linear : &in;
+}
+
+// Transform RGB to YCbCr.
+// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B).
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+ const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+ ImageF* cr_plane, ThreadPool* pool) {
+ const HWY_FULL(float) df;
+ const size_t S = Lanes(df); // Step.
+
+ const size_t xsize = r_plane.xsize();
+ const size_t ysize = r_plane.ysize();
+ if ((xsize == 0) || (ysize == 0)) return true;
+
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto k128 = Set(df, 128.0f / 255);
+ const auto kR = Set(df, 0.299f); // NTSC luma
+ const auto kG = Set(df, 0.587f);
+ const auto kB = Set(df, 0.114f);
+ const auto kAmpR = Set(df, 0.701f);
+ const auto kAmpB = Set(df, 0.886f);
+ const auto kDiffR = Add(kAmpR, kR);
+ const auto kDiffB = Add(kAmpB, kB);
+ const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB))));
+ const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB))));
+
+ constexpr size_t kGroupArea = kGroupDim * kGroupDim;
+ const size_t lines_per_group = DivCeil(kGroupArea, xsize);
+ const size_t num_stripes = DivCeil(ysize, lines_per_group);
+ const auto transform = [&](int idx, int /* thread*/) {
+ const size_t y0 = idx * lines_per_group;
+ const size_t y1 = std::min<size_t>(y0 + lines_per_group, ysize);
+ for (size_t y = y0; y < y1; ++y) {
+ const float* r_row = r_plane.ConstRow(y);
+ const float* g_row = g_plane.ConstRow(y);
+ const float* b_row = b_plane.ConstRow(y);
+ float* y_row = y_plane->Row(y);
+ float* cb_row = cb_plane->Row(y);
+ float* cr_row = cr_plane->Row(y);
+ for (size_t x = 0; x < xsize; x += S) {
+ const auto r = Load(df, r_row + x);
+ const auto g = Load(df, g_row + x);
+ const auto b = Load(df, b_row + x);
+ const auto r_base = Mul(r, kR);
+ const auto r_diff = Mul(r, kDiffR);
+ const auto g_base = Mul(g, kG);
+ const auto b_base = Mul(b, kB);
+ const auto b_diff = Mul(b, kDiffB);
+ const auto y_base = Add(r_base, Add(g_base, b_base));
+ const auto y_vec = Sub(y_base, k128);
+ const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB);
+ const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR);
+ Store(y_vec, df, y_row + x);
+ Store(cb_vec, df, cb_row + x);
+ Store(cr_vec, df, cr_row + x);
+ }
+ }
+ };
+ return RunOnPool(pool, 0, static_cast<int>(num_stripes), ThreadPool::NoInit,
+ transform, "RgbToYcbCr");
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(ToXYB);
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+ ImageBundle* JXL_RESTRICT linear_storage) {
+ return HWY_DYNAMIC_DISPATCH(ToXYB)(in, pool, xyb, cms, linear_storage);
+}
+
+HWY_EXPORT(LinearRGBRowToXYB);
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+ float* JXL_RESTRICT row2,
+ const float* JXL_RESTRICT premul_absorb, size_t xsize) {
+ HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB)
+ (row0, row1, row2, premul_absorb, xsize);
+}
+
+HWY_EXPORT(ComputePremulAbsorb);
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb) {
+ HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb);
+}
+
+void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+ float* JXL_RESTRICT row2, size_t xsize) {
+ for (size_t x = 0; x < xsize; x++) {
+ row2[x] = (row2[x] - row1[x] + kScaledXYBOffset[2]) * kScaledXYBScale[2];
+ row0[x] = (row0[x] + kScaledXYBOffset[0]) * kScaledXYBScale[0];
+ row1[x] = (row1[x] + kScaledXYBOffset[1]) * kScaledXYBScale[1];
+ }
+}
+
+void ScaleXYB(Image3F* opsin) {
+ for (size_t y = 0; y < opsin->ysize(); y++) {
+ float* row0 = opsin->PlaneRow(0, y);
+ float* row1 = opsin->PlaneRow(1, y);
+ float* row2 = opsin->PlaneRow(2, y);
+ ScaleXYBRow(row0, row1, row2, opsin->xsize());
+ }
+}
+
+HWY_EXPORT(Image3FToXYB);
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+ float intensity_target, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) {
+ return HWY_DYNAMIC_DISPATCH(Image3FToXYB)(in, color_encoding,
+ intensity_target, pool, xyb, cms);
+}
+
+HWY_EXPORT(RgbToYcbcr);
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+ const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+ ImageF* cr_plane, ThreadPool* pool) {
+ return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane,
+ cb_plane, cr_plane, pool);
+}
+
+// DEPRECATED
+Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms) {
+ ImageMetadata metadata;
+ metadata.SetUintSamples(8);
+ metadata.color_encoding = ColorEncoding::SRGB();
+ ImageBundle ib(&metadata);
+ ib.SetFromImage(ConvertToFloat(srgb8), metadata.color_encoding);
+ JXL_CHECK(ib.TransformTo(ColorEncoding::LinearSRGB(ib.IsGray()), cms));
+ ThreadPool* null_pool = nullptr;
+ Image3F xyb(srgb8.xsize(), srgb8.ysize());
+
+ ImageBundle linear_storage(&metadata);
+ (void)ToXYB(ib, null_pool, &xyb, cms, &linear_storage);
+ return xyb;
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/enc_xyb.h b/third_party/jpeg-xl/lib/jxl/enc_xyb.h
new file mode 100644
index 0000000000..fc902848ee
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/enc_xyb.h
@@ -0,0 +1,56 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENC_XYB_H_
+#define LIB_JXL_ENC_XYB_H_
+
+// Converts to XYB color space.
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+
+namespace jxl {
+
+// Converts any color space to XYB. If `linear` is not null, returns `linear`
+// after filling it with a linear sRGB copy of `in`. Otherwise, returns `&in`.
+//
+// NOTE this return value can avoid an extra color conversion if `in` would
+// later be passed to JxlButteraugliComparator.
+const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms,
+ ImageBundle* JXL_RESTRICT linear = nullptr);
+
+void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding,
+ float intensity_target, ThreadPool* pool,
+ Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms);
+
+void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1,
+ float* JXL_RESTRICT row2,
+ const float* JXL_RESTRICT premul_absorb, size_t xsize);
+
+void ComputePremulAbsorb(float intensity_target, float* premul_absorb);
+
+// Transforms each color component of the given XYB image into the [0.0, 1.0]
+// interval with an affine transform.
+void ScaleXYB(Image3F* opsin);
+void ScaleXYBRow(float* row0, float* row1, float* row2, size_t xsize);
+
+// Bt.601 to match JPEG/JFIF. Outputs _signed_ YCbCr values suitable for DCT,
+// see F.1.1.3 of T.81 (because our data type is float, there is no need to add
+// a bias to make the values unsigned).
+Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane,
+ const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane,
+ ImageF* cr_plane, ThreadPool* pool);
+
+// DEPRECATED, used by opsin_image_wrapper.
+Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENC_XYB_H_
diff --git a/third_party/jpeg-xl/lib/jxl/encode.cc b/third_party/jpeg-xl/lib/jxl/encode.cc
new file mode 100644
index 0000000000..fbd5133ae5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/encode.cc
@@ -0,0 +1,2128 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <brotli/encode.h>
+#include <jxl/codestream_header.h>
+#include <jxl/encode.h>
+#include <jxl/types.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/exif.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+
+// Debug-printing failure macro similar to JXL_FAILURE, but for the status code
+// JXL_ENC_ERROR
+#ifdef JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(enc, error_code, format, ...) \
+ (enc->error = error_code, \
+ ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+ ::jxl::Abort(), JXL_ENC_ERROR)
+#define JXL_API_ERROR_NOSET(format, ...) \
+ (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+ ::jxl::Abort(), JXL_ENC_ERROR)
+#else // JXL_CRASH_ON_ERROR
+#define JXL_API_ERROR(enc, error_code, format, ...) \
+ (enc->error = error_code, \
+ ((JXL_DEBUG_ON_ERROR) && \
+ ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \
+ JXL_ENC_ERROR)
+#define JXL_API_ERROR_NOSET(format, ...) \
+ (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \
+ JXL_ENC_ERROR)
+#endif // JXL_CRASH_ON_ERROR
+
+namespace jxl {} // namespace jxl
+
+uint32_t JxlEncoderVersion(void) {
+ return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 +
+ JPEGXL_PATCH_VERSION;
+}
+
+namespace {
+template <typename T>
+void AppendJxlpBoxCounter(uint32_t counter, bool last, T* output) {
+ if (last) counter |= 0x80000000;
+ for (size_t i = 0; i < 4; i++) {
+ output->push_back(counter >> (8 * (3 - i)) & 0xff);
+ }
+}
+
+void QueueFrame(
+ const JxlEncoderFrameSettings* frame_settings,
+ jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame>& frame) {
+ if (frame_settings->values.lossless) {
+ frame->option_values.cparams.SetLossless();
+ }
+
+ jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager);
+ queued_input.frame = std::move(frame);
+ frame_settings->enc->input_queue.emplace_back(std::move(queued_input));
+ frame_settings->enc->num_queued_frames++;
+}
+
+void QueueFastLosslessFrame(const JxlEncoderFrameSettings* frame_settings,
+ JxlFastLosslessFrameState* fast_lossless_frame) {
+ jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager);
+ queued_input.fast_lossless_frame.reset(fast_lossless_frame);
+ frame_settings->enc->input_queue.emplace_back(std::move(queued_input));
+ frame_settings->enc->num_queued_frames++;
+}
+
+void QueueBox(JxlEncoder* enc,
+ jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedBox>& box) {
+ jxl::JxlEncoderQueuedInput queued_input(enc->memory_manager);
+ queued_input.box = std::move(box);
+ enc->input_queue.emplace_back(std::move(queued_input));
+ enc->num_queued_boxes++;
+}
+
+// TODO(lode): share this code and the Brotli compression code in enc_jpeg_data
+JxlEncoderStatus BrotliCompress(int quality, const uint8_t* in, size_t in_size,
+ jxl::PaddedBytes* out) {
+ std::unique_ptr<BrotliEncoderState, decltype(BrotliEncoderDestroyInstance)*>
+ enc(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr),
+ BrotliEncoderDestroyInstance);
+ if (!enc) return JXL_API_ERROR_NOSET("BrotliEncoderCreateInstance failed");
+
+ BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_QUALITY, quality);
+ BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_SIZE_HINT, in_size);
+
+ constexpr size_t kBufferSize = 128 * 1024;
+ jxl::PaddedBytes temp_buffer(kBufferSize);
+
+ size_t avail_in = in_size;
+ const uint8_t* next_in = in;
+
+ size_t total_out = 0;
+
+ for (;;) {
+ size_t avail_out = kBufferSize;
+ uint8_t* next_out = temp_buffer.data();
+ jxl::msan::MemoryIsInitialized(next_in, avail_in);
+ if (!BrotliEncoderCompressStream(enc.get(), BROTLI_OPERATION_FINISH,
+ &avail_in, &next_in, &avail_out, &next_out,
+ &total_out)) {
+ return JXL_API_ERROR_NOSET("Brotli compression failed");
+ }
+ size_t out_size = next_out - temp_buffer.data();
+ jxl::msan::UnpoisonMemory(next_out - out_size, out_size);
+ out->resize(out->size() + out_size);
+ memcpy(out->data() + out->size() - out_size, temp_buffer.data(), out_size);
+ if (BrotliEncoderIsFinished(enc.get())) break;
+ }
+
+ return JXL_ENC_SUCCESS;
+}
+
+// The JXL codestream can have level 5 or level 10. Levels have certain
+// restrictions such as max allowed image dimensions. This function checks the
+// level required to support the current encoder settings. The debug_string is
+// intended to be used for developer API error messages, and may be set to
+// nullptr.
+int VerifyLevelSettings(const JxlEncoder* enc, std::string* debug_string) {
+ const auto& m = enc->metadata.m;
+
+ uint64_t xsize = enc->metadata.size.xsize();
+ uint64_t ysize = enc->metadata.size.ysize();
+ // The uncompressed ICC size, if it is used.
+ size_t icc_size = 0;
+ if (m.color_encoding.WantICC()) {
+ icc_size = m.color_encoding.ICC().size();
+ }
+
+ // Level 10 checks
+
+ if (xsize > (1ull << 30ull) || ysize > (1ull << 30ull) ||
+ xsize * ysize > (1ull << 40ull)) {
+ if (debug_string) *debug_string = "Too large image dimensions";
+ return -1;
+ }
+ if (icc_size > (1ull << 28)) {
+ if (debug_string) *debug_string = "Too large ICC profile size";
+ return -1;
+ }
+ if (m.num_extra_channels > 256) {
+ if (debug_string) *debug_string = "Too many extra channels";
+ return -1;
+ }
+
+ // Level 5 checks
+
+ if (!m.modular_16_bit_buffer_sufficient) {
+ if (debug_string) *debug_string = "Too high modular bit depth";
+ return 10;
+ }
+ if (xsize > (1ull << 18ull) || ysize > (1ull << 18ull) ||
+ xsize * ysize > (1ull << 28ull)) {
+ if (debug_string) *debug_string = "Too large image dimensions";
+ return 10;
+ }
+ if (icc_size > (1ull << 22)) {
+ if (debug_string) *debug_string = "Too large ICC profile";
+ return 10;
+ }
+ if (m.num_extra_channels > 4) {
+ if (debug_string) *debug_string = "Too many extra channels";
+ return 10;
+ }
+ for (size_t i = 0; i < m.extra_channel_info.size(); ++i) {
+ if (m.extra_channel_info[i].type == jxl::ExtraChannel::kBlack) {
+ if (debug_string) *debug_string = "CMYK channel not allowed";
+ return 10;
+ }
+ }
+
+ // TODO(lode): also need to check if consecutive composite-still frames total
+ // pixel amount doesn't exceed 2**28 in the case of level 5. This should be
+ // done when adding frame and requires ability to add composite still frames
+ // to be added first.
+
+ // TODO(lode): also need to check animation duration of a frame. This should
+ // be done when adding frame, but first requires implementing setting the
+ // JxlFrameHeader for a frame.
+
+ // TODO(lode): also need to check properties such as num_splines, num_patches,
+ // modular_16bit_buffers and multiple properties of modular trees. However
+ // these are not user-set properties so cannot be checked here, but decisions
+ // the C++ encoder should be able to make based on the level.
+
+ // All level 5 checks passes, so can return the more compatible level 5
+ return 5;
+}
+
+size_t BitsPerChannel(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 8;
+ case JXL_TYPE_UINT16:
+ return 16;
+ case JXL_TYPE_FLOAT:
+ return 32;
+ case JXL_TYPE_FLOAT16:
+ return 16;
+ default:
+ return 0; // signals unhandled JxlDataType
+ }
+}
+
+template <typename T>
+uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata,
+ JxlPixelFormat format) {
+ if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+ return BitsPerChannel(format.data_type);
+ } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) {
+ return metadata.bit_depth.bits_per_sample;
+ } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) {
+ return bit_depth.bits_per_sample;
+ } else {
+ return 0;
+ }
+}
+
+JxlEncoderStatus CheckValidBitdepth(uint32_t bits_per_sample,
+ uint32_t exponent_bits_per_sample) {
+ if (!exponent_bits_per_sample) {
+ // The spec allows up to 31 for bits_per_sample here, but
+ // the code does not (yet) support it.
+ if (!(bits_per_sample > 0 && bits_per_sample <= 24)) {
+ return JXL_API_ERROR_NOSET("Invalid value for bits_per_sample");
+ }
+ } else if ((exponent_bits_per_sample > 8) ||
+ (bits_per_sample > 24 + exponent_bits_per_sample) ||
+ (bits_per_sample < 3 + exponent_bits_per_sample)) {
+ return JXL_API_ERROR_NOSET("Invalid float description");
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus VerifyInputBitDepth(JxlBitDepth bit_depth,
+ JxlPixelFormat format) {
+ if ((format.data_type == JXL_TYPE_FLOAT ||
+ format.data_type == JXL_TYPE_FLOAT16) &&
+ bit_depth.type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) {
+ return JXL_API_ERROR_NOSET(
+ "Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT is "
+ "implemented for float types.");
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+bool EncodeFrameIndexBox(const jxl::JxlEncoderFrameIndexBox& frame_index_box,
+ jxl::BitWriter& writer) {
+ bool ok = true;
+ int NF = 0;
+ for (size_t i = 0; i < frame_index_box.entries.size(); ++i) {
+ if (i == 0 || frame_index_box.entries[i].to_be_indexed) {
+ ++NF;
+ }
+ }
+ // Frame index box contents varint + 8 bytes
+ // continue with NF * 3 * varint
+ // varint max length is 10 for 64 bit numbers, and these numbers
+ // are limited to 63 bits.
+ static const int kVarintMaxLength = 10;
+ static const int kFrameIndexBoxHeaderLength = kVarintMaxLength + 8;
+ static const int kFrameIndexBoxElementLength = 3 * kVarintMaxLength;
+ const int buffer_size =
+ kFrameIndexBoxHeaderLength + NF * kFrameIndexBoxElementLength;
+ std::vector<uint8_t> buffer_vec(buffer_size);
+ uint8_t* buffer = buffer_vec.data();
+ size_t output_pos = 0;
+ ok &= jxl::EncodeVarInt(NF, buffer_vec.size(), &output_pos, buffer);
+ StoreBE32(frame_index_box.TNUM, &buffer[output_pos]);
+ output_pos += 4;
+ StoreBE32(frame_index_box.TDEN, &buffer[output_pos]);
+ output_pos += 4;
+ // When we record a frame in the index, the record needs to know
+ // how many frames until the next indexed frame. That is why
+ // we store the 'prev' record. That 'prev' record needs to store
+ // the offset byte position to previously recorded indexed frame,
+ // that's why we also trace previous to the previous frame.
+ int prev_prev_ix = -1; // For position offset (OFFi) delta coding.
+ int prev_ix = 0;
+ int T_prev = 0;
+ int T = 0;
+ for (size_t i = 1; i < frame_index_box.entries.size(); ++i) {
+ if (frame_index_box.entries[i].to_be_indexed) {
+ // Now we can record the previous entry, since we need to store
+ // there how many frames until the next one.
+ int64_t OFFi = frame_index_box.entries[prev_ix].OFFi;
+ if (prev_prev_ix != -1) {
+ // Offi needs to be offset of start byte of this frame compared to start
+ // byte of previous frame from this index in the JPEG XL codestream. For
+ // the first frame, this is the offset from the first byte of the JPEG
+ // XL codestream.
+ OFFi -= frame_index_box.entries[prev_prev_ix].OFFi;
+ }
+ int32_t Ti = T_prev;
+ int32_t Fi = i - prev_ix;
+ ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer);
+ ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer);
+ ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer);
+ prev_prev_ix = prev_ix;
+ prev_ix = i;
+ T_prev = T;
+ T += frame_index_box.entries[i].duration;
+ }
+ }
+ {
+ // Last frame.
+ size_t i = frame_index_box.entries.size();
+ int64_t OFFi = frame_index_box.entries[prev_ix].OFFi;
+ if (prev_prev_ix != -1) {
+ OFFi -= frame_index_box.entries[prev_prev_ix].OFFi;
+ }
+ int32_t Ti = T_prev;
+ int32_t Fi = i - prev_ix;
+ ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer);
+ ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer);
+ ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer);
+ }
+ // Enough buffer has been allocated, this function should never fail in
+ // writing.
+ JXL_ASSERT(ok);
+ return ok;
+}
+
+} // namespace
+
+JxlEncoderStatus JxlEncoderStruct::RefillOutputByteQueue() {
+ jxl::PaddedBytes bytes;
+
+ jxl::JxlEncoderQueuedInput& input = input_queue[0];
+
+ // TODO(lode): split this into 3 functions: for adding the signature and other
+ // initial headers (jbrd, ...), one for adding frame, and one for adding user
+ // box.
+
+ if (!wrote_bytes) {
+ // First time encoding any data, verify the level 5 vs level 10 settings
+ std::string level_message;
+ int required_level = VerifyLevelSettings(this, &level_message);
+ // Only level 5 and 10 are defined, and the function can return -1 to
+ // indicate full incompatibility.
+ JXL_ASSERT(required_level == -1 || required_level == 5 ||
+ required_level == 10);
+ // codestream_level == -1 means auto-set to the required level
+ if (codestream_level == -1) codestream_level = required_level;
+ if (codestream_level == 5 && required_level != 5) {
+ // If the required level is 10, return error rather than automatically
+ // setting the level to 10, to avoid inadvertently creating a level 10
+ // JXL file while intending to target a level 5 decoder.
+ return JXL_API_ERROR(
+ this, JXL_ENC_ERR_API_USAGE, "%s",
+ ("Codestream level verification for level 5 failed: " + level_message)
+ .c_str());
+ }
+ if (required_level == -1) {
+ return JXL_API_ERROR(
+ this, JXL_ENC_ERR_API_USAGE, "%s",
+ ("Codestream level verification for level 10 failed: " +
+ level_message)
+ .c_str());
+ }
+
+ jxl::BitWriter writer;
+ if (!WriteCodestreamHeaders(&metadata, &writer, nullptr)) {
+ return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+ "Failed to write codestream header");
+ }
+ // Only send ICC (at least several hundred bytes) if fields aren't enough.
+ if (metadata.m.color_encoding.WantICC()) {
+ if (!jxl::WriteICC(metadata.m.color_encoding.ICC(), &writer,
+ jxl::kLayerHeader, nullptr)) {
+ return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+ "Failed to write ICC profile");
+ }
+ }
+ // TODO(lode): preview should be added here if a preview image is added
+
+ writer.ZeroPadToByte();
+
+ // Not actually the end of frame, but the end of metadata/ICC, but helps
+ // the next frame to start here for indexing purposes.
+ codestream_bytes_written_end_of_frame +=
+ jxl::DivCeil(writer.BitsWritten(), 8);
+
+ bytes = std::move(writer).TakeBytes();
+
+ if (MustUseContainer()) {
+ // Add "JXL " and ftyp box.
+ output_byte_queue.insert(
+ output_byte_queue.end(), jxl::kContainerHeader,
+ jxl::kContainerHeader + sizeof(jxl::kContainerHeader));
+ if (codestream_level != 5) {
+ // Add jxll box directly after the ftyp box to indicate the codestream
+ // level.
+ output_byte_queue.insert(
+ output_byte_queue.end(), jxl::kLevelBoxHeader,
+ jxl::kLevelBoxHeader + sizeof(jxl::kLevelBoxHeader));
+ output_byte_queue.push_back(codestream_level);
+ }
+
+ // Whether to write the basic info and color profile header of the
+ // codestream into an early separate jxlp box, so that it comes before
+ // metadata or jpeg reconstruction boxes. In theory this could simply
+ // always be done, but there's no reason to add an extra box with box
+ // header overhead if the codestream will already come immediately after
+ // the signature and level boxes.
+ bool partial_header =
+ store_jpeg_metadata ||
+ (use_boxes && (!input.frame && !input.fast_lossless_frame));
+
+ if (partial_header) {
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), bytes.size() + 4,
+ /*unbounded=*/false, &output_byte_queue);
+ AppendJxlpBoxCounter(jxlp_counter++, /*last=*/false,
+ &output_byte_queue);
+ output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+ bytes.data() + bytes.size());
+ bytes.clear();
+ }
+
+ if (store_jpeg_metadata && !jpeg_metadata.empty()) {
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_metadata.size(),
+ false, &output_byte_queue);
+ output_byte_queue.insert(output_byte_queue.end(), jpeg_metadata.begin(),
+ jpeg_metadata.end());
+ }
+ }
+ wrote_bytes = true;
+ }
+
+ // Choose frame or box processing: exactly one of the two unique pointers (box
+ // or frame) in the input queue item is non-null.
+ if (input.frame || input.fast_lossless_frame) {
+ jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedFrame> input_frame =
+ std::move(input.frame);
+ if (input.fast_lossless_frame) {
+ output_fast_frame_queue.push_back(std::move(input.fast_lossless_frame));
+ }
+ input_queue.erase(input_queue.begin());
+ num_queued_frames--;
+ if (input_frame) {
+ for (unsigned idx = 0; idx < input_frame->ec_initialized.size(); idx++) {
+ if (!input_frame->ec_initialized[idx]) {
+ return JXL_API_ERROR(this, JXL_ENC_ERR_API_USAGE,
+ "Extra channel %u is not initialized", idx);
+ }
+ }
+
+ // TODO(zond): If the input queue is empty and the frames_closed is true,
+ // then mark this frame as the last.
+
+ // TODO(zond): Handle progressive mode like EncodeFile does it.
+ // TODO(zond): Handle animation like EncodeFile does it, by checking if
+ // JxlEncoderCloseFrames has been called and if the frame
+ // queue is empty (to see if it's the last animation frame).
+
+ if (metadata.m.xyb_encoded) {
+ input_frame->option_values.cparams.color_transform =
+ jxl::ColorTransform::kXYB;
+ } else {
+ // TODO(zond): Figure out when to use kYCbCr instead.
+ input_frame->option_values.cparams.color_transform =
+ jxl::ColorTransform::kNone;
+ }
+ }
+
+ uint32_t duration;
+ uint32_t timecode;
+ if (input_frame && metadata.m.have_animation) {
+ duration = input_frame->option_values.header.duration;
+ timecode = input_frame->option_values.header.timecode;
+ } else {
+ // If have_animation is false, the encoder should ignore the duration and
+ // timecode values. However, assigning them to ib will cause the encoder
+ // to write an invalid frame header that can't be decoded so ensure
+ // they're the default value of 0 here.
+ duration = 0;
+ timecode = 0;
+ }
+
+ bool last_frame = frames_closed && !num_queued_frames;
+
+ size_t codestream_byte_size = 0;
+
+ jxl::BitWriter writer;
+
+ if (input_frame) {
+ jxl::PassesEncoderState enc_state;
+
+ frame_index_box.AddFrame(codestream_bytes_written_end_of_frame, duration,
+ input_frame->option_values.frame_index_box);
+
+ // EncodeFrame creates jxl::FrameHeader object internally based on the
+ // FrameInfo, imagebundle, cparams and metadata. Copy the information to
+ // these.
+ jxl::ImageBundle& ib = input_frame->frame;
+ ib.duration = duration;
+ ib.timecode = timecode;
+ ib.name = input_frame->option_values.frame_name;
+ ib.blendmode = static_cast<jxl::BlendMode>(
+ input_frame->option_values.header.layer_info.blend_info.blendmode);
+ ib.blend =
+ input_frame->option_values.header.layer_info.blend_info.blendmode !=
+ JXL_BLEND_REPLACE;
+
+ size_t save_as_reference =
+ input_frame->option_values.header.layer_info.save_as_reference;
+ if (save_as_reference >= 3) {
+ return JXL_API_ERROR(
+ this, JXL_ENC_ERR_API_USAGE,
+ "Cannot use save_as_reference values >=3 (found: %d)",
+ (int)save_as_reference);
+ }
+ ib.use_for_next_frame = !!save_as_reference;
+
+ jxl::FrameInfo frame_info;
+ frame_info.is_last = last_frame;
+ frame_info.save_as_reference = save_as_reference;
+ frame_info.source =
+ input_frame->option_values.header.layer_info.blend_info.source;
+ frame_info.clamp =
+ input_frame->option_values.header.layer_info.blend_info.clamp;
+ frame_info.alpha_channel =
+ input_frame->option_values.header.layer_info.blend_info.alpha;
+ frame_info.extra_channel_blending_info.resize(
+ metadata.m.num_extra_channels);
+ // If extra channel blend info has not been set, use the blend mode from
+ // the layer_info.
+ JxlBlendInfo default_blend_info =
+ input_frame->option_values.header.layer_info.blend_info;
+ for (size_t i = 0; i < metadata.m.num_extra_channels; ++i) {
+ auto& to = frame_info.extra_channel_blending_info[i];
+ const auto& from =
+ i < input_frame->option_values.extra_channel_blend_info.size()
+ ? input_frame->option_values.extra_channel_blend_info[i]
+ : default_blend_info;
+ to.mode = static_cast<jxl::BlendMode>(from.blendmode);
+ to.source = from.source;
+ to.alpha_channel = from.alpha;
+ to.clamp = (from.clamp != 0);
+ }
+
+ if (input_frame->option_values.header.layer_info.have_crop) {
+ ib.origin.x0 = input_frame->option_values.header.layer_info.crop_x0;
+ ib.origin.y0 = input_frame->option_values.header.layer_info.crop_y0;
+ }
+ JXL_ASSERT(writer.BitsWritten() == 0);
+ if (!jxl::EncodeFrame(input_frame->option_values.cparams, frame_info,
+ &metadata, input_frame->frame, &enc_state, cms,
+ thread_pool.get(), &writer,
+ /*aux_out=*/nullptr)) {
+ return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+ "Failed to encode frame");
+ }
+ codestream_bytes_written_beginning_of_frame =
+ codestream_bytes_written_end_of_frame;
+ codestream_bytes_written_end_of_frame +=
+ jxl::DivCeil(writer.BitsWritten(), 8);
+
+ // Possibly bytes already contains the codestream header: in case this is
+ // the first frame, and the codestream header was not encoded as jxlp
+ // above.
+ bytes.append(std::move(writer).TakeBytes());
+ codestream_byte_size = bytes.size();
+ } else {
+ JXL_CHECK(!output_fast_frame_queue.empty());
+ JxlFastLosslessPrepareHeader(output_fast_frame_queue.front().get(),
+ /*add_image_header=*/0, last_frame);
+ codestream_byte_size =
+ JxlFastLosslessOutputSize(output_fast_frame_queue.front().get()) +
+ bytes.size();
+ }
+
+ if (MustUseContainer()) {
+ if (last_frame && jxlp_counter == 0) {
+ // If this is the last frame and no jxlp boxes were used yet, it's
+ // slighly more efficient to write a jxlc box since it has 4 bytes
+ // less overhead.
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), codestream_byte_size,
+ /*unbounded=*/false, &output_byte_queue);
+ } else {
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), codestream_byte_size + 4,
+ /*unbounded=*/false, &output_byte_queue);
+ AppendJxlpBoxCounter(jxlp_counter++, last_frame, &output_byte_queue);
+ }
+ }
+
+ output_byte_queue.insert(output_byte_queue.end(), bytes.data(),
+ bytes.data() + bytes.size());
+
+ if (input_frame) {
+ last_used_cparams = input_frame->option_values.cparams;
+ }
+ if (last_frame && frame_index_box.StoreFrameIndexBox()) {
+ bytes.clear();
+ EncodeFrameIndexBox(frame_index_box, writer);
+ jxl::AppendBoxHeader(jxl::MakeBoxType("jxli"), bytes.size(),
+ /*unbounded=*/false, &output_byte_queue);
+ }
+ } else {
+ // Not a frame, so is a box instead
+ jxl::MemoryManagerUniquePtr<jxl::JxlEncoderQueuedBox> box =
+ std::move(input.box);
+ input_queue.erase(input_queue.begin());
+ num_queued_boxes--;
+
+ if (box->compress_box) {
+ jxl::PaddedBytes compressed(4);
+ // Prepend the original box type in the brob box contents
+ for (size_t i = 0; i < 4; i++) {
+ compressed[i] = static_cast<uint8_t>(box->type[i]);
+ }
+ if (JXL_ENC_SUCCESS !=
+ BrotliCompress((brotli_effort >= 0 ? brotli_effort : 4),
+ box->contents.data(), box->contents.size(),
+ &compressed)) {
+ return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC,
+ "Brotli compression for brob box failed");
+ }
+ jxl::AppendBoxHeader(jxl::MakeBoxType("brob"), compressed.size(), false,
+ &output_byte_queue);
+ output_byte_queue.insert(output_byte_queue.end(), compressed.data(),
+ compressed.data() + compressed.size());
+ } else {
+ jxl::AppendBoxHeader(box->type, box->contents.size(), false,
+ &output_byte_queue);
+ output_byte_queue.insert(output_byte_queue.end(), box->contents.data(),
+ box->contents.data() + box->contents.size());
+ }
+ }
+
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetColorEncoding(JxlEncoder* enc,
+ const JxlColorEncoding* color) {
+ if (!enc->basic_info_set) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set");
+ }
+ if (enc->color_encoding_set) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Color encoding is already set");
+ }
+ if (!jxl::ConvertExternalToInternalColorEncoding(
+ *color, &enc->metadata.m.color_encoding)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC, "Error in color conversion");
+ }
+ if (enc->metadata.m.color_encoding.GetColorSpace() ==
+ jxl::ColorSpace::kGray) {
+ if (enc->basic_info.num_color_channels != 1)
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "Cannot use grayscale color encoding with num_color_channels != 1");
+ } else {
+ if (enc->basic_info.num_color_channels != 3)
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "Cannot use RGB color encoding with num_color_channels != 3");
+ }
+ enc->color_encoding_set = true;
+ if (!enc->intensity_target_set) {
+ jxl::SetIntensityTarget(&enc->metadata.m);
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc,
+ const uint8_t* icc_profile,
+ size_t size) {
+ if (!enc->basic_info_set) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set");
+ }
+ if (enc->color_encoding_set) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "ICC profile is already set");
+ }
+ jxl::PaddedBytes icc;
+ icc.assign(icc_profile, icc_profile + size);
+ if (!enc->metadata.m.color_encoding.SetICC(std::move(icc))) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_BAD_INPUT,
+ "ICC profile could not be set");
+ }
+ if (enc->metadata.m.color_encoding.GetColorSpace() ==
+ jxl::ColorSpace::kGray) {
+ if (enc->basic_info.num_color_channels != 1)
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_BAD_INPUT,
+ "Cannot use grayscale ICC profile with num_color_channels != 1");
+ } else {
+ if (enc->basic_info.num_color_channels != 3)
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_BAD_INPUT,
+ "Cannot use RGB ICC profile with num_color_channels != 3");
+ // TODO(jon): also check that a kBlack extra channel is provided in the CMYK
+ // case
+ }
+ enc->color_encoding_set = true;
+ if (!enc->intensity_target_set) {
+ jxl::SetIntensityTarget(&enc->metadata.m);
+ }
+
+ if (!enc->basic_info.uses_original_profile) {
+ enc->metadata.m.color_encoding.DecideIfWantICC();
+ }
+
+ return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderInitBasicInfo(JxlBasicInfo* info) {
+ info->have_container = JXL_FALSE;
+ info->xsize = 0;
+ info->ysize = 0;
+ info->bits_per_sample = 8;
+ info->exponent_bits_per_sample = 0;
+ info->intensity_target = 0.f;
+ info->min_nits = 0.f;
+ info->relative_to_max_display = JXL_FALSE;
+ info->linear_below = 0.f;
+ info->uses_original_profile = JXL_FALSE;
+ info->have_preview = JXL_FALSE;
+ info->have_animation = JXL_FALSE;
+ info->orientation = JXL_ORIENT_IDENTITY;
+ info->num_color_channels = 3;
+ info->num_extra_channels = 0;
+ info->alpha_bits = 0;
+ info->alpha_exponent_bits = 0;
+ info->alpha_premultiplied = JXL_FALSE;
+ info->preview.xsize = 0;
+ info->preview.ysize = 0;
+ info->intrinsic_xsize = 0;
+ info->intrinsic_ysize = 0;
+ info->animation.tps_numerator = 10;
+ info->animation.tps_denominator = 1;
+ info->animation.num_loops = 0;
+ info->animation.have_timecodes = JXL_FALSE;
+}
+
+void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header) {
+ // For each field, the default value of the specification is used. Depending
+ // on whether an animation frame, or a composite still blending frame,
+ // is used, different fields have to be set up by the user after initing
+ // the frame header.
+ frame_header->duration = 0;
+ frame_header->timecode = 0;
+ frame_header->name_length = 0;
+ // In the specification, the default value of is_last is !frame_type, and the
+ // default frame_type is kRegularFrame which has value 0, so is_last is true
+ // by default. However, the encoder does not use this value (the field exists
+ // for the decoder to set) since last frame is determined by usage of
+ // JxlEncoderCloseFrames instead.
+ frame_header->is_last = JXL_TRUE;
+ frame_header->layer_info.have_crop = JXL_FALSE;
+ frame_header->layer_info.crop_x0 = 0;
+ frame_header->layer_info.crop_y0 = 0;
+ // These must be set if have_crop is enabled, but the default value has
+ // have_crop false, and these dimensions 0. The user must set these to the
+ // desired size after enabling have_crop (which is not yet implemented).
+ frame_header->layer_info.xsize = 0;
+ frame_header->layer_info.ysize = 0;
+ JxlEncoderInitBlendInfo(&frame_header->layer_info.blend_info);
+ frame_header->layer_info.save_as_reference = 0;
+}
+
+void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info) {
+ // Default blend mode in the specification is 0. Note that combining
+ // blend mode of replace with a duration is not useful, but the user has to
+ // manually set duration in case of animation, or manually change the blend
+ // mode in case of composite stills, so initing to a combination that is not
+ // useful on its own is not an issue.
+ blend_info->blendmode = JXL_BLEND_REPLACE;
+ blend_info->source = 0;
+ blend_info->alpha = 0;
+ blend_info->clamp = 0;
+}
+
+JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc,
+ const JxlBasicInfo* info) {
+ if (!enc->metadata.size.Set(info->xsize, info->ysize)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid dimensions");
+ }
+ if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample,
+ info->exponent_bits_per_sample)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth");
+ }
+ enc->metadata.m.bit_depth.bits_per_sample = info->bits_per_sample;
+ enc->metadata.m.bit_depth.exponent_bits_per_sample =
+ info->exponent_bits_per_sample;
+ enc->metadata.m.bit_depth.floating_point_sample =
+ (info->exponent_bits_per_sample != 0u);
+ enc->metadata.m.modular_16_bit_buffer_sufficient =
+ (!info->uses_original_profile || info->bits_per_sample <= 12) &&
+ info->alpha_bits <= 12;
+ if ((info->intrinsic_xsize > 0 || info->intrinsic_ysize > 0) &&
+ (info->intrinsic_xsize != info->xsize ||
+ info->intrinsic_ysize != info->ysize)) {
+ if (info->intrinsic_xsize > (1ull << 30ull) ||
+ info->intrinsic_ysize > (1ull << 30ull) ||
+ !enc->metadata.m.intrinsic_size.Set(info->intrinsic_xsize,
+ info->intrinsic_ysize)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid intrinsic dimensions");
+ }
+ enc->metadata.m.have_intrinsic_size = true;
+ }
+
+ // The number of extra channels includes the alpha channel, so for example and
+ // RGBA with no other extra channels, has exactly num_extra_channels == 1
+ enc->metadata.m.num_extra_channels = info->num_extra_channels;
+ enc->metadata.m.extra_channel_info.resize(enc->metadata.m.num_extra_channels);
+ if (info->num_extra_channels == 0 && info->alpha_bits) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "when alpha_bits is non-zero, the number of channels must be at least "
+ "1");
+ }
+ // If the user provides non-zero alpha_bits, we make the channel info at index
+ // zero the appropriate alpha channel.
+ if (info->alpha_bits) {
+ JxlExtraChannelInfo channel_info;
+ JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &channel_info);
+ channel_info.bits_per_sample = info->alpha_bits;
+ channel_info.exponent_bits_per_sample = info->alpha_exponent_bits;
+ if (JxlEncoderSetExtraChannelInfo(enc, 0, &channel_info)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Problem setting extra channel info for alpha");
+ }
+ }
+
+ enc->metadata.m.xyb_encoded = !info->uses_original_profile;
+ if (info->orientation > 0 && info->orientation <= 8) {
+ enc->metadata.m.orientation = info->orientation;
+ } else {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for orientation field");
+ }
+ if (info->num_color_channels != 1 && info->num_color_channels != 3) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid number of color channels");
+ }
+ if (info->intensity_target != 0) {
+ enc->metadata.m.SetIntensityTarget(info->intensity_target);
+ enc->intensity_target_set = true;
+ } else if (enc->color_encoding_set) {
+ // If this is false, JxlEncoderSetColorEncoding will be called later and we
+ // will get one more chance to call jxl::SetIntensityTarget, after the color
+ // encoding is indeed set.
+ jxl::SetIntensityTarget(&enc->metadata.m);
+ enc->intensity_target_set = true;
+ }
+ enc->metadata.m.tone_mapping.min_nits = info->min_nits;
+ enc->metadata.m.tone_mapping.relative_to_max_display =
+ info->relative_to_max_display;
+ enc->metadata.m.tone_mapping.linear_below = info->linear_below;
+ enc->basic_info = *info;
+ enc->basic_info_set = true;
+
+ enc->metadata.m.have_animation = info->have_animation;
+ if (info->have_animation) {
+ if (info->animation.tps_denominator < 1) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "If animation is used, tps_denominator must be >= 1");
+ }
+ if (info->animation.tps_numerator < 1) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "If animation is used, tps_numerator must be >= 1");
+ }
+ enc->metadata.m.animation.tps_numerator = info->animation.tps_numerator;
+ enc->metadata.m.animation.tps_denominator = info->animation.tps_denominator;
+ enc->metadata.m.animation.num_loops = info->animation.num_loops;
+ enc->metadata.m.animation.have_timecodes = info->animation.have_timecodes;
+ }
+ std::string level_message;
+ int required_level = VerifyLevelSettings(enc, &level_message);
+ if (required_level == -1 ||
+ (static_cast<int>(enc->codestream_level) < required_level &&
+ enc->codestream_level != -1)) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE, "%s",
+ ("Codestream level verification for level " +
+ std::to_string(enc->codestream_level) + " failed: " + level_message)
+ .c_str());
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type,
+ JxlExtraChannelInfo* info) {
+ info->type = type;
+ info->bits_per_sample = 8;
+ info->exponent_bits_per_sample = 0;
+ info->dim_shift = 0;
+ info->name_length = 0;
+ info->alpha_premultiplied = JXL_FALSE;
+ info->spot_color[0] = 0;
+ info->spot_color[1] = 0;
+ info->spot_color[2] = 0;
+ info->spot_color[3] = 0;
+ info->cfa_channel = 0;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo(
+ JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info) {
+ if (index >= enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for the index of extra channel");
+ }
+ if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample,
+ info->exponent_bits_per_sample)) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth");
+ }
+
+ jxl::ExtraChannelInfo& channel = enc->metadata.m.extra_channel_info[index];
+ channel.type = static_cast<jxl::ExtraChannel>(info->type);
+ channel.bit_depth.bits_per_sample = info->bits_per_sample;
+ enc->metadata.m.modular_16_bit_buffer_sufficient &=
+ info->bits_per_sample <= 12;
+ channel.bit_depth.exponent_bits_per_sample = info->exponent_bits_per_sample;
+ channel.bit_depth.floating_point_sample = info->exponent_bits_per_sample != 0;
+ channel.dim_shift = info->dim_shift;
+ channel.name = "";
+ channel.alpha_associated = (info->alpha_premultiplied != 0);
+ channel.cfa_channel = info->cfa_channel;
+ channel.spot_color[0] = info->spot_color[0];
+ channel.spot_color[1] = info->spot_color[1];
+ channel.spot_color[2] = info->spot_color[2];
+ channel.spot_color[3] = info->spot_color[3];
+ std::string level_message;
+ int required_level = VerifyLevelSettings(enc, &level_message);
+ if (required_level == -1 ||
+ (static_cast<int>(enc->codestream_level) < required_level &&
+ enc->codestream_level != -1)) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE, "%s",
+ ("Codestream level verification for level " +
+ std::to_string(enc->codestream_level) + " failed: " + level_message)
+ .c_str());
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc,
+ size_t index,
+ const char* name,
+ size_t size) {
+ if (index >= enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for the index of extra channel");
+ }
+ enc->metadata.m.extra_channel_info[index].name =
+ std::string(name, name + size);
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate(
+ JxlEncoder* enc, const JxlEncoderFrameSettings* source) {
+ auto opts = jxl::MemoryManagerMakeUnique<JxlEncoderFrameSettings>(
+ &enc->memory_manager);
+ if (!opts) return nullptr;
+ opts->enc = enc;
+ if (source != nullptr) {
+ opts->values = source->values;
+ } else {
+ opts->values.lossless = false;
+ }
+ opts->values.cparams.level = enc->codestream_level;
+ opts->values.cparams.ec_distance.resize(enc->metadata.m.num_extra_channels,
+ -1);
+
+ JxlEncoderFrameSettings* ret = opts.get();
+ enc->encoder_options.emplace_back(std::move(opts));
+ return ret;
+}
+
+JxlEncoderFrameSettings* JxlEncoderOptionsCreate(
+ JxlEncoder* enc, const JxlEncoderFrameSettings* source) {
+ // Deprecated function name, call the non-deprecated function
+ return JxlEncoderFrameSettingsCreate(enc, source);
+}
+
+JxlEncoderStatus JxlEncoderSetFrameLossless(
+ JxlEncoderFrameSettings* frame_settings, const JXL_BOOL lossless) {
+ if (lossless && frame_settings->enc->basic_info_set &&
+ frame_settings->enc->metadata.m.xyb_encoded) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Set uses_original_profile=true for lossless encoding");
+ }
+ frame_settings->values.lossless = lossless;
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetLossless(
+ JxlEncoderFrameSettings* frame_settings, JXL_BOOL lossless) {
+ // Deprecated function name, call the non-deprecated function
+ return JxlEncoderSetFrameLossless(frame_settings, lossless);
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetEffort(
+ JxlEncoderFrameSettings* frame_settings, const int effort) {
+ return JxlEncoderFrameSettingsSetOption(frame_settings,
+ JXL_ENC_FRAME_SETTING_EFFORT, effort);
+}
+
+JxlEncoderStatus JxlEncoderSetFrameDistance(
+ JxlEncoderFrameSettings* frame_settings, float distance) {
+ if (distance < 0.f || distance > 25.f) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Distance has to be in [0.0..25.0] (corresponding to "
+ "quality in [0.0..100.0])");
+ }
+ if (distance > 0.f && distance < 0.01f) {
+ distance = 0.01f;
+ }
+ frame_settings->values.cparams.butteraugli_distance = distance;
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetExtraChannelDistance(
+ JxlEncoderFrameSettings* frame_settings, size_t index, float distance) {
+ if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for the index of extra channel");
+ }
+ if (distance != -1.f && (distance < 0.f || distance > 25.f)) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Distance has to be -1 or in [0.0..25.0] (corresponding to "
+ "quality in [0.0..100.0])");
+ }
+ if (distance > 0.f && distance < 0.01f) {
+ distance = 0.01f;
+ }
+
+ if (index >= frame_settings->values.cparams.ec_distance.size()) {
+ // This can only happen if JxlEncoderFrameSettingsCreate() was called before
+ // JxlEncoderSetBasicInfo().
+ frame_settings->values.cparams.ec_distance.resize(
+ frame_settings->enc->metadata.m.num_extra_channels, -1);
+ }
+
+ frame_settings->values.cparams.ec_distance[index] = distance;
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDistance(
+ JxlEncoderFrameSettings* frame_settings, float distance) {
+ // Deprecated function name, call the non-deprecated function
+ return JxlEncoderSetFrameDistance(frame_settings, distance);
+}
+
+JxlEncoderStatus JxlEncoderOptionsSetDecodingSpeed(
+ JxlEncoderFrameSettings* frame_settings, int tier) {
+ return JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, tier);
+}
+
+JxlEncoderStatus JxlEncoderFrameSettingsSetOption(
+ JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+ int64_t value) {
+ // check if value is -1, 0 or 1 for Override-type options
+ switch (option) {
+ case JXL_ENC_FRAME_SETTING_NOISE:
+ case JXL_ENC_FRAME_SETTING_DOTS:
+ case JXL_ENC_FRAME_SETTING_PATCHES:
+ case JXL_ENC_FRAME_SETTING_GABORISH:
+ case JXL_ENC_FRAME_SETTING_MODULAR:
+ case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+ case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+ case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+ case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+ case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+ case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+ case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+ if (value < -1 || value > 1) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be -1 (default), 0 (off) or 1 (on)");
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (option) {
+ case JXL_ENC_FRAME_SETTING_EFFORT:
+ if (frame_settings->enc->allow_expert_options) {
+ if (value < 1 || value > 10) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Encode effort has to be in [1..10]");
+ }
+ } else {
+ if (value < 1 || value > 9) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Encode effort has to be in [1..9]");
+ }
+ }
+ frame_settings->values.cparams.speed_tier =
+ static_cast<jxl::SpeedTier>(10 - value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT:
+ if (value < -1 || value > 11) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Brotli effort has to be in [-1..11]");
+ }
+ // set cparams for brotli use in JPEG frames
+ frame_settings->values.cparams.brotli_effort = value;
+ // set enc option for brotli use in brob boxes
+ frame_settings->enc->brotli_effort = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_DECODING_SPEED:
+ if (value < 0 || value > 4) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Decoding speed has to be in [0..4]");
+ }
+ frame_settings->values.cparams.decoding_speed_tier = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_RESAMPLING:
+ if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Resampling factor has to be 1, 2, 4 or 8");
+ }
+ frame_settings->values.cparams.resampling = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING:
+ // TODO(lode): the jxl codestream allows choosing a different resampling
+ // factor for each extra channel, independently per frame. Move this
+ // option to a JxlEncoderFrameSettings-option that can be set per extra
+ // channel, so needs its own function rather than
+ // JxlEncoderFrameSettingsSetOption due to the extra channel index
+ // argument required.
+ if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Resampling factor has to be 1, 2, 4 or 8");
+ }
+ frame_settings->values.cparams.ec_resampling = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED:
+ if (value < 0 || value > 1) {
+ return JXL_ENC_ERROR;
+ }
+ frame_settings->values.cparams.already_downsampled = (value == 1);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_NOISE:
+ frame_settings->values.cparams.noise = static_cast<jxl::Override>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_DOTS:
+ frame_settings->values.cparams.dots = static_cast<jxl::Override>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_PATCHES:
+ frame_settings->values.cparams.patches =
+ static_cast<jxl::Override>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_EPF:
+ if (value < -1 || value > 3) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "EPF value has to be in [-1..3]");
+ }
+ frame_settings->values.cparams.epf = static_cast<int>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_GABORISH:
+ frame_settings->values.cparams.gaborish =
+ static_cast<jxl::Override>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR:
+ frame_settings->values.cparams.modular_mode = (value == 1);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+ frame_settings->values.cparams.keep_invisible =
+ static_cast<jxl::Override>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+ frame_settings->values.cparams.centerfirst = (value == 1);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X:
+ if (value < -1) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Center x coordinate has to be -1 or positive");
+ }
+ frame_settings->values.cparams.center_x = static_cast<size_t>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y:
+ if (value < -1) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Center y coordinate has to be -1 or positive");
+ }
+ frame_settings->values.cparams.center_y = static_cast<size_t>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+ frame_settings->values.cparams.responsive = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+ frame_settings->values.cparams.progressive_mode = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+ frame_settings->values.cparams.qprogressive_mode = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC:
+ if (value < -1 || value > 2) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Progressive DC has to be in [-1..2]");
+ }
+ frame_settings->values.cparams.progressive_dc = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_PALETTE_COLORS:
+ if (value < -1 || value > 70913) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..70913]");
+ }
+ if (value == -1) {
+ frame_settings->values.cparams.palette_colors = 1 << 10;
+ } else {
+ frame_settings->values.cparams.palette_colors = value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+ // TODO(lode): the defaults of some palette settings depend on others.
+ // See the logic in cjxl. Similar for other settings. This should be
+ // handled in the encoder during JxlEncoderProcessOutput (or,
+ // alternatively, in the cjxl binary like now)
+ frame_settings->values.cparams.lossy_palette = (value == 1);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM:
+ if (value < -1 || value > 2) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..2]");
+ }
+ if (value == -1) {
+ frame_settings->values.cparams.color_transform =
+ jxl::ColorTransform::kXYB;
+ } else {
+ frame_settings->values.cparams.color_transform =
+ static_cast<jxl::ColorTransform>(value);
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE:
+ if (value < -1 || value > 41) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..41]");
+ }
+ frame_settings->values.cparams.colorspace = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE:
+ if (value < -1 || value > 3) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..3]");
+ }
+ // TODO(lode): the default behavior of this parameter for cjxl is
+ // to choose 1 or 2 depending on the situation. This behavior needs to be
+ // implemented either in the C++ library by allowing to set this to -1, or
+ // kept in cjxl and set it to 1 or 2 using this API.
+ if (value == -1) {
+ frame_settings->values.cparams.modular_group_size_shift = 1;
+ } else {
+ frame_settings->values.cparams.modular_group_size_shift = value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR:
+ if (value < -1 || value > 15) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..15]");
+ }
+ frame_settings->values.cparams.options.predictor =
+ static_cast<jxl::Predictor>(value);
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS:
+ // The max allowed value can in theory be higher. However, it depends on
+ // the effort setting. 11 is the highest safe value that doesn't cause
+ // tree_samples to be >= 64 in the encoder. The specification may allow
+ // more than this. With more fine tuning higher values could be allowed.
+ // For N-channel images, the largest useful value is N-1.
+ if (value < -1 || value > 11) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..11]");
+ }
+ if (value == -1) {
+ frame_settings->values.cparams.options.max_properties = 0;
+ } else {
+ frame_settings->values.cparams.options.max_properties = value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+ if (value == -1) {
+ frame_settings->values.cparams.force_cfl_jpeg_recompression = true;
+ } else {
+ frame_settings->values.cparams.force_cfl_jpeg_recompression = value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_INDEX_BOX:
+ frame_settings->values.frame_index_box = true;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_PHOTON_NOISE:
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Float option, try setting it with "
+ "JxlEncoderFrameSettingsSetFloatOption");
+ case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+ frame_settings->values.cparams.jpeg_compress_boxes = value;
+ return JXL_ENC_SUCCESS;
+ default:
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Unknown option");
+ }
+}
+
+JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption(
+ JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option,
+ float value) {
+ switch (option) {
+ case JXL_ENC_FRAME_SETTING_PHOTON_NOISE:
+ if (value < 0) return JXL_ENC_ERROR;
+ // TODO(lode): add encoder setting to set the 8 floating point values of
+ // the noise synthesis parameters per frame for more fine grained control.
+ frame_settings->values.cparams.photon_noise_iso = value;
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT:
+ if (value < -1.f || value > 100.f) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be smaller than 100");
+ }
+ // This value is called "iterations" or "nb_repeats" in cjxl, but is in
+ // fact a fraction in range 0.0-1.0, with the default value 0.5.
+ // Convert from floating point percentage to floating point fraction here.
+ if (value < -.5f) {
+ // TODO(lode): for this and many other settings (also in
+ // JxlEncoderFrameSettingsSetOption), avoid duplicating the default
+ // values here and in enc_params.h and options.h, have one location
+ // where the defaults are specified.
+ frame_settings->values.cparams.options.nb_repeats = 0.5f;
+ } else {
+ frame_settings->values.cparams.options.nb_repeats = value * 0.01f;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT:
+ if (value < -1.f || value > 100.f) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..100]");
+ }
+ if (value < -.5f) {
+ frame_settings->values.cparams.channel_colors_pre_transform_percent =
+ 95.0f;
+ } else {
+ frame_settings->values.cparams.channel_colors_pre_transform_percent =
+ value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT:
+ if (value < -1.f || value > 100.f) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Option value has to be in [-1..100]");
+ }
+ if (value < -.5f) {
+ frame_settings->values.cparams.channel_colors_percent = 80.0f;
+ } else {
+ frame_settings->values.cparams.channel_colors_percent = value;
+ }
+ return JXL_ENC_SUCCESS;
+ case JXL_ENC_FRAME_SETTING_EFFORT:
+ case JXL_ENC_FRAME_SETTING_DECODING_SPEED:
+ case JXL_ENC_FRAME_SETTING_RESAMPLING:
+ case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING:
+ case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED:
+ case JXL_ENC_FRAME_SETTING_NOISE:
+ case JXL_ENC_FRAME_SETTING_DOTS:
+ case JXL_ENC_FRAME_SETTING_PATCHES:
+ case JXL_ENC_FRAME_SETTING_EPF:
+ case JXL_ENC_FRAME_SETTING_GABORISH:
+ case JXL_ENC_FRAME_SETTING_MODULAR:
+ case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE:
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER:
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X:
+ case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y:
+ case JXL_ENC_FRAME_SETTING_RESPONSIVE:
+ case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC:
+ case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC:
+ case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC:
+ case JXL_ENC_FRAME_SETTING_PALETTE_COLORS:
+ case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE:
+ case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM:
+ case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE:
+ case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE:
+ case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR:
+ case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS:
+ case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL:
+ case JXL_ENC_FRAME_INDEX_BOX:
+ case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT:
+ case JXL_ENC_FRAME_SETTING_FILL_ENUM:
+ case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES:
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Int option, try setting it with "
+ "JxlEncoderFrameSettingsSetOption");
+ default:
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED,
+ "Unknown option");
+ }
+}
+JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager) {
+ JxlMemoryManager local_memory_manager;
+ if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) {
+ return nullptr;
+ }
+
+ void* alloc =
+ jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlEncoder));
+ if (!alloc) return nullptr;
+ JxlEncoder* enc = new (alloc) JxlEncoder();
+ enc->memory_manager = local_memory_manager;
+ // TODO(sboukortt): add an API function to set this.
+ enc->cms = jxl::GetJxlCms();
+
+ // Initialize all the field values.
+ JxlEncoderReset(enc);
+
+ return enc;
+}
+
+void JxlEncoderReset(JxlEncoder* enc) {
+ enc->thread_pool.reset();
+ enc->input_queue.clear();
+ enc->num_queued_frames = 0;
+ enc->num_queued_boxes = 0;
+ enc->encoder_options.clear();
+ enc->output_byte_queue.clear();
+ enc->output_fast_frame_queue.clear();
+ enc->codestream_bytes_written_beginning_of_frame = 0;
+ enc->codestream_bytes_written_end_of_frame = 0;
+ enc->wrote_bytes = false;
+ enc->jxlp_counter = 0;
+ enc->metadata = jxl::CodecMetadata();
+ enc->last_used_cparams = jxl::CompressParams();
+ enc->frames_closed = false;
+ enc->boxes_closed = false;
+ enc->basic_info_set = false;
+ enc->color_encoding_set = false;
+ enc->intensity_target_set = false;
+ enc->use_container = false;
+ enc->use_boxes = false;
+ enc->codestream_level = -1;
+ JxlEncoderInitBasicInfo(&enc->basic_info);
+}
+
+void JxlEncoderDestroy(JxlEncoder* enc) {
+ if (enc) {
+ JxlMemoryManager local_memory_manager = enc->memory_manager;
+ // Call destructor directly since custom free function is used.
+ enc->~JxlEncoder();
+ jxl::MemoryManagerFree(&local_memory_manager, enc);
+ }
+}
+
+JxlEncoderError JxlEncoderGetError(JxlEncoder* enc) { return enc->error; }
+
+JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc,
+ JXL_BOOL use_container) {
+ if (enc->wrote_bytes) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "this setting can only be set at the beginning");
+ }
+ enc->use_container = static_cast<bool>(use_container);
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderStoreJPEGMetadata(JxlEncoder* enc,
+ JXL_BOOL store_jpeg_metadata) {
+ if (enc->wrote_bytes) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "this setting can only be set at the beginning");
+ }
+ enc->store_jpeg_metadata = static_cast<bool>(store_jpeg_metadata);
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc, int level) {
+ if (level != -1 && level != 5 && level != 10) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_NOT_SUPPORTED, "invalid level");
+ }
+ if (enc->wrote_bytes) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "this setting can only be set at the beginning");
+ }
+ enc->codestream_level = level;
+ return JXL_ENC_SUCCESS;
+}
+
+int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc) {
+ return VerifyLevelSettings(enc, nullptr);
+}
+
+void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms) {
+ jxl::msan::MemoryIsInitialized(&cms, sizeof(cms));
+ enc->cms = cms;
+}
+
+JxlEncoderStatus JxlEncoderSetParallelRunner(JxlEncoder* enc,
+ JxlParallelRunner parallel_runner,
+ void* parallel_runner_opaque) {
+ if (enc->thread_pool) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "parallel runner already set");
+ }
+ enc->thread_pool = jxl::MemoryManagerMakeUnique<jxl::ThreadPool>(
+ &enc->memory_manager, parallel_runner, parallel_runner_opaque);
+ if (!enc->thread_pool) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC,
+ "error setting parallel runner");
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+namespace {
+JxlEncoderStatus GetCurrentDimensions(
+ const JxlEncoderFrameSettings* frame_settings, size_t& xsize,
+ size_t& ysize) {
+ xsize = frame_settings->enc->metadata.xsize();
+ ysize = frame_settings->enc->metadata.ysize();
+ if (frame_settings->values.header.layer_info.have_crop) {
+ xsize = frame_settings->values.header.layer_info.xsize;
+ ysize = frame_settings->values.header.layer_info.ysize;
+ }
+ if (frame_settings->values.cparams.already_downsampled) {
+ size_t factor = frame_settings->values.cparams.resampling;
+ xsize = jxl::DivCeil(xsize, factor);
+ ysize = jxl::DivCeil(ysize, factor);
+ }
+ if (xsize == 0 || ysize == 0) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "zero-sized frame is not allowed");
+ }
+ return JXL_ENC_SUCCESS;
+}
+} // namespace
+
+JxlEncoderStatus JxlEncoderAddJPEGFrame(
+ const JxlEncoderFrameSettings* frame_settings, const uint8_t* buffer,
+ size_t size) {
+ if (frame_settings->enc->frames_closed) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Frame input is already closed");
+ }
+
+ jxl::CodecInOut io;
+ if (!jxl::jpeg::DecodeImageJPG(jxl::Span<const uint8_t>(buffer, size), &io)) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT,
+ "Error during decode of input JPEG");
+ }
+
+ if (!frame_settings->enc->color_encoding_set) {
+ if (!SetColorEncodingFromJpegData(
+ *io.Main().jpeg_data,
+ &frame_settings->enc->metadata.m.color_encoding)) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT,
+ "Error in input JPEG color space");
+ }
+ }
+
+ if (!frame_settings->enc->basic_info_set) {
+ JxlBasicInfo basic_info;
+ JxlEncoderInitBasicInfo(&basic_info);
+ basic_info.xsize = io.Main().jpeg_data->width;
+ basic_info.ysize = io.Main().jpeg_data->height;
+ basic_info.uses_original_profile = true;
+ if (JxlEncoderSetBasicInfo(frame_settings->enc, &basic_info) !=
+ JXL_ENC_SUCCESS) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "Error setting basic info");
+ }
+ }
+
+ if (frame_settings->enc->metadata.m.xyb_encoded) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Can't XYB encode a lossless JPEG");
+ }
+ if (!io.blobs.exif.empty()) {
+ JxlOrientation orientation = static_cast<JxlOrientation>(
+ frame_settings->enc->metadata.m.orientation);
+ jxl::InterpretExif(io.blobs.exif, &orientation);
+ frame_settings->enc->metadata.m.orientation = orientation;
+
+ size_t exif_size = io.blobs.exif.size();
+ // Exif data in JPEG is limited to 64k
+ if (exif_size > 0xFFFF) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "Exif larger than possible in JPEG?");
+ }
+ exif_size += 4; // prefix 4 zero bytes for tiff offset
+ std::vector<uint8_t> exif(exif_size);
+ memcpy(exif.data() + 4, io.blobs.exif.data(), io.blobs.exif.size());
+ JxlEncoderUseBoxes(frame_settings->enc);
+ JxlEncoderAddBox(frame_settings->enc, "Exif", exif.data(), exif_size,
+ frame_settings->values.cparams.jpeg_compress_boxes);
+ }
+ if (!io.blobs.xmp.empty()) {
+ JxlEncoderUseBoxes(frame_settings->enc);
+ JxlEncoderAddBox(frame_settings->enc, "xml ", io.blobs.xmp.data(),
+ io.blobs.xmp.size(),
+ frame_settings->values.cparams.jpeg_compress_boxes);
+ }
+ if (!io.blobs.jumbf.empty()) {
+ JxlEncoderUseBoxes(frame_settings->enc);
+ JxlEncoderAddBox(frame_settings->enc, "jumb", io.blobs.jumbf.data(),
+ io.blobs.jumbf.size(),
+ frame_settings->values.cparams.jpeg_compress_boxes);
+ }
+ if (frame_settings->enc->store_jpeg_metadata) {
+ jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data;
+ jxl::PaddedBytes jpeg_data;
+ if (!jxl::jpeg::EncodeJPEGData(data_in, &jpeg_data,
+ frame_settings->values.cparams)) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_JBRD,
+ "JPEG bitstream reconstruction data cannot be encoded");
+ }
+ frame_settings->enc->jpeg_metadata = std::vector<uint8_t>(
+ jpeg_data.data(), jpeg_data.data() + jpeg_data.size());
+ }
+
+ auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+ &frame_settings->enc->memory_manager,
+ // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+ // default move constructor there.
+ jxl::JxlEncoderQueuedFrame{
+ frame_settings->values,
+ jxl::ImageBundle(&frame_settings->enc->metadata.m),
+ {}});
+ if (!queued_frame) {
+ // TODO(jon): when can this happen? is this an API usage error?
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "No frame queued?");
+ }
+ queued_frame->frame.SetFromImage(std::move(*io.Main().color()),
+ io.Main().c_current());
+ size_t xsize, ysize;
+ if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "bad dimensions");
+ }
+ if (xsize != static_cast<size_t>(io.Main().jpeg_data->width) ||
+ ysize != static_cast<size_t>(io.Main().jpeg_data->height)) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "JPEG dimensions don't match frame dimensions");
+ }
+ std::vector<jxl::ImageF> extra_channels(
+ frame_settings->enc->metadata.m.num_extra_channels);
+ for (auto& extra_channel : extra_channels) {
+ extra_channel = jxl::ImageF(xsize, ysize);
+ queued_frame->ec_initialized.push_back(0);
+ }
+ queued_frame->frame.SetExtraChannels(std::move(extra_channels));
+ queued_frame->frame.jpeg_data = std::move(io.Main().jpeg_data);
+ queued_frame->frame.color_transform = io.Main().color_transform;
+ queued_frame->frame.chroma_subsampling = io.Main().chroma_subsampling;
+
+ QueueFrame(frame_settings, queued_frame);
+ return JXL_ENC_SUCCESS;
+}
+
+static bool CanDoFastLossless(const JxlEncoderFrameSettings* frame_settings,
+ const JxlPixelFormat* pixel_format,
+ bool has_alpha) {
+ if (!frame_settings->values.lossless) {
+ return false;
+ }
+ // TODO(veluca): many of the following options could be made to work, but are
+ // just not implemented in FJXL's frame header handling yet.
+ if (frame_settings->values.frame_index_box) {
+ return false;
+ }
+ if (frame_settings->values.header.layer_info.have_crop) {
+ return false;
+ }
+ if (frame_settings->enc->metadata.m.have_animation) {
+ return false;
+ }
+ if (frame_settings->values.cparams.speed_tier != jxl::SpeedTier::kLightning) {
+ return false;
+ }
+ if (frame_settings->values.image_bit_depth.type ==
+ JxlBitDepthType::JXL_BIT_DEPTH_CUSTOM &&
+ frame_settings->values.image_bit_depth.bits_per_sample !=
+ frame_settings->enc->metadata.m.bit_depth.bits_per_sample) {
+ return false;
+ }
+ // TODO(veluca): implement support for LSB-padded input in fast_lossless.
+ if (frame_settings->values.image_bit_depth.type ==
+ JxlBitDepthType::JXL_BIT_DEPTH_FROM_PIXEL_FORMAT &&
+ frame_settings->values.image_bit_depth.bits_per_sample % 8 != 0) {
+ return false;
+ }
+ if (!frame_settings->values.frame_name.empty()) {
+ return false;
+ }
+ // No extra channels other than alpha.
+ if (!(has_alpha && frame_settings->enc->metadata.m.num_extra_channels == 1) &&
+ frame_settings->enc->metadata.m.num_extra_channels != 0) {
+ return false;
+ }
+ if (frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 16) {
+ return false;
+ }
+ if (pixel_format->data_type != JxlDataType::JXL_TYPE_FLOAT16 &&
+ pixel_format->data_type != JxlDataType::JXL_TYPE_UINT16 &&
+ pixel_format->data_type != JxlDataType::JXL_TYPE_UINT8) {
+ return false;
+ }
+ if ((frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 8) !=
+ (pixel_format->data_type == JxlDataType::JXL_TYPE_UINT16 ||
+ pixel_format->data_type == JxlDataType::JXL_TYPE_FLOAT16)) {
+ return false;
+ }
+ if (!((pixel_format->num_channels == 1 || pixel_format->num_channels == 3) &&
+ !has_alpha) &&
+ !((pixel_format->num_channels == 2 || pixel_format->num_channels == 4) &&
+ has_alpha)) {
+ return false;
+ }
+
+ return true;
+}
+
+JxlEncoderStatus JxlEncoderAddImageFrame(
+ const JxlEncoderFrameSettings* frame_settings,
+ const JxlPixelFormat* pixel_format, const void* buffer, size_t size) {
+ if (!frame_settings->enc->basic_info_set ||
+ (!frame_settings->enc->color_encoding_set &&
+ !frame_settings->enc->metadata.m.xyb_encoded)) {
+ // Basic Info must be set, and color encoding must be set directly,
+ // or set to XYB via JxlBasicInfo.uses_original_profile = JXL_FALSE
+ // Otherwise, this is an API misuse.
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Basic info or color encoding not set yet");
+ }
+
+ if (frame_settings->enc->frames_closed) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Frame input already closed");
+ }
+ if (pixel_format->num_channels < 3) {
+ if (frame_settings->enc->basic_info.num_color_channels != 1) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Grayscale pixel format input for an RGB image");
+ }
+ } else {
+ if (frame_settings->enc->basic_info.num_color_channels != 3) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "RGB pixel format input for a grayscale image");
+ }
+ }
+
+ bool has_alpha = frame_settings->enc->metadata.m.HasAlpha();
+
+ size_t xsize, ysize;
+ if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "bad dimensions");
+ }
+
+ // All required conditions to do fast-lossless.
+ if (CanDoFastLossless(frame_settings, pixel_format, has_alpha)) {
+ const size_t bytes_per_pixel =
+ pixel_format->data_type == JxlDataType::JXL_TYPE_UINT8
+ ? pixel_format->num_channels
+ : pixel_format->num_channels * 2;
+ const size_t last_row_size = xsize * bytes_per_pixel;
+ const size_t align = pixel_format->align;
+ const size_t row_size =
+ (align > 1 ? jxl::DivCeil(last_row_size, align) * align
+ : last_row_size);
+ const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size;
+ if (bytes_to_read > size) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "provided image buffer too small");
+ }
+ const bool big_endian =
+ pixel_format->endianness == JXL_BIG_ENDIAN ||
+ (pixel_format->endianness == JXL_NATIVE_ENDIAN && !IsLittleEndian());
+
+ auto runner = +[](void* void_pool, void* opaque, void fun(void*, size_t),
+ size_t count) {
+ auto* pool = reinterpret_cast<jxl::ThreadPool*>(void_pool);
+ JXL_CHECK(jxl::RunOnPool(
+ pool, 0, count, jxl::ThreadPool::NoInit,
+ [&](size_t i, size_t) { fun(opaque, i); }, "Encode fast lossless"));
+ };
+ QueueFastLosslessFrame(
+ frame_settings,
+ JxlFastLosslessPrepareFrame(
+ reinterpret_cast<const unsigned char*>(buffer), xsize, row_size,
+ ysize, pixel_format->num_channels,
+ frame_settings->enc->metadata.m.bit_depth.bits_per_sample,
+ big_endian, /*effort=*/2, frame_settings->enc->thread_pool.get(),
+ runner));
+ return JXL_ENC_SUCCESS;
+ }
+
+ auto queued_frame = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedFrame>(
+ &frame_settings->enc->memory_manager,
+ // JxlEncoderQueuedFrame is a struct with no constructors, so we use the
+ // default move constructor there.
+ jxl::JxlEncoderQueuedFrame{
+ frame_settings->values,
+ jxl::ImageBundle(&frame_settings->enc->metadata.m),
+ {}});
+
+ if (!queued_frame) {
+ // TODO(jon): when can this happen? is this an API usage error?
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "No frame queued?");
+ }
+
+ jxl::ColorEncoding c_current;
+ if (!frame_settings->enc->color_encoding_set) {
+ if ((pixel_format->data_type == JXL_TYPE_FLOAT) ||
+ (pixel_format->data_type == JXL_TYPE_FLOAT16)) {
+ c_current =
+ jxl::ColorEncoding::LinearSRGB(pixel_format->num_channels < 3);
+ } else {
+ c_current = jxl::ColorEncoding::SRGB(pixel_format->num_channels < 3);
+ }
+ } else {
+ c_current = frame_settings->enc->metadata.m.color_encoding;
+ }
+ uint32_t num_channels = pixel_format->num_channels;
+ size_t has_interleaved_alpha =
+ static_cast<size_t>(num_channels == 2 || num_channels == 4);
+ if (has_interleaved_alpha >
+ frame_settings->enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "number of extra channels mismatch (need 1 extra channel for alpha)");
+ }
+ std::vector<jxl::ImageF> extra_channels(
+ frame_settings->enc->metadata.m.num_extra_channels);
+ for (auto& extra_channel : extra_channels) {
+ extra_channel = jxl::ImageF(xsize, ysize);
+ }
+ queued_frame->frame.SetExtraChannels(std::move(extra_channels));
+ for (auto& ec_info : frame_settings->enc->metadata.m.extra_channel_info) {
+ if (has_interleaved_alpha && ec_info.type == jxl::ExtraChannel::kAlpha) {
+ queued_frame->ec_initialized.push_back(1);
+ has_interleaved_alpha = 0; // only first Alpha is initialized
+ } else {
+ queued_frame->ec_initialized.push_back(0);
+ }
+ }
+ queued_frame->frame.origin.x0 =
+ frame_settings->values.header.layer_info.crop_x0;
+ queued_frame->frame.origin.y0 =
+ frame_settings->values.header.layer_info.crop_y0;
+ queued_frame->frame.use_for_next_frame =
+ (frame_settings->values.header.layer_info.save_as_reference != 0u);
+ queued_frame->frame.blendmode =
+ frame_settings->values.header.layer_info.blend_info.blendmode ==
+ JXL_BLEND_REPLACE
+ ? jxl::BlendMode::kReplace
+ : jxl::BlendMode::kBlend;
+ queued_frame->frame.blend =
+ frame_settings->values.header.layer_info.blend_info.source > 0;
+
+ if (JXL_ENC_SUCCESS !=
+ VerifyInputBitDepth(frame_settings->values.image_bit_depth,
+ *pixel_format)) {
+ return JXL_API_ERROR_NOSET("Invalid input bit depth");
+ }
+ size_t bits_per_sample =
+ GetBitDepth(frame_settings->values.image_bit_depth,
+ frame_settings->enc->metadata.m, *pixel_format);
+ const uint8_t* uint8_buffer = reinterpret_cast<const uint8_t*>(buffer);
+ if (!jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(uint8_buffer, size), xsize, ysize, c_current,
+ bits_per_sample, *pixel_format,
+ frame_settings->enc->thread_pool.get(), &(queued_frame->frame))) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid input buffer");
+ }
+ if (frame_settings->values.lossless &&
+ frame_settings->enc->metadata.m.xyb_encoded) {
+ return JXL_API_ERROR(
+ frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Set uses_original_profile=true for lossless encoding");
+ }
+ queued_frame->option_values.cparams.level =
+ frame_settings->enc->codestream_level;
+
+ QueueFrame(frame_settings, queued_frame);
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc) {
+ if (enc->wrote_bytes) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "this setting can only be set at the beginning");
+ }
+ enc->use_boxes = true;
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc, const JxlBoxType type,
+ const uint8_t* contents, size_t size,
+ JXL_BOOL compress_box) {
+ if (!enc->use_boxes) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "must set JxlEncoderUseBoxes at the beginning to add boxes");
+ }
+ if (compress_box) {
+ if (memcmp("jxl", type, 3) == 0) {
+ return JXL_API_ERROR(
+ enc, JXL_ENC_ERR_API_USAGE,
+ "brob box may not contain a type starting with \"jxl\"");
+ }
+ if (memcmp("jbrd", type, 4) == 0) {
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "jbrd box may not be brob compressed");
+ }
+ if (memcmp("brob", type, 4) == 0) {
+ // The compress_box will compress an existing non-brob box into a brob
+ // box. If already giving a valid brotli-compressed brob box, set
+ // compress_box to false since it is already compressed.
+ return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE,
+ "a brob box cannot contain another brob box");
+ }
+ }
+
+ auto box = jxl::MemoryManagerMakeUnique<jxl::JxlEncoderQueuedBox>(
+ &enc->memory_manager);
+
+ box->type = jxl::MakeBoxType(type);
+ box->contents.assign(contents, contents + size);
+ box->compress_box = !!compress_box;
+ QueueBox(enc, box);
+ return JXL_ENC_SUCCESS;
+}
+
+JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer(
+ const JxlEncoderOptions* frame_settings, const JxlPixelFormat* pixel_format,
+ const void* buffer, size_t size, uint32_t index) {
+ if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for the index of extra channel");
+ }
+ if (!frame_settings->enc->basic_info_set ||
+ !frame_settings->enc->color_encoding_set) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Basic info has to be set first");
+ }
+ if (frame_settings->enc->input_queue.empty()) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "First add image frame, then extra channels");
+ }
+ if (frame_settings->enc->frames_closed) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Frame input already closed");
+ }
+ size_t xsize, ysize;
+ if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC,
+ "bad dimensions");
+ }
+ JxlPixelFormat ec_format = *pixel_format;
+ ec_format.num_channels = 1;
+ if (JXL_ENC_SUCCESS !=
+ VerifyInputBitDepth(frame_settings->values.image_bit_depth, ec_format)) {
+ return JXL_API_ERROR_NOSET("Invalid input bit depth");
+ }
+ size_t bits_per_sample = GetBitDepth(
+ frame_settings->values.image_bit_depth,
+ frame_settings->enc->metadata.m.extra_channel_info[index], ec_format);
+ const uint8_t* uint8_buffer = reinterpret_cast<const uint8_t*>(buffer);
+ auto queued_frame = frame_settings->enc->input_queue.back().frame.get();
+ if (!jxl::ConvertFromExternal(jxl::Span<const uint8_t>(uint8_buffer, size),
+ xsize, ysize, bits_per_sample, ec_format, 0,
+ frame_settings->enc->thread_pool.get(),
+ &queued_frame->frame.extra_channels()[index])) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Failed to set buffer for extra channel");
+ }
+ queued_frame->ec_initialized[index] = 1;
+
+ return JXL_ENC_SUCCESS;
+}
+
+void JxlEncoderCloseFrames(JxlEncoder* enc) { enc->frames_closed = true; }
+
+void JxlEncoderCloseBoxes(JxlEncoder* enc) { enc->boxes_closed = true; }
+
+void JxlEncoderCloseInput(JxlEncoder* enc) {
+ JxlEncoderCloseFrames(enc);
+ JxlEncoderCloseBoxes(enc);
+}
+JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, uint8_t** next_out,
+ size_t* avail_out) {
+ while (*avail_out >= 32 &&
+ (!enc->output_byte_queue.empty() ||
+ !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty())) {
+ if (!enc->output_byte_queue.empty()) {
+ size_t to_copy = std::min(*avail_out, enc->output_byte_queue.size());
+ std::copy_n(enc->output_byte_queue.begin(), to_copy, *next_out);
+ *next_out += to_copy;
+ *avail_out -= to_copy;
+ enc->output_byte_queue.erase(enc->output_byte_queue.begin(),
+ enc->output_byte_queue.begin() + to_copy);
+ } else if (!enc->output_fast_frame_queue.empty()) {
+ size_t count = JxlFastLosslessWriteOutput(
+ enc->output_fast_frame_queue.front().get(), *next_out, *avail_out);
+ *next_out += count;
+ *avail_out -= count;
+ if (count == 0) {
+ enc->output_fast_frame_queue.pop_front();
+ }
+
+ } else if (!enc->input_queue.empty()) {
+ if (enc->RefillOutputByteQueue() != JXL_ENC_SUCCESS) {
+ return JXL_ENC_ERROR;
+ }
+ }
+ }
+
+ if (!enc->output_byte_queue.empty() ||
+ !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty()) {
+ return JXL_ENC_NEED_MORE_OUTPUT;
+ }
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameHeader(JxlEncoderOptions* frame_settings,
+ const JxlFrameHeader* frame_header) {
+ if (frame_header->layer_info.blend_info.source > 3) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "invalid blending source index");
+ }
+ // If there are no extra channels, it's ok for the value to be 0.
+ if (frame_header->layer_info.blend_info.alpha != 0 &&
+ frame_header->layer_info.blend_info.alpha >=
+ frame_settings->enc->metadata.m.extra_channel_info.size()) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "alpha blend channel index out of bounds");
+ }
+
+ frame_settings->values.header = *frame_header;
+ // Setting the frame header resets the frame name, it must be set again with
+ // JxlEncoderSetFrameName if desired.
+ frame_settings->values.frame_name = "";
+
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo(
+ JxlEncoderOptions* frame_settings, size_t index,
+ const JxlBlendInfo* blend_info) {
+ if (index >= frame_settings->enc->metadata.m.num_extra_channels) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "Invalid value for the index of extra channel");
+ }
+
+ if (frame_settings->values.extra_channel_blend_info.size() !=
+ frame_settings->enc->metadata.m.num_extra_channels) {
+ JxlBlendInfo default_blend_info;
+ JxlEncoderInitBlendInfo(&default_blend_info);
+ frame_settings->values.extra_channel_blend_info.resize(
+ frame_settings->enc->metadata.m.num_extra_channels, default_blend_info);
+ }
+ frame_settings->values.extra_channel_blend_info[index] = *blend_info;
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameName(JxlEncoderFrameSettings* frame_settings,
+ const char* frame_name) {
+ std::string str = frame_name ? frame_name : "";
+ if (str.size() > 1071) {
+ return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE,
+ "frame name can be max 1071 bytes long");
+ }
+ frame_settings->values.frame_name = str;
+ frame_settings->values.header.name_length = str.size();
+ return JXL_ENC_SUCCESS;
+}
+
+JxlEncoderStatus JxlEncoderSetFrameBitDepth(
+ JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth) {
+ if (bit_depth->type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT &&
+ bit_depth->type != JXL_BIT_DEPTH_FROM_CODESTREAM) {
+ return JXL_API_ERROR_NOSET(
+ "Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT and "
+ "JXL_BIT_DEPTH_FROM_CODESTREAM is implemented "
+ "for input buffers.");
+ }
+ frame_settings->values.image_bit_depth = *bit_depth;
+ return JXL_ENC_SUCCESS;
+}
+
+void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding,
+ JXL_BOOL is_gray) {
+ ConvertInternalToExternalColorEncoding(jxl::ColorEncoding::SRGB(is_gray),
+ color_encoding);
+}
+
+void JxlColorEncodingSetToLinearSRGB(JxlColorEncoding* color_encoding,
+ JXL_BOOL is_gray) {
+ ConvertInternalToExternalColorEncoding(
+ jxl::ColorEncoding::LinearSRGB(is_gray), color_encoding);
+}
+
+void JxlEncoderAllowExpertOptions(JxlEncoder* enc) {
+ enc->allow_expert_options = true;
+}
diff --git a/third_party/jpeg-xl/lib/jxl/encode_internal.h b/third_party/jpeg-xl/lib/jxl/encode_internal.h
new file mode 100644
index 0000000000..7713c5cab6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/encode_internal.h
@@ -0,0 +1,275 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+#ifndef LIB_JXL_ENCODE_INTERNAL_H_
+#define LIB_JXL_ENCODE_INTERNAL_H_
+
+#include <jxl/encode.h>
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <jxl/types.h>
+
+#include <deque>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/enc_fast_lossless.h"
+#include "lib/jxl/enc_frame.h"
+#include "lib/jxl/memory_manager_internal.h"
+
+namespace jxl {
+
+/* Frame index box 'jxli' will start with Varint() for
+NF: has type Varint(): number of frames listed in the index.
+TNUM: has type u32: numerator of tick unit.
+TDEN: has type u32: denominator of tick unit. Value 0 means the file is
+ill-formed. per frame i listed: OFFi: has type Varint(): offset of start byte of
+this frame compared to start byte of previous frame from this index in the JPEG
+XL codestream. For the first frame, this is the offset from the first byte of
+the JPEG XL codestream. Ti: has type Varint(): duration in ticks between the
+start of this frame and the start of the next frame in the index. If this is the
+last frame in the index, this is the duration in ticks between the start of this
+frame and the end of the stream. A tick lasts TNUM / TDEN seconds. Fi: has type
+Varint(): amount of frames the next frame in the index occurs after this frame.
+If this is the last frame in the index, this is the amount of frames after this
+frame in the remainder of the stream. Only frames that are presented by the
+decoder are counted for this purpose, this excludes frames that are not intended
+for display but for compositing with other frames, such as frames that aren't
+the last frame with a duration of 0 ticks.
+
+All the frames listed in jxli are keyframes and the first frame is
+present in the list.
+There shall be either zero or one Frame Index boxes in a JPEG XL file.
+The offsets OFFi per frame are given as bytes in the codestream, not as
+bytes in the file format using the box structure. This means if JPEG XL Partial
+Codestream boxes are used, the offset is counted within the concatenated
+codestream, bytes from box headers or non-codestream boxes are not counted.
+*/
+
+typedef struct JxlEncoderFrameIndexBoxEntryStruct {
+ bool to_be_indexed;
+ uint32_t duration;
+ uint64_t OFFi;
+} JxlEncoderFrameIndexBoxEntry;
+
+typedef struct JxlEncoderFrameIndexBoxStruct {
+ // We always need to record the first frame entry, so presence of the
+ // first entry alone is not an indication if it was requested to be
+ // stored.
+ bool index_box_requested_through_api = false;
+
+ int64_t NF() const { return entries.size(); }
+ bool StoreFrameIndexBox() {
+ for (auto e : entries) {
+ if (e.to_be_indexed) {
+ return true;
+ }
+ }
+ return false;
+ }
+ int32_t TNUM = 1;
+ int32_t TDEN = 1000;
+
+ std::vector<JxlEncoderFrameIndexBoxEntry> entries;
+
+ // That way we can ensure that every index box will have the first frame.
+ // If the API user decides to mark it as an indexed frame, we call
+ // the AddFrame again, this time with requested.
+ void AddFrame(uint64_t OFFi, uint32_t duration, bool to_be_indexed) {
+ // We call AddFrame to every frame.
+ // Recording the first frame is required by the standard.
+ // Knowing the last frame is required, since the last indexed frame
+ // needs to know how many frames until the end.
+ // To be able to tell how many frames there are between each index
+ // entry we just record every frame here.
+ if (entries.size() == 1) {
+ if (OFFi == entries[0].OFFi) {
+ // API use for the first frame, let's clear the already recorded first
+ // frame.
+ entries.clear();
+ }
+ }
+ JxlEncoderFrameIndexBoxEntry e;
+ e.to_be_indexed = to_be_indexed;
+ e.OFFi = OFFi;
+ e.duration = duration;
+ entries.push_back(e);
+ }
+} JxlEncoderFrameIndexBox;
+
+// The encoder options (such as quality, compression speed, ...) for a single
+// frame, but not encoder-wide options such as box-related options.
+typedef struct JxlEncoderFrameSettingsValuesStruct {
+ // lossless is a separate setting from cparams because it is a combination
+ // setting that overrides multiple settings inside of cparams.
+ bool lossless;
+ CompressParams cparams;
+ JxlFrameHeader header;
+ std::vector<JxlBlendInfo> extra_channel_blend_info;
+ std::string frame_name;
+ JxlBitDepth image_bit_depth;
+ bool frame_index_box = false;
+} JxlEncoderFrameSettingsValues;
+
+typedef std::array<uint8_t, 4> BoxType;
+
+// Utility function that makes a BoxType from a string literal. The string must
+// have 4 characters, a 5th null termination character is optional.
+constexpr BoxType MakeBoxType(const char* type) {
+ return BoxType(
+ {{static_cast<uint8_t>(type[0]), static_cast<uint8_t>(type[1]),
+ static_cast<uint8_t>(type[2]), static_cast<uint8_t>(type[3])}});
+}
+
+constexpr unsigned char kContainerHeader[] = {
+ 0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xd, 0xa, 0x87,
+ 0xa, 0, 0, 0, 0x14, 'f', 't', 'y', 'p', 'j', 'x',
+ 'l', ' ', 0, 0, 0, 0, 'j', 'x', 'l', ' '};
+
+constexpr unsigned char kLevelBoxHeader[] = {0, 0, 0, 0x9, 'j', 'x', 'l', 'l'};
+
+struct JxlEncoderQueuedFrame {
+ JxlEncoderFrameSettingsValues option_values;
+ ImageBundle frame;
+ std::vector<uint8_t> ec_initialized;
+};
+
+struct JxlEncoderQueuedBox {
+ BoxType type;
+ std::vector<uint8_t> contents;
+ bool compress_box;
+};
+
+using FJXLFrameUniquePtr =
+ std::unique_ptr<JxlFastLosslessFrameState,
+ decltype(&JxlFastLosslessFreeFrameState)>;
+
+// Either a frame, or a box, not both.
+// Can also be a FJXL frame.
+struct JxlEncoderQueuedInput {
+ explicit JxlEncoderQueuedInput(const JxlMemoryManager& memory_manager)
+ : frame(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)),
+ box(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)) {}
+ MemoryManagerUniquePtr<JxlEncoderQueuedFrame> frame;
+ MemoryManagerUniquePtr<JxlEncoderQueuedBox> box;
+ FJXLFrameUniquePtr fast_lossless_frame = {nullptr,
+ JxlFastLosslessFreeFrameState};
+};
+
+// Appends a JXL container box header with given type, size, and unbounded
+// properties to output.
+template <typename T>
+void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded,
+ T* output) {
+ uint64_t box_size = 0;
+ bool large_size = false;
+ if (!unbounded) {
+ box_size = size + 8;
+ if (box_size >= 0x100000000ull) {
+ large_size = true;
+ }
+ }
+
+ {
+ const uint64_t store = large_size ? 1 : box_size;
+ for (size_t i = 0; i < 4; i++) {
+ output->push_back(store >> (8 * (3 - i)) & 0xff);
+ }
+ }
+ for (size_t i = 0; i < 4; i++) {
+ output->push_back(type[i]);
+ }
+
+ if (large_size) {
+ for (size_t i = 0; i < 8; i++) {
+ output->push_back(box_size >> (8 * (7 - i)) & 0xff);
+ }
+ }
+}
+
+} // namespace jxl
+
+// Internal use only struct, can only be initialized correctly by
+// JxlEncoderCreate.
+struct JxlEncoderStruct {
+ JxlEncoderError error = JxlEncoderError::JXL_ENC_ERR_OK;
+ JxlMemoryManager memory_manager;
+ jxl::MemoryManagerUniquePtr<jxl::ThreadPool> thread_pool{
+ nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)};
+ JxlCmsInterface cms;
+ std::vector<jxl::MemoryManagerUniquePtr<JxlEncoderFrameSettings>>
+ encoder_options;
+
+ size_t num_queued_frames;
+ size_t num_queued_boxes;
+ std::vector<jxl::JxlEncoderQueuedInput> input_queue;
+ std::deque<uint8_t> output_byte_queue;
+ std::deque<jxl::FJXLFrameUniquePtr> output_fast_frame_queue;
+
+ // How many codestream bytes have been written, i.e.,
+ // content of jxlc and jxlp boxes. Frame index box jxli
+ // requires position indices to point to codestream bytes,
+ // so we need to keep track of the total of flushed or queue
+ // codestream bytes. These bytes may be in a single jxlc box
+ // or across multiple jxlp boxes.
+ size_t codestream_bytes_written_beginning_of_frame;
+ size_t codestream_bytes_written_end_of_frame;
+ jxl::JxlEncoderFrameIndexBox frame_index_box;
+
+ // Force using the container even if not needed
+ bool use_container;
+ // User declared they will add metadata boxes
+ bool use_boxes;
+
+ // TODO(lode): move level into jxl::CompressParams since some C++
+ // implementation decisions should be based on it: level 10 allows more
+ // features to be used.
+ int32_t codestream_level;
+ bool store_jpeg_metadata;
+ jxl::CodecMetadata metadata;
+ std::vector<uint8_t> jpeg_metadata;
+
+ // Wrote any output at all, so wrote the data before the first user added
+ // frame or box, such as signature, basic info, ICC profile or jpeg
+ // reconstruction box.
+ bool wrote_bytes;
+ jxl::CompressParams last_used_cparams;
+ JxlBasicInfo basic_info;
+
+ // Encoder wrote a jxlp (partial codestream) box, so any next codestream
+ // parts must also be written in jxlp boxes, a single jxlc box cannot be
+ // used. The counter is used for the 4-byte jxlp box index header.
+ size_t jxlp_counter;
+
+ bool frames_closed;
+ bool boxes_closed;
+ bool basic_info_set;
+ bool color_encoding_set;
+ bool intensity_target_set;
+ bool allow_expert_options = false;
+ int brotli_effort = -1;
+
+ // Takes the first frame in the input_queue, encodes it, and appends
+ // the bytes to the output_byte_queue.
+ JxlEncoderStatus RefillOutputByteQueue();
+
+ bool MustUseContainer() const {
+ return use_container || (codestream_level != 5 && codestream_level != -1) ||
+ store_jpeg_metadata || use_boxes;
+ }
+
+ // Appends the bytes of a JXL box header with the provided type and size to
+ // the end of the output_byte_queue. If unbounded is true, the size won't be
+ // added to the header and the box will be assumed to continue until EOF.
+ void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded);
+};
+
+struct JxlEncoderFrameSettingsStruct {
+ JxlEncoder* enc;
+ jxl::JxlEncoderFrameSettingsValues values;
+};
+
+#endif // LIB_JXL_ENCODE_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/encode_test.cc b/third_party/jpeg-xl/lib/jxl/encode_test.cc
new file mode 100644
index 0000000000..3f1d77fd62
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/encode_test.cc
@@ -0,0 +1,1405 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+TEST(EncodeTest, AddFrameAfterCloseInputTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ JxlEncoderCloseInput(enc.get());
+
+ size_t xsize = 64;
+ size_t ysize = 64;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ jxl::CodecInOut input_io =
+ jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, AddJPEGAfterCloseTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ JxlEncoderCloseInput(enc.get());
+
+ const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeColorEncodingTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ size_t xsize = 64;
+ size_t ysize = 64;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ jxl::CodecInOut input_io =
+ jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = true;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, AddFrameBeforeBasicInfoTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ size_t xsize = 64;
+ size_t ysize = 64;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ jxl::CodecInOut input_io =
+ jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+}
+
+TEST(EncodeTest, DefaultAllocTest) {
+ JxlEncoder* enc = JxlEncoderCreate(nullptr);
+ EXPECT_NE(nullptr, enc);
+ JxlEncoderDestroy(enc);
+}
+
+TEST(EncodeTest, CustomAllocTest) {
+ struct CalledCounters {
+ int allocs = 0;
+ int frees = 0;
+ } counters;
+
+ JxlMemoryManager mm;
+ mm.opaque = &counters;
+ mm.alloc = [](void* opaque, size_t size) {
+ reinterpret_cast<CalledCounters*>(opaque)->allocs++;
+ return malloc(size);
+ };
+ mm.free = [](void* opaque, void* address) {
+ reinterpret_cast<CalledCounters*>(opaque)->frees++;
+ free(address);
+ };
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(&mm);
+ EXPECT_NE(nullptr, enc.get());
+ EXPECT_LE(1, counters.allocs);
+ EXPECT_EQ(0, counters.frees);
+ }
+ EXPECT_LE(1, counters.frees);
+}
+
+TEST(EncodeTest, DefaultParallelRunnerTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetParallelRunner(enc.get(), nullptr, nullptr));
+}
+
+void VerifyFrameEncoding(size_t xsize, size_t ysize, JxlEncoder* enc,
+ const JxlEncoderFrameSettings* frame_settings,
+ size_t max_compressed_size,
+ bool lossy_use_original_profile) {
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ jxl::CodecInOut input_io =
+ jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ if (frame_settings->values.lossless || lossy_use_original_profile) {
+ basic_info.uses_original_profile = true;
+ } else {
+ basic_info.uses_original_profile = false;
+ }
+ // 16-bit alpha means this requires level 10
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding, true);
+ EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetColorEncoding(enc, &color_encoding));
+ JxlColorEncodingSetToSRGB(&color_encoding, false);
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+ pixel_format.num_channels = 1;
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ pixel_format.num_channels = 4;
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseInput(enc);
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_LE(compressed.size(), max_compressed_size);
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+ jxl::CodecInOut decoded_io;
+ EXPECT_TRUE(jxl::test::DecodeFile(
+ {}, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ &decoded_io));
+
+ EXPECT_LE(
+ ComputeDistance2(input_io.Main(), decoded_io.Main(), jxl::GetJxlCms()),
+#if JXL_HIGH_PRECISION
+ 1.84);
+#else
+ 8.7);
+#endif
+}
+
+void VerifyFrameEncoding(JxlEncoder* enc,
+ const JxlEncoderFrameSettings* frame_settings) {
+ VerifyFrameEncoding(63, 129, enc, frame_settings, 2700,
+ /*lossy_use_original_profile=*/false);
+}
+
+TEST(EncodeTest, FrameEncodingTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ VerifyFrameEncoding(enc.get(),
+ JxlEncoderFrameSettingsCreate(enc.get(), nullptr));
+}
+
+TEST(EncodeTest, EncoderResetTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ VerifyFrameEncoding(50, 200, enc.get(),
+ JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 4300,
+ false);
+ // Encoder should become reusable for a new image from scratch after using
+ // reset.
+ JxlEncoderReset(enc.get());
+ VerifyFrameEncoding(157, 77, enc.get(),
+ JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 2300,
+ false);
+}
+
+TEST(EncodeTest, CmsTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ bool cms_called = false;
+ JxlCmsInterface cms = jxl::GetJxlCms();
+ struct InitData {
+ void* original_init_data;
+ jpegxl_cms_init_func original_init;
+ bool* cms_called;
+ };
+ InitData init_data = {/*original_init_data=*/cms.init_data,
+ /*original_init=*/cms.init,
+ /*cms_called=*/&cms_called};
+ cms.init_data = &init_data;
+ cms.init = +[](void* raw_init_data, size_t num_threads,
+ size_t pixels_per_thread, const JxlColorProfile* input_profile,
+ const JxlColorProfile* output_profile,
+ float intensity_target) {
+ const InitData* init_data = static_cast<const InitData*>(raw_init_data);
+ *init_data->cms_called = true;
+ return init_data->original_init(init_data->original_init_data, num_threads,
+ pixels_per_thread, input_profile,
+ output_profile, intensity_target);
+ };
+ JxlEncoderSetCms(enc.get(), cms);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+ JxlEncoderSetFrameLossless(frame_settings, false);
+ ASSERT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(frame_settings,
+ JXL_ENC_FRAME_SETTING_EFFORT, 8));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_TRUE(cms_called);
+}
+
+TEST(EncodeTest, frame_settingsTest) {
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 5));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(jxl::SpeedTier::kHare, enc->last_used_cparams.speed_tier);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ // Lower than currently supported values
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 0));
+ // Higher than currently supported values
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 11));
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE));
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3600, false);
+ EXPECT_EQ(true, enc->last_used_cparams.IsLossless());
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetFrameDistance(frame_settings, 0.5));
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3030, false);
+ EXPECT_EQ(0.5, enc->last_used_cparams.butteraugli_distance);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ // Disallowed negative distance
+ EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetFrameDistance(frame_settings, -1));
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, 2));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(2u, enc->last_used_cparams.decoding_speed_tier);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_ERROR,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 100));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 1));
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X, 5));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(true, enc->last_used_cparams.centerfirst);
+ EXPECT_EQ(5, enc->last_used_cparams.center_x);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_RESPONSIVE, 0));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC, -1));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2));
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 2830,
+ /*lossy_use_original_profile=*/false);
+ EXPECT_EQ(false, enc->last_used_cparams.responsive);
+ EXPECT_EQ(true, enc->last_used_cparams.progressive_mode);
+ EXPECT_EQ(2, enc->last_used_cparams.progressive_dc);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetFloatOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_PHOTON_NOISE, 1777.777));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_NEAR(1777.777f, enc->last_used_cparams.photon_noise_iso, 1E-4);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetFloatOption(
+ frame_settings,
+ JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT, 55.0f));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetFloatOption(
+ frame_settings,
+ JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, 25.0f));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_PALETTE_COLORS, 70000));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_LOSSY_PALETTE, 1));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_NEAR(55.0f,
+ enc->last_used_cparams.channel_colors_pre_transform_percent,
+ 1E-6);
+ EXPECT_NEAR(25.0f, enc->last_used_cparams.channel_colors_percent, 1E-6);
+ EXPECT_EQ(70000, enc->last_used_cparams.palette_colors);
+ EXPECT_EQ(true, enc->last_used_cparams.lossy_palette);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, 30));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, 2));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 14));
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetFloatOption(
+ frame_settings,
+ JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, 77.0f));
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, 7));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(30, enc->last_used_cparams.colorspace);
+ EXPECT_EQ(2, enc->last_used_cparams.modular_group_size_shift);
+ EXPECT_EQ(jxl::Predictor::Best, enc->last_used_cparams.options.predictor);
+ EXPECT_NEAR(0.77f, enc->last_used_cparams.options.nb_repeats, 1E-6);
+ EXPECT_EQ(7, enc->last_used_cparams.options.max_properties);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 0));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(false, enc->last_used_cparams.force_cfl_jpeg_recompression);
+ }
+
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 1));
+ VerifyFrameEncoding(enc.get(), frame_settings);
+ EXPECT_EQ(true, enc->last_used_cparams.force_cfl_jpeg_recompression);
+ }
+}
+
+TEST(EncodeTest, LossyEncoderUseOriginalProfileTest) {
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ ASSERT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7897, true);
+ }
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ ASSERT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2));
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 8310, true);
+ }
+ {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ ASSERT_NE(nullptr, enc.get());
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ ASSERT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 8));
+ VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7173, true);
+ }
+}
+
+namespace {
+// Returns a copy of buf from offset to offset+size, or a new zeroed vector if
+// the result would have been out of bounds taking integer overflow into
+// account.
+std::vector<uint8_t> SliceSpan(const jxl::Span<const uint8_t>& buf,
+ size_t offset, size_t size) {
+ if (offset + size >= buf.size()) {
+ return std::vector<uint8_t>(size, 0);
+ }
+ if (offset + size < offset) {
+ return std::vector<uint8_t>(size, 0);
+ }
+ return std::vector<uint8_t>(buf.data() + offset, buf.data() + offset + size);
+}
+
+struct Box {
+ // The type of the box.
+ // If "uuid", use extended_type instead
+ char type[4] = {0, 0, 0, 0};
+
+ // The extended_type is only used when type == "uuid".
+ // Extended types are not used in JXL. However, the box format itself
+ // supports this so they are handled correctly.
+ char extended_type[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ // Box data.
+ jxl::Span<const uint8_t> data = jxl::Span<const uint8_t>(nullptr, 0);
+
+ // If the size is not given, the datasize extends to the end of the file.
+ // If this field is false, the size field is not encoded when the box is
+ // serialized.
+ bool data_size_given = true;
+
+ // If successful, returns true and sets `in` to be the rest data (if any).
+ // If `in` contains a box with a size larger than `in.size()`, will not
+ // modify `in`, and will return true but the data `Span<uint8_t>` will
+ // remain set to nullptr.
+ // If unsuccessful, returns error and doesn't modify `in`.
+ jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+ // Total box_size including this header itself.
+ uint64_t box_size = LoadBE32(SliceSpan(*in, 0, 4).data());
+ size_t pos = 4;
+
+ memcpy(type, SliceSpan(*in, pos, 4).data(), 4);
+ pos += 4;
+
+ if (box_size == 1) {
+ // If the size is 1, it indicates extended size read from 64-bit integer.
+ box_size = LoadBE64(SliceSpan(*in, pos, 8).data());
+ pos += 8;
+ }
+
+ if (!memcmp("uuid", type, 4)) {
+ memcpy(extended_type, SliceSpan(*in, pos, 16).data(), 16);
+ pos += 16;
+ }
+
+ // This is the end of the box header, the box data begins here. Handle
+ // the data size now.
+ const size_t header_size = pos;
+
+ if (box_size != 0) {
+ if (box_size < header_size) {
+ return JXL_FAILURE("Invalid box size");
+ }
+ if (box_size > in->size()) {
+ // The box is fine, but the input is too short.
+ return true;
+ }
+ data_size_given = true;
+ data = jxl::Span<const uint8_t>(in->data() + header_size,
+ box_size - header_size);
+ } else {
+ data_size_given = false;
+ data = jxl::Span<const uint8_t>(in->data() + header_size,
+ in->size() - header_size);
+ }
+
+ *in = jxl::Span<const uint8_t>(in->data() + header_size + data.size(),
+ in->size() - header_size - data.size());
+ return true;
+ }
+};
+
+struct Container {
+ std::vector<Box> boxes;
+
+ // If successful, returns true and sets `in` to be the rest data (if any).
+ // If unsuccessful, returns error and doesn't modify `in`.
+ jxl::Status Decode(jxl::Span<const uint8_t>* in) {
+ boxes.clear();
+
+ Box signature_box;
+ JXL_RETURN_IF_ERROR(signature_box.Decode(in));
+ if (memcmp("JXL ", signature_box.type, 4) != 0) {
+ return JXL_FAILURE("Invalid magic signature");
+ }
+ if (signature_box.data.size() != 4)
+ return JXL_FAILURE("Invalid magic signature");
+ if (signature_box.data[0] != 0xd || signature_box.data[1] != 0xa ||
+ signature_box.data[2] != 0x87 || signature_box.data[3] != 0xa) {
+ return JXL_FAILURE("Invalid magic signature");
+ }
+
+ Box ftyp_box;
+ JXL_RETURN_IF_ERROR(ftyp_box.Decode(in));
+ if (memcmp("ftyp", ftyp_box.type, 4) != 0) {
+ return JXL_FAILURE("Invalid ftyp");
+ }
+ if (ftyp_box.data.size() != 12) return JXL_FAILURE("Invalid ftyp");
+ const char* expected = "jxl \0\0\0\0jxl ";
+ if (memcmp(expected, ftyp_box.data.data(), 12) != 0)
+ return JXL_FAILURE("Invalid ftyp");
+
+ while (!in->empty()) {
+ Box box = {};
+ JXL_RETURN_IF_ERROR(box.Decode(in));
+ if (box.data.data() == nullptr) {
+ // The decoding encountered a box, but not enough data yet.
+ return true;
+ }
+ boxes.emplace_back(box);
+ }
+
+ return true;
+ }
+};
+
+} // namespace
+
+TEST(EncodeTest, SingleFrameBoundedJXLCTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), true));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ size_t xsize = 71;
+ size_t ysize = 23;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+ Container container = {};
+ jxl::Span<const uint8_t> encoded_span =
+ jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+ EXPECT_TRUE(container.Decode(&encoded_span));
+ EXPECT_EQ(0u, encoded_span.size());
+ bool found_jxlc = false;
+ bool found_jxlp = false;
+ // The encoder is allowed to either emit a jxlc or one or more jxlp.
+ for (size_t i = 0; i < container.boxes.size(); ++i) {
+ if (memcmp("jxlc", container.boxes[i].type, 4) == 0) {
+ EXPECT_EQ(false, found_jxlc); // Max 1 jxlc
+ EXPECT_EQ(false, found_jxlp); // Can't mix jxlc and jxlp
+ found_jxlc = true;
+ }
+ if (memcmp("jxlp", container.boxes[i].type, 4) == 0) {
+ EXPECT_EQ(false, found_jxlc); // Can't mix jxlc and jxlp
+ found_jxlp = true;
+ }
+ // The encoder shouldn't create an unbounded box in this case, with the
+ // single frame it knows the full size in time, so can help make decoding
+ // more efficient by giving the full box size of the final box.
+ EXPECT_EQ(true, container.boxes[i].data_size_given);
+ }
+ EXPECT_EQ(true, found_jxlc || found_jxlp);
+}
+
+TEST(EncodeTest, CodestreamLevelTest) {
+ size_t xsize = 64;
+ size_t ysize = 64;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+
+ jxl::CodecInOut input_io =
+ jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+ Container container = {};
+ jxl::Span<const uint8_t> encoded_span =
+ jxl::Span<const uint8_t>(compressed.data(), compressed.size());
+ EXPECT_TRUE(container.Decode(&encoded_span));
+ EXPECT_EQ(0u, encoded_span.size());
+ EXPECT_EQ(0, memcmp("jxll", container.boxes[0].type, 4));
+}
+
+TEST(EncodeTest, CodestreamLevelVerificationTest) {
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0};
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = 64;
+ basic_info.ysize = 64;
+ basic_info.uses_original_profile = false;
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+
+ EXPECT_EQ(5, JxlEncoderGetRequiredCodestreamLevel(enc.get()));
+
+ // Set an image dimension that is too large for level 5, but fits in level 10
+
+ basic_info.xsize = 1ull << 30ull;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 5));
+ EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ EXPECT_EQ(10, JxlEncoderGetRequiredCodestreamLevel(enc.get()));
+
+ // Set an image dimension that is too large even for level 10
+
+ basic_info.xsize = 1ull << 31ull;
+ EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) {
+ const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+ jxl::extras::JXLDecompressParams dparams;
+ std::vector<uint8_t> decoded_jpeg_bytes;
+ jxl::extras::PackedPixelFile ppf;
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+ nullptr, &ppf, &decoded_jpeg_bytes));
+
+ EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size());
+ EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size()));
+}
+
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(ProgressiveJPEGReconstructionTest)) {
+ const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ frame_settings->values.cparams.progressive_mode = true;
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+ jxl::extras::JXLDecompressParams dparams;
+ std::vector<uint8_t> decoded_jpeg_bytes;
+ jxl::extras::PackedPixelFile ppf;
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+ nullptr, &ppf, &decoded_jpeg_bytes));
+
+ EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size());
+ EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size()));
+}
+
+static void ProcessEncoder(JxlEncoder* enc, std::vector<uint8_t>& compressed,
+ uint8_t*& next_out, size_t& avail_out) {
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ size_t offset = next_out - compressed.data();
+ compressed.resize(next_out - compressed.data());
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+}
+
+TEST(EncodeTest, BasicInfoTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ size_t xsize = 1;
+ size_t ysize = 1;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+ basic_info.have_animation = true;
+ basic_info.intensity_target = 123.4;
+ basic_info.min_nits = 5.0;
+ basic_info.linear_below = 12.7;
+ basic_info.orientation = JXL_ORIENT_ROTATE_90_CW;
+ basic_info.intrinsic_xsize = 88;
+ basic_info.intrinsic_ysize = 99;
+ basic_info.animation.tps_numerator = 55;
+ basic_info.animation.tps_denominator = 77;
+ basic_info.animation.num_loops = 10;
+ basic_info.animation.have_timecodes = JXL_TRUE;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseFrames(enc.get());
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_NE(nullptr, dec.get());
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO));
+ // Allow testing the orientation field, without this setting it will be
+ // overridden to identity.
+ JxlDecoderSetKeepOrientation(dec.get(), JXL_TRUE);
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+ JxlDecoderCloseInput(dec.get());
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+ if (status == JXL_DEC_ERROR) {
+ FAIL();
+ } else if (status == JXL_DEC_SUCCESS) {
+ break;
+ } else if (status == JXL_DEC_BASIC_INFO) {
+ JxlBasicInfo basic_info2;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetBasicInfo(dec.get(), &basic_info2));
+ EXPECT_EQ(basic_info.xsize, basic_info2.xsize);
+ EXPECT_EQ(basic_info.ysize, basic_info2.ysize);
+ EXPECT_EQ(basic_info.bits_per_sample, basic_info2.bits_per_sample);
+ EXPECT_EQ(basic_info.exponent_bits_per_sample,
+ basic_info2.exponent_bits_per_sample);
+ EXPECT_NEAR(basic_info.intensity_target, basic_info2.intensity_target,
+ 0.5);
+ EXPECT_NEAR(basic_info.min_nits, basic_info2.min_nits, 0.5);
+ EXPECT_NEAR(basic_info.linear_below, basic_info2.linear_below, 0.5);
+ EXPECT_EQ(basic_info.relative_to_max_display,
+ basic_info2.relative_to_max_display);
+ EXPECT_EQ(basic_info.uses_original_profile,
+ basic_info2.uses_original_profile);
+ EXPECT_EQ(basic_info.orientation, basic_info2.orientation);
+ EXPECT_EQ(basic_info.intrinsic_xsize, basic_info2.intrinsic_xsize);
+ EXPECT_EQ(basic_info.intrinsic_ysize, basic_info2.intrinsic_ysize);
+ EXPECT_EQ(basic_info.num_color_channels, basic_info2.num_color_channels);
+ // TODO(lode): also test num_extra_channels, but currently there may be a
+ // mismatch between 0 and 1 if there is alpha, until encoder support for
+ // extra channels is fully implemented.
+ EXPECT_EQ(basic_info.alpha_bits, basic_info2.alpha_bits);
+ EXPECT_EQ(basic_info.alpha_exponent_bits,
+ basic_info2.alpha_exponent_bits);
+ EXPECT_EQ(basic_info.alpha_premultiplied,
+ basic_info2.alpha_premultiplied);
+
+ EXPECT_EQ(basic_info.have_preview, basic_info2.have_preview);
+ if (basic_info.have_preview) {
+ EXPECT_EQ(basic_info.preview.xsize, basic_info2.preview.xsize);
+ EXPECT_EQ(basic_info.preview.ysize, basic_info2.preview.ysize);
+ }
+
+ EXPECT_EQ(basic_info.have_animation, basic_info2.have_animation);
+ if (basic_info.have_animation) {
+ EXPECT_EQ(basic_info.animation.tps_numerator,
+ basic_info2.animation.tps_numerator);
+ EXPECT_EQ(basic_info.animation.tps_denominator,
+ basic_info2.animation.tps_denominator);
+ EXPECT_EQ(basic_info.animation.num_loops,
+ basic_info2.animation.num_loops);
+ EXPECT_EQ(basic_info.animation.have_timecodes,
+ basic_info2.animation.have_timecodes);
+ }
+ } else {
+ FAIL(); // unexpected status
+ }
+ }
+}
+
+TEST(EncodeTest, AnimationHeaderTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ size_t xsize = 1;
+ size_t ysize = 1;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.have_animation = true;
+ basic_info.animation.tps_numerator = 1000;
+ basic_info.animation.tps_denominator = 1;
+ basic_info.animation.have_timecodes = JXL_TRUE;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+ std::string frame_name = "test frame";
+ JxlFrameHeader header;
+ JxlEncoderInitFrameHeader(&header);
+ header.duration = 50;
+ header.timecode = 800;
+ header.layer_info.blend_info.blendmode = JXL_BLEND_BLEND;
+ header.layer_info.blend_info.source = 2;
+ header.layer_info.blend_info.clamp = 1;
+ JxlBlendInfo extra_channel_blend_info;
+ JxlEncoderInitBlendInfo(&extra_channel_blend_info);
+ extra_channel_blend_info.blendmode = JXL_BLEND_MULADD;
+ JxlEncoderSetFrameHeader(frame_settings, &header);
+ JxlEncoderSetExtraChannelBlendInfo(frame_settings, 0,
+ &extra_channel_blend_info);
+ JxlEncoderSetFrameName(frame_settings, frame_name.c_str());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseFrames(enc.get());
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_NE(nullptr, dec.get());
+
+ // To test the blend_info fields, coalescing must be set to false in the
+ // decoder.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE));
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME));
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+ JxlDecoderCloseInput(dec.get());
+
+ bool seen_frame = false;
+
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+ if (status == JXL_DEC_ERROR) {
+ FAIL();
+ } else if (status == JXL_DEC_SUCCESS) {
+ break;
+ } else if (status == JXL_DEC_FRAME) {
+ seen_frame = true;
+ JxlFrameHeader header2;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2));
+ EXPECT_EQ(header.duration, header2.duration);
+ EXPECT_EQ(header.timecode, header2.timecode);
+ EXPECT_EQ(header.layer_info.blend_info.blendmode,
+ header2.layer_info.blend_info.blendmode);
+ EXPECT_EQ(header.layer_info.blend_info.clamp,
+ header2.layer_info.blend_info.clamp);
+ EXPECT_EQ(header.layer_info.blend_info.source,
+ header2.layer_info.blend_info.source);
+ EXPECT_EQ(frame_name.size(), header2.name_length);
+ JxlBlendInfo extra_channel_blend_info2;
+ JxlDecoderGetExtraChannelBlendInfo(dec.get(), 0,
+ &extra_channel_blend_info2);
+ EXPECT_EQ(extra_channel_blend_info.blendmode,
+ extra_channel_blend_info2.blendmode);
+ if (header2.name_length > 0) {
+ std::string frame_name2(header2.name_length + 1, '\0');
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetFrameName(dec.get(), &frame_name2.front(),
+ frame_name2.size()));
+ frame_name2.resize(header2.name_length);
+ EXPECT_EQ(frame_name, frame_name2);
+ }
+ } else {
+ FAIL(); // unexpected status
+ }
+ }
+
+ EXPECT_EQ(true, seen_frame);
+}
+TEST(EncodeTest, CroppedFrameTest) {
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ size_t xsize = 300;
+ size_t ysize = 300;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ std::vector<uint8_t> pixels2(pixels.size());
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ // Encoding a 300x300 frame in an image that is only 100x100
+ basic_info.xsize = 100;
+ basic_info.ysize = 100;
+ basic_info.uses_original_profile = JXL_TRUE;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+ JxlFrameHeader header;
+ JxlEncoderInitFrameHeader(&header);
+ header.layer_info.have_crop = JXL_TRUE;
+ header.layer_info.xsize = xsize;
+ header.layer_info.ysize = ysize;
+ header.layer_info.crop_x0 = -50;
+ header.layer_info.crop_y0 = -250;
+ JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE);
+ JxlEncoderSetFrameHeader(frame_settings, &header);
+ JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT,
+ 1);
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(100);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ JxlEncoderCloseFrames(enc.get());
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_NE(nullptr, dec.get());
+ // Non-coalesced decoding so we can get the full uncropped frame
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE));
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE));
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+ JxlDecoderCloseInput(dec.get());
+
+ bool seen_frame = false;
+ bool checked_frame = false;
+ for (;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+ if (status == JXL_DEC_ERROR) {
+ FAIL();
+ } else if (status == JXL_DEC_SUCCESS) {
+ break;
+ } else if (status == JXL_DEC_FRAME) {
+ seen_frame = true;
+ JxlFrameHeader header2;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2));
+ EXPECT_EQ(header.layer_info.xsize, header2.layer_info.xsize);
+ EXPECT_EQ(header.layer_info.ysize, header2.layer_info.ysize);
+ EXPECT_EQ(header.layer_info.crop_x0, header2.layer_info.crop_x0);
+ EXPECT_EQ(header.layer_info.crop_y0, header2.layer_info.crop_y0);
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec.get(), &pixel_format,
+ pixels2.data(), pixels2.size()));
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ EXPECT_EQ(0, memcmp(pixels.data(), pixels2.data(), pixels.size()));
+ checked_frame = true;
+ } else {
+ FAIL(); // unexpected status
+ }
+ }
+ EXPECT_EQ(true, checked_frame);
+ EXPECT_EQ(true, seen_frame);
+}
+
+TEST(EncodeTest, JXL_BOXES_TEST(BoxTest)) {
+ // Test with uncompressed boxes and with brob boxes
+ for (int compress_box = 0; compress_box <= 1; ++compress_box) {
+ // Tests adding two metadata boxes with the encoder: an exif box before the
+ // image frame, and an xml box after the image frame. Then verifies the
+ // decoder can decode them, they are in the expected place, and have the
+ // correct content after decoding.
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ EXPECT_NE(nullptr, enc.get());
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseBoxes(enc.get()));
+
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ size_t xsize = 50;
+ size_t ysize = 17;
+ JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0};
+ std::vector<uint8_t> pixels =
+ jxl::test::GetSomeTestImage(xsize, ysize, 4, 0);
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+
+ // Add an early metadata box. Also add a valid 4-byte TIFF offset header
+ // before the fake exif data of these box contents.
+ constexpr const char* exif_test_string = "\0\0\0\0exif test data";
+ const uint8_t* exif_data =
+ reinterpret_cast<const uint8_t*>(exif_test_string);
+ // Skip the 4 zeroes for strlen
+ const size_t exif_size = 4 + strlen(exif_test_string + 4);
+ JxlEncoderAddBox(enc.get(), "Exif", exif_data, exif_size, compress_box);
+
+ // Write to output
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ // Add image frame
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ pixels.data(), pixels.size()));
+ // Indicate this is the last frame
+ JxlEncoderCloseFrames(enc.get());
+
+ // Write to output
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ // Add a late metadata box
+ constexpr const char* xml_test_string = "<some random xml data>";
+ const uint8_t* xml_data = reinterpret_cast<const uint8_t*>(xml_test_string);
+ size_t xml_size = strlen(xml_test_string);
+ JxlEncoderAddBox(enc.get(), "XML ", xml_data, xml_size, compress_box);
+
+ // Indicate this is the last box
+ JxlEncoderCloseBoxes(enc.get());
+
+ // Write to output
+ ProcessEncoder(enc.get(), compressed, next_out, avail_out);
+
+ // Decode to verify the boxes, we don't decode to pixels, only the boxes.
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_NE(nullptr, dec.get());
+
+ if (compress_box) {
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE));
+ }
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(
+ dec.get(), JXL_DEC_FRAME | JXL_DEC_BOX));
+
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+ JxlDecoderCloseInput(dec.get());
+
+ std::vector<uint8_t> dec_exif_box(exif_size);
+ std::vector<uint8_t> dec_xml_box(xml_size);
+
+ for (bool post_frame = false;;) {
+ JxlDecoderStatus status = JxlDecoderProcessInput(dec.get());
+ if (status == JXL_DEC_ERROR) {
+ FAIL();
+ } else if (status == JXL_DEC_SUCCESS) {
+ EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get()));
+ break;
+ } else if (status == JXL_DEC_FRAME) {
+ post_frame = true;
+ } else if (status == JXL_DEC_BOX) {
+ // Since we gave the exif/xml box output buffer of the exact known
+ // correct size, 0 bytes should be released. Same when no buffer was
+ // set.
+ EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get()));
+ JxlBoxType type;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec.get(), type, true));
+ if (!memcmp(type, "Exif", 4)) {
+ // This box should have been encoded before the image frame
+ EXPECT_EQ(false, post_frame);
+ JxlDecoderSetBoxBuffer(dec.get(), dec_exif_box.data(),
+ dec_exif_box.size());
+ } else if (!memcmp(type, "XML ", 4)) {
+ // This box should have been encoded after the image frame
+ EXPECT_EQ(true, post_frame);
+ JxlDecoderSetBoxBuffer(dec.get(), dec_xml_box.data(),
+ dec_xml_box.size());
+ }
+ } else {
+ FAIL(); // unexpected status
+ }
+ }
+
+ EXPECT_EQ(0, memcmp(exif_data, dec_exif_box.data(), exif_size));
+ EXPECT_EQ(0, memcmp(xml_data, dec_xml_box.data(), xml_size));
+ }
+}
+
+#if JPEGXL_ENABLE_JPEG // Loading .jpg files requires libjpeg support.
+TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGFrameTest)) {
+ for (int skip_basic_info = 0; skip_basic_info < 2; skip_basic_info++) {
+ for (int skip_color_encoding = 0; skip_color_encoding < 2;
+ skip_color_encoding++) {
+ // cannot set color encoding if basic info is not set
+ if (skip_basic_info && !skip_color_encoding) continue;
+ const std::string jpeg_path = "jxl/flower/flower_cropped.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+ jxl::CodecInOut orig_io;
+ ASSERT_TRUE(SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io,
+ /*pool=*/nullptr));
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+ JxlEncoderFrameSettingsSetOption(frame_settings,
+ JXL_ENC_FRAME_SETTING_EFFORT, 1);
+ if (!skip_basic_info) {
+ JxlBasicInfo basic_info;
+ JxlEncoderInitBasicInfo(&basic_info);
+ basic_info.xsize = orig_io.xsize();
+ basic_info.ysize = orig_io.ysize();
+ basic_info.uses_original_profile = true;
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetBasicInfo(enc.get(), &basic_info));
+ }
+ if (!skip_color_encoding) {
+ JxlColorEncoding color_encoding;
+ JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding));
+ }
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderAddJPEGFrame(
+ frame_settings, orig.data(), orig.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed = std::vector<uint8_t>(64);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size() - (next_out - compressed.data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result =
+ JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() * 2);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+
+ jxl::CodecInOut decoded_io;
+ EXPECT_TRUE(jxl::test::DecodeFile(
+ {}, jxl::Span<const uint8_t>(compressed.data(), compressed.size()),
+ &decoded_io));
+
+ EXPECT_LE(
+ ComputeDistance2(orig_io.Main(), decoded_io.Main(), jxl::GetJxlCms()),
+ 3.5);
+ }
+ }
+}
+#endif // JPEGXL_ENABLE_JPEG
diff --git a/third_party/jpeg-xl/lib/jxl/entropy_coder.cc b/third_party/jpeg-xl/lib/jxl/entropy_coder.cc
new file mode 100644
index 0000000000..0043c2d31e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/entropy_coder.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/entropy_coder.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_context_map.h"
+#include "lib/jxl/epf.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map) {
+ auto& dct = block_ctx_map->dc_thresholds;
+ auto& qft = block_ctx_map->qf_thresholds;
+ auto& ctx_map = block_ctx_map->ctx_map;
+ bool is_default = br->ReadFixedBits<1>();
+ if (is_default) {
+ *block_ctx_map = BlockCtxMap();
+ return true;
+ }
+ block_ctx_map->num_dc_ctxs = 1;
+ for (int j : {0, 1, 2}) {
+ dct[j].resize(br->ReadFixedBits<4>());
+ block_ctx_map->num_dc_ctxs *= dct[j].size() + 1;
+ for (int& i : dct[j]) {
+ i = UnpackSigned(U32Coder::Read(kDCThresholdDist, br));
+ }
+ }
+ qft.resize(br->ReadFixedBits<4>());
+ for (uint32_t& i : qft) {
+ i = U32Coder::Read(kQFThresholdDist, br) + 1;
+ }
+
+ if (block_ctx_map->num_dc_ctxs * (qft.size() + 1) > 64) {
+ return JXL_FAILURE("Invalid block context map: too big");
+ }
+
+ ctx_map.resize(3 * kNumOrders * block_ctx_map->num_dc_ctxs *
+ (qft.size() + 1));
+ JXL_RETURN_IF_ERROR(DecodeContextMap(&ctx_map, &block_ctx_map->num_ctxs, br));
+ if (block_ctx_map->num_ctxs > 16) {
+ return JXL_FAILURE("Invalid block context map: too many distinct contexts");
+ }
+ return true;
+}
+
+constexpr uint8_t BlockCtxMap::kDefaultCtxMap[]; // from ac_context.h
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/entropy_coder.h b/third_party/jpeg-xl/lib/jxl/entropy_coder.h
new file mode 100644
index 0000000000..e4afa7a631
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/entropy_coder.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ENTROPY_CODER_H_
+#define LIB_JXL_ENTROPY_CODER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+// Entropy coding and context modeling of DC and AC coefficients, as well as AC
+// strategy and quantization field.
+
+namespace jxl {
+
+static JXL_INLINE int32_t PredictFromTopAndLeft(
+ const int32_t* const JXL_RESTRICT row_top,
+ const int32_t* const JXL_RESTRICT row, size_t x, int32_t default_val) {
+ if (x == 0) {
+ return row_top == nullptr ? default_val : row_top[x];
+ }
+ if (row_top == nullptr) {
+ return row[x - 1];
+ }
+ return (row_top[x] + row[x - 1] + 1) / 2;
+}
+
+static constexpr U32Enc kDCThresholdDist(Bits(4), BitsOffset(8, 16),
+ BitsOffset(16, 272),
+ BitsOffset(32, 65808));
+
+static constexpr U32Enc kQFThresholdDist(Bits(2), BitsOffset(3, 4),
+ BitsOffset(5, 12), BitsOffset(8, 44));
+
+Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ENTROPY_CODER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/entropy_coder_test.cc b/third_party/jpeg-xl/lib/jxl/entropy_coder_test.cc
new file mode 100644
index 0000000000..9dbeb137af
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/entropy_coder_test.cc
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(deymo): Move these tests to dec_ans.h and common.h
+
+#include <stdint.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(EntropyCoderTest, PackUnpack) {
+ for (int32_t i = -31; i < 32; ++i) {
+ uint32_t packed = PackSigned(i);
+ EXPECT_LT(packed, 63u);
+ int32_t unpacked = UnpackSigned(packed);
+ EXPECT_EQ(i, unpacked);
+ }
+}
+
+struct DummyBitReader {
+ uint32_t nbits, bits;
+ void Consume(uint32_t nbits) {}
+ uint32_t PeekBits(uint32_t n) {
+ EXPECT_EQ(n, nbits);
+ return bits;
+ }
+};
+
+void HybridUintRoundtrip(HybridUintConfig config, size_t limit = 1 << 24) {
+ Rng rng(0);
+ constexpr size_t kNumIntegers = 1 << 20;
+ std::vector<uint32_t> integers(kNumIntegers);
+ std::vector<uint32_t> token(kNumIntegers);
+ std::vector<uint32_t> nbits(kNumIntegers);
+ std::vector<uint32_t> bits(kNumIntegers);
+ for (size_t i = 0; i < kNumIntegers; i++) {
+ integers[i] = rng.UniformU(0, limit + 1);
+ config.Encode(integers[i], &token[i], &nbits[i], &bits[i]);
+ }
+ for (size_t i = 0; i < kNumIntegers; i++) {
+ DummyBitReader br{nbits[i], bits[i]};
+ EXPECT_EQ(integers[i],
+ ANSSymbolReader::ReadHybridUintConfig(config, token[i], &br));
+ }
+}
+
+TEST(HybridUintTest, Test000) {
+ HybridUintRoundtrip(HybridUintConfig{0, 0, 0});
+}
+TEST(HybridUintTest, Test411) {
+ HybridUintRoundtrip(HybridUintConfig{4, 1, 1});
+}
+TEST(HybridUintTest, Test420) {
+ HybridUintRoundtrip(HybridUintConfig{4, 2, 0});
+}
+TEST(HybridUintTest, Test421) {
+ HybridUintRoundtrip(HybridUintConfig{4, 2, 1}, 256);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/epf.cc b/third_party/jpeg-xl/lib/jxl/epf.cc
new file mode 100644
index 0000000000..7288ed9ca6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/epf.cc
@@ -0,0 +1,146 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Edge-preserving smoothing: weighted average based on L1 patch similarity.
+
+#include "lib/jxl/epf.h"
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+#include <numeric> // std::accumulate
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// Mirror n floats starting at *p and store them before p.
+JXL_INLINE void LeftMirror(float* p, size_t n) {
+ for (size_t i = 0; i < n; i++) {
+ *(p - 1 - i) = p[i];
+ }
+}
+
+// Mirror n floats starting at *(p - n) and store them at *p.
+JXL_INLINE void RightMirror(float* p, size_t n) {
+ for (size_t i = 0; i < n; i++) {
+ p[i] = *(p - 1 - i);
+ }
+}
+
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state) {
+ const LoopFilter& lf = state->shared->frame_header.loop_filter;
+ JXL_CHECK(lf.epf_iters > 0);
+ const AcStrategyImage& ac_strategy = state->shared->ac_strategy;
+ const float quant_scale = state->shared->quantizer.Scale();
+
+ const size_t sigma_stride = state->sigma.PixelsPerRow();
+ const size_t sharpness_stride = state->shared->epf_sharpness.PixelsPerRow();
+
+ for (size_t by = 0; by < block_rect.ysize(); ++by) {
+ float* JXL_RESTRICT sigma_row = block_rect.Row(&state->sigma, by);
+ const uint8_t* JXL_RESTRICT sharpness_row =
+ block_rect.ConstRow(state->shared->epf_sharpness, by);
+ AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by);
+ const int32_t* const JXL_RESTRICT row_quant =
+ block_rect.ConstRow(state->shared->raw_quant_field, by);
+
+ for (size_t bx = 0; bx < block_rect.xsize(); bx++) {
+ AcStrategy acs = acs_row[bx];
+ size_t llf_x = acs.covered_blocks_x();
+ if (!acs.IsFirstBlock()) continue;
+ // quant_scale is smaller for low quality.
+ // quant_scale is roughly 0.08 / butteraugli score.
+ //
+ // row_quant is smaller for low quality.
+ // row_quant is a quantization multiplier of form 1.0 /
+ // row_quant[bx]
+ //
+ // lf.epf_quant_mul is a parameter in the format
+ // kInvSigmaNum is a constant
+ float sigma_quant =
+ lf.epf_quant_mul / (quant_scale * row_quant[bx] * kInvSigmaNum);
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) {
+ float sigma =
+ sigma_quant *
+ lf.epf_sharp_lut[sharpness_row[bx + ix + iy * sharpness_stride]];
+ // Avoid infinities.
+ sigma = std::min(-1e-4f, sigma); // TODO(veluca): remove this.
+ sigma_row[bx + ix + kSigmaPadding +
+ (iy + kSigmaPadding) * sigma_stride] = 1.0f / sigma;
+ }
+ }
+ // TODO(veluca): remove this padding.
+ // Left padding with mirroring.
+ if (bx + block_rect.x0() == 0) {
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ LeftMirror(
+ sigma_row + kSigmaPadding + (iy + kSigmaPadding) * sigma_stride,
+ kSigmaBorder);
+ }
+ }
+ // Right padding with mirroring.
+ if (bx + block_rect.x0() + llf_x ==
+ state->shared->frame_dim.xsize_blocks) {
+ for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) {
+ RightMirror(sigma_row + kSigmaPadding + bx + llf_x +
+ (iy + kSigmaPadding) * sigma_stride,
+ kSigmaBorder);
+ }
+ }
+ // Offsets for row copying, in blocks.
+ size_t offset_before = bx + block_rect.x0() == 0 ? 1 : bx + kSigmaPadding;
+ size_t offset_after =
+ bx + block_rect.x0() + llf_x == state->shared->frame_dim.xsize_blocks
+ ? kSigmaPadding + llf_x + bx + kSigmaBorder
+ : kSigmaPadding + llf_x + bx;
+ size_t num = offset_after - offset_before;
+ // Above
+ if (by + block_rect.y0() == 0) {
+ for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+ memcpy(
+ sigma_row + offset_before +
+ (kSigmaPadding - 1 - iy) * sigma_stride,
+ sigma_row + offset_before + (kSigmaPadding + iy) * sigma_stride,
+ num * sizeof(*sigma_row));
+ }
+ }
+ // Below
+ if (by + block_rect.y0() + acs.covered_blocks_y() ==
+ state->shared->frame_dim.ysize_blocks) {
+ for (size_t iy = 0; iy < kSigmaBorder; iy++) {
+ memcpy(
+ sigma_row + offset_before +
+ sigma_stride * (acs.covered_blocks_y() + kSigmaPadding + iy),
+ sigma_row + offset_before +
+ sigma_stride *
+ (acs.covered_blocks_y() + kSigmaPadding - 1 - iy),
+ num * sizeof(*sigma_row));
+ }
+ }
+ }
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/epf.h b/third_party/jpeg-xl/lib/jxl/epf.h
new file mode 100644
index 0000000000..7a0834ed97
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/epf.h
@@ -0,0 +1,33 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_EPF_H_
+#define LIB_JXL_EPF_H_
+
+// Fast SIMD "in-loop" edge preserving filter (adaptive, nonlinear).
+
+#include <stddef.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/passes_state.h"
+
+namespace jxl {
+
+// 4 * (sqrt(0.5)-1), so that Weight(sigma) = 0.5.
+static constexpr float kInvSigmaNum = -1.1715728752538099024f;
+
+// kInvSigmaNum / 0.3
+constexpr float kMinSigma = -3.90524291751269967465540850526868f;
+
+// Fills the `state->filter_weights.sigma` image with the precomputed sigma
+// values in the area inside `block_rect`. Accesses the AC strategy, quant field
+// and epf_sharpness fields in the corresponding positions.
+void ComputeSigma(const Rect& block_rect, PassesDecoderState* state);
+
+} // namespace jxl
+
+#endif // LIB_JXL_EPF_H_
diff --git a/third_party/jpeg-xl/lib/jxl/exif.h b/third_party/jpeg-xl/lib/jxl/exif.h
new file mode 100644
index 0000000000..0cf493fc71
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/exif.h
@@ -0,0 +1,87 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_EXIF_H_
+#define LIB_JXL_EXIF_H_
+
+// Basic parsing of Exif (just enough for the render-impacting things
+// like orientation)
+
+#include <jxl/codestream_header.h>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/image_metadata.h"
+
+namespace jxl {
+
+constexpr uint16_t kExifOrientationTag = 274;
+
+// Checks if a blob looks like Exif, and if so, sets bigendian
+// according to the tiff endianness
+inline bool IsExif(const std::vector<uint8_t>& exif, bool* bigendian) {
+ if (exif.size() < 12) return false; // not enough bytes for a valid exif blob
+ const uint8_t* t = exif.data();
+ if (LoadLE32(t) == 0x2A004D4D) {
+ *bigendian = true;
+ return true;
+ } else if (LoadLE32(t) == 0x002A4949) {
+ *bigendian = false;
+ return true;
+ }
+ return false; // not a valid tiff header
+}
+
+// Finds the position of an Exif tag, or 0 if it is not found
+inline size_t FindExifTagPosition(const std::vector<uint8_t>& exif,
+ uint16_t tagname) {
+ bool bigendian;
+ if (!IsExif(exif, &bigendian)) return 0;
+ const uint8_t* t = exif.data() + 4;
+ uint64_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t));
+ if (exif.size() < 12 + offset + 2 || offset < 8) return 0;
+ t += offset - 4;
+ if (offset + 2 >= exif.size()) return 0;
+ uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ while (nb_tags > 0) {
+ if (t + 12 >= exif.data() + exif.size()) return 0;
+ uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ if (tag == tagname) return static_cast<size_t>(t - exif.data());
+ t += 10;
+ nb_tags--;
+ }
+ return 0;
+}
+
+// TODO (jon): tag 1 can be used to represent Adobe RGB 1998 if it has value
+// "R03"
+// TODO (jon): set intrinsic dimensions according to
+// https://discourse.wicg.io/t/proposal-exif-image-resolution-auto-and-from-image/4326/24
+// Parses the Exif data just enough to extract any render-impacting info.
+// If the Exif data is invalid or could not be parsed, then it is treated
+// as a no-op.
+inline void InterpretExif(const std::vector<uint8_t>& exif,
+ JxlOrientation* orientation) {
+ bool bigendian;
+ if (!IsExif(exif, &bigendian)) return;
+ size_t o_pos = FindExifTagPosition(exif, kExifOrientationTag);
+ if (o_pos) {
+ const uint8_t* t = exif.data() + o_pos;
+ uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 2;
+ uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t));
+ t += 4;
+ uint16_t value = (bigendian ? LoadBE16(t) : LoadLE16(t));
+ t += 4;
+ if (type == 3 && count == 1 && value >= 1 && value <= 8) {
+ *orientation = static_cast<JxlOrientation>(value);
+ }
+ }
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_EXIF_H_
diff --git a/third_party/jpeg-xl/lib/jxl/fake_parallel_runner_testonly.h b/third_party/jpeg-xl/lib/jxl/fake_parallel_runner_testonly.h
new file mode 100644
index 0000000000..508d808cc5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fake_parallel_runner_testonly.h
@@ -0,0 +1,79 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
+#define LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
+
+#include <jxl/parallel_runner.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/random.h"
+
+namespace jxl {
+
+// A parallel runner implementation that runs all the jobs in a single thread
+// (the caller thread) but runs them pretending to use multiple threads and
+// potentially out of order. This is useful for testing conditions that only
+// occur under heavy load where the order of operations is different.
+class FakeParallelRunner {
+ public:
+ FakeParallelRunner(uint32_t order_seed, uint32_t num_threads)
+ : order_seed_(order_seed), rng_(order_seed), num_threads_(num_threads) {
+ if (num_threads_ < 1) num_threads_ = 1;
+ }
+
+ JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start,
+ uint32_t end) {
+ JxlParallelRetCode ret = init(jxl_opaque, num_threads_);
+ if (ret != 0) return ret;
+
+ if (order_seed_ == 0) {
+ for (uint32_t i = start; i < end; i++) {
+ func(jxl_opaque, i, i % num_threads_);
+ }
+ } else {
+ std::vector<uint32_t> order(end - start);
+ for (uint32_t i = start; i < end; i++) {
+ order[i - start] = i;
+ }
+ rng_.Shuffle(order.data(), order.size());
+ for (uint32_t i = start; i < end; i++) {
+ func(jxl_opaque, order[i - start], i % num_threads_);
+ }
+ }
+ return ret;
+ }
+
+ private:
+ // Seed for the RNG for defining the execution order. A value of 0 means
+ // sequential order from start to end.
+ uint32_t order_seed_;
+
+ // The PRNG object, initialized with the order_seed_. Only used if the seed is
+ // not 0.
+ Rng rng_;
+
+ // Number of fake threads. All the tasks are run on the same thread, but using
+ // different thread_id values based on this num_threads.
+ uint32_t num_threads_;
+};
+
+} // namespace jxl
+
+extern "C" {
+// Function to pass as the parallel runner.
+JXL_INLINE JxlParallelRetCode JxlFakeParallelRunner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+ return static_cast<jxl::FakeParallelRunner*>(runner_opaque)
+ ->Run(jpegxl_opaque, init, func, start_range, end_range);
+}
+}
+
+#endif // LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct-inl.h
new file mode 100644
index 0000000000..d2453b0e10
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct-inl.h
@@ -0,0 +1,238 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_FAST_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_FAST_DCT_INL_H_
+#undef LIB_JXL_FAST_DCT_INL_H_
+#else
+#define LIB_JXL_FAST_DCT_INL_H_
+#endif
+
+#include <cmath>
+
+#include <hwy/aligned_allocator.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/status.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#if HWY_TARGET == HWY_NEON
+HWY_NOINLINE void FastTransposeBlock(const int16_t* JXL_RESTRICT data_in,
+ size_t stride_in, size_t N, size_t M,
+ int16_t* JXL_RESTRICT data_out,
+ size_t stride_out) {
+ JXL_DASSERT(N % 8 == 0);
+ JXL_DASSERT(M % 8 == 0);
+ for (size_t i = 0; i < N; i += 8) {
+ for (size_t j = 0; j < M; j += 8) {
+ // TODO(veluca): one could optimize the M==8, stride_in==8 case further
+ // with vld4.
+ // This code is about 40% faster for N == M == stride_in ==
+ // stride_out == 8
+ // Using loads + stores to reshuffle things to be able to
+ // use vld4 doesn't help.
+ /*
+ auto a0 = vld4q_s16(data_in); auto a1 = vld4q_s16(data_in + 32);
+ int16x8x4_t out0;
+ int16x8x4_t out1;
+ out0.val[0] = vuzp1q_s16(a0.val[0], a1.val[0]);
+ out0.val[1] = vuzp1q_s16(a0.val[1], a1.val[1]);
+ out0.val[2] = vuzp1q_s16(a0.val[2], a1.val[2]);
+ out0.val[3] = vuzp1q_s16(a0.val[3], a1.val[3]);
+ out1.val[0] = vuzp2q_s16(a0.val[0], a1.val[0]);
+ out1.val[1] = vuzp2q_s16(a0.val[1], a1.val[1]);
+ out1.val[2] = vuzp2q_s16(a0.val[2], a1.val[2]);
+ out1.val[3] = vuzp2q_s16(a0.val[3], a1.val[3]);
+ vst1q_s16_x4(data_out, out0);
+ vst1q_s16_x4(data_out + 32, out1);
+ */
+ auto a0 = vld1q_s16(data_in + i * stride_in + j);
+ auto a1 = vld1q_s16(data_in + (i + 1) * stride_in + j);
+ auto a2 = vld1q_s16(data_in + (i + 2) * stride_in + j);
+ auto a3 = vld1q_s16(data_in + (i + 3) * stride_in + j);
+
+ auto a01 = vtrnq_s16(a0, a1);
+ auto a23 = vtrnq_s16(a2, a3);
+
+ auto four0 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[0]),
+ vreinterpretq_s32_s16(a23.val[0]));
+ auto four1 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[1]),
+ vreinterpretq_s32_s16(a23.val[1]));
+
+ auto a4 = vld1q_s16(data_in + (i + 4) * stride_in + j);
+ auto a5 = vld1q_s16(data_in + (i + 5) * stride_in + j);
+ auto a6 = vld1q_s16(data_in + (i + 6) * stride_in + j);
+ auto a7 = vld1q_s16(data_in + (i + 7) * stride_in + j);
+
+ auto a45 = vtrnq_s16(a4, a5);
+ auto a67 = vtrnq_s16(a6, a7);
+
+ auto four2 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[0]),
+ vreinterpretq_s32_s16(a67.val[0]));
+ auto four3 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[1]),
+ vreinterpretq_s32_s16(a67.val[1]));
+
+ auto out0 =
+ vcombine_s32(vget_low_s32(four0.val[0]), vget_low_s32(four2.val[0]));
+ auto out1 =
+ vcombine_s32(vget_low_s32(four1.val[0]), vget_low_s32(four3.val[0]));
+ auto out2 =
+ vcombine_s32(vget_low_s32(four0.val[1]), vget_low_s32(four2.val[1]));
+ auto out3 =
+ vcombine_s32(vget_low_s32(four1.val[1]), vget_low_s32(four3.val[1]));
+ auto out4 = vcombine_s32(vget_high_s32(four0.val[0]),
+ vget_high_s32(four2.val[0]));
+ auto out5 = vcombine_s32(vget_high_s32(four1.val[0]),
+ vget_high_s32(four3.val[0]));
+ auto out6 = vcombine_s32(vget_high_s32(four0.val[1]),
+ vget_high_s32(four2.val[1]));
+ auto out7 = vcombine_s32(vget_high_s32(four1.val[1]),
+ vget_high_s32(four3.val[1]));
+ vst1q_s16(data_out + j * stride_out + i, vreinterpretq_s16_s32(out0));
+ vst1q_s16(data_out + (j + 1) * stride_out + i,
+ vreinterpretq_s16_s32(out1));
+ vst1q_s16(data_out + (j + 2) * stride_out + i,
+ vreinterpretq_s16_s32(out2));
+ vst1q_s16(data_out + (j + 3) * stride_out + i,
+ vreinterpretq_s16_s32(out3));
+ vst1q_s16(data_out + (j + 4) * stride_out + i,
+ vreinterpretq_s16_s32(out4));
+ vst1q_s16(data_out + (j + 5) * stride_out + i,
+ vreinterpretq_s16_s32(out5));
+ vst1q_s16(data_out + (j + 6) * stride_out + i,
+ vreinterpretq_s16_s32(out6));
+ vst1q_s16(data_out + (j + 7) * stride_out + i,
+ vreinterpretq_s16_s32(out7));
+ }
+ }
+}
+
+template <size_t N>
+struct FastDCTTag {};
+
+#include "lib/jxl/fast_dct128-inl.h"
+#include "lib/jxl/fast_dct16-inl.h"
+#include "lib/jxl/fast_dct256-inl.h"
+#include "lib/jxl/fast_dct32-inl.h"
+#include "lib/jxl/fast_dct64-inl.h"
+#include "lib/jxl/fast_dct8-inl.h"
+
+template <size_t ROWS, size_t COLS>
+struct ComputeFastScaledIDCT {
+ // scratch_space must be aligned, and should have space for ROWS*COLS
+ // int16_ts.
+ HWY_MAYBE_UNUSED void operator()(int16_t* JXL_RESTRICT from, int16_t* to,
+ size_t to_stride,
+ int16_t* JXL_RESTRICT scratch_space) {
+ // Reverse the steps done in ComputeScaledDCT.
+ if (ROWS < COLS) {
+ FastTransposeBlock(from, COLS, ROWS, COLS, scratch_space, ROWS);
+ FastIDCT(FastDCTTag<COLS>(), scratch_space, ROWS, from, ROWS, ROWS);
+ FastTransposeBlock(from, ROWS, COLS, ROWS, scratch_space, COLS);
+ FastIDCT(FastDCTTag<ROWS>(), scratch_space, COLS, to, to_stride, COLS);
+ } else {
+ FastIDCT(FastDCTTag<COLS>(), from, ROWS, scratch_space, ROWS, ROWS);
+ FastTransposeBlock(scratch_space, ROWS, COLS, ROWS, from, COLS);
+ FastIDCT(FastDCTTag<ROWS>(), from, COLS, to, to_stride, COLS);
+ }
+ }
+};
+#endif
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFastIDCT() {
+#if HWY_TARGET == HWY_NEON
+ auto pixels_mem = hwy::AllocateAligned<float>(N * M);
+ float* pixels = pixels_mem.get();
+ auto dct_mem = hwy::AllocateAligned<float>(N * M);
+ float* dct = dct_mem.get();
+ auto dct_i_mem = hwy::AllocateAligned<int16_t>(N * M);
+ int16_t* dct_i = dct_i_mem.get();
+ auto dct_in_mem = hwy::AllocateAligned<int16_t>(N * M);
+ int16_t* dct_in = dct_in_mem.get();
+ auto idct_mem = hwy::AllocateAligned<int16_t>(N * M);
+ int16_t* idct = idct_mem.get();
+
+ auto scratch_space_mem = hwy::AllocateAligned<float>(N * M * 2);
+ float* scratch_space = scratch_space_mem.get();
+ auto scratch_space_i_mem = hwy::AllocateAligned<int16_t>(N * M * 2);
+ int16_t* scratch_space_i = scratch_space_i_mem.get();
+
+ Rng rng(0);
+ for (size_t i = 0; i < N * M; i++) {
+ pixels[i] = rng.UniformF(-1, 1);
+ }
+ ComputeScaledDCT<M, N>()(DCTFrom(pixels, N), dct, scratch_space);
+ size_t integer_bits = std::max(FastIDCTIntegerBits(FastDCTTag<N>()),
+ FastIDCTIntegerBits(FastDCTTag<M>()));
+ // Enough range for [-2, 2] output values.
+ JXL_ASSERT(integer_bits <= 14);
+ float scale = (1 << (14 - integer_bits));
+ for (size_t i = 0; i < N * M; i++) {
+ dct_i[i] = std::round(dct[i] * scale);
+ }
+
+ for (size_t j = 0; j < 40000000 / (M * N); j++) {
+ memcpy(dct_in, dct_i, sizeof(*dct_i) * N * M);
+ ComputeFastScaledIDCT<M, N>()(dct_in, idct, N, scratch_space_i);
+ }
+ float max_error = 0;
+ for (size_t i = 0; i < M * N; i++) {
+ float err = std::abs(idct[i] * (1.0f / scale) - pixels[i]);
+ if (std::abs(err) > max_error) {
+ max_error = std::abs(err);
+ }
+ }
+ printf("max error: %f mantissa bits: %d\n", max_error,
+ 14 - (int)integer_bits);
+#endif
+}
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFloatIDCT() {
+ auto pixels_mem = hwy::AllocateAligned<float>(N * M);
+ float* pixels = pixels_mem.get();
+ auto dct_mem = hwy::AllocateAligned<float>(N * M);
+ float* dct = dct_mem.get();
+ auto idct_mem = hwy::AllocateAligned<float>(N * M);
+ float* idct = idct_mem.get();
+
+ auto dct_in_mem = hwy::AllocateAligned<float>(N * M);
+ float* dct_in = dct_mem.get();
+
+ auto scratch_space_mem = hwy::AllocateAligned<float>(N * M * 2);
+ float* scratch_space = scratch_space_mem.get();
+
+ Rng rng(0);
+ for (size_t i = 0; i < N * M; i++) {
+ pixels[i] = rng.UniformF(-1, 1);
+ }
+ ComputeScaledDCT<M, N>()(DCTFrom(pixels, N), dct, scratch_space);
+
+ for (size_t j = 0; j < 40000000 / (M * N); j++) {
+ memcpy(dct_in, dct, sizeof(*dct) * N * M);
+ ComputeScaledIDCT<M, N>()(dct_in, DCTTo(idct, N), scratch_space);
+ }
+ float max_error = 0;
+ for (size_t i = 0; i < M * N; i++) {
+ float err = std::abs(idct[i] - pixels[i]);
+ if (std::abs(err) > max_error) {
+ max_error = std::abs(err);
+ }
+ }
+ printf("max error: %e\n", max_error);
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_FAST_DCT_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct.cc b/third_party/jpeg-xl/lib/jxl/fast_dct.cc
new file mode 100644
index 0000000000..d796018fd0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/fast_dct-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+void BenchmarkFloatIDCT32x32() { TestFloatIDCT<32, 32>(); }
+void BenchmarkFastIDCT32x32() { TestFastIDCT<32, 32>(); }
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(BenchmarkFloatIDCT32x32);
+HWY_EXPORT(BenchmarkFastIDCT32x32);
+void BenchmarkFloatIDCT32x32() {
+ HWY_DYNAMIC_DISPATCH(BenchmarkFloatIDCT32x32)();
+}
+void BenchmarkFastIDCT32x32() {
+ HWY_DYNAMIC_DISPATCH(BenchmarkFastIDCT32x32)();
+}
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct.h b/third_party/jpeg-xl/lib/jxl/fast_dct.h
new file mode 100644
index 0000000000..641933d8a0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct.h
@@ -0,0 +1,9 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+namespace jxl {
+void BenchmarkFloatIDCT32x32();
+void BenchmarkFastIDCT32x32();
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct128-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct128-inl.h
new file mode 100644
index 0000000000..1a94d3ee92
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct128-inl.h
@@ -0,0 +1,2137 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<128>) { return 2; }
+
+void FastIDCT(FastDCTTag<128>, const int16_t* in, size_t in_stride,
+ int16_t* out, size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 64 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 32 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 96 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 16 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 80 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 48 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vaddq_s16(v13, v10);
+ int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573);
+ int16x8_t v17 = vaddq_s16(v17_tmp, v16);
+ int16x8_t v18 = vld1q_s16(in + in_stride * 112 + i);
+ int16x8_t v19 = vaddq_s16(v18, v12);
+ int16x8_t v20 = vaddq_s16(v19, v16);
+ int16x8_t v21 = vaddq_s16(v17, v20);
+ int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vld1q_s16(in + in_stride * 8 + i);
+ int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+ int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+ int16x8_t v28 = vld1q_s16(in + in_stride * 72 + i);
+ int16x8_t v29 = vld1q_s16(in + in_stride * 56 + i);
+ int16x8_t v30 = vaddq_s16(v28, v29);
+ int16x8_t v31 = vaddq_s16(v27, v30);
+ int16x8_t v32 = vld1q_s16(in + in_stride * 40 + i);
+ int16x8_t v33 = vld1q_s16(in + in_stride * 24 + i);
+ int16x8_t v34 = vaddq_s16(v32, v33);
+ int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573);
+ int16x8_t v35 = vaddq_s16(v35_tmp, v34);
+ int16x8_t v36 = vld1q_s16(in + in_stride * 104 + i);
+ int16x8_t v37 = vld1q_s16(in + in_stride * 88 + i);
+ int16x8_t v38 = vaddq_s16(v36, v37);
+ int16x8_t v39 = vaddq_s16(v38, v34);
+ int16x8_t v40 = vaddq_s16(v35, v39);
+ int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734);
+ int16x8_t v42 = vaddq_s16(v31, v41);
+ int16x8_t v43 = vaddq_s16(v33, v26);
+ int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+ int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+ int16x8_t v45 = vaddq_s16(v37, v28);
+ int16x8_t v46 = vaddq_s16(v29, v32);
+ int16x8_t v47 = vaddq_s16(v45, v46);
+ int16x8_t v48 = vaddq_s16(v44, v47);
+ int16x8_t v49 = vaddq_s16(v46, v43);
+ int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573);
+ int16x8_t v50 = vaddq_s16(v50_tmp, v49);
+ int16x8_t v51 = vld1q_s16(in + in_stride * 120 + i);
+ int16x8_t v52 = vaddq_s16(v51, v36);
+ int16x8_t v53 = vaddq_s16(v52, v45);
+ int16x8_t v54 = vaddq_s16(v53, v49);
+ int16x8_t v55 = vaddq_s16(v50, v54);
+ int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734);
+ int16x8_t v57 = vaddq_s16(v48, v56);
+ int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705);
+ int16x8_t v59 = vaddq_s16(v42, v58);
+ int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+ int16x8_t v61 = vaddq_s16(v25, v60);
+ int16x8_t v62 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+ int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+ int16x8_t v64 = vld1q_s16(in + in_stride * 68 + i);
+ int16x8_t v65 = vld1q_s16(in + in_stride * 60 + i);
+ int16x8_t v66 = vaddq_s16(v64, v65);
+ int16x8_t v67 = vaddq_s16(v63, v66);
+ int16x8_t v68 = vld1q_s16(in + in_stride * 36 + i);
+ int16x8_t v69 = vld1q_s16(in + in_stride * 28 + i);
+ int16x8_t v70 = vaddq_s16(v68, v69);
+ int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573);
+ int16x8_t v71 = vaddq_s16(v71_tmp, v70);
+ int16x8_t v72 = vld1q_s16(in + in_stride * 100 + i);
+ int16x8_t v73 = vld1q_s16(in + in_stride * 92 + i);
+ int16x8_t v74 = vaddq_s16(v72, v73);
+ int16x8_t v75 = vaddq_s16(v74, v70);
+ int16x8_t v76 = vaddq_s16(v71, v75);
+ int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+ int16x8_t v78 = vaddq_s16(v67, v77);
+ int16x8_t v79 = vld1q_s16(in + in_stride * 20 + i);
+ int16x8_t v80 = vld1q_s16(in + in_stride * 12 + i);
+ int16x8_t v81 = vaddq_s16(v79, v80);
+ int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+ int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+ int16x8_t v83 = vld1q_s16(in + in_stride * 84 + i);
+ int16x8_t v84 = vld1q_s16(in + in_stride * 76 + i);
+ int16x8_t v85 = vaddq_s16(v83, v84);
+ int16x8_t v86 = vld1q_s16(in + in_stride * 52 + i);
+ int16x8_t v87 = vld1q_s16(in + in_stride * 44 + i);
+ int16x8_t v88 = vaddq_s16(v86, v87);
+ int16x8_t v89 = vaddq_s16(v85, v88);
+ int16x8_t v90 = vaddq_s16(v82, v89);
+ int16x8_t v91 = vaddq_s16(v88, v81);
+ int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573);
+ int16x8_t v92 = vaddq_s16(v92_tmp, v91);
+ int16x8_t v93 = vld1q_s16(in + in_stride * 116 + i);
+ int16x8_t v94 = vld1q_s16(in + in_stride * 108 + i);
+ int16x8_t v95 = vaddq_s16(v93, v94);
+ int16x8_t v96 = vaddq_s16(v95, v85);
+ int16x8_t v97 = vaddq_s16(v96, v91);
+ int16x8_t v98 = vaddq_s16(v92, v97);
+ int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+ int16x8_t v100 = vaddq_s16(v90, v99);
+ int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705);
+ int16x8_t v102 = vaddq_s16(v78, v101);
+ int16x8_t v103 = vaddq_s16(v80, v62);
+ int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573);
+ int16x8_t v104 = vaddq_s16(v104_tmp, v103);
+ int16x8_t v105 = vaddq_s16(v84, v64);
+ int16x8_t v106 = vaddq_s16(v65, v86);
+ int16x8_t v107 = vaddq_s16(v105, v106);
+ int16x8_t v108 = vaddq_s16(v104, v107);
+ int16x8_t v109 = vaddq_s16(v87, v68);
+ int16x8_t v110 = vaddq_s16(v69, v79);
+ int16x8_t v111 = vaddq_s16(v109, v110);
+ int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573);
+ int16x8_t v112 = vaddq_s16(v112_tmp, v111);
+ int16x8_t v113 = vaddq_s16(v94, v72);
+ int16x8_t v114 = vaddq_s16(v73, v83);
+ int16x8_t v115 = vaddq_s16(v113, v114);
+ int16x8_t v116 = vaddq_s16(v115, v111);
+ int16x8_t v117 = vaddq_s16(v112, v116);
+ int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734);
+ int16x8_t v119 = vaddq_s16(v108, v118);
+ int16x8_t v120 = vaddq_s16(v110, v103);
+ int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573);
+ int16x8_t v121 = vaddq_s16(v121_tmp, v120);
+ int16x8_t v122 = vaddq_s16(v114, v105);
+ int16x8_t v123 = vaddq_s16(v106, v109);
+ int16x8_t v124 = vaddq_s16(v122, v123);
+ int16x8_t v125 = vaddq_s16(v121, v124);
+ int16x8_t v126 = vaddq_s16(v123, v120);
+ int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573);
+ int16x8_t v127 = vaddq_s16(v127_tmp, v126);
+ int16x8_t v128 = vld1q_s16(in + in_stride * 124 + i);
+ int16x8_t v129 = vaddq_s16(v128, v93);
+ int16x8_t v130 = vaddq_s16(v129, v113);
+ int16x8_t v131 = vaddq_s16(v130, v122);
+ int16x8_t v132 = vaddq_s16(v131, v126);
+ int16x8_t v133 = vaddq_s16(v127, v132);
+ int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+ int16x8_t v135 = vaddq_s16(v125, v134);
+ int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705);
+ int16x8_t v137 = vaddq_s16(v119, v136);
+ int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463);
+ int16x8_t v139 = vaddq_s16(v102, v138);
+ int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404);
+ int16x8_t v141 = vaddq_s16(v61, v140);
+ int16x8_t v142 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573);
+ int16x8_t v143 = vaddq_s16(v143_tmp, v142);
+ int16x8_t v144 = vld1q_s16(in + in_stride * 66 + i);
+ int16x8_t v145 = vld1q_s16(in + in_stride * 62 + i);
+ int16x8_t v146 = vaddq_s16(v144, v145);
+ int16x8_t v147 = vaddq_s16(v143, v146);
+ int16x8_t v148 = vld1q_s16(in + in_stride * 34 + i);
+ int16x8_t v149 = vld1q_s16(in + in_stride * 30 + i);
+ int16x8_t v150 = vaddq_s16(v148, v149);
+ int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573);
+ int16x8_t v151 = vaddq_s16(v151_tmp, v150);
+ int16x8_t v152 = vld1q_s16(in + in_stride * 98 + i);
+ int16x8_t v153 = vld1q_s16(in + in_stride * 94 + i);
+ int16x8_t v154 = vaddq_s16(v152, v153);
+ int16x8_t v155 = vaddq_s16(v154, v150);
+ int16x8_t v156 = vaddq_s16(v151, v155);
+ int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734);
+ int16x8_t v158 = vaddq_s16(v147, v157);
+ int16x8_t v159 = vld1q_s16(in + in_stride * 18 + i);
+ int16x8_t v160 = vld1q_s16(in + in_stride * 14 + i);
+ int16x8_t v161 = vaddq_s16(v159, v160);
+ int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573);
+ int16x8_t v162 = vaddq_s16(v162_tmp, v161);
+ int16x8_t v163 = vld1q_s16(in + in_stride * 82 + i);
+ int16x8_t v164 = vld1q_s16(in + in_stride * 78 + i);
+ int16x8_t v165 = vaddq_s16(v163, v164);
+ int16x8_t v166 = vld1q_s16(in + in_stride * 50 + i);
+ int16x8_t v167 = vld1q_s16(in + in_stride * 46 + i);
+ int16x8_t v168 = vaddq_s16(v166, v167);
+ int16x8_t v169 = vaddq_s16(v165, v168);
+ int16x8_t v170 = vaddq_s16(v162, v169);
+ int16x8_t v171 = vaddq_s16(v168, v161);
+ int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573);
+ int16x8_t v172 = vaddq_s16(v172_tmp, v171);
+ int16x8_t v173 = vld1q_s16(in + in_stride * 114 + i);
+ int16x8_t v174 = vld1q_s16(in + in_stride * 110 + i);
+ int16x8_t v175 = vaddq_s16(v173, v174);
+ int16x8_t v176 = vaddq_s16(v175, v165);
+ int16x8_t v177 = vaddq_s16(v176, v171);
+ int16x8_t v178 = vaddq_s16(v172, v177);
+ int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734);
+ int16x8_t v180 = vaddq_s16(v170, v179);
+ int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705);
+ int16x8_t v182 = vaddq_s16(v158, v181);
+ int16x8_t v183 = vld1q_s16(in + in_stride * 10 + i);
+ int16x8_t v184 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v185 = vaddq_s16(v183, v184);
+ int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573);
+ int16x8_t v186 = vaddq_s16(v186_tmp, v185);
+ int16x8_t v187 = vld1q_s16(in + in_stride * 74 + i);
+ int16x8_t v188 = vld1q_s16(in + in_stride * 70 + i);
+ int16x8_t v189 = vaddq_s16(v187, v188);
+ int16x8_t v190 = vld1q_s16(in + in_stride * 58 + i);
+ int16x8_t v191 = vld1q_s16(in + in_stride * 54 + i);
+ int16x8_t v192 = vaddq_s16(v190, v191);
+ int16x8_t v193 = vaddq_s16(v189, v192);
+ int16x8_t v194 = vaddq_s16(v186, v193);
+ int16x8_t v195 = vld1q_s16(in + in_stride * 42 + i);
+ int16x8_t v196 = vld1q_s16(in + in_stride * 38 + i);
+ int16x8_t v197 = vaddq_s16(v195, v196);
+ int16x8_t v198 = vld1q_s16(in + in_stride * 26 + i);
+ int16x8_t v199 = vld1q_s16(in + in_stride * 22 + i);
+ int16x8_t v200 = vaddq_s16(v198, v199);
+ int16x8_t v201 = vaddq_s16(v197, v200);
+ int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573);
+ int16x8_t v202 = vaddq_s16(v202_tmp, v201);
+ int16x8_t v203 = vld1q_s16(in + in_stride * 106 + i);
+ int16x8_t v204 = vld1q_s16(in + in_stride * 102 + i);
+ int16x8_t v205 = vaddq_s16(v203, v204);
+ int16x8_t v206 = vld1q_s16(in + in_stride * 90 + i);
+ int16x8_t v207 = vld1q_s16(in + in_stride * 86 + i);
+ int16x8_t v208 = vaddq_s16(v206, v207);
+ int16x8_t v209 = vaddq_s16(v205, v208);
+ int16x8_t v210 = vaddq_s16(v209, v201);
+ int16x8_t v211 = vaddq_s16(v202, v210);
+ int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734);
+ int16x8_t v213 = vaddq_s16(v194, v212);
+ int16x8_t v214 = vaddq_s16(v200, v185);
+ int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573);
+ int16x8_t v215 = vaddq_s16(v215_tmp, v214);
+ int16x8_t v216 = vaddq_s16(v208, v189);
+ int16x8_t v217 = vaddq_s16(v192, v197);
+ int16x8_t v218 = vaddq_s16(v216, v217);
+ int16x8_t v219 = vaddq_s16(v215, v218);
+ int16x8_t v220 = vaddq_s16(v217, v214);
+ int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573);
+ int16x8_t v221 = vaddq_s16(v221_tmp, v220);
+ int16x8_t v222 = vld1q_s16(in + in_stride * 122 + i);
+ int16x8_t v223 = vld1q_s16(in + in_stride * 118 + i);
+ int16x8_t v224 = vaddq_s16(v222, v223);
+ int16x8_t v225 = vaddq_s16(v224, v205);
+ int16x8_t v226 = vaddq_s16(v225, v216);
+ int16x8_t v227 = vaddq_s16(v226, v220);
+ int16x8_t v228 = vaddq_s16(v221, v227);
+ int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734);
+ int16x8_t v230 = vaddq_s16(v219, v229);
+ int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705);
+ int16x8_t v232 = vaddq_s16(v213, v231);
+ int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463);
+ int16x8_t v234 = vaddq_s16(v182, v233);
+ int16x8_t v235 = vaddq_s16(v184, v142);
+ int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573);
+ int16x8_t v236 = vaddq_s16(v236_tmp, v235);
+ int16x8_t v237 = vaddq_s16(v188, v144);
+ int16x8_t v238 = vaddq_s16(v145, v190);
+ int16x8_t v239 = vaddq_s16(v237, v238);
+ int16x8_t v240 = vaddq_s16(v236, v239);
+ int16x8_t v241 = vaddq_s16(v196, v148);
+ int16x8_t v242 = vaddq_s16(v149, v198);
+ int16x8_t v243 = vaddq_s16(v241, v242);
+ int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573);
+ int16x8_t v244 = vaddq_s16(v244_tmp, v243);
+ int16x8_t v245 = vaddq_s16(v204, v152);
+ int16x8_t v246 = vaddq_s16(v153, v206);
+ int16x8_t v247 = vaddq_s16(v245, v246);
+ int16x8_t v248 = vaddq_s16(v247, v243);
+ int16x8_t v249 = vaddq_s16(v244, v248);
+ int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734);
+ int16x8_t v251 = vaddq_s16(v240, v250);
+ int16x8_t v252 = vaddq_s16(v199, v159);
+ int16x8_t v253 = vaddq_s16(v160, v183);
+ int16x8_t v254 = vaddq_s16(v252, v253);
+ int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573);
+ int16x8_t v255 = vaddq_s16(v255_tmp, v254);
+ int16x8_t v256 = vaddq_s16(v207, v163);
+ int16x8_t v257 = vaddq_s16(v164, v187);
+ int16x8_t v258 = vaddq_s16(v256, v257);
+ int16x8_t v259 = vaddq_s16(v191, v166);
+ int16x8_t v260 = vaddq_s16(v167, v195);
+ int16x8_t v261 = vaddq_s16(v259, v260);
+ int16x8_t v262 = vaddq_s16(v258, v261);
+ int16x8_t v263 = vaddq_s16(v255, v262);
+ int16x8_t v264 = vaddq_s16(v261, v254);
+ int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573);
+ int16x8_t v265 = vaddq_s16(v265_tmp, v264);
+ int16x8_t v266 = vaddq_s16(v223, v173);
+ int16x8_t v267 = vaddq_s16(v174, v203);
+ int16x8_t v268 = vaddq_s16(v266, v267);
+ int16x8_t v269 = vaddq_s16(v268, v258);
+ int16x8_t v270 = vaddq_s16(v269, v264);
+ int16x8_t v271 = vaddq_s16(v265, v270);
+ int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734);
+ int16x8_t v273 = vaddq_s16(v263, v272);
+ int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705);
+ int16x8_t v275 = vaddq_s16(v251, v274);
+ int16x8_t v276 = vaddq_s16(v253, v235);
+ int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573);
+ int16x8_t v277 = vaddq_s16(v277_tmp, v276);
+ int16x8_t v278 = vaddq_s16(v257, v237);
+ int16x8_t v279 = vaddq_s16(v238, v259);
+ int16x8_t v280 = vaddq_s16(v278, v279);
+ int16x8_t v281 = vaddq_s16(v277, v280);
+ int16x8_t v282 = vaddq_s16(v260, v241);
+ int16x8_t v283 = vaddq_s16(v242, v252);
+ int16x8_t v284 = vaddq_s16(v282, v283);
+ int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573);
+ int16x8_t v285 = vaddq_s16(v285_tmp, v284);
+ int16x8_t v286 = vaddq_s16(v267, v245);
+ int16x8_t v287 = vaddq_s16(v246, v256);
+ int16x8_t v288 = vaddq_s16(v286, v287);
+ int16x8_t v289 = vaddq_s16(v288, v284);
+ int16x8_t v290 = vaddq_s16(v285, v289);
+ int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734);
+ int16x8_t v292 = vaddq_s16(v281, v291);
+ int16x8_t v293 = vaddq_s16(v283, v276);
+ int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573);
+ int16x8_t v294 = vaddq_s16(v294_tmp, v293);
+ int16x8_t v295 = vaddq_s16(v287, v278);
+ int16x8_t v296 = vaddq_s16(v279, v282);
+ int16x8_t v297 = vaddq_s16(v295, v296);
+ int16x8_t v298 = vaddq_s16(v294, v297);
+ int16x8_t v299 = vaddq_s16(v296, v293);
+ int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573);
+ int16x8_t v300 = vaddq_s16(v300_tmp, v299);
+ int16x8_t v301 = vld1q_s16(in + in_stride * 126 + i);
+ int16x8_t v302 = vaddq_s16(v301, v222);
+ int16x8_t v303 = vaddq_s16(v302, v266);
+ int16x8_t v304 = vaddq_s16(v303, v286);
+ int16x8_t v305 = vaddq_s16(v304, v295);
+ int16x8_t v306 = vaddq_s16(v305, v299);
+ int16x8_t v307 = vaddq_s16(v300, v306);
+ int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734);
+ int16x8_t v309 = vaddq_s16(v298, v308);
+ int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705);
+ int16x8_t v311 = vaddq_s16(v292, v310);
+ int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463);
+ int16x8_t v313 = vaddq_s16(v275, v312);
+ int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404);
+ int16x8_t v315 = vaddq_s16(v234, v314);
+ int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389);
+ int16x8_t v317 = vaddq_s16(v141, v316);
+ int16x8_t v318 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573);
+ int16x8_t v319 = vaddq_s16(v319_tmp, v318);
+ int16x8_t v320 = vld1q_s16(in + in_stride * 65 + i);
+ int16x8_t v321 = vld1q_s16(in + in_stride * 63 + i);
+ int16x8_t v322 = vaddq_s16(v320, v321);
+ int16x8_t v323 = vaddq_s16(v319, v322);
+ int16x8_t v324 = vld1q_s16(in + in_stride * 33 + i);
+ int16x8_t v325 = vld1q_s16(in + in_stride * 31 + i);
+ int16x8_t v326 = vaddq_s16(v324, v325);
+ int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573);
+ int16x8_t v327 = vaddq_s16(v327_tmp, v326);
+ int16x8_t v328 = vld1q_s16(in + in_stride * 97 + i);
+ int16x8_t v329 = vld1q_s16(in + in_stride * 95 + i);
+ int16x8_t v330 = vaddq_s16(v328, v329);
+ int16x8_t v331 = vaddq_s16(v330, v326);
+ int16x8_t v332 = vaddq_s16(v327, v331);
+ int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734);
+ int16x8_t v334 = vaddq_s16(v323, v333);
+ int16x8_t v335 = vld1q_s16(in + in_stride * 17 + i);
+ int16x8_t v336 = vld1q_s16(in + in_stride * 15 + i);
+ int16x8_t v337 = vaddq_s16(v335, v336);
+ int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573);
+ int16x8_t v338 = vaddq_s16(v338_tmp, v337);
+ int16x8_t v339 = vld1q_s16(in + in_stride * 81 + i);
+ int16x8_t v340 = vld1q_s16(in + in_stride * 79 + i);
+ int16x8_t v341 = vaddq_s16(v339, v340);
+ int16x8_t v342 = vld1q_s16(in + in_stride * 49 + i);
+ int16x8_t v343 = vld1q_s16(in + in_stride * 47 + i);
+ int16x8_t v344 = vaddq_s16(v342, v343);
+ int16x8_t v345 = vaddq_s16(v341, v344);
+ int16x8_t v346 = vaddq_s16(v338, v345);
+ int16x8_t v347 = vaddq_s16(v344, v337);
+ int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573);
+ int16x8_t v348 = vaddq_s16(v348_tmp, v347);
+ int16x8_t v349 = vld1q_s16(in + in_stride * 113 + i);
+ int16x8_t v350 = vld1q_s16(in + in_stride * 111 + i);
+ int16x8_t v351 = vaddq_s16(v349, v350);
+ int16x8_t v352 = vaddq_s16(v351, v341);
+ int16x8_t v353 = vaddq_s16(v352, v347);
+ int16x8_t v354 = vaddq_s16(v348, v353);
+ int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734);
+ int16x8_t v356 = vaddq_s16(v346, v355);
+ int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705);
+ int16x8_t v358 = vaddq_s16(v334, v357);
+ int16x8_t v359 = vld1q_s16(in + in_stride * 9 + i);
+ int16x8_t v360 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v361 = vaddq_s16(v359, v360);
+ int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573);
+ int16x8_t v362 = vaddq_s16(v362_tmp, v361);
+ int16x8_t v363 = vld1q_s16(in + in_stride * 73 + i);
+ int16x8_t v364 = vld1q_s16(in + in_stride * 71 + i);
+ int16x8_t v365 = vaddq_s16(v363, v364);
+ int16x8_t v366 = vld1q_s16(in + in_stride * 57 + i);
+ int16x8_t v367 = vld1q_s16(in + in_stride * 55 + i);
+ int16x8_t v368 = vaddq_s16(v366, v367);
+ int16x8_t v369 = vaddq_s16(v365, v368);
+ int16x8_t v370 = vaddq_s16(v362, v369);
+ int16x8_t v371 = vld1q_s16(in + in_stride * 41 + i);
+ int16x8_t v372 = vld1q_s16(in + in_stride * 39 + i);
+ int16x8_t v373 = vaddq_s16(v371, v372);
+ int16x8_t v374 = vld1q_s16(in + in_stride * 25 + i);
+ int16x8_t v375 = vld1q_s16(in + in_stride * 23 + i);
+ int16x8_t v376 = vaddq_s16(v374, v375);
+ int16x8_t v377 = vaddq_s16(v373, v376);
+ int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573);
+ int16x8_t v378 = vaddq_s16(v378_tmp, v377);
+ int16x8_t v379 = vld1q_s16(in + in_stride * 105 + i);
+ int16x8_t v380 = vld1q_s16(in + in_stride * 103 + i);
+ int16x8_t v381 = vaddq_s16(v379, v380);
+ int16x8_t v382 = vld1q_s16(in + in_stride * 89 + i);
+ int16x8_t v383 = vld1q_s16(in + in_stride * 87 + i);
+ int16x8_t v384 = vaddq_s16(v382, v383);
+ int16x8_t v385 = vaddq_s16(v381, v384);
+ int16x8_t v386 = vaddq_s16(v385, v377);
+ int16x8_t v387 = vaddq_s16(v378, v386);
+ int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734);
+ int16x8_t v389 = vaddq_s16(v370, v388);
+ int16x8_t v390 = vaddq_s16(v376, v361);
+ int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573);
+ int16x8_t v391 = vaddq_s16(v391_tmp, v390);
+ int16x8_t v392 = vaddq_s16(v384, v365);
+ int16x8_t v393 = vaddq_s16(v368, v373);
+ int16x8_t v394 = vaddq_s16(v392, v393);
+ int16x8_t v395 = vaddq_s16(v391, v394);
+ int16x8_t v396 = vaddq_s16(v393, v390);
+ int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573);
+ int16x8_t v397 = vaddq_s16(v397_tmp, v396);
+ int16x8_t v398 = vld1q_s16(in + in_stride * 121 + i);
+ int16x8_t v399 = vld1q_s16(in + in_stride * 119 + i);
+ int16x8_t v400 = vaddq_s16(v398, v399);
+ int16x8_t v401 = vaddq_s16(v400, v381);
+ int16x8_t v402 = vaddq_s16(v401, v392);
+ int16x8_t v403 = vaddq_s16(v402, v396);
+ int16x8_t v404 = vaddq_s16(v397, v403);
+ int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734);
+ int16x8_t v406 = vaddq_s16(v395, v405);
+ int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705);
+ int16x8_t v408 = vaddq_s16(v389, v407);
+ int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463);
+ int16x8_t v410 = vaddq_s16(v358, v409);
+ int16x8_t v411 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v412 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v413 = vaddq_s16(v411, v412);
+ int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573);
+ int16x8_t v414 = vaddq_s16(v414_tmp, v413);
+ int16x8_t v415 = vld1q_s16(in + in_stride * 69 + i);
+ int16x8_t v416 = vld1q_s16(in + in_stride * 67 + i);
+ int16x8_t v417 = vaddq_s16(v415, v416);
+ int16x8_t v418 = vld1q_s16(in + in_stride * 61 + i);
+ int16x8_t v419 = vld1q_s16(in + in_stride * 59 + i);
+ int16x8_t v420 = vaddq_s16(v418, v419);
+ int16x8_t v421 = vaddq_s16(v417, v420);
+ int16x8_t v422 = vaddq_s16(v414, v421);
+ int16x8_t v423 = vld1q_s16(in + in_stride * 37 + i);
+ int16x8_t v424 = vld1q_s16(in + in_stride * 35 + i);
+ int16x8_t v425 = vaddq_s16(v423, v424);
+ int16x8_t v426 = vld1q_s16(in + in_stride * 29 + i);
+ int16x8_t v427 = vld1q_s16(in + in_stride * 27 + i);
+ int16x8_t v428 = vaddq_s16(v426, v427);
+ int16x8_t v429 = vaddq_s16(v425, v428);
+ int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573);
+ int16x8_t v430 = vaddq_s16(v430_tmp, v429);
+ int16x8_t v431 = vld1q_s16(in + in_stride * 101 + i);
+ int16x8_t v432 = vld1q_s16(in + in_stride * 99 + i);
+ int16x8_t v433 = vaddq_s16(v431, v432);
+ int16x8_t v434 = vld1q_s16(in + in_stride * 93 + i);
+ int16x8_t v435 = vld1q_s16(in + in_stride * 91 + i);
+ int16x8_t v436 = vaddq_s16(v434, v435);
+ int16x8_t v437 = vaddq_s16(v433, v436);
+ int16x8_t v438 = vaddq_s16(v437, v429);
+ int16x8_t v439 = vaddq_s16(v430, v438);
+ int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734);
+ int16x8_t v441 = vaddq_s16(v422, v440);
+ int16x8_t v442 = vld1q_s16(in + in_stride * 21 + i);
+ int16x8_t v443 = vld1q_s16(in + in_stride * 19 + i);
+ int16x8_t v444 = vaddq_s16(v442, v443);
+ int16x8_t v445 = vld1q_s16(in + in_stride * 13 + i);
+ int16x8_t v446 = vld1q_s16(in + in_stride * 11 + i);
+ int16x8_t v447 = vaddq_s16(v445, v446);
+ int16x8_t v448 = vaddq_s16(v444, v447);
+ int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573);
+ int16x8_t v449 = vaddq_s16(v449_tmp, v448);
+ int16x8_t v450 = vld1q_s16(in + in_stride * 85 + i);
+ int16x8_t v451 = vld1q_s16(in + in_stride * 83 + i);
+ int16x8_t v452 = vaddq_s16(v450, v451);
+ int16x8_t v453 = vld1q_s16(in + in_stride * 77 + i);
+ int16x8_t v454 = vld1q_s16(in + in_stride * 75 + i);
+ int16x8_t v455 = vaddq_s16(v453, v454);
+ int16x8_t v456 = vaddq_s16(v452, v455);
+ int16x8_t v457 = vld1q_s16(in + in_stride * 53 + i);
+ int16x8_t v458 = vld1q_s16(in + in_stride * 51 + i);
+ int16x8_t v459 = vaddq_s16(v457, v458);
+ int16x8_t v460 = vld1q_s16(in + in_stride * 45 + i);
+ int16x8_t v461 = vld1q_s16(in + in_stride * 43 + i);
+ int16x8_t v462 = vaddq_s16(v460, v461);
+ int16x8_t v463 = vaddq_s16(v459, v462);
+ int16x8_t v464 = vaddq_s16(v456, v463);
+ int16x8_t v465 = vaddq_s16(v449, v464);
+ int16x8_t v466 = vaddq_s16(v463, v448);
+ int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573);
+ int16x8_t v467 = vaddq_s16(v467_tmp, v466);
+ int16x8_t v468 = vld1q_s16(in + in_stride * 117 + i);
+ int16x8_t v469 = vld1q_s16(in + in_stride * 115 + i);
+ int16x8_t v470 = vaddq_s16(v468, v469);
+ int16x8_t v471 = vld1q_s16(in + in_stride * 109 + i);
+ int16x8_t v472 = vld1q_s16(in + in_stride * 107 + i);
+ int16x8_t v473 = vaddq_s16(v471, v472);
+ int16x8_t v474 = vaddq_s16(v470, v473);
+ int16x8_t v475 = vaddq_s16(v474, v456);
+ int16x8_t v476 = vaddq_s16(v475, v466);
+ int16x8_t v477 = vaddq_s16(v467, v476);
+ int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734);
+ int16x8_t v479 = vaddq_s16(v465, v478);
+ int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705);
+ int16x8_t v481 = vaddq_s16(v441, v480);
+ int16x8_t v482 = vaddq_s16(v447, v413);
+ int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573);
+ int16x8_t v483 = vaddq_s16(v483_tmp, v482);
+ int16x8_t v484 = vaddq_s16(v455, v417);
+ int16x8_t v485 = vaddq_s16(v420, v459);
+ int16x8_t v486 = vaddq_s16(v484, v485);
+ int16x8_t v487 = vaddq_s16(v483, v486);
+ int16x8_t v488 = vaddq_s16(v462, v425);
+ int16x8_t v489 = vaddq_s16(v428, v444);
+ int16x8_t v490 = vaddq_s16(v488, v489);
+ int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573);
+ int16x8_t v491 = vaddq_s16(v491_tmp, v490);
+ int16x8_t v492 = vaddq_s16(v473, v433);
+ int16x8_t v493 = vaddq_s16(v436, v452);
+ int16x8_t v494 = vaddq_s16(v492, v493);
+ int16x8_t v495 = vaddq_s16(v494, v490);
+ int16x8_t v496 = vaddq_s16(v491, v495);
+ int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734);
+ int16x8_t v498 = vaddq_s16(v487, v497);
+ int16x8_t v499 = vaddq_s16(v489, v482);
+ int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573);
+ int16x8_t v500 = vaddq_s16(v500_tmp, v499);
+ int16x8_t v501 = vaddq_s16(v493, v484);
+ int16x8_t v502 = vaddq_s16(v485, v488);
+ int16x8_t v503 = vaddq_s16(v501, v502);
+ int16x8_t v504 = vaddq_s16(v500, v503);
+ int16x8_t v505 = vaddq_s16(v502, v499);
+ int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573);
+ int16x8_t v506 = vaddq_s16(v506_tmp, v505);
+ int16x8_t v507 = vld1q_s16(in + in_stride * 125 + i);
+ int16x8_t v508 = vld1q_s16(in + in_stride * 123 + i);
+ int16x8_t v509 = vaddq_s16(v507, v508);
+ int16x8_t v510 = vaddq_s16(v509, v470);
+ int16x8_t v511 = vaddq_s16(v510, v492);
+ int16x8_t v512 = vaddq_s16(v511, v501);
+ int16x8_t v513 = vaddq_s16(v512, v505);
+ int16x8_t v514 = vaddq_s16(v506, v513);
+ int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734);
+ int16x8_t v516 = vaddq_s16(v504, v515);
+ int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705);
+ int16x8_t v518 = vaddq_s16(v498, v517);
+ int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463);
+ int16x8_t v520 = vaddq_s16(v481, v519);
+ int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404);
+ int16x8_t v522 = vaddq_s16(v410, v521);
+ int16x8_t v523 = vaddq_s16(v412, v318);
+ int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573);
+ int16x8_t v524 = vaddq_s16(v524_tmp, v523);
+ int16x8_t v525 = vaddq_s16(v416, v320);
+ int16x8_t v526 = vaddq_s16(v321, v418);
+ int16x8_t v527 = vaddq_s16(v525, v526);
+ int16x8_t v528 = vaddq_s16(v524, v527);
+ int16x8_t v529 = vaddq_s16(v424, v324);
+ int16x8_t v530 = vaddq_s16(v325, v426);
+ int16x8_t v531 = vaddq_s16(v529, v530);
+ int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573);
+ int16x8_t v532 = vaddq_s16(v532_tmp, v531);
+ int16x8_t v533 = vaddq_s16(v432, v328);
+ int16x8_t v534 = vaddq_s16(v329, v434);
+ int16x8_t v535 = vaddq_s16(v533, v534);
+ int16x8_t v536 = vaddq_s16(v535, v531);
+ int16x8_t v537 = vaddq_s16(v532, v536);
+ int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734);
+ int16x8_t v539 = vaddq_s16(v528, v538);
+ int16x8_t v540 = vaddq_s16(v443, v335);
+ int16x8_t v541 = vaddq_s16(v336, v445);
+ int16x8_t v542 = vaddq_s16(v540, v541);
+ int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573);
+ int16x8_t v543 = vaddq_s16(v543_tmp, v542);
+ int16x8_t v544 = vaddq_s16(v451, v339);
+ int16x8_t v545 = vaddq_s16(v340, v453);
+ int16x8_t v546 = vaddq_s16(v544, v545);
+ int16x8_t v547 = vaddq_s16(v458, v342);
+ int16x8_t v548 = vaddq_s16(v343, v460);
+ int16x8_t v549 = vaddq_s16(v547, v548);
+ int16x8_t v550 = vaddq_s16(v546, v549);
+ int16x8_t v551 = vaddq_s16(v543, v550);
+ int16x8_t v552 = vaddq_s16(v549, v542);
+ int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573);
+ int16x8_t v553 = vaddq_s16(v553_tmp, v552);
+ int16x8_t v554 = vaddq_s16(v469, v349);
+ int16x8_t v555 = vaddq_s16(v350, v471);
+ int16x8_t v556 = vaddq_s16(v554, v555);
+ int16x8_t v557 = vaddq_s16(v556, v546);
+ int16x8_t v558 = vaddq_s16(v557, v552);
+ int16x8_t v559 = vaddq_s16(v553, v558);
+ int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734);
+ int16x8_t v561 = vaddq_s16(v551, v560);
+ int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705);
+ int16x8_t v563 = vaddq_s16(v539, v562);
+ int16x8_t v564 = vaddq_s16(v446, v359);
+ int16x8_t v565 = vaddq_s16(v360, v411);
+ int16x8_t v566 = vaddq_s16(v564, v565);
+ int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573);
+ int16x8_t v567 = vaddq_s16(v567_tmp, v566);
+ int16x8_t v568 = vaddq_s16(v454, v363);
+ int16x8_t v569 = vaddq_s16(v364, v415);
+ int16x8_t v570 = vaddq_s16(v568, v569);
+ int16x8_t v571 = vaddq_s16(v419, v366);
+ int16x8_t v572 = vaddq_s16(v367, v457);
+ int16x8_t v573 = vaddq_s16(v571, v572);
+ int16x8_t v574 = vaddq_s16(v570, v573);
+ int16x8_t v575 = vaddq_s16(v567, v574);
+ int16x8_t v576 = vaddq_s16(v461, v371);
+ int16x8_t v577 = vaddq_s16(v372, v423);
+ int16x8_t v578 = vaddq_s16(v576, v577);
+ int16x8_t v579 = vaddq_s16(v427, v374);
+ int16x8_t v580 = vaddq_s16(v375, v442);
+ int16x8_t v581 = vaddq_s16(v579, v580);
+ int16x8_t v582 = vaddq_s16(v578, v581);
+ int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573);
+ int16x8_t v583 = vaddq_s16(v583_tmp, v582);
+ int16x8_t v584 = vaddq_s16(v472, v379);
+ int16x8_t v585 = vaddq_s16(v380, v431);
+ int16x8_t v586 = vaddq_s16(v584, v585);
+ int16x8_t v587 = vaddq_s16(v435, v382);
+ int16x8_t v588 = vaddq_s16(v383, v450);
+ int16x8_t v589 = vaddq_s16(v587, v588);
+ int16x8_t v590 = vaddq_s16(v586, v589);
+ int16x8_t v591 = vaddq_s16(v590, v582);
+ int16x8_t v592 = vaddq_s16(v583, v591);
+ int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734);
+ int16x8_t v594 = vaddq_s16(v575, v593);
+ int16x8_t v595 = vaddq_s16(v581, v566);
+ int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573);
+ int16x8_t v596 = vaddq_s16(v596_tmp, v595);
+ int16x8_t v597 = vaddq_s16(v589, v570);
+ int16x8_t v598 = vaddq_s16(v573, v578);
+ int16x8_t v599 = vaddq_s16(v597, v598);
+ int16x8_t v600 = vaddq_s16(v596, v599);
+ int16x8_t v601 = vaddq_s16(v598, v595);
+ int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573);
+ int16x8_t v602 = vaddq_s16(v602_tmp, v601);
+ int16x8_t v603 = vaddq_s16(v508, v398);
+ int16x8_t v604 = vaddq_s16(v399, v468);
+ int16x8_t v605 = vaddq_s16(v603, v604);
+ int16x8_t v606 = vaddq_s16(v605, v586);
+ int16x8_t v607 = vaddq_s16(v606, v597);
+ int16x8_t v608 = vaddq_s16(v607, v601);
+ int16x8_t v609 = vaddq_s16(v602, v608);
+ int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734);
+ int16x8_t v611 = vaddq_s16(v600, v610);
+ int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705);
+ int16x8_t v613 = vaddq_s16(v594, v612);
+ int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463);
+ int16x8_t v615 = vaddq_s16(v563, v614);
+ int16x8_t v616 = vaddq_s16(v565, v523);
+ int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573);
+ int16x8_t v617 = vaddq_s16(v617_tmp, v616);
+ int16x8_t v618 = vaddq_s16(v569, v525);
+ int16x8_t v619 = vaddq_s16(v526, v571);
+ int16x8_t v620 = vaddq_s16(v618, v619);
+ int16x8_t v621 = vaddq_s16(v617, v620);
+ int16x8_t v622 = vaddq_s16(v577, v529);
+ int16x8_t v623 = vaddq_s16(v530, v579);
+ int16x8_t v624 = vaddq_s16(v622, v623);
+ int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573);
+ int16x8_t v625 = vaddq_s16(v625_tmp, v624);
+ int16x8_t v626 = vaddq_s16(v585, v533);
+ int16x8_t v627 = vaddq_s16(v534, v587);
+ int16x8_t v628 = vaddq_s16(v626, v627);
+ int16x8_t v629 = vaddq_s16(v628, v624);
+ int16x8_t v630 = vaddq_s16(v625, v629);
+ int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734);
+ int16x8_t v632 = vaddq_s16(v621, v631);
+ int16x8_t v633 = vaddq_s16(v580, v540);
+ int16x8_t v634 = vaddq_s16(v541, v564);
+ int16x8_t v635 = vaddq_s16(v633, v634);
+ int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573);
+ int16x8_t v636 = vaddq_s16(v636_tmp, v635);
+ int16x8_t v637 = vaddq_s16(v588, v544);
+ int16x8_t v638 = vaddq_s16(v545, v568);
+ int16x8_t v639 = vaddq_s16(v637, v638);
+ int16x8_t v640 = vaddq_s16(v572, v547);
+ int16x8_t v641 = vaddq_s16(v548, v576);
+ int16x8_t v642 = vaddq_s16(v640, v641);
+ int16x8_t v643 = vaddq_s16(v639, v642);
+ int16x8_t v644 = vaddq_s16(v636, v643);
+ int16x8_t v645 = vaddq_s16(v642, v635);
+ int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573);
+ int16x8_t v646 = vaddq_s16(v646_tmp, v645);
+ int16x8_t v647 = vaddq_s16(v604, v554);
+ int16x8_t v648 = vaddq_s16(v555, v584);
+ int16x8_t v649 = vaddq_s16(v647, v648);
+ int16x8_t v650 = vaddq_s16(v649, v639);
+ int16x8_t v651 = vaddq_s16(v650, v645);
+ int16x8_t v652 = vaddq_s16(v646, v651);
+ int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734);
+ int16x8_t v654 = vaddq_s16(v644, v653);
+ int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705);
+ int16x8_t v656 = vaddq_s16(v632, v655);
+ int16x8_t v657 = vaddq_s16(v634, v616);
+ int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573);
+ int16x8_t v658 = vaddq_s16(v658_tmp, v657);
+ int16x8_t v659 = vaddq_s16(v638, v618);
+ int16x8_t v660 = vaddq_s16(v619, v640);
+ int16x8_t v661 = vaddq_s16(v659, v660);
+ int16x8_t v662 = vaddq_s16(v658, v661);
+ int16x8_t v663 = vaddq_s16(v641, v622);
+ int16x8_t v664 = vaddq_s16(v623, v633);
+ int16x8_t v665 = vaddq_s16(v663, v664);
+ int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573);
+ int16x8_t v666 = vaddq_s16(v666_tmp, v665);
+ int16x8_t v667 = vaddq_s16(v648, v626);
+ int16x8_t v668 = vaddq_s16(v627, v637);
+ int16x8_t v669 = vaddq_s16(v667, v668);
+ int16x8_t v670 = vaddq_s16(v669, v665);
+ int16x8_t v671 = vaddq_s16(v666, v670);
+ int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734);
+ int16x8_t v673 = vaddq_s16(v662, v672);
+ int16x8_t v674 = vaddq_s16(v664, v657);
+ int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573);
+ int16x8_t v675 = vaddq_s16(v675_tmp, v674);
+ int16x8_t v676 = vaddq_s16(v668, v659);
+ int16x8_t v677 = vaddq_s16(v660, v663);
+ int16x8_t v678 = vaddq_s16(v676, v677);
+ int16x8_t v679 = vaddq_s16(v675, v678);
+ int16x8_t v680 = vaddq_s16(v677, v674);
+ int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573);
+ int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+ int16x8_t v682 = vld1q_s16(in + in_stride * 127 + i);
+ int16x8_t v683 = vaddq_s16(v682, v507);
+ int16x8_t v684 = vaddq_s16(v683, v603);
+ int16x8_t v685 = vaddq_s16(v684, v647);
+ int16x8_t v686 = vaddq_s16(v685, v667);
+ int16x8_t v687 = vaddq_s16(v686, v676);
+ int16x8_t v688 = vaddq_s16(v687, v680);
+ int16x8_t v689 = vaddq_s16(v681, v688);
+ int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734);
+ int16x8_t v691 = vaddq_s16(v679, v690);
+ int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705);
+ int16x8_t v693 = vaddq_s16(v673, v692);
+ int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463);
+ int16x8_t v695 = vaddq_s16(v656, v694);
+ int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404);
+ int16x8_t v697 = vaddq_s16(v615, v696);
+ int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389);
+ int16x8_t v699 = vaddq_s16(v522, v698);
+ int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385);
+ int16x8_t v701 = vaddq_s16(v317, v700);
+ int16x8_t v702 = vsubq_s16(v0, v1);
+ int16x8_t v703 = vsubq_s16(v4, v6);
+ int16x8_t v704_tmp = vqrdmulhq_n_s16(v703, 10045);
+ int16x8_t v704 = vaddq_s16(v704_tmp, v703);
+ int16x8_t v705 = vaddq_s16(v702, v704);
+ int16x8_t v706 = vsubq_s16(v11, v14);
+ int16x8_t v707 = vsubq_s16(v17, v20);
+ int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 10045);
+ int16x8_t v708 = vaddq_s16(v708_tmp, v707);
+ int16x8_t v709 = vaddq_s16(v706, v708);
+ int16x8_t v710 = vqrdmulhq_n_s16(v709, 19705);
+ int16x8_t v711 = vaddq_s16(v705, v710);
+ int16x8_t v712 = vsubq_s16(v27, v30);
+ int16x8_t v713 = vsubq_s16(v35, v39);
+ int16x8_t v714_tmp = vqrdmulhq_n_s16(v713, 10045);
+ int16x8_t v714 = vaddq_s16(v714_tmp, v713);
+ int16x8_t v715 = vaddq_s16(v712, v714);
+ int16x8_t v716 = vsubq_s16(v44, v47);
+ int16x8_t v717 = vsubq_s16(v50, v54);
+ int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 10045);
+ int16x8_t v718 = vaddq_s16(v718_tmp, v717);
+ int16x8_t v719 = vaddq_s16(v716, v718);
+ int16x8_t v720 = vqrdmulhq_n_s16(v719, 19705);
+ int16x8_t v721 = vaddq_s16(v715, v720);
+ int16x8_t v722 = vqrdmulhq_n_s16(v721, 17121);
+ int16x8_t v723 = vaddq_s16(v711, v722);
+ int16x8_t v724 = vsubq_s16(v63, v66);
+ int16x8_t v725 = vsubq_s16(v71, v75);
+ int16x8_t v726_tmp = vqrdmulhq_n_s16(v725, 10045);
+ int16x8_t v726 = vaddq_s16(v726_tmp, v725);
+ int16x8_t v727 = vaddq_s16(v724, v726);
+ int16x8_t v728 = vsubq_s16(v82, v89);
+ int16x8_t v729 = vsubq_s16(v92, v97);
+ int16x8_t v730_tmp = vqrdmulhq_n_s16(v729, 10045);
+ int16x8_t v730 = vaddq_s16(v730_tmp, v729);
+ int16x8_t v731 = vaddq_s16(v728, v730);
+ int16x8_t v732 = vqrdmulhq_n_s16(v731, 19705);
+ int16x8_t v733 = vaddq_s16(v727, v732);
+ int16x8_t v734 = vsubq_s16(v104, v107);
+ int16x8_t v735 = vsubq_s16(v112, v116);
+ int16x8_t v736_tmp = vqrdmulhq_n_s16(v735, 10045);
+ int16x8_t v736 = vaddq_s16(v736_tmp, v735);
+ int16x8_t v737 = vaddq_s16(v734, v736);
+ int16x8_t v738 = vsubq_s16(v121, v124);
+ int16x8_t v739 = vsubq_s16(v127, v132);
+ int16x8_t v740_tmp = vqrdmulhq_n_s16(v739, 10045);
+ int16x8_t v740 = vaddq_s16(v740_tmp, v739);
+ int16x8_t v741 = vaddq_s16(v738, v740);
+ int16x8_t v742 = vqrdmulhq_n_s16(v741, 19705);
+ int16x8_t v743 = vaddq_s16(v737, v742);
+ int16x8_t v744 = vqrdmulhq_n_s16(v743, 17121);
+ int16x8_t v745 = vaddq_s16(v733, v744);
+ int16x8_t v746 = vqrdmulhq_n_s16(v745, 16563);
+ int16x8_t v747 = vaddq_s16(v723, v746);
+ int16x8_t v748 = vsubq_s16(v143, v146);
+ int16x8_t v749 = vsubq_s16(v151, v155);
+ int16x8_t v750_tmp = vqrdmulhq_n_s16(v749, 10045);
+ int16x8_t v750 = vaddq_s16(v750_tmp, v749);
+ int16x8_t v751 = vaddq_s16(v748, v750);
+ int16x8_t v752 = vsubq_s16(v162, v169);
+ int16x8_t v753 = vqrdmulhq_n_s16(v752, 19705);
+ int16x8_t v754 = vsubq_s16(v172, v177);
+ int16x8_t v755 = vqrdmulhq_n_s16(v754, 25746);
+ int16x8_t v756 = vaddq_s16(v753, v755);
+ int16x8_t v757 = vaddq_s16(v751, v756);
+ int16x8_t v758 = vsubq_s16(v186, v193);
+ int16x8_t v759 = vsubq_s16(v202, v210);
+ int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 10045);
+ int16x8_t v760 = vaddq_s16(v760_tmp, v759);
+ int16x8_t v761 = vaddq_s16(v758, v760);
+ int16x8_t v762 = vsubq_s16(v215, v218);
+ int16x8_t v763 = vsubq_s16(v221, v227);
+ int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 10045);
+ int16x8_t v764 = vaddq_s16(v764_tmp, v763);
+ int16x8_t v765 = vaddq_s16(v762, v764);
+ int16x8_t v766 = vqrdmulhq_n_s16(v765, 19705);
+ int16x8_t v767 = vaddq_s16(v761, v766);
+ int16x8_t v768 = vqrdmulhq_n_s16(v767, 17121);
+ int16x8_t v769 = vaddq_s16(v757, v768);
+ int16x8_t v770 = vsubq_s16(v236, v239);
+ int16x8_t v771 = vsubq_s16(v244, v248);
+ int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 10045);
+ int16x8_t v772 = vaddq_s16(v772_tmp, v771);
+ int16x8_t v773 = vaddq_s16(v770, v772);
+ int16x8_t v774 = vsubq_s16(v255, v262);
+ int16x8_t v775 = vsubq_s16(v265, v270);
+ int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 10045);
+ int16x8_t v776 = vaddq_s16(v776_tmp, v775);
+ int16x8_t v777 = vaddq_s16(v774, v776);
+ int16x8_t v778 = vqrdmulhq_n_s16(v777, 19705);
+ int16x8_t v779 = vaddq_s16(v773, v778);
+ int16x8_t v780 = vsubq_s16(v277, v280);
+ int16x8_t v781 = vsubq_s16(v285, v289);
+ int16x8_t v782_tmp = vqrdmulhq_n_s16(v781, 10045);
+ int16x8_t v782 = vaddq_s16(v782_tmp, v781);
+ int16x8_t v783 = vaddq_s16(v780, v782);
+ int16x8_t v784 = vsubq_s16(v294, v297);
+ int16x8_t v785 = vsubq_s16(v300, v306);
+ int16x8_t v786_tmp = vqrdmulhq_n_s16(v785, 10045);
+ int16x8_t v786 = vaddq_s16(v786_tmp, v785);
+ int16x8_t v787 = vaddq_s16(v784, v786);
+ int16x8_t v788 = vqrdmulhq_n_s16(v787, 19705);
+ int16x8_t v789 = vaddq_s16(v783, v788);
+ int16x8_t v790 = vqrdmulhq_n_s16(v789, 17121);
+ int16x8_t v791 = vaddq_s16(v779, v790);
+ int16x8_t v792 = vqrdmulhq_n_s16(v791, 16563);
+ int16x8_t v793 = vaddq_s16(v769, v792);
+ int16x8_t v794 = vqrdmulhq_n_s16(v793, 16429);
+ int16x8_t v795 = vaddq_s16(v747, v794);
+ int16x8_t v796 = vsubq_s16(v319, v322);
+ int16x8_t v797 = vsubq_s16(v327, v331);
+ int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 10045);
+ int16x8_t v798 = vaddq_s16(v798_tmp, v797);
+ int16x8_t v799 = vaddq_s16(v796, v798);
+ int16x8_t v800 = vsubq_s16(v338, v345);
+ int16x8_t v801 = vsubq_s16(v348, v353);
+ int16x8_t v802_tmp = vqrdmulhq_n_s16(v801, 10045);
+ int16x8_t v802 = vaddq_s16(v802_tmp, v801);
+ int16x8_t v803 = vaddq_s16(v800, v802);
+ int16x8_t v804 = vqrdmulhq_n_s16(v803, 19705);
+ int16x8_t v805 = vaddq_s16(v799, v804);
+ int16x8_t v806 = vsubq_s16(v362, v369);
+ int16x8_t v807 = vsubq_s16(v378, v386);
+ int16x8_t v808_tmp = vqrdmulhq_n_s16(v807, 10045);
+ int16x8_t v808 = vaddq_s16(v808_tmp, v807);
+ int16x8_t v809 = vaddq_s16(v806, v808);
+ int16x8_t v810 = vsubq_s16(v391, v394);
+ int16x8_t v811 = vsubq_s16(v397, v403);
+ int16x8_t v812_tmp = vqrdmulhq_n_s16(v811, 10045);
+ int16x8_t v812 = vaddq_s16(v812_tmp, v811);
+ int16x8_t v813 = vaddq_s16(v810, v812);
+ int16x8_t v814 = vqrdmulhq_n_s16(v813, 19705);
+ int16x8_t v815 = vaddq_s16(v809, v814);
+ int16x8_t v816 = vqrdmulhq_n_s16(v815, 17121);
+ int16x8_t v817 = vaddq_s16(v805, v816);
+ int16x8_t v818 = vsubq_s16(v414, v421);
+ int16x8_t v819 = vsubq_s16(v430, v438);
+ int16x8_t v820_tmp = vqrdmulhq_n_s16(v819, 10045);
+ int16x8_t v820 = vaddq_s16(v820_tmp, v819);
+ int16x8_t v821 = vaddq_s16(v818, v820);
+ int16x8_t v822 = vsubq_s16(v449, v464);
+ int16x8_t v823 = vsubq_s16(v467, v476);
+ int16x8_t v824_tmp = vqrdmulhq_n_s16(v823, 10045);
+ int16x8_t v824 = vaddq_s16(v824_tmp, v823);
+ int16x8_t v825 = vaddq_s16(v822, v824);
+ int16x8_t v826 = vqrdmulhq_n_s16(v825, 19705);
+ int16x8_t v827 = vaddq_s16(v821, v826);
+ int16x8_t v828 = vsubq_s16(v483, v486);
+ int16x8_t v829 = vsubq_s16(v491, v495);
+ int16x8_t v830_tmp = vqrdmulhq_n_s16(v829, 10045);
+ int16x8_t v830 = vaddq_s16(v830_tmp, v829);
+ int16x8_t v831 = vaddq_s16(v828, v830);
+ int16x8_t v832 = vsubq_s16(v500, v503);
+ int16x8_t v833 = vsubq_s16(v506, v513);
+ int16x8_t v834_tmp = vqrdmulhq_n_s16(v833, 10045);
+ int16x8_t v834 = vaddq_s16(v834_tmp, v833);
+ int16x8_t v835 = vaddq_s16(v832, v834);
+ int16x8_t v836 = vqrdmulhq_n_s16(v835, 19705);
+ int16x8_t v837 = vaddq_s16(v831, v836);
+ int16x8_t v838 = vqrdmulhq_n_s16(v837, 17121);
+ int16x8_t v839 = vaddq_s16(v827, v838);
+ int16x8_t v840 = vqrdmulhq_n_s16(v839, 16563);
+ int16x8_t v841 = vaddq_s16(v817, v840);
+ int16x8_t v842 = vsubq_s16(v524, v527);
+ int16x8_t v843 = vsubq_s16(v532, v536);
+ int16x8_t v844_tmp = vqrdmulhq_n_s16(v843, 10045);
+ int16x8_t v844 = vaddq_s16(v844_tmp, v843);
+ int16x8_t v845 = vaddq_s16(v842, v844);
+ int16x8_t v846 = vsubq_s16(v543, v550);
+ int16x8_t v847 = vsubq_s16(v553, v558);
+ int16x8_t v848_tmp = vqrdmulhq_n_s16(v847, 10045);
+ int16x8_t v848 = vaddq_s16(v848_tmp, v847);
+ int16x8_t v849 = vaddq_s16(v846, v848);
+ int16x8_t v850 = vqrdmulhq_n_s16(v849, 19705);
+ int16x8_t v851 = vaddq_s16(v845, v850);
+ int16x8_t v852 = vsubq_s16(v567, v574);
+ int16x8_t v853 = vsubq_s16(v583, v591);
+ int16x8_t v854_tmp = vqrdmulhq_n_s16(v853, 10045);
+ int16x8_t v854 = vaddq_s16(v854_tmp, v853);
+ int16x8_t v855 = vaddq_s16(v852, v854);
+ int16x8_t v856 = vsubq_s16(v596, v599);
+ int16x8_t v857 = vsubq_s16(v602, v608);
+ int16x8_t v858_tmp = vqrdmulhq_n_s16(v857, 10045);
+ int16x8_t v858 = vaddq_s16(v858_tmp, v857);
+ int16x8_t v859 = vaddq_s16(v856, v858);
+ int16x8_t v860 = vqrdmulhq_n_s16(v859, 19705);
+ int16x8_t v861 = vaddq_s16(v855, v860);
+ int16x8_t v862 = vqrdmulhq_n_s16(v861, 17121);
+ int16x8_t v863 = vaddq_s16(v851, v862);
+ int16x8_t v864 = vsubq_s16(v617, v620);
+ int16x8_t v865 = vsubq_s16(v625, v629);
+ int16x8_t v866_tmp = vqrdmulhq_n_s16(v865, 10045);
+ int16x8_t v866 = vaddq_s16(v866_tmp, v865);
+ int16x8_t v867 = vaddq_s16(v864, v866);
+ int16x8_t v868 = vsubq_s16(v636, v643);
+ int16x8_t v869 = vsubq_s16(v646, v651);
+ int16x8_t v870_tmp = vqrdmulhq_n_s16(v869, 10045);
+ int16x8_t v870 = vaddq_s16(v870_tmp, v869);
+ int16x8_t v871 = vaddq_s16(v868, v870);
+ int16x8_t v872 = vqrdmulhq_n_s16(v871, 19705);
+ int16x8_t v873 = vaddq_s16(v867, v872);
+ int16x8_t v874 = vsubq_s16(v658, v661);
+ int16x8_t v875 = vsubq_s16(v666, v670);
+ int16x8_t v876_tmp = vqrdmulhq_n_s16(v875, 10045);
+ int16x8_t v876 = vaddq_s16(v876_tmp, v875);
+ int16x8_t v877 = vaddq_s16(v874, v876);
+ int16x8_t v878 = vsubq_s16(v675, v678);
+ int16x8_t v879 = vsubq_s16(v681, v688);
+ int16x8_t v880_tmp = vqrdmulhq_n_s16(v879, 10045);
+ int16x8_t v880 = vaddq_s16(v880_tmp, v879);
+ int16x8_t v881 = vaddq_s16(v878, v880);
+ int16x8_t v882 = vqrdmulhq_n_s16(v881, 19705);
+ int16x8_t v883 = vaddq_s16(v877, v882);
+ int16x8_t v884 = vqrdmulhq_n_s16(v883, 17121);
+ int16x8_t v885 = vaddq_s16(v873, v884);
+ int16x8_t v886 = vqrdmulhq_n_s16(v885, 16563);
+ int16x8_t v887 = vaddq_s16(v863, v886);
+ int16x8_t v888 = vqrdmulhq_n_s16(v887, 16429);
+ int16x8_t v889 = vaddq_s16(v841, v888);
+ int16x8_t v890 = vqrdmulhq_n_s16(v889, 16395);
+ int16x8_t v891 = vaddq_s16(v795, v890);
+ int16x8_t v892 = vsubq_s16(v702, v704);
+ int16x8_t v893 = vsubq_s16(v706, v708);
+ int16x8_t v894 = vqrdmulhq_n_s16(v893, 29490);
+ int16x8_t v895 = vaddq_s16(v892, v894);
+ int16x8_t v896 = vsubq_s16(v712, v714);
+ int16x8_t v897 = vsubq_s16(v716, v718);
+ int16x8_t v898 = vqrdmulhq_n_s16(v897, 29490);
+ int16x8_t v899 = vaddq_s16(v896, v898);
+ int16x8_t v900 = vqrdmulhq_n_s16(v899, 18578);
+ int16x8_t v901 = vaddq_s16(v895, v900);
+ int16x8_t v902 = vsubq_s16(v724, v726);
+ int16x8_t v903 = vsubq_s16(v728, v730);
+ int16x8_t v904 = vqrdmulhq_n_s16(v903, 29490);
+ int16x8_t v905 = vaddq_s16(v902, v904);
+ int16x8_t v906 = vsubq_s16(v734, v736);
+ int16x8_t v907 = vsubq_s16(v738, v740);
+ int16x8_t v908 = vqrdmulhq_n_s16(v907, 29490);
+ int16x8_t v909 = vaddq_s16(v906, v908);
+ int16x8_t v910 = vqrdmulhq_n_s16(v909, 18578);
+ int16x8_t v911 = vaddq_s16(v905, v910);
+ int16x8_t v912 = vqrdmulhq_n_s16(v911, 16890);
+ int16x8_t v913 = vaddq_s16(v901, v912);
+ int16x8_t v914 = vsubq_s16(v748, v750);
+ int16x8_t v915_tmp = vqrdmulhq_n_s16(v754, 10045);
+ int16x8_t v915 = vaddq_s16(v915_tmp, v754);
+ int16x8_t v916 = vsubq_s16(v752, v915);
+ int16x8_t v917 = vqrdmulhq_n_s16(v916, 29490);
+ int16x8_t v918 = vaddq_s16(v914, v917);
+ int16x8_t v919 = vsubq_s16(v758, v760);
+ int16x8_t v920 = vsubq_s16(v762, v764);
+ int16x8_t v921 = vqrdmulhq_n_s16(v920, 29490);
+ int16x8_t v922 = vaddq_s16(v919, v921);
+ int16x8_t v923 = vqrdmulhq_n_s16(v922, 18578);
+ int16x8_t v924 = vaddq_s16(v918, v923);
+ int16x8_t v925 = vsubq_s16(v770, v772);
+ int16x8_t v926 = vsubq_s16(v774, v776);
+ int16x8_t v927 = vqrdmulhq_n_s16(v926, 29490);
+ int16x8_t v928 = vaddq_s16(v925, v927);
+ int16x8_t v929 = vsubq_s16(v780, v782);
+ int16x8_t v930 = vsubq_s16(v784, v786);
+ int16x8_t v931 = vqrdmulhq_n_s16(v930, 29490);
+ int16x8_t v932 = vaddq_s16(v929, v931);
+ int16x8_t v933 = vqrdmulhq_n_s16(v932, 18578);
+ int16x8_t v934 = vaddq_s16(v928, v933);
+ int16x8_t v935 = vqrdmulhq_n_s16(v934, 16890);
+ int16x8_t v936 = vaddq_s16(v924, v935);
+ int16x8_t v937 = vqrdmulhq_n_s16(v936, 16508);
+ int16x8_t v938 = vaddq_s16(v913, v937);
+ int16x8_t v939 = vsubq_s16(v796, v798);
+ int16x8_t v940 = vsubq_s16(v800, v802);
+ int16x8_t v941 = vqrdmulhq_n_s16(v940, 29490);
+ int16x8_t v942 = vaddq_s16(v939, v941);
+ int16x8_t v943 = vsubq_s16(v806, v808);
+ int16x8_t v944 = vsubq_s16(v810, v812);
+ int16x8_t v945 = vqrdmulhq_n_s16(v944, 29490);
+ int16x8_t v946 = vaddq_s16(v943, v945);
+ int16x8_t v947 = vqrdmulhq_n_s16(v946, 18578);
+ int16x8_t v948 = vaddq_s16(v942, v947);
+ int16x8_t v949 = vsubq_s16(v818, v820);
+ int16x8_t v950 = vsubq_s16(v822, v824);
+ int16x8_t v951 = vqrdmulhq_n_s16(v950, 29490);
+ int16x8_t v952 = vaddq_s16(v949, v951);
+ int16x8_t v953 = vsubq_s16(v828, v830);
+ int16x8_t v954 = vsubq_s16(v832, v834);
+ int16x8_t v955 = vqrdmulhq_n_s16(v954, 29490);
+ int16x8_t v956 = vaddq_s16(v953, v955);
+ int16x8_t v957 = vqrdmulhq_n_s16(v956, 18578);
+ int16x8_t v958 = vaddq_s16(v952, v957);
+ int16x8_t v959 = vqrdmulhq_n_s16(v958, 16890);
+ int16x8_t v960 = vaddq_s16(v948, v959);
+ int16x8_t v961 = vsubq_s16(v842, v844);
+ int16x8_t v962 = vsubq_s16(v846, v848);
+ int16x8_t v963 = vqrdmulhq_n_s16(v962, 29490);
+ int16x8_t v964 = vaddq_s16(v961, v963);
+ int16x8_t v965 = vsubq_s16(v852, v854);
+ int16x8_t v966 = vsubq_s16(v856, v858);
+ int16x8_t v967 = vqrdmulhq_n_s16(v966, 29490);
+ int16x8_t v968 = vaddq_s16(v965, v967);
+ int16x8_t v969 = vqrdmulhq_n_s16(v968, 18578);
+ int16x8_t v970 = vaddq_s16(v964, v969);
+ int16x8_t v971 = vsubq_s16(v864, v866);
+ int16x8_t v972 = vsubq_s16(v868, v870);
+ int16x8_t v973 = vqrdmulhq_n_s16(v972, 29490);
+ int16x8_t v974 = vaddq_s16(v971, v973);
+ int16x8_t v975 = vsubq_s16(v874, v876);
+ int16x8_t v976 = vsubq_s16(v878, v880);
+ int16x8_t v977 = vqrdmulhq_n_s16(v976, 29490);
+ int16x8_t v978 = vaddq_s16(v975, v977);
+ int16x8_t v979 = vqrdmulhq_n_s16(v978, 18578);
+ int16x8_t v980 = vaddq_s16(v974, v979);
+ int16x8_t v981 = vqrdmulhq_n_s16(v980, 16890);
+ int16x8_t v982 = vaddq_s16(v970, v981);
+ int16x8_t v983 = vqrdmulhq_n_s16(v982, 16508);
+ int16x8_t v984 = vaddq_s16(v960, v983);
+ int16x8_t v985 = vqrdmulhq_n_s16(v984, 16415);
+ int16x8_t v986 = vaddq_s16(v938, v985);
+ int16x8_t v987 = vsubq_s16(v2, v8);
+ int16x8_t v988 = vsubq_s16(v15, v22);
+ int16x8_t v989_tmp = vqrdmulhq_n_s16(v988, 18446);
+ int16x8_t v989 = vmlaq_n_s16(v989_tmp, v988, 2);
+ int16x8_t v990 = vaddq_s16(v987, v989);
+ int16x8_t v991 = vsubq_s16(v31, v41);
+ int16x8_t v992 = vsubq_s16(v48, v56);
+ int16x8_t v993_tmp = vqrdmulhq_n_s16(v992, 18446);
+ int16x8_t v993 = vmlaq_n_s16(v993_tmp, v992, 2);
+ int16x8_t v994 = vaddq_s16(v991, v993);
+ int16x8_t v995 = vqrdmulhq_n_s16(v994, 21195);
+ int16x8_t v996 = vaddq_s16(v990, v995);
+ int16x8_t v997 = vsubq_s16(v67, v77);
+ int16x8_t v998 = vsubq_s16(v90, v99);
+ int16x8_t v999_tmp = vqrdmulhq_n_s16(v998, 18446);
+ int16x8_t v999 = vmlaq_n_s16(v999_tmp, v998, 2);
+ int16x8_t v1000 = vaddq_s16(v997, v999);
+ int16x8_t v1001 = vsubq_s16(v108, v118);
+ int16x8_t v1002 = vsubq_s16(v125, v134);
+ int16x8_t v1003_tmp = vqrdmulhq_n_s16(v1002, 18446);
+ int16x8_t v1003 = vmlaq_n_s16(v1003_tmp, v1002, 2);
+ int16x8_t v1004 = vaddq_s16(v1001, v1003);
+ int16x8_t v1005 = vqrdmulhq_n_s16(v1004, 21195);
+ int16x8_t v1006 = vaddq_s16(v1000, v1005);
+ int16x8_t v1007 = vqrdmulhq_n_s16(v1006, 17401);
+ int16x8_t v1008 = vaddq_s16(v996, v1007);
+ int16x8_t v1009 = vsubq_s16(v147, v157);
+ int16x8_t v1010 = vsubq_s16(v170, v179);
+ int16x8_t v1011_tmp = vqrdmulhq_n_s16(v1010, 18446);
+ int16x8_t v1011 = vmlaq_n_s16(v1011_tmp, v1010, 2);
+ int16x8_t v1012 = vaddq_s16(v1009, v1011);
+ int16x8_t v1013 = vsubq_s16(v194, v212);
+ int16x8_t v1014 = vsubq_s16(v219, v229);
+ int16x8_t v1015_tmp = vqrdmulhq_n_s16(v1014, 18446);
+ int16x8_t v1015 = vmlaq_n_s16(v1015_tmp, v1014, 2);
+ int16x8_t v1016 = vaddq_s16(v1013, v1015);
+ int16x8_t v1017 = vqrdmulhq_n_s16(v1016, 21195);
+ int16x8_t v1018 = vaddq_s16(v1012, v1017);
+ int16x8_t v1019 = vsubq_s16(v240, v250);
+ int16x8_t v1020 = vsubq_s16(v263, v272);
+ int16x8_t v1021_tmp = vqrdmulhq_n_s16(v1020, 18446);
+ int16x8_t v1021 = vmlaq_n_s16(v1021_tmp, v1020, 2);
+ int16x8_t v1022 = vaddq_s16(v1019, v1021);
+ int16x8_t v1023 = vsubq_s16(v281, v291);
+ int16x8_t v1024 = vsubq_s16(v298, v308);
+ int16x8_t v1025_tmp = vqrdmulhq_n_s16(v1024, 18446);
+ int16x8_t v1025 = vmlaq_n_s16(v1025_tmp, v1024, 2);
+ int16x8_t v1026 = vaddq_s16(v1023, v1025);
+ int16x8_t v1027 = vqrdmulhq_n_s16(v1026, 21195);
+ int16x8_t v1028 = vaddq_s16(v1022, v1027);
+ int16x8_t v1029 = vqrdmulhq_n_s16(v1028, 17401);
+ int16x8_t v1030 = vaddq_s16(v1018, v1029);
+ int16x8_t v1031 = vqrdmulhq_n_s16(v1030, 16629);
+ int16x8_t v1032 = vaddq_s16(v1008, v1031);
+ int16x8_t v1033 = vsubq_s16(v323, v333);
+ int16x8_t v1034 = vsubq_s16(v346, v355);
+ int16x8_t v1035_tmp = vqrdmulhq_n_s16(v1034, 18446);
+ int16x8_t v1035 = vmlaq_n_s16(v1035_tmp, v1034, 2);
+ int16x8_t v1036 = vaddq_s16(v1033, v1035);
+ int16x8_t v1037 = vsubq_s16(v370, v388);
+ int16x8_t v1038 = vsubq_s16(v395, v405);
+ int16x8_t v1039_tmp = vqrdmulhq_n_s16(v1038, 18446);
+ int16x8_t v1039 = vmlaq_n_s16(v1039_tmp, v1038, 2);
+ int16x8_t v1040 = vaddq_s16(v1037, v1039);
+ int16x8_t v1041 = vqrdmulhq_n_s16(v1040, 21195);
+ int16x8_t v1042 = vaddq_s16(v1036, v1041);
+ int16x8_t v1043 = vsubq_s16(v422, v440);
+ int16x8_t v1044 = vsubq_s16(v465, v478);
+ int16x8_t v1045_tmp = vqrdmulhq_n_s16(v1044, 18446);
+ int16x8_t v1045 = vmlaq_n_s16(v1045_tmp, v1044, 2);
+ int16x8_t v1046 = vaddq_s16(v1043, v1045);
+ int16x8_t v1047 = vsubq_s16(v487, v497);
+ int16x8_t v1048 = vsubq_s16(v504, v515);
+ int16x8_t v1049_tmp = vqrdmulhq_n_s16(v1048, 18446);
+ int16x8_t v1049 = vmlaq_n_s16(v1049_tmp, v1048, 2);
+ int16x8_t v1050 = vaddq_s16(v1047, v1049);
+ int16x8_t v1051 = vqrdmulhq_n_s16(v1050, 21195);
+ int16x8_t v1052 = vaddq_s16(v1046, v1051);
+ int16x8_t v1053 = vqrdmulhq_n_s16(v1052, 17401);
+ int16x8_t v1054 = vaddq_s16(v1042, v1053);
+ int16x8_t v1055 = vsubq_s16(v528, v538);
+ int16x8_t v1056 = vsubq_s16(v551, v560);
+ int16x8_t v1057_tmp = vqrdmulhq_n_s16(v1056, 18446);
+ int16x8_t v1057 = vmlaq_n_s16(v1057_tmp, v1056, 2);
+ int16x8_t v1058 = vaddq_s16(v1055, v1057);
+ int16x8_t v1059 = vsubq_s16(v575, v593);
+ int16x8_t v1060 = vsubq_s16(v600, v610);
+ int16x8_t v1061_tmp = vqrdmulhq_n_s16(v1060, 18446);
+ int16x8_t v1061 = vmlaq_n_s16(v1061_tmp, v1060, 2);
+ int16x8_t v1062 = vaddq_s16(v1059, v1061);
+ int16x8_t v1063 = vqrdmulhq_n_s16(v1062, 21195);
+ int16x8_t v1064 = vaddq_s16(v1058, v1063);
+ int16x8_t v1065 = vsubq_s16(v621, v631);
+ int16x8_t v1066 = vsubq_s16(v644, v653);
+ int16x8_t v1067_tmp = vqrdmulhq_n_s16(v1066, 18446);
+ int16x8_t v1067 = vmlaq_n_s16(v1067_tmp, v1066, 2);
+ int16x8_t v1068 = vaddq_s16(v1065, v1067);
+ int16x8_t v1069 = vsubq_s16(v662, v672);
+ int16x8_t v1070 = vsubq_s16(v679, v690);
+ int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 18446);
+ int16x8_t v1071 = vmlaq_n_s16(v1071_tmp, v1070, 2);
+ int16x8_t v1072 = vaddq_s16(v1069, v1071);
+ int16x8_t v1073 = vqrdmulhq_n_s16(v1072, 21195);
+ int16x8_t v1074 = vaddq_s16(v1068, v1073);
+ int16x8_t v1075 = vqrdmulhq_n_s16(v1074, 17401);
+ int16x8_t v1076 = vaddq_s16(v1064, v1075);
+ int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 16629);
+ int16x8_t v1078 = vaddq_s16(v1054, v1077);
+ int16x8_t v1079 = vqrdmulhq_n_s16(v1078, 16445);
+ int16x8_t v1080 = vaddq_s16(v1032, v1079);
+ int16x8_t v1081 = vsubq_s16(v987, v989);
+ int16x8_t v1082 = vsubq_s16(v991, v993);
+ int16x8_t v1083 = vqrdmulhq_n_s16(v1082, 25826);
+ int16x8_t v1084 = vaddq_s16(v1081, v1083);
+ int16x8_t v1085 = vsubq_s16(v997, v999);
+ int16x8_t v1086 = vsubq_s16(v1001, v1003);
+ int16x8_t v1087 = vqrdmulhq_n_s16(v1086, 25826);
+ int16x8_t v1088 = vaddq_s16(v1085, v1087);
+ int16x8_t v1089 = vqrdmulhq_n_s16(v1088, 18124);
+ int16x8_t v1090 = vaddq_s16(v1084, v1089);
+ int16x8_t v1091 = vsubq_s16(v1009, v1011);
+ int16x8_t v1092 = vsubq_s16(v1013, v1015);
+ int16x8_t v1093 = vqrdmulhq_n_s16(v1092, 25826);
+ int16x8_t v1094 = vaddq_s16(v1091, v1093);
+ int16x8_t v1095 = vsubq_s16(v1019, v1021);
+ int16x8_t v1096 = vsubq_s16(v1023, v1025);
+ int16x8_t v1097 = vqrdmulhq_n_s16(v1096, 25826);
+ int16x8_t v1098 = vaddq_s16(v1095, v1097);
+ int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 18124);
+ int16x8_t v1100 = vaddq_s16(v1094, v1099);
+ int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16792);
+ int16x8_t v1102 = vaddq_s16(v1090, v1101);
+ int16x8_t v1103 = vsubq_s16(v1033, v1035);
+ int16x8_t v1104 = vsubq_s16(v1037, v1039);
+ int16x8_t v1105 = vqrdmulhq_n_s16(v1104, 25826);
+ int16x8_t v1106 = vaddq_s16(v1103, v1105);
+ int16x8_t v1107 = vsubq_s16(v1043, v1045);
+ int16x8_t v1108 = vsubq_s16(v1047, v1049);
+ int16x8_t v1109 = vqrdmulhq_n_s16(v1108, 25826);
+ int16x8_t v1110 = vaddq_s16(v1107, v1109);
+ int16x8_t v1111 = vqrdmulhq_n_s16(v1110, 18124);
+ int16x8_t v1112 = vaddq_s16(v1106, v1111);
+ int16x8_t v1113 = vsubq_s16(v1055, v1057);
+ int16x8_t v1114 = vsubq_s16(v1059, v1061);
+ int16x8_t v1115 = vqrdmulhq_n_s16(v1114, 25826);
+ int16x8_t v1116 = vaddq_s16(v1113, v1115);
+ int16x8_t v1117 = vsubq_s16(v1065, v1067);
+ int16x8_t v1118 = vsubq_s16(v1069, v1071);
+ int16x8_t v1119 = vqrdmulhq_n_s16(v1118, 25826);
+ int16x8_t v1120 = vaddq_s16(v1117, v1119);
+ int16x8_t v1121 = vqrdmulhq_n_s16(v1120, 18124);
+ int16x8_t v1122 = vaddq_s16(v1116, v1121);
+ int16x8_t v1123 = vqrdmulhq_n_s16(v1122, 16792);
+ int16x8_t v1124 = vaddq_s16(v1112, v1123);
+ int16x8_t v1125 = vqrdmulhq_n_s16(v1124, 16484);
+ int16x8_t v1126 = vaddq_s16(v1102, v1125);
+ int16x8_t v1127 = vsubq_s16(v892, v894);
+ int16x8_t v1128 = vsubq_s16(v896, v898);
+ int16x8_t v1129_tmp = vqrdmulhq_n_s16(v1128, 1988);
+ int16x8_t v1129 = vaddq_s16(v1129_tmp, v1128);
+ int16x8_t v1130 = vaddq_s16(v1127, v1129);
+ int16x8_t v1131 = vsubq_s16(v902, v904);
+ int16x8_t v1132 = vsubq_s16(v906, v908);
+ int16x8_t v1133_tmp = vqrdmulhq_n_s16(v1132, 1988);
+ int16x8_t v1133 = vaddq_s16(v1133_tmp, v1132);
+ int16x8_t v1134 = vaddq_s16(v1131, v1133);
+ int16x8_t v1135 = vqrdmulhq_n_s16(v1134, 19102);
+ int16x8_t v1136 = vaddq_s16(v1130, v1135);
+ int16x8_t v1137 = vsubq_s16(v914, v917);
+ int16x8_t v1138 = vsubq_s16(v919, v921);
+ int16x8_t v1139_tmp = vqrdmulhq_n_s16(v1138, 1988);
+ int16x8_t v1139 = vaddq_s16(v1139_tmp, v1138);
+ int16x8_t v1140 = vaddq_s16(v1137, v1139);
+ int16x8_t v1141 = vsubq_s16(v925, v927);
+ int16x8_t v1142 = vsubq_s16(v929, v931);
+ int16x8_t v1143_tmp = vqrdmulhq_n_s16(v1142, 1988);
+ int16x8_t v1143 = vaddq_s16(v1143_tmp, v1142);
+ int16x8_t v1144 = vaddq_s16(v1141, v1143);
+ int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 19102);
+ int16x8_t v1146 = vaddq_s16(v1140, v1145);
+ int16x8_t v1147 = vqrdmulhq_n_s16(v1146, 17000);
+ int16x8_t v1148 = vaddq_s16(v1136, v1147);
+ int16x8_t v1149 = vsubq_s16(v939, v941);
+ int16x8_t v1150 = vsubq_s16(v943, v945);
+ int16x8_t v1151_tmp = vqrdmulhq_n_s16(v1150, 1988);
+ int16x8_t v1151 = vaddq_s16(v1151_tmp, v1150);
+ int16x8_t v1152 = vaddq_s16(v1149, v1151);
+ int16x8_t v1153 = vsubq_s16(v949, v951);
+ int16x8_t v1154 = vsubq_s16(v953, v955);
+ int16x8_t v1155_tmp = vqrdmulhq_n_s16(v1154, 1988);
+ int16x8_t v1155 = vaddq_s16(v1155_tmp, v1154);
+ int16x8_t v1156 = vaddq_s16(v1153, v1155);
+ int16x8_t v1157 = vqrdmulhq_n_s16(v1156, 19102);
+ int16x8_t v1158 = vaddq_s16(v1152, v1157);
+ int16x8_t v1159 = vsubq_s16(v961, v963);
+ int16x8_t v1160 = vsubq_s16(v965, v967);
+ int16x8_t v1161_tmp = vqrdmulhq_n_s16(v1160, 1988);
+ int16x8_t v1161 = vaddq_s16(v1161_tmp, v1160);
+ int16x8_t v1162 = vaddq_s16(v1159, v1161);
+ int16x8_t v1163 = vsubq_s16(v971, v973);
+ int16x8_t v1164 = vsubq_s16(v975, v977);
+ int16x8_t v1165_tmp = vqrdmulhq_n_s16(v1164, 1988);
+ int16x8_t v1165 = vaddq_s16(v1165_tmp, v1164);
+ int16x8_t v1166 = vaddq_s16(v1163, v1165);
+ int16x8_t v1167 = vqrdmulhq_n_s16(v1166, 19102);
+ int16x8_t v1168 = vaddq_s16(v1162, v1167);
+ int16x8_t v1169 = vqrdmulhq_n_s16(v1168, 17000);
+ int16x8_t v1170 = vaddq_s16(v1158, v1169);
+ int16x8_t v1171 = vqrdmulhq_n_s16(v1170, 16534);
+ int16x8_t v1172 = vaddq_s16(v1148, v1171);
+ int16x8_t v1173 = vsubq_s16(v705, v710);
+ int16x8_t v1174 = vsubq_s16(v715, v720);
+ int16x8_t v1175_tmp = vqrdmulhq_n_s16(v1174, 23673);
+ int16x8_t v1175 = vaddq_s16(v1175_tmp, v1174);
+ int16x8_t v1176 = vaddq_s16(v1173, v1175);
+ int16x8_t v1177 = vsubq_s16(v727, v732);
+ int16x8_t v1178 = vsubq_s16(v737, v742);
+ int16x8_t v1179_tmp = vqrdmulhq_n_s16(v1178, 23673);
+ int16x8_t v1179 = vaddq_s16(v1179_tmp, v1178);
+ int16x8_t v1180 = vaddq_s16(v1177, v1179);
+ int16x8_t v1181 = vqrdmulhq_n_s16(v1180, 20398);
+ int16x8_t v1182 = vaddq_s16(v1176, v1181);
+ int16x8_t v1183 = vsubq_s16(v751, v756);
+ int16x8_t v1184 = vsubq_s16(v761, v766);
+ int16x8_t v1185_tmp = vqrdmulhq_n_s16(v1184, 23673);
+ int16x8_t v1185 = vaddq_s16(v1185_tmp, v1184);
+ int16x8_t v1186 = vaddq_s16(v1183, v1185);
+ int16x8_t v1187 = vsubq_s16(v773, v778);
+ int16x8_t v1188 = vsubq_s16(v783, v788);
+ int16x8_t v1189_tmp = vqrdmulhq_n_s16(v1188, 23673);
+ int16x8_t v1189 = vaddq_s16(v1189_tmp, v1188);
+ int16x8_t v1190 = vaddq_s16(v1187, v1189);
+ int16x8_t v1191 = vqrdmulhq_n_s16(v1190, 20398);
+ int16x8_t v1192 = vaddq_s16(v1186, v1191);
+ int16x8_t v1193 = vqrdmulhq_n_s16(v1192, 17255);
+ int16x8_t v1194 = vaddq_s16(v1182, v1193);
+ int16x8_t v1195 = vsubq_s16(v799, v804);
+ int16x8_t v1196 = vsubq_s16(v809, v814);
+ int16x8_t v1197_tmp = vqrdmulhq_n_s16(v1196, 23673);
+ int16x8_t v1197 = vaddq_s16(v1197_tmp, v1196);
+ int16x8_t v1198 = vaddq_s16(v1195, v1197);
+ int16x8_t v1199 = vsubq_s16(v821, v826);
+ int16x8_t v1200 = vsubq_s16(v831, v836);
+ int16x8_t v1201_tmp = vqrdmulhq_n_s16(v1200, 23673);
+ int16x8_t v1201 = vaddq_s16(v1201_tmp, v1200);
+ int16x8_t v1202 = vaddq_s16(v1199, v1201);
+ int16x8_t v1203 = vqrdmulhq_n_s16(v1202, 20398);
+ int16x8_t v1204 = vaddq_s16(v1198, v1203);
+ int16x8_t v1205 = vsubq_s16(v845, v850);
+ int16x8_t v1206 = vsubq_s16(v855, v860);
+ int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 23673);
+ int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206);
+ int16x8_t v1208 = vaddq_s16(v1205, v1207);
+ int16x8_t v1209 = vsubq_s16(v867, v872);
+ int16x8_t v1210 = vsubq_s16(v877, v882);
+ int16x8_t v1211_tmp = vqrdmulhq_n_s16(v1210, 23673);
+ int16x8_t v1211 = vaddq_s16(v1211_tmp, v1210);
+ int16x8_t v1212 = vaddq_s16(v1209, v1211);
+ int16x8_t v1213 = vqrdmulhq_n_s16(v1212, 20398);
+ int16x8_t v1214 = vaddq_s16(v1208, v1213);
+ int16x8_t v1215 = vqrdmulhq_n_s16(v1214, 17255);
+ int16x8_t v1216 = vaddq_s16(v1204, v1215);
+ int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 16595);
+ int16x8_t v1218 = vaddq_s16(v1194, v1217);
+ int16x8_t v1219 = vsubq_s16(v9, v24);
+ int16x8_t v1220 = vsubq_s16(v42, v58);
+ int16x8_t v1221_tmp = vqrdmulhq_n_s16(v1220, 3314);
+ int16x8_t v1221 = vmlaq_n_s16(v1221_tmp, v1220, 5);
+ int16x8_t v1222 = vaddq_s16(v1219, v1221);
+ int16x8_t v1223 = vsubq_s16(v78, v101);
+ int16x8_t v1224 = vsubq_s16(v119, v136);
+ int16x8_t v1225_tmp = vqrdmulhq_n_s16(v1224, 3314);
+ int16x8_t v1225 = vmlaq_n_s16(v1225_tmp, v1224, 5);
+ int16x8_t v1226 = vaddq_s16(v1223, v1225);
+ int16x8_t v1227 = vqrdmulhq_n_s16(v1226, 22112);
+ int16x8_t v1228 = vaddq_s16(v1222, v1227);
+ int16x8_t v1229 = vsubq_s16(v158, v181);
+ int16x8_t v1230 = vsubq_s16(v213, v231);
+ int16x8_t v1231_tmp = vqrdmulhq_n_s16(v1230, 3314);
+ int16x8_t v1231 = vmlaq_n_s16(v1231_tmp, v1230, 5);
+ int16x8_t v1232 = vaddq_s16(v1229, v1231);
+ int16x8_t v1233 = vsubq_s16(v251, v274);
+ int16x8_t v1234 = vsubq_s16(v292, v310);
+ int16x8_t v1235_tmp = vqrdmulhq_n_s16(v1234, 3314);
+ int16x8_t v1235 = vmlaq_n_s16(v1235_tmp, v1234, 5);
+ int16x8_t v1236 = vaddq_s16(v1233, v1235);
+ int16x8_t v1237 = vqrdmulhq_n_s16(v1236, 22112);
+ int16x8_t v1238 = vaddq_s16(v1232, v1237);
+ int16x8_t v1239 = vqrdmulhq_n_s16(v1238, 17561);
+ int16x8_t v1240 = vaddq_s16(v1228, v1239);
+ int16x8_t v1241 = vsubq_s16(v334, v357);
+ int16x8_t v1242 = vsubq_s16(v389, v407);
+ int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 3314);
+ int16x8_t v1243 = vmlaq_n_s16(v1243_tmp, v1242, 5);
+ int16x8_t v1244 = vaddq_s16(v1241, v1243);
+ int16x8_t v1245 = vsubq_s16(v441, v480);
+ int16x8_t v1246 = vsubq_s16(v498, v517);
+ int16x8_t v1247_tmp = vqrdmulhq_n_s16(v1246, 3314);
+ int16x8_t v1247 = vmlaq_n_s16(v1247_tmp, v1246, 5);
+ int16x8_t v1248 = vaddq_s16(v1245, v1247);
+ int16x8_t v1249 = vqrdmulhq_n_s16(v1248, 22112);
+ int16x8_t v1250 = vaddq_s16(v1244, v1249);
+ int16x8_t v1251 = vsubq_s16(v539, v562);
+ int16x8_t v1252 = vsubq_s16(v594, v612);
+ int16x8_t v1253_tmp = vqrdmulhq_n_s16(v1252, 3314);
+ int16x8_t v1253 = vmlaq_n_s16(v1253_tmp, v1252, 5);
+ int16x8_t v1254 = vaddq_s16(v1251, v1253);
+ int16x8_t v1255 = vsubq_s16(v632, v655);
+ int16x8_t v1256 = vsubq_s16(v673, v692);
+ int16x8_t v1257_tmp = vqrdmulhq_n_s16(v1256, 3314);
+ int16x8_t v1257 = vmlaq_n_s16(v1257_tmp, v1256, 5);
+ int16x8_t v1258 = vaddq_s16(v1255, v1257);
+ int16x8_t v1259 = vqrdmulhq_n_s16(v1258, 22112);
+ int16x8_t v1260 = vaddq_s16(v1254, v1259);
+ int16x8_t v1261 = vqrdmulhq_n_s16(v1260, 17561);
+ int16x8_t v1262 = vaddq_s16(v1250, v1261);
+ int16x8_t v1263 = vqrdmulhq_n_s16(v1262, 16666);
+ int16x8_t v1264 = vaddq_s16(v1240, v1263);
+ int16x8_t v1265 = vsubq_s16(v1219, v1221);
+ int16x8_t v1266 = vsubq_s16(v1223, v1225);
+ int16x8_t v1267 = vqrdmulhq_n_s16(v1266, 24397);
+ int16x8_t v1268 = vaddq_s16(v1265, v1267);
+ int16x8_t v1269 = vsubq_s16(v1229, v1231);
+ int16x8_t v1270 = vsubq_s16(v1233, v1235);
+ int16x8_t v1271 = vqrdmulhq_n_s16(v1270, 24397);
+ int16x8_t v1272 = vaddq_s16(v1269, v1271);
+ int16x8_t v1273 = vqrdmulhq_n_s16(v1272, 17921);
+ int16x8_t v1274 = vaddq_s16(v1268, v1273);
+ int16x8_t v1275 = vsubq_s16(v1241, v1243);
+ int16x8_t v1276 = vsubq_s16(v1245, v1247);
+ int16x8_t v1277 = vqrdmulhq_n_s16(v1276, 24397);
+ int16x8_t v1278 = vaddq_s16(v1275, v1277);
+ int16x8_t v1279 = vsubq_s16(v1251, v1253);
+ int16x8_t v1280 = vsubq_s16(v1255, v1257);
+ int16x8_t v1281 = vqrdmulhq_n_s16(v1280, 24397);
+ int16x8_t v1282 = vaddq_s16(v1279, v1281);
+ int16x8_t v1283 = vqrdmulhq_n_s16(v1282, 17921);
+ int16x8_t v1284 = vaddq_s16(v1278, v1283);
+ int16x8_t v1285 = vqrdmulhq_n_s16(v1284, 16747);
+ int16x8_t v1286 = vaddq_s16(v1274, v1285);
+ int16x8_t v1287 = vsubq_s16(v1173, v1175);
+ int16x8_t v1288 = vsubq_s16(v1177, v1179);
+ int16x8_t v1289 = vqrdmulhq_n_s16(v1288, 27504);
+ int16x8_t v1290 = vaddq_s16(v1287, v1289);
+ int16x8_t v1291 = vsubq_s16(v1183, v1185);
+ int16x8_t v1292 = vsubq_s16(v1187, v1189);
+ int16x8_t v1293 = vqrdmulhq_n_s16(v1292, 27504);
+ int16x8_t v1294 = vaddq_s16(v1291, v1293);
+ int16x8_t v1295 = vqrdmulhq_n_s16(v1294, 18343);
+ int16x8_t v1296 = vaddq_s16(v1290, v1295);
+ int16x8_t v1297 = vsubq_s16(v1195, v1197);
+ int16x8_t v1298 = vsubq_s16(v1199, v1201);
+ int16x8_t v1299 = vqrdmulhq_n_s16(v1298, 27504);
+ int16x8_t v1300 = vaddq_s16(v1297, v1299);
+ int16x8_t v1301 = vsubq_s16(v1205, v1207);
+ int16x8_t v1302 = vsubq_s16(v1209, v1211);
+ int16x8_t v1303 = vqrdmulhq_n_s16(v1302, 27504);
+ int16x8_t v1304 = vaddq_s16(v1301, v1303);
+ int16x8_t v1305 = vqrdmulhq_n_s16(v1304, 18343);
+ int16x8_t v1306 = vaddq_s16(v1300, v1305);
+ int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 16840);
+ int16x8_t v1308 = vaddq_s16(v1296, v1307);
+ int16x8_t v1309 = vsubq_s16(v1127, v1129);
+ int16x8_t v1310 = vsubq_s16(v1131, v1133);
+ int16x8_t v1311 = vqrdmulhq_n_s16(v1310, 31869);
+ int16x8_t v1312 = vaddq_s16(v1309, v1311);
+ int16x8_t v1313 = vsubq_s16(v1137, v1139);
+ int16x8_t v1314 = vsubq_s16(v1141, v1143);
+ int16x8_t v1315 = vqrdmulhq_n_s16(v1314, 31869);
+ int16x8_t v1316 = vaddq_s16(v1313, v1315);
+ int16x8_t v1317 = vqrdmulhq_n_s16(v1316, 18830);
+ int16x8_t v1318 = vaddq_s16(v1312, v1317);
+ int16x8_t v1319 = vsubq_s16(v1149, v1151);
+ int16x8_t v1320 = vsubq_s16(v1153, v1155);
+ int16x8_t v1321 = vqrdmulhq_n_s16(v1320, 31869);
+ int16x8_t v1322 = vaddq_s16(v1319, v1321);
+ int16x8_t v1323 = vsubq_s16(v1159, v1161);
+ int16x8_t v1324 = vsubq_s16(v1163, v1165);
+ int16x8_t v1325 = vqrdmulhq_n_s16(v1324, 31869);
+ int16x8_t v1326 = vaddq_s16(v1323, v1325);
+ int16x8_t v1327 = vqrdmulhq_n_s16(v1326, 18830);
+ int16x8_t v1328 = vaddq_s16(v1322, v1327);
+ int16x8_t v1329 = vqrdmulhq_n_s16(v1328, 16944);
+ int16x8_t v1330 = vaddq_s16(v1318, v1329);
+ int16x8_t v1331 = vsubq_s16(v1081, v1083);
+ int16x8_t v1332 = vsubq_s16(v1085, v1087);
+ int16x8_t v1333_tmp = vqrdmulhq_n_s16(v1332, 5552);
+ int16x8_t v1333 = vaddq_s16(v1333_tmp, v1332);
+ int16x8_t v1334 = vaddq_s16(v1331, v1333);
+ int16x8_t v1335 = vsubq_s16(v1091, v1093);
+ int16x8_t v1336 = vsubq_s16(v1095, v1097);
+ int16x8_t v1337_tmp = vqrdmulhq_n_s16(v1336, 5552);
+ int16x8_t v1337 = vaddq_s16(v1337_tmp, v1336);
+ int16x8_t v1338 = vaddq_s16(v1335, v1337);
+ int16x8_t v1339 = vqrdmulhq_n_s16(v1338, 19393);
+ int16x8_t v1340 = vaddq_s16(v1334, v1339);
+ int16x8_t v1341 = vsubq_s16(v1103, v1105);
+ int16x8_t v1342 = vsubq_s16(v1107, v1109);
+ int16x8_t v1343_tmp = vqrdmulhq_n_s16(v1342, 5552);
+ int16x8_t v1343 = vaddq_s16(v1343_tmp, v1342);
+ int16x8_t v1344 = vaddq_s16(v1341, v1343);
+ int16x8_t v1345 = vsubq_s16(v1113, v1115);
+ int16x8_t v1346 = vsubq_s16(v1117, v1119);
+ int16x8_t v1347_tmp = vqrdmulhq_n_s16(v1346, 5552);
+ int16x8_t v1347 = vaddq_s16(v1347_tmp, v1346);
+ int16x8_t v1348 = vaddq_s16(v1345, v1347);
+ int16x8_t v1349 = vqrdmulhq_n_s16(v1348, 19393);
+ int16x8_t v1350 = vaddq_s16(v1344, v1349);
+ int16x8_t v1351 = vqrdmulhq_n_s16(v1350, 17059);
+ int16x8_t v1352 = vaddq_s16(v1340, v1351);
+ int16x8_t v1353 = vsubq_s16(v990, v995);
+ int16x8_t v1354 = vsubq_s16(v1000, v1005);
+ int16x8_t v1355_tmp = vqrdmulhq_n_s16(v1354, 15865);
+ int16x8_t v1355 = vaddq_s16(v1355_tmp, v1354);
+ int16x8_t v1356 = vaddq_s16(v1353, v1355);
+ int16x8_t v1357 = vsubq_s16(v1012, v1017);
+ int16x8_t v1358 = vsubq_s16(v1022, v1027);
+ int16x8_t v1359_tmp = vqrdmulhq_n_s16(v1358, 15865);
+ int16x8_t v1359 = vaddq_s16(v1359_tmp, v1358);
+ int16x8_t v1360 = vaddq_s16(v1357, v1359);
+ int16x8_t v1361 = vqrdmulhq_n_s16(v1360, 20040);
+ int16x8_t v1362 = vaddq_s16(v1356, v1361);
+ int16x8_t v1363 = vsubq_s16(v1036, v1041);
+ int16x8_t v1364 = vsubq_s16(v1046, v1051);
+ int16x8_t v1365_tmp = vqrdmulhq_n_s16(v1364, 15865);
+ int16x8_t v1365 = vaddq_s16(v1365_tmp, v1364);
+ int16x8_t v1366 = vaddq_s16(v1363, v1365);
+ int16x8_t v1367 = vsubq_s16(v1058, v1063);
+ int16x8_t v1368 = vsubq_s16(v1068, v1073);
+ int16x8_t v1369_tmp = vqrdmulhq_n_s16(v1368, 15865);
+ int16x8_t v1369 = vaddq_s16(v1369_tmp, v1368);
+ int16x8_t v1370 = vaddq_s16(v1367, v1369);
+ int16x8_t v1371 = vqrdmulhq_n_s16(v1370, 20040);
+ int16x8_t v1372 = vaddq_s16(v1366, v1371);
+ int16x8_t v1373 = vqrdmulhq_n_s16(v1372, 17187);
+ int16x8_t v1374 = vaddq_s16(v1362, v1373);
+ int16x8_t v1375 = vsubq_s16(v895, v900);
+ int16x8_t v1376 = vsubq_s16(v905, v910);
+ int16x8_t v1377_tmp = vqrdmulhq_n_s16(v1376, 1893);
+ int16x8_t v1377 = vmlaq_n_s16(v1377_tmp, v1376, 2);
+ int16x8_t v1378 = vaddq_s16(v1375, v1377);
+ int16x8_t v1379 = vsubq_s16(v918, v923);
+ int16x8_t v1380 = vsubq_s16(v928, v933);
+ int16x8_t v1381_tmp = vqrdmulhq_n_s16(v1380, 1893);
+ int16x8_t v1381 = vmlaq_n_s16(v1381_tmp, v1380, 2);
+ int16x8_t v1382 = vaddq_s16(v1379, v1381);
+ int16x8_t v1383 = vqrdmulhq_n_s16(v1382, 20783);
+ int16x8_t v1384 = vaddq_s16(v1378, v1383);
+ int16x8_t v1385 = vsubq_s16(v942, v947);
+ int16x8_t v1386 = vsubq_s16(v952, v957);
+ int16x8_t v1387_tmp = vqrdmulhq_n_s16(v1386, 1893);
+ int16x8_t v1387 = vmlaq_n_s16(v1387_tmp, v1386, 2);
+ int16x8_t v1388 = vaddq_s16(v1385, v1387);
+ int16x8_t v1389 = vsubq_s16(v964, v969);
+ int16x8_t v1390 = vsubq_s16(v974, v979);
+ int16x8_t v1391_tmp = vqrdmulhq_n_s16(v1390, 1893);
+ int16x8_t v1391 = vmlaq_n_s16(v1391_tmp, v1390, 2);
+ int16x8_t v1392 = vaddq_s16(v1389, v1391);
+ int16x8_t v1393 = vqrdmulhq_n_s16(v1392, 20783);
+ int16x8_t v1394 = vaddq_s16(v1388, v1393);
+ int16x8_t v1395 = vqrdmulhq_n_s16(v1394, 17326);
+ int16x8_t v1396 = vaddq_s16(v1384, v1395);
+ int16x8_t v1397 = vsubq_s16(v711, v722);
+ int16x8_t v1398 = vsubq_s16(v733, v744);
+ int16x8_t v1399_tmp = vqrdmulhq_n_s16(v1398, 13357);
+ int16x8_t v1399 = vmlaq_n_s16(v1399_tmp, v1398, 3);
+ int16x8_t v1400 = vaddq_s16(v1397, v1399);
+ int16x8_t v1401 = vsubq_s16(v757, v768);
+ int16x8_t v1402 = vsubq_s16(v779, v790);
+ int16x8_t v1403_tmp = vqrdmulhq_n_s16(v1402, 13357);
+ int16x8_t v1403 = vmlaq_n_s16(v1403_tmp, v1402, 3);
+ int16x8_t v1404 = vaddq_s16(v1401, v1403);
+ int16x8_t v1405 = vqrdmulhq_n_s16(v1404, 21637);
+ int16x8_t v1406 = vaddq_s16(v1400, v1405);
+ int16x8_t v1407 = vsubq_s16(v805, v816);
+ int16x8_t v1408 = vsubq_s16(v827, v838);
+ int16x8_t v1409_tmp = vqrdmulhq_n_s16(v1408, 13357);
+ int16x8_t v1409 = vmlaq_n_s16(v1409_tmp, v1408, 3);
+ int16x8_t v1410 = vaddq_s16(v1407, v1409);
+ int16x8_t v1411 = vsubq_s16(v851, v862);
+ int16x8_t v1412 = vsubq_s16(v873, v884);
+ int16x8_t v1413_tmp = vqrdmulhq_n_s16(v1412, 13357);
+ int16x8_t v1413 = vmlaq_n_s16(v1413_tmp, v1412, 3);
+ int16x8_t v1414 = vaddq_s16(v1411, v1413);
+ int16x8_t v1415 = vqrdmulhq_n_s16(v1414, 21637);
+ int16x8_t v1416 = vaddq_s16(v1410, v1415);
+ int16x8_t v1417 = vqrdmulhq_n_s16(v1416, 17479);
+ int16x8_t v1418 = vaddq_s16(v1406, v1417);
+ int16x8_t v1419 = vsubq_s16(v25, v60);
+ int16x8_t v1420 = vsubq_s16(v102, v138);
+ int16x8_t v1421_tmp = vqrdmulhq_n_s16(v1420, 6226);
+ int16x8_t v1421 = vmlaq_n_s16(v1421_tmp, v1420, 10);
+ int16x8_t v1422 = vaddq_s16(v1419, v1421);
+ int16x8_t v1423 = vsubq_s16(v182, v233);
+ int16x8_t v1424 = vsubq_s16(v275, v312);
+ int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 6226);
+ int16x8_t v1425 = vmlaq_n_s16(v1425_tmp, v1424, 10);
+ int16x8_t v1426 = vaddq_s16(v1423, v1425);
+ int16x8_t v1427 = vqrdmulhq_n_s16(v1426, 22622);
+ int16x8_t v1428 = vaddq_s16(v1422, v1427);
+ int16x8_t v1429 = vsubq_s16(v358, v409);
+ int16x8_t v1430 = vsubq_s16(v481, v519);
+ int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 6226);
+ int16x8_t v1431 = vmlaq_n_s16(v1431_tmp, v1430, 10);
+ int16x8_t v1432 = vaddq_s16(v1429, v1431);
+ int16x8_t v1433 = vsubq_s16(v563, v614);
+ int16x8_t v1434 = vsubq_s16(v656, v694);
+ int16x8_t v1435_tmp = vqrdmulhq_n_s16(v1434, 6226);
+ int16x8_t v1435 = vmlaq_n_s16(v1435_tmp, v1434, 10);
+ int16x8_t v1436 = vaddq_s16(v1433, v1435);
+ int16x8_t v1437 = vqrdmulhq_n_s16(v1436, 22622);
+ int16x8_t v1438 = vaddq_s16(v1432, v1437);
+ int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17646);
+ int16x8_t v1440 = vaddq_s16(v1428, v1439);
+ int16x8_t v1441 = vsubq_s16(v1419, v1421);
+ int16x8_t v1442 = vsubq_s16(v1423, v1425);
+ int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 23761);
+ int16x8_t v1444 = vaddq_s16(v1441, v1443);
+ int16x8_t v1445 = vsubq_s16(v1429, v1431);
+ int16x8_t v1446 = vsubq_s16(v1433, v1435);
+ int16x8_t v1447 = vqrdmulhq_n_s16(v1446, 23761);
+ int16x8_t v1448 = vaddq_s16(v1445, v1447);
+ int16x8_t v1449 = vqrdmulhq_n_s16(v1448, 17826);
+ int16x8_t v1450 = vaddq_s16(v1444, v1449);
+ int16x8_t v1451 = vsubq_s16(v1397, v1399);
+ int16x8_t v1452 = vsubq_s16(v1401, v1403);
+ int16x8_t v1453 = vqrdmulhq_n_s16(v1452, 25084);
+ int16x8_t v1454 = vaddq_s16(v1451, v1453);
+ int16x8_t v1455 = vsubq_s16(v1407, v1409);
+ int16x8_t v1456 = vsubq_s16(v1411, v1413);
+ int16x8_t v1457 = vqrdmulhq_n_s16(v1456, 25084);
+ int16x8_t v1458 = vaddq_s16(v1455, v1457);
+ int16x8_t v1459 = vqrdmulhq_n_s16(v1458, 18021);
+ int16x8_t v1460 = vaddq_s16(v1454, v1459);
+ int16x8_t v1461 = vsubq_s16(v1375, v1377);
+ int16x8_t v1462 = vsubq_s16(v1379, v1381);
+ int16x8_t v1463 = vqrdmulhq_n_s16(v1462, 26631);
+ int16x8_t v1464 = vaddq_s16(v1461, v1463);
+ int16x8_t v1465 = vsubq_s16(v1385, v1387);
+ int16x8_t v1466 = vsubq_s16(v1389, v1391);
+ int16x8_t v1467 = vqrdmulhq_n_s16(v1466, 26631);
+ int16x8_t v1468 = vaddq_s16(v1465, v1467);
+ int16x8_t v1469 = vqrdmulhq_n_s16(v1468, 18231);
+ int16x8_t v1470 = vaddq_s16(v1464, v1469);
+ int16x8_t v1471 = vsubq_s16(v1353, v1355);
+ int16x8_t v1472 = vsubq_s16(v1357, v1359);
+ int16x8_t v1473 = vqrdmulhq_n_s16(v1472, 28454);
+ int16x8_t v1474 = vaddq_s16(v1471, v1473);
+ int16x8_t v1475 = vsubq_s16(v1363, v1365);
+ int16x8_t v1476 = vsubq_s16(v1367, v1369);
+ int16x8_t v1477 = vqrdmulhq_n_s16(v1476, 28454);
+ int16x8_t v1478 = vaddq_s16(v1475, v1477);
+ int16x8_t v1479 = vqrdmulhq_n_s16(v1478, 18458);
+ int16x8_t v1480 = vaddq_s16(v1474, v1479);
+ int16x8_t v1481 = vsubq_s16(v1331, v1333);
+ int16x8_t v1482 = vsubq_s16(v1335, v1337);
+ int16x8_t v1483 = vqrdmulhq_n_s16(v1482, 30624);
+ int16x8_t v1484 = vaddq_s16(v1481, v1483);
+ int16x8_t v1485 = vsubq_s16(v1341, v1343);
+ int16x8_t v1486 = vsubq_s16(v1345, v1347);
+ int16x8_t v1487 = vqrdmulhq_n_s16(v1486, 30624);
+ int16x8_t v1488 = vaddq_s16(v1485, v1487);
+ int16x8_t v1489 = vqrdmulhq_n_s16(v1488, 18702);
+ int16x8_t v1490 = vaddq_s16(v1484, v1489);
+ int16x8_t v1491 = vsubq_s16(v1309, v1311);
+ int16x8_t v1492 = vsubq_s16(v1313, v1315);
+ int16x8_t v1493_tmp = vqrdmulhq_n_s16(v1492, 472);
+ int16x8_t v1493 = vaddq_s16(v1493_tmp, v1492);
+ int16x8_t v1494 = vaddq_s16(v1491, v1493);
+ int16x8_t v1495 = vsubq_s16(v1319, v1321);
+ int16x8_t v1496 = vsubq_s16(v1323, v1325);
+ int16x8_t v1497_tmp = vqrdmulhq_n_s16(v1496, 472);
+ int16x8_t v1497 = vaddq_s16(v1497_tmp, v1496);
+ int16x8_t v1498 = vaddq_s16(v1495, v1497);
+ int16x8_t v1499 = vqrdmulhq_n_s16(v1498, 18964);
+ int16x8_t v1500 = vaddq_s16(v1494, v1499);
+ int16x8_t v1501 = vsubq_s16(v1287, v1289);
+ int16x8_t v1502 = vsubq_s16(v1291, v1293);
+ int16x8_t v1503_tmp = vqrdmulhq_n_s16(v1502, 3672);
+ int16x8_t v1503 = vaddq_s16(v1503_tmp, v1502);
+ int16x8_t v1504 = vaddq_s16(v1501, v1503);
+ int16x8_t v1505 = vsubq_s16(v1297, v1299);
+ int16x8_t v1506 = vsubq_s16(v1301, v1303);
+ int16x8_t v1507_tmp = vqrdmulhq_n_s16(v1506, 3672);
+ int16x8_t v1507 = vaddq_s16(v1507_tmp, v1506);
+ int16x8_t v1508 = vaddq_s16(v1505, v1507);
+ int16x8_t v1509 = vqrdmulhq_n_s16(v1508, 19245);
+ int16x8_t v1510 = vaddq_s16(v1504, v1509);
+ int16x8_t v1511 = vsubq_s16(v1265, v1267);
+ int16x8_t v1512 = vsubq_s16(v1269, v1271);
+ int16x8_t v1513_tmp = vqrdmulhq_n_s16(v1512, 7662);
+ int16x8_t v1513 = vaddq_s16(v1513_tmp, v1512);
+ int16x8_t v1514 = vaddq_s16(v1511, v1513);
+ int16x8_t v1515 = vsubq_s16(v1275, v1277);
+ int16x8_t v1516 = vsubq_s16(v1279, v1281);
+ int16x8_t v1517_tmp = vqrdmulhq_n_s16(v1516, 7662);
+ int16x8_t v1517 = vaddq_s16(v1517_tmp, v1516);
+ int16x8_t v1518 = vaddq_s16(v1515, v1517);
+ int16x8_t v1519 = vqrdmulhq_n_s16(v1518, 19546);
+ int16x8_t v1520 = vaddq_s16(v1514, v1519);
+ int16x8_t v1521 = vsubq_s16(v1222, v1227);
+ int16x8_t v1522 = vsubq_s16(v1232, v1237);
+ int16x8_t v1523_tmp = vqrdmulhq_n_s16(v1522, 12756);
+ int16x8_t v1523 = vaddq_s16(v1523_tmp, v1522);
+ int16x8_t v1524 = vaddq_s16(v1521, v1523);
+ int16x8_t v1525 = vsubq_s16(v1244, v1249);
+ int16x8_t v1526 = vsubq_s16(v1254, v1259);
+ int16x8_t v1527_tmp = vqrdmulhq_n_s16(v1526, 12756);
+ int16x8_t v1527 = vaddq_s16(v1527_tmp, v1526);
+ int16x8_t v1528 = vaddq_s16(v1525, v1527);
+ int16x8_t v1529 = vqrdmulhq_n_s16(v1528, 19869);
+ int16x8_t v1530 = vaddq_s16(v1524, v1529);
+ int16x8_t v1531 = vsubq_s16(v1176, v1181);
+ int16x8_t v1532 = vsubq_s16(v1186, v1191);
+ int16x8_t v1533_tmp = vqrdmulhq_n_s16(v1532, 19463);
+ int16x8_t v1533 = vaddq_s16(v1533_tmp, v1532);
+ int16x8_t v1534 = vaddq_s16(v1531, v1533);
+ int16x8_t v1535 = vsubq_s16(v1198, v1203);
+ int16x8_t v1536 = vsubq_s16(v1208, v1213);
+ int16x8_t v1537_tmp = vqrdmulhq_n_s16(v1536, 19463);
+ int16x8_t v1537 = vaddq_s16(v1537_tmp, v1536);
+ int16x8_t v1538 = vaddq_s16(v1535, v1537);
+ int16x8_t v1539 = vqrdmulhq_n_s16(v1538, 20216);
+ int16x8_t v1540 = vaddq_s16(v1534, v1539);
+ int16x8_t v1541 = vsubq_s16(v1130, v1135);
+ int16x8_t v1542 = vsubq_s16(v1140, v1145);
+ int16x8_t v1543_tmp = vqrdmulhq_n_s16(v1542, 28661);
+ int16x8_t v1543 = vaddq_s16(v1543_tmp, v1542);
+ int16x8_t v1544 = vaddq_s16(v1541, v1543);
+ int16x8_t v1545 = vsubq_s16(v1152, v1157);
+ int16x8_t v1546 = vsubq_s16(v1162, v1167);
+ int16x8_t v1547_tmp = vqrdmulhq_n_s16(v1546, 28661);
+ int16x8_t v1547 = vaddq_s16(v1547_tmp, v1546);
+ int16x8_t v1548 = vaddq_s16(v1545, v1547);
+ int16x8_t v1549 = vqrdmulhq_n_s16(v1548, 20587);
+ int16x8_t v1550 = vaddq_s16(v1544, v1549);
+ int16x8_t v1551 = vsubq_s16(v1084, v1089);
+ int16x8_t v1552 = vsubq_s16(v1094, v1099);
+ int16x8_t v1553_tmp = vqrdmulhq_n_s16(v1552, 9242);
+ int16x8_t v1553 = vmlaq_n_s16(v1553_tmp, v1552, 2);
+ int16x8_t v1554 = vaddq_s16(v1551, v1553);
+ int16x8_t v1555 = vsubq_s16(v1106, v1111);
+ int16x8_t v1556 = vsubq_s16(v1116, v1121);
+ int16x8_t v1557_tmp = vqrdmulhq_n_s16(v1556, 9242);
+ int16x8_t v1557 = vmlaq_n_s16(v1557_tmp, v1556, 2);
+ int16x8_t v1558 = vaddq_s16(v1555, v1557);
+ int16x8_t v1559 = vqrdmulhq_n_s16(v1558, 20985);
+ int16x8_t v1560 = vaddq_s16(v1554, v1559);
+ int16x8_t v1561 = vsubq_s16(v996, v1007);
+ int16x8_t v1562 = vsubq_s16(v1018, v1029);
+ int16x8_t v1563_tmp = vqrdmulhq_n_s16(v1562, 30298);
+ int16x8_t v1563 = vmlaq_n_s16(v1563_tmp, v1562, 2);
+ int16x8_t v1564 = vaddq_s16(v1561, v1563);
+ int16x8_t v1565 = vsubq_s16(v1042, v1053);
+ int16x8_t v1566 = vsubq_s16(v1064, v1075);
+ int16x8_t v1567_tmp = vqrdmulhq_n_s16(v1566, 30298);
+ int16x8_t v1567 = vmlaq_n_s16(v1567_tmp, v1566, 2);
+ int16x8_t v1568 = vaddq_s16(v1565, v1567);
+ int16x8_t v1569 = vqrdmulhq_n_s16(v1568, 21412);
+ int16x8_t v1570 = vaddq_s16(v1564, v1569);
+ int16x8_t v1571 = vsubq_s16(v901, v912);
+ int16x8_t v1572 = vsubq_s16(v924, v935);
+ int16x8_t v1573_tmp = vqrdmulhq_n_s16(v1572, 2773);
+ int16x8_t v1573 = vmlaq_n_s16(v1573_tmp, v1572, 4);
+ int16x8_t v1574 = vaddq_s16(v1571, v1573);
+ int16x8_t v1575 = vsubq_s16(v948, v959);
+ int16x8_t v1576 = vsubq_s16(v970, v981);
+ int16x8_t v1577_tmp = vqrdmulhq_n_s16(v1576, 2773);
+ int16x8_t v1577 = vmlaq_n_s16(v1577_tmp, v1576, 4);
+ int16x8_t v1578 = vaddq_s16(v1575, v1577);
+ int16x8_t v1579 = vqrdmulhq_n_s16(v1578, 21871);
+ int16x8_t v1580 = vaddq_s16(v1574, v1579);
+ int16x8_t v1581 = vsubq_s16(v723, v746);
+ int16x8_t v1582 = vsubq_s16(v769, v792);
+ int16x8_t v1583_tmp = vqrdmulhq_n_s16(v1582, 26108);
+ int16x8_t v1583 = vmlaq_n_s16(v1583_tmp, v1582, 6);
+ int16x8_t v1584 = vaddq_s16(v1581, v1583);
+ int16x8_t v1585 = vsubq_s16(v817, v840);
+ int16x8_t v1586 = vsubq_s16(v863, v886);
+ int16x8_t v1587_tmp = vqrdmulhq_n_s16(v1586, 26108);
+ int16x8_t v1587 = vmlaq_n_s16(v1587_tmp, v1586, 6);
+ int16x8_t v1588 = vaddq_s16(v1585, v1587);
+ int16x8_t v1589 = vqrdmulhq_n_s16(v1588, 22363);
+ int16x8_t v1590 = vaddq_s16(v1584, v1589);
+ int16x8_t v1591 = vsubq_s16(v61, v140);
+ int16x8_t v1592 = vsubq_s16(v234, v314);
+ int16x8_t v1593_tmp = vqrdmulhq_n_s16(v1592, 12251);
+ int16x8_t v1593 = vmlaq_n_s16(v1593_tmp, v1592, 20);
+ int16x8_t v1594 = vaddq_s16(v1591, v1593);
+ int16x8_t v1595 = vsubq_s16(v410, v521);
+ int16x8_t v1596 = vsubq_s16(v615, v696);
+ int16x8_t v1597_tmp = vqrdmulhq_n_s16(v1596, 12251);
+ int16x8_t v1597 = vmlaq_n_s16(v1597_tmp, v1596, 20);
+ int16x8_t v1598 = vaddq_s16(v1595, v1597);
+ int16x8_t v1599 = vqrdmulhq_n_s16(v1598, 22891);
+ int16x8_t v1600 = vaddq_s16(v1594, v1599);
+ int16x8_t v1601 = vsubq_s16(v1591, v1593);
+ int16x8_t v1602 = vsubq_s16(v1595, v1597);
+ int16x8_t v1603 = vqrdmulhq_n_s16(v1602, 23460);
+ int16x8_t v1604 = vaddq_s16(v1601, v1603);
+ int16x8_t v1605 = vsubq_s16(v1581, v1583);
+ int16x8_t v1606 = vsubq_s16(v1585, v1587);
+ int16x8_t v1607 = vqrdmulhq_n_s16(v1606, 24073);
+ int16x8_t v1608 = vaddq_s16(v1605, v1607);
+ int16x8_t v1609 = vsubq_s16(v1571, v1573);
+ int16x8_t v1610 = vsubq_s16(v1575, v1577);
+ int16x8_t v1611 = vqrdmulhq_n_s16(v1610, 24734);
+ int16x8_t v1612 = vaddq_s16(v1609, v1611);
+ int16x8_t v1613 = vsubq_s16(v1561, v1563);
+ int16x8_t v1614 = vsubq_s16(v1565, v1567);
+ int16x8_t v1615 = vqrdmulhq_n_s16(v1614, 25448);
+ int16x8_t v1616 = vaddq_s16(v1613, v1615);
+ int16x8_t v1617 = vsubq_s16(v1551, v1553);
+ int16x8_t v1618 = vsubq_s16(v1555, v1557);
+ int16x8_t v1619 = vqrdmulhq_n_s16(v1618, 26220);
+ int16x8_t v1620 = vaddq_s16(v1617, v1619);
+ int16x8_t v1621 = vsubq_s16(v1541, v1543);
+ int16x8_t v1622 = vsubq_s16(v1545, v1547);
+ int16x8_t v1623 = vqrdmulhq_n_s16(v1622, 27058);
+ int16x8_t v1624 = vaddq_s16(v1621, v1623);
+ int16x8_t v1625 = vsubq_s16(v1531, v1533);
+ int16x8_t v1626 = vsubq_s16(v1535, v1537);
+ int16x8_t v1627 = vqrdmulhq_n_s16(v1626, 27969);
+ int16x8_t v1628 = vaddq_s16(v1625, v1627);
+ int16x8_t v1629 = vsubq_s16(v1521, v1523);
+ int16x8_t v1630 = vsubq_s16(v1525, v1527);
+ int16x8_t v1631 = vqrdmulhq_n_s16(v1630, 28961);
+ int16x8_t v1632 = vaddq_s16(v1629, v1631);
+ int16x8_t v1633 = vsubq_s16(v1511, v1513);
+ int16x8_t v1634 = vsubq_s16(v1515, v1517);
+ int16x8_t v1635 = vqrdmulhq_n_s16(v1634, 30044);
+ int16x8_t v1636 = vaddq_s16(v1633, v1635);
+ int16x8_t v1637 = vsubq_s16(v1501, v1503);
+ int16x8_t v1638 = vsubq_s16(v1505, v1507);
+ int16x8_t v1639 = vqrdmulhq_n_s16(v1638, 31232);
+ int16x8_t v1640 = vaddq_s16(v1637, v1639);
+ int16x8_t v1641 = vsubq_s16(v1491, v1493);
+ int16x8_t v1642 = vsubq_s16(v1495, v1497);
+ int16x8_t v1643 = vqrdmulhq_n_s16(v1642, 32538);
+ int16x8_t v1644 = vaddq_s16(v1641, v1643);
+ int16x8_t v1645 = vsubq_s16(v1481, v1483);
+ int16x8_t v1646 = vsubq_s16(v1485, v1487);
+ int16x8_t v1647_tmp = vqrdmulhq_n_s16(v1646, 1211);
+ int16x8_t v1647 = vaddq_s16(v1647_tmp, v1646);
+ int16x8_t v1648 = vaddq_s16(v1645, v1647);
+ int16x8_t v1649 = vsubq_s16(v1471, v1473);
+ int16x8_t v1650 = vsubq_s16(v1475, v1477);
+ int16x8_t v1651_tmp = vqrdmulhq_n_s16(v1650, 2808);
+ int16x8_t v1651 = vaddq_s16(v1651_tmp, v1650);
+ int16x8_t v1652 = vaddq_s16(v1649, v1651);
+ int16x8_t v1653 = vsubq_s16(v1461, v1463);
+ int16x8_t v1654 = vsubq_s16(v1465, v1467);
+ int16x8_t v1655_tmp = vqrdmulhq_n_s16(v1654, 4586);
+ int16x8_t v1655 = vaddq_s16(v1655_tmp, v1654);
+ int16x8_t v1656 = vaddq_s16(v1653, v1655);
+ int16x8_t v1657 = vsubq_s16(v1451, v1453);
+ int16x8_t v1658 = vsubq_s16(v1455, v1457);
+ int16x8_t v1659_tmp = vqrdmulhq_n_s16(v1658, 6576);
+ int16x8_t v1659 = vaddq_s16(v1659_tmp, v1658);
+ int16x8_t v1660 = vaddq_s16(v1657, v1659);
+ int16x8_t v1661 = vsubq_s16(v1441, v1443);
+ int16x8_t v1662 = vsubq_s16(v1445, v1447);
+ int16x8_t v1663_tmp = vqrdmulhq_n_s16(v1662, 8817);
+ int16x8_t v1663 = vaddq_s16(v1663_tmp, v1662);
+ int16x8_t v1664 = vaddq_s16(v1661, v1663);
+ int16x8_t v1665 = vsubq_s16(v1422, v1427);
+ int16x8_t v1666 = vsubq_s16(v1432, v1437);
+ int16x8_t v1667_tmp = vqrdmulhq_n_s16(v1666, 11356);
+ int16x8_t v1667 = vaddq_s16(v1667_tmp, v1666);
+ int16x8_t v1668 = vaddq_s16(v1665, v1667);
+ int16x8_t v1669 = vsubq_s16(v1400, v1405);
+ int16x8_t v1670 = vsubq_s16(v1410, v1415);
+ int16x8_t v1671_tmp = vqrdmulhq_n_s16(v1670, 14256);
+ int16x8_t v1671 = vaddq_s16(v1671_tmp, v1670);
+ int16x8_t v1672 = vaddq_s16(v1669, v1671);
+ int16x8_t v1673 = vsubq_s16(v1378, v1383);
+ int16x8_t v1674 = vsubq_s16(v1388, v1393);
+ int16x8_t v1675_tmp = vqrdmulhq_n_s16(v1674, 17596);
+ int16x8_t v1675 = vaddq_s16(v1675_tmp, v1674);
+ int16x8_t v1676 = vaddq_s16(v1673, v1675);
+ int16x8_t v1677 = vsubq_s16(v1356, v1361);
+ int16x8_t v1678 = vsubq_s16(v1366, v1371);
+ int16x8_t v1679_tmp = vqrdmulhq_n_s16(v1678, 21483);
+ int16x8_t v1679 = vaddq_s16(v1679_tmp, v1678);
+ int16x8_t v1680 = vaddq_s16(v1677, v1679);
+ int16x8_t v1681 = vsubq_s16(v1334, v1339);
+ int16x8_t v1682 = vsubq_s16(v1344, v1349);
+ int16x8_t v1683_tmp = vqrdmulhq_n_s16(v1682, 26057);
+ int16x8_t v1683 = vaddq_s16(v1683_tmp, v1682);
+ int16x8_t v1684 = vaddq_s16(v1681, v1683);
+ int16x8_t v1685 = vsubq_s16(v1312, v1317);
+ int16x8_t v1686 = vsubq_s16(v1322, v1327);
+ int16x8_t v1687_tmp = vqrdmulhq_n_s16(v1686, 31517);
+ int16x8_t v1687 = vaddq_s16(v1687_tmp, v1686);
+ int16x8_t v1688 = vaddq_s16(v1685, v1687);
+ int16x8_t v1689 = vsubq_s16(v1290, v1295);
+ int16x8_t v1690 = vsubq_s16(v1300, v1305);
+ int16x8_t v1691_tmp = vqrdmulhq_n_s16(v1690, 5373);
+ int16x8_t v1691 = vmlaq_n_s16(v1691_tmp, v1690, 2);
+ int16x8_t v1692 = vaddq_s16(v1689, v1691);
+ int16x8_t v1693 = vsubq_s16(v1268, v1273);
+ int16x8_t v1694 = vsubq_s16(v1278, v1283);
+ int16x8_t v1695_tmp = vqrdmulhq_n_s16(v1694, 13571);
+ int16x8_t v1695 = vmlaq_n_s16(v1695_tmp, v1694, 2);
+ int16x8_t v1696 = vaddq_s16(v1693, v1695);
+ int16x8_t v1697 = vsubq_s16(v1228, v1239);
+ int16x8_t v1698 = vsubq_s16(v1250, v1261);
+ int16x8_t v1699_tmp = vqrdmulhq_n_s16(v1698, 23975);
+ int16x8_t v1699 = vmlaq_n_s16(v1699_tmp, v1698, 2);
+ int16x8_t v1700 = vaddq_s16(v1697, v1699);
+ int16x8_t v1701 = vsubq_s16(v1182, v1193);
+ int16x8_t v1702 = vsubq_s16(v1204, v1215);
+ int16x8_t v1703_tmp = vqrdmulhq_n_s16(v1702, 4832);
+ int16x8_t v1703 = vmlaq_n_s16(v1703_tmp, v1702, 3);
+ int16x8_t v1704 = vaddq_s16(v1701, v1703);
+ int16x8_t v1705 = vsubq_s16(v1136, v1147);
+ int16x8_t v1706 = vsubq_s16(v1158, v1169);
+ int16x8_t v1707_tmp = vqrdmulhq_n_s16(v1706, 23437);
+ int16x8_t v1707 = vmlaq_n_s16(v1707_tmp, v1706, 3);
+ int16x8_t v1708 = vaddq_s16(v1705, v1707);
+ int16x8_t v1709 = vsubq_s16(v1090, v1101);
+ int16x8_t v1710 = vsubq_s16(v1112, v1123);
+ int16x8_t v1711_tmp = vqrdmulhq_n_s16(v1710, 17573);
+ int16x8_t v1711 = vmlaq_n_s16(v1711_tmp, v1710, 4);
+ int16x8_t v1712 = vaddq_s16(v1709, v1711);
+ int16x8_t v1713 = vsubq_s16(v1008, v1031);
+ int16x8_t v1714 = vsubq_s16(v1054, v1077);
+ int16x8_t v1715_tmp = vqrdmulhq_n_s16(v1714, 27122);
+ int16x8_t v1715 = vmlaq_n_s16(v1715_tmp, v1714, 5);
+ int16x8_t v1716 = vaddq_s16(v1713, v1715);
+ int16x8_t v1717 = vsubq_s16(v913, v937);
+ int16x8_t v1718 = vsubq_s16(v960, v983);
+ int16x8_t v1719_tmp = vqrdmulhq_n_s16(v1718, 5041);
+ int16x8_t v1719 = vmlaq_n_s16(v1719_tmp, v1718, 8);
+ int16x8_t v1720 = vaddq_s16(v1717, v1719);
+ int16x8_t v1721 = vsubq_s16(v747, v794);
+ int16x8_t v1722 = vsubq_s16(v841, v888);
+ int16x8_t v1723_tmp = vqrdmulhq_n_s16(v1722, 19146);
+ int16x8_t v1723 = vmlaq_n_s16(v1723_tmp, v1722, 13);
+ int16x8_t v1724 = vaddq_s16(v1721, v1723);
+ int16x8_t v1725 = vsubq_s16(v141, v316);
+ int16x8_t v1726 = vsubq_s16(v522, v698);
+ int16x8_t v1727_tmp = vqrdmulhq_n_s16(v1726, 24402);
+ int16x8_t v1727 = vmlaq_n_s16(v1727_tmp, v1726, 40);
+ int16x8_t v1728 = vaddq_s16(v1725, v1727);
+ int16x8_t v1729 = vsubq_s16(v1725, v1727);
+ int16x8_t v1730 = vsubq_s16(v1721, v1723);
+ int16x8_t v1731 = vsubq_s16(v1717, v1719);
+ int16x8_t v1732 = vsubq_s16(v1713, v1715);
+ int16x8_t v1733 = vsubq_s16(v1709, v1711);
+ int16x8_t v1734 = vsubq_s16(v1705, v1707);
+ int16x8_t v1735 = vsubq_s16(v1701, v1703);
+ int16x8_t v1736 = vsubq_s16(v1697, v1699);
+ int16x8_t v1737 = vsubq_s16(v1693, v1695);
+ int16x8_t v1738 = vsubq_s16(v1689, v1691);
+ int16x8_t v1739 = vsubq_s16(v1685, v1687);
+ int16x8_t v1740 = vsubq_s16(v1681, v1683);
+ int16x8_t v1741 = vsubq_s16(v1677, v1679);
+ int16x8_t v1742 = vsubq_s16(v1673, v1675);
+ int16x8_t v1743 = vsubq_s16(v1669, v1671);
+ int16x8_t v1744 = vsubq_s16(v1665, v1667);
+ int16x8_t v1745 = vsubq_s16(v1661, v1663);
+ int16x8_t v1746 = vsubq_s16(v1657, v1659);
+ int16x8_t v1747 = vsubq_s16(v1653, v1655);
+ int16x8_t v1748 = vsubq_s16(v1649, v1651);
+ int16x8_t v1749 = vsubq_s16(v1645, v1647);
+ int16x8_t v1750 = vsubq_s16(v1641, v1643);
+ int16x8_t v1751 = vsubq_s16(v1637, v1639);
+ int16x8_t v1752 = vsubq_s16(v1633, v1635);
+ int16x8_t v1753 = vsubq_s16(v1629, v1631);
+ int16x8_t v1754 = vsubq_s16(v1625, v1627);
+ int16x8_t v1755 = vsubq_s16(v1621, v1623);
+ int16x8_t v1756 = vsubq_s16(v1617, v1619);
+ int16x8_t v1757 = vsubq_s16(v1613, v1615);
+ int16x8_t v1758 = vsubq_s16(v1609, v1611);
+ int16x8_t v1759 = vsubq_s16(v1605, v1607);
+ int16x8_t v1760 = vsubq_s16(v1601, v1603);
+ int16x8_t v1761 = vsubq_s16(v1594, v1599);
+ int16x8_t v1762 = vsubq_s16(v1584, v1589);
+ int16x8_t v1763 = vsubq_s16(v1574, v1579);
+ int16x8_t v1764 = vsubq_s16(v1564, v1569);
+ int16x8_t v1765 = vsubq_s16(v1554, v1559);
+ int16x8_t v1766 = vsubq_s16(v1544, v1549);
+ int16x8_t v1767 = vsubq_s16(v1534, v1539);
+ int16x8_t v1768 = vsubq_s16(v1524, v1529);
+ int16x8_t v1769 = vsubq_s16(v1514, v1519);
+ int16x8_t v1770 = vsubq_s16(v1504, v1509);
+ int16x8_t v1771 = vsubq_s16(v1494, v1499);
+ int16x8_t v1772 = vsubq_s16(v1484, v1489);
+ int16x8_t v1773 = vsubq_s16(v1474, v1479);
+ int16x8_t v1774 = vsubq_s16(v1464, v1469);
+ int16x8_t v1775 = vsubq_s16(v1454, v1459);
+ int16x8_t v1776 = vsubq_s16(v1444, v1449);
+ int16x8_t v1777 = vsubq_s16(v1428, v1439);
+ int16x8_t v1778 = vsubq_s16(v1406, v1417);
+ int16x8_t v1779 = vsubq_s16(v1384, v1395);
+ int16x8_t v1780 = vsubq_s16(v1362, v1373);
+ int16x8_t v1781 = vsubq_s16(v1340, v1351);
+ int16x8_t v1782 = vsubq_s16(v1318, v1329);
+ int16x8_t v1783 = vsubq_s16(v1296, v1307);
+ int16x8_t v1784 = vsubq_s16(v1274, v1285);
+ int16x8_t v1785 = vsubq_s16(v1240, v1263);
+ int16x8_t v1786 = vsubq_s16(v1194, v1217);
+ int16x8_t v1787 = vsubq_s16(v1148, v1171);
+ int16x8_t v1788 = vsubq_s16(v1102, v1125);
+ int16x8_t v1789 = vsubq_s16(v1032, v1079);
+ int16x8_t v1790 = vsubq_s16(v938, v985);
+ int16x8_t v1791 = vsubq_s16(v795, v890);
+ int16x8_t v1792 = vsubq_s16(v317, v700);
+ vst1q_s16(out + out_stride * 0 + i, v701);
+ vst1q_s16(out + out_stride * 1 + i, v891);
+ vst1q_s16(out + out_stride * 2 + i, v986);
+ vst1q_s16(out + out_stride * 3 + i, v1080);
+ vst1q_s16(out + out_stride * 4 + i, v1126);
+ vst1q_s16(out + out_stride * 5 + i, v1172);
+ vst1q_s16(out + out_stride * 6 + i, v1218);
+ vst1q_s16(out + out_stride * 7 + i, v1264);
+ vst1q_s16(out + out_stride * 8 + i, v1286);
+ vst1q_s16(out + out_stride * 9 + i, v1308);
+ vst1q_s16(out + out_stride * 10 + i, v1330);
+ vst1q_s16(out + out_stride * 11 + i, v1352);
+ vst1q_s16(out + out_stride * 12 + i, v1374);
+ vst1q_s16(out + out_stride * 13 + i, v1396);
+ vst1q_s16(out + out_stride * 14 + i, v1418);
+ vst1q_s16(out + out_stride * 15 + i, v1440);
+ vst1q_s16(out + out_stride * 16 + i, v1450);
+ vst1q_s16(out + out_stride * 17 + i, v1460);
+ vst1q_s16(out + out_stride * 18 + i, v1470);
+ vst1q_s16(out + out_stride * 19 + i, v1480);
+ vst1q_s16(out + out_stride * 20 + i, v1490);
+ vst1q_s16(out + out_stride * 21 + i, v1500);
+ vst1q_s16(out + out_stride * 22 + i, v1510);
+ vst1q_s16(out + out_stride * 23 + i, v1520);
+ vst1q_s16(out + out_stride * 24 + i, v1530);
+ vst1q_s16(out + out_stride * 25 + i, v1540);
+ vst1q_s16(out + out_stride * 26 + i, v1550);
+ vst1q_s16(out + out_stride * 27 + i, v1560);
+ vst1q_s16(out + out_stride * 28 + i, v1570);
+ vst1q_s16(out + out_stride * 29 + i, v1580);
+ vst1q_s16(out + out_stride * 30 + i, v1590);
+ vst1q_s16(out + out_stride * 31 + i, v1600);
+ vst1q_s16(out + out_stride * 32 + i, v1604);
+ vst1q_s16(out + out_stride * 33 + i, v1608);
+ vst1q_s16(out + out_stride * 34 + i, v1612);
+ vst1q_s16(out + out_stride * 35 + i, v1616);
+ vst1q_s16(out + out_stride * 36 + i, v1620);
+ vst1q_s16(out + out_stride * 37 + i, v1624);
+ vst1q_s16(out + out_stride * 38 + i, v1628);
+ vst1q_s16(out + out_stride * 39 + i, v1632);
+ vst1q_s16(out + out_stride * 40 + i, v1636);
+ vst1q_s16(out + out_stride * 41 + i, v1640);
+ vst1q_s16(out + out_stride * 42 + i, v1644);
+ vst1q_s16(out + out_stride * 43 + i, v1648);
+ vst1q_s16(out + out_stride * 44 + i, v1652);
+ vst1q_s16(out + out_stride * 45 + i, v1656);
+ vst1q_s16(out + out_stride * 46 + i, v1660);
+ vst1q_s16(out + out_stride * 47 + i, v1664);
+ vst1q_s16(out + out_stride * 48 + i, v1668);
+ vst1q_s16(out + out_stride * 49 + i, v1672);
+ vst1q_s16(out + out_stride * 50 + i, v1676);
+ vst1q_s16(out + out_stride * 51 + i, v1680);
+ vst1q_s16(out + out_stride * 52 + i, v1684);
+ vst1q_s16(out + out_stride * 53 + i, v1688);
+ vst1q_s16(out + out_stride * 54 + i, v1692);
+ vst1q_s16(out + out_stride * 55 + i, v1696);
+ vst1q_s16(out + out_stride * 56 + i, v1700);
+ vst1q_s16(out + out_stride * 57 + i, v1704);
+ vst1q_s16(out + out_stride * 58 + i, v1708);
+ vst1q_s16(out + out_stride * 59 + i, v1712);
+ vst1q_s16(out + out_stride * 60 + i, v1716);
+ vst1q_s16(out + out_stride * 61 + i, v1720);
+ vst1q_s16(out + out_stride * 62 + i, v1724);
+ vst1q_s16(out + out_stride * 63 + i, v1728);
+ vst1q_s16(out + out_stride * 64 + i, v1729);
+ vst1q_s16(out + out_stride * 65 + i, v1730);
+ vst1q_s16(out + out_stride * 66 + i, v1731);
+ vst1q_s16(out + out_stride * 67 + i, v1732);
+ vst1q_s16(out + out_stride * 68 + i, v1733);
+ vst1q_s16(out + out_stride * 69 + i, v1734);
+ vst1q_s16(out + out_stride * 70 + i, v1735);
+ vst1q_s16(out + out_stride * 71 + i, v1736);
+ vst1q_s16(out + out_stride * 72 + i, v1737);
+ vst1q_s16(out + out_stride * 73 + i, v1738);
+ vst1q_s16(out + out_stride * 74 + i, v1739);
+ vst1q_s16(out + out_stride * 75 + i, v1740);
+ vst1q_s16(out + out_stride * 76 + i, v1741);
+ vst1q_s16(out + out_stride * 77 + i, v1742);
+ vst1q_s16(out + out_stride * 78 + i, v1743);
+ vst1q_s16(out + out_stride * 79 + i, v1744);
+ vst1q_s16(out + out_stride * 80 + i, v1745);
+ vst1q_s16(out + out_stride * 81 + i, v1746);
+ vst1q_s16(out + out_stride * 82 + i, v1747);
+ vst1q_s16(out + out_stride * 83 + i, v1748);
+ vst1q_s16(out + out_stride * 84 + i, v1749);
+ vst1q_s16(out + out_stride * 85 + i, v1750);
+ vst1q_s16(out + out_stride * 86 + i, v1751);
+ vst1q_s16(out + out_stride * 87 + i, v1752);
+ vst1q_s16(out + out_stride * 88 + i, v1753);
+ vst1q_s16(out + out_stride * 89 + i, v1754);
+ vst1q_s16(out + out_stride * 90 + i, v1755);
+ vst1q_s16(out + out_stride * 91 + i, v1756);
+ vst1q_s16(out + out_stride * 92 + i, v1757);
+ vst1q_s16(out + out_stride * 93 + i, v1758);
+ vst1q_s16(out + out_stride * 94 + i, v1759);
+ vst1q_s16(out + out_stride * 95 + i, v1760);
+ vst1q_s16(out + out_stride * 96 + i, v1761);
+ vst1q_s16(out + out_stride * 97 + i, v1762);
+ vst1q_s16(out + out_stride * 98 + i, v1763);
+ vst1q_s16(out + out_stride * 99 + i, v1764);
+ vst1q_s16(out + out_stride * 100 + i, v1765);
+ vst1q_s16(out + out_stride * 101 + i, v1766);
+ vst1q_s16(out + out_stride * 102 + i, v1767);
+ vst1q_s16(out + out_stride * 103 + i, v1768);
+ vst1q_s16(out + out_stride * 104 + i, v1769);
+ vst1q_s16(out + out_stride * 105 + i, v1770);
+ vst1q_s16(out + out_stride * 106 + i, v1771);
+ vst1q_s16(out + out_stride * 107 + i, v1772);
+ vst1q_s16(out + out_stride * 108 + i, v1773);
+ vst1q_s16(out + out_stride * 109 + i, v1774);
+ vst1q_s16(out + out_stride * 110 + i, v1775);
+ vst1q_s16(out + out_stride * 111 + i, v1776);
+ vst1q_s16(out + out_stride * 112 + i, v1777);
+ vst1q_s16(out + out_stride * 113 + i, v1778);
+ vst1q_s16(out + out_stride * 114 + i, v1779);
+ vst1q_s16(out + out_stride * 115 + i, v1780);
+ vst1q_s16(out + out_stride * 116 + i, v1781);
+ vst1q_s16(out + out_stride * 117 + i, v1782);
+ vst1q_s16(out + out_stride * 118 + i, v1783);
+ vst1q_s16(out + out_stride * 119 + i, v1784);
+ vst1q_s16(out + out_stride * 120 + i, v1785);
+ vst1q_s16(out + out_stride * 121 + i, v1786);
+ vst1q_s16(out + out_stride * 122 + i, v1787);
+ vst1q_s16(out + out_stride * 123 + i, v1788);
+ vst1q_s16(out + out_stride * 124 + i, v1789);
+ vst1q_s16(out + out_stride * 125 + i, v1790);
+ vst1q_s16(out + out_stride * 126 + i, v1791);
+ vst1q_s16(out + out_stride * 127 + i, v1792);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct16-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct16-inl.h
new file mode 100644
index 0000000000..472ec20d42
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct16-inl.h
@@ -0,0 +1,180 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<16>) { return 1; }
+
+void FastIDCT(FastDCTTag<16>, const int16_t* in, size_t in_stride, int16_t* out,
+ size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 8 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 12 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 10 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vaddq_s16(v13, v10);
+ int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080);
+ int16x8_t v18 = vld1q_s16(in + in_stride * 14 + i);
+ int16x8_t v19 = vaddq_s16(v18, v12);
+ int16x8_t v20 = vaddq_s16(v16, v19);
+ int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734);
+ int16x8_t v22 = vaddq_s16(v17, v21);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vld1q_s16(in + in_stride * 15 + i);
+ int16x8_t v27 = vld1q_s16(in + in_stride * 13 + i);
+ int16x8_t v28 = vaddq_s16(v26, v27);
+ int16x8_t v29 = vld1q_s16(in + in_stride * 11 + i);
+ int16x8_t v30 = vld1q_s16(in + in_stride * 9 + i);
+ int16x8_t v31 = vaddq_s16(v29, v30);
+ int16x8_t v32 = vaddq_s16(v28, v31);
+ int16x8_t v33 = vqrdmulhq_n_s16(v32, 17734);
+ int16x8_t v34 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v35 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v36 = vaddq_s16(v34, v35);
+ int16x8_t v37 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v38 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v39 = vaddq_s16(v37, v38);
+ int16x8_t v40 = vaddq_s16(v36, v39);
+ int16x8_t v41_tmp = vqrdmulhq_n_s16(v40, 10045);
+ int16x8_t v41 = vaddq_s16(v41_tmp, v40);
+ int16x8_t v42 = vaddq_s16(v33, v41);
+ int16x8_t v43 = vqrdmulhq_n_s16(v42, 16705);
+ int16x8_t v44_tmp = vqrdmulhq_n_s16(v36, 13573);
+ int16x8_t v44 = vaddq_s16(v44_tmp, v36);
+ int16x8_t v45 = vaddq_s16(v39, v31);
+ int16x8_t v46 = vaddq_s16(v44, v45);
+ int16x8_t v47 = vqrdmulhq_n_s16(v46, 16705);
+ int16x8_t v48 = vaddq_s16(v43, v47);
+ int16x8_t v49_tmp = vqrdmulhq_n_s16(v35, 13573);
+ int16x8_t v49 = vaddq_s16(v49_tmp, v35);
+ int16x8_t v50 = vaddq_s16(v30, v37);
+ int16x8_t v51 = vaddq_s16(v49, v50);
+ int16x8_t v52 = vaddq_s16(v38, v34);
+ int16x8_t v53 = vaddq_s16(v27, v29);
+ int16x8_t v54 = vaddq_s16(v52, v53);
+ int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734);
+ int16x8_t v56 = vqrdmulhq_n_s16(v52, 25080);
+ int16x8_t v57 = vaddq_s16(v55, v56);
+ int16x8_t v58 = vaddq_s16(v51, v57);
+ int16x8_t v59 = vaddq_s16(v48, v58);
+ int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+ int16x8_t v61 = vaddq_s16(v25, v60);
+ int16x8_t v62 = vsubq_s16(v0, v1);
+ int16x8_t v63 = vsubq_s16(v4, v6);
+ int16x8_t v64_tmp = vqrdmulhq_n_s16(v63, 10045);
+ int16x8_t v64 = vaddq_s16(v64_tmp, v63);
+ int16x8_t v65 = vaddq_s16(v62, v64);
+ int16x8_t v66 = vsubq_s16(v11, v14);
+ int16x8_t v67 = vqrdmulhq_n_s16(v16, 17734);
+ int16x8_t v68_tmp = vqrdmulhq_n_s16(v19, 10045);
+ int16x8_t v68 = vaddq_s16(v68_tmp, v19);
+ int16x8_t v69 = vsubq_s16(v67, v68);
+ int16x8_t v70 = vaddq_s16(v66, v69);
+ int16x8_t v71 = vqrdmulhq_n_s16(v70, 19705);
+ int16x8_t v72 = vaddq_s16(v65, v71);
+ int16x8_t v73 = vsubq_s16(v49, v50);
+ int16x8_t v74 = vqrdmulhq_n_s16(v52, 17734);
+ int16x8_t v75_tmp = vqrdmulhq_n_s16(v53, 10045);
+ int16x8_t v75 = vaddq_s16(v75_tmp, v53);
+ int16x8_t v76 = vsubq_s16(v74, v75);
+ int16x8_t v77 = vaddq_s16(v73, v76);
+ int16x8_t v78 = vsubq_s16(v44, v45);
+ int16x8_t v79 = vqrdmulhq_n_s16(v78, 19705);
+ int16x8_t v80 = vqrdmulhq_n_s16(v40, 13573);
+ int16x8_t v81 = vsubq_s16(v80, v32);
+ int16x8_t v82 = vqrdmulhq_n_s16(v81, 25746);
+ int16x8_t v83 = vaddq_s16(v79, v82);
+ int16x8_t v84 = vaddq_s16(v77, v83);
+ int16x8_t v85 = vqrdmulhq_n_s16(v84, 17121);
+ int16x8_t v86 = vaddq_s16(v72, v85);
+ int16x8_t v87 = vsubq_s16(v62, v64);
+ int16x8_t v88 = vsubq_s16(v66, v69);
+ int16x8_t v89 = vqrdmulhq_n_s16(v88, 29490);
+ int16x8_t v90 = vaddq_s16(v87, v89);
+ int16x8_t v91 = vsubq_s16(v73, v76);
+ int16x8_t v92 = vqrdmulhq_n_s16(v78, 29490);
+ int16x8_t v93_tmp = vqrdmulhq_n_s16(v81, 5763);
+ int16x8_t v93 = vaddq_s16(v93_tmp, v81);
+ int16x8_t v94 = vsubq_s16(v92, v93);
+ int16x8_t v95 = vaddq_s16(v91, v94);
+ int16x8_t v96 = vqrdmulhq_n_s16(v95, 18578);
+ int16x8_t v97 = vaddq_s16(v90, v96);
+ int16x8_t v98 = vsubq_s16(v46, v42);
+ int16x8_t v99_tmp = vqrdmulhq_n_s16(v98, 18446);
+ int16x8_t v99 = vmlaq_n_s16(v99_tmp, v98, 2);
+ int16x8_t v100 = vsubq_s16(v51, v57);
+ int16x8_t v101 = vaddq_s16(v99, v100);
+ int16x8_t v102 = vqrdmulhq_n_s16(v101, 21195);
+ int16x8_t v103 = vsubq_s16(v2, v8);
+ int16x8_t v104 = vsubq_s16(v15, v22);
+ int16x8_t v105_tmp = vqrdmulhq_n_s16(v104, 18446);
+ int16x8_t v105 = vmlaq_n_s16(v105_tmp, v104, 2);
+ int16x8_t v106 = vaddq_s16(v103, v105);
+ int16x8_t v107 = vaddq_s16(v102, v106);
+ int16x8_t v108 = vsubq_s16(v103, v105);
+ int16x8_t v109 = vsubq_s16(v100, v99);
+ int16x8_t v110 = vqrdmulhq_n_s16(v109, 25826);
+ int16x8_t v111 = vaddq_s16(v108, v110);
+ int16x8_t v112 = vsubq_s16(v87, v89);
+ int16x8_t v113 = vsubq_s16(v91, v94);
+ int16x8_t v114_tmp = vqrdmulhq_n_s16(v113, 1988);
+ int16x8_t v114 = vaddq_s16(v114_tmp, v113);
+ int16x8_t v115 = vaddq_s16(v112, v114);
+ int16x8_t v116 = vsubq_s16(v65, v71);
+ int16x8_t v117 = vsubq_s16(v77, v83);
+ int16x8_t v118_tmp = vqrdmulhq_n_s16(v117, 23673);
+ int16x8_t v118 = vaddq_s16(v118_tmp, v117);
+ int16x8_t v119 = vaddq_s16(v116, v118);
+ int16x8_t v120 = vsubq_s16(v58, v48);
+ int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 3314);
+ int16x8_t v121 = vmlaq_n_s16(v121_tmp, v120, 5);
+ int16x8_t v122 = vsubq_s16(v9, v24);
+ int16x8_t v123 = vaddq_s16(v121, v122);
+ int16x8_t v124 = vsubq_s16(v122, v121);
+ int16x8_t v125 = vsubq_s16(v116, v118);
+ int16x8_t v126 = vsubq_s16(v112, v114);
+ int16x8_t v127 = vsubq_s16(v108, v110);
+ int16x8_t v128 = vsubq_s16(v106, v102);
+ int16x8_t v129 = vsubq_s16(v90, v96);
+ int16x8_t v130 = vsubq_s16(v72, v85);
+ int16x8_t v131 = vsubq_s16(v25, v60);
+ vst1q_s16(out + out_stride * 0 + i, v61);
+ vst1q_s16(out + out_stride * 1 + i, v86);
+ vst1q_s16(out + out_stride * 2 + i, v97);
+ vst1q_s16(out + out_stride * 3 + i, v107);
+ vst1q_s16(out + out_stride * 4 + i, v111);
+ vst1q_s16(out + out_stride * 5 + i, v115);
+ vst1q_s16(out + out_stride * 6 + i, v119);
+ vst1q_s16(out + out_stride * 7 + i, v123);
+ vst1q_s16(out + out_stride * 8 + i, v124);
+ vst1q_s16(out + out_stride * 9 + i, v125);
+ vst1q_s16(out + out_stride * 10 + i, v126);
+ vst1q_s16(out + out_stride * 11 + i, v127);
+ vst1q_s16(out + out_stride * 12 + i, v128);
+ vst1q_s16(out + out_stride * 13 + i, v129);
+ vst1q_s16(out + out_stride * 14 + i, v130);
+ vst1q_s16(out + out_stride * 15 + i, v131);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct256-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct256-inl.h
new file mode 100644
index 0000000000..a823440af2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct256-inl.h
@@ -0,0 +1,4811 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<256>) { return 3; }
+
+void FastIDCT(FastDCTTag<256>, const int16_t* in, size_t in_stride,
+ int16_t* out, size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 128 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 64 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 192 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 32 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 160 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 96 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vaddq_s16(v13, v10);
+ int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573);
+ int16x8_t v17 = vaddq_s16(v17_tmp, v16);
+ int16x8_t v18 = vld1q_s16(in + in_stride * 224 + i);
+ int16x8_t v19 = vaddq_s16(v18, v12);
+ int16x8_t v20 = vaddq_s16(v19, v16);
+ int16x8_t v21 = vaddq_s16(v17, v20);
+ int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vld1q_s16(in + in_stride * 16 + i);
+ int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+ int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+ int16x8_t v28 = vld1q_s16(in + in_stride * 144 + i);
+ int16x8_t v29 = vld1q_s16(in + in_stride * 112 + i);
+ int16x8_t v30 = vaddq_s16(v28, v29);
+ int16x8_t v31 = vaddq_s16(v27, v30);
+ int16x8_t v32 = vld1q_s16(in + in_stride * 80 + i);
+ int16x8_t v33 = vld1q_s16(in + in_stride * 48 + i);
+ int16x8_t v34 = vaddq_s16(v32, v33);
+ int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573);
+ int16x8_t v35 = vaddq_s16(v35_tmp, v34);
+ int16x8_t v36 = vld1q_s16(in + in_stride * 208 + i);
+ int16x8_t v37 = vld1q_s16(in + in_stride * 176 + i);
+ int16x8_t v38 = vaddq_s16(v36, v37);
+ int16x8_t v39 = vaddq_s16(v38, v34);
+ int16x8_t v40 = vaddq_s16(v35, v39);
+ int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734);
+ int16x8_t v42 = vaddq_s16(v31, v41);
+ int16x8_t v43 = vaddq_s16(v33, v26);
+ int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+ int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+ int16x8_t v45 = vaddq_s16(v37, v28);
+ int16x8_t v46 = vaddq_s16(v29, v32);
+ int16x8_t v47 = vaddq_s16(v45, v46);
+ int16x8_t v48 = vaddq_s16(v44, v47);
+ int16x8_t v49 = vaddq_s16(v46, v43);
+ int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573);
+ int16x8_t v50 = vaddq_s16(v50_tmp, v49);
+ int16x8_t v51 = vld1q_s16(in + in_stride * 240 + i);
+ int16x8_t v52 = vaddq_s16(v51, v36);
+ int16x8_t v53 = vaddq_s16(v52, v45);
+ int16x8_t v54 = vaddq_s16(v53, v49);
+ int16x8_t v55 = vaddq_s16(v50, v54);
+ int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734);
+ int16x8_t v57 = vaddq_s16(v48, v56);
+ int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705);
+ int16x8_t v59 = vaddq_s16(v42, v58);
+ int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+ int16x8_t v61 = vaddq_s16(v25, v60);
+ int16x8_t v62 = vld1q_s16(in + in_stride * 8 + i);
+ int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+ int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+ int16x8_t v64 = vld1q_s16(in + in_stride * 136 + i);
+ int16x8_t v65 = vld1q_s16(in + in_stride * 120 + i);
+ int16x8_t v66 = vaddq_s16(v64, v65);
+ int16x8_t v67 = vaddq_s16(v63, v66);
+ int16x8_t v68 = vld1q_s16(in + in_stride * 72 + i);
+ int16x8_t v69 = vld1q_s16(in + in_stride * 56 + i);
+ int16x8_t v70 = vaddq_s16(v68, v69);
+ int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573);
+ int16x8_t v71 = vaddq_s16(v71_tmp, v70);
+ int16x8_t v72 = vld1q_s16(in + in_stride * 200 + i);
+ int16x8_t v73 = vld1q_s16(in + in_stride * 184 + i);
+ int16x8_t v74 = vaddq_s16(v72, v73);
+ int16x8_t v75 = vaddq_s16(v74, v70);
+ int16x8_t v76 = vaddq_s16(v71, v75);
+ int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+ int16x8_t v78 = vaddq_s16(v67, v77);
+ int16x8_t v79 = vld1q_s16(in + in_stride * 40 + i);
+ int16x8_t v80 = vld1q_s16(in + in_stride * 24 + i);
+ int16x8_t v81 = vaddq_s16(v79, v80);
+ int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+ int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+ int16x8_t v83 = vld1q_s16(in + in_stride * 168 + i);
+ int16x8_t v84 = vld1q_s16(in + in_stride * 152 + i);
+ int16x8_t v85 = vaddq_s16(v83, v84);
+ int16x8_t v86 = vld1q_s16(in + in_stride * 104 + i);
+ int16x8_t v87 = vld1q_s16(in + in_stride * 88 + i);
+ int16x8_t v88 = vaddq_s16(v86, v87);
+ int16x8_t v89 = vaddq_s16(v85, v88);
+ int16x8_t v90 = vaddq_s16(v82, v89);
+ int16x8_t v91 = vaddq_s16(v88, v81);
+ int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573);
+ int16x8_t v92 = vaddq_s16(v92_tmp, v91);
+ int16x8_t v93 = vld1q_s16(in + in_stride * 232 + i);
+ int16x8_t v94 = vld1q_s16(in + in_stride * 216 + i);
+ int16x8_t v95 = vaddq_s16(v93, v94);
+ int16x8_t v96 = vaddq_s16(v95, v85);
+ int16x8_t v97 = vaddq_s16(v96, v91);
+ int16x8_t v98 = vaddq_s16(v92, v97);
+ int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+ int16x8_t v100 = vaddq_s16(v90, v99);
+ int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705);
+ int16x8_t v102 = vaddq_s16(v78, v101);
+ int16x8_t v103 = vaddq_s16(v80, v62);
+ int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573);
+ int16x8_t v104 = vaddq_s16(v104_tmp, v103);
+ int16x8_t v105 = vaddq_s16(v84, v64);
+ int16x8_t v106 = vaddq_s16(v65, v86);
+ int16x8_t v107 = vaddq_s16(v105, v106);
+ int16x8_t v108 = vaddq_s16(v104, v107);
+ int16x8_t v109 = vaddq_s16(v87, v68);
+ int16x8_t v110 = vaddq_s16(v69, v79);
+ int16x8_t v111 = vaddq_s16(v109, v110);
+ int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573);
+ int16x8_t v112 = vaddq_s16(v112_tmp, v111);
+ int16x8_t v113 = vaddq_s16(v94, v72);
+ int16x8_t v114 = vaddq_s16(v73, v83);
+ int16x8_t v115 = vaddq_s16(v113, v114);
+ int16x8_t v116 = vaddq_s16(v115, v111);
+ int16x8_t v117 = vaddq_s16(v112, v116);
+ int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734);
+ int16x8_t v119 = vaddq_s16(v108, v118);
+ int16x8_t v120 = vaddq_s16(v110, v103);
+ int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573);
+ int16x8_t v121 = vaddq_s16(v121_tmp, v120);
+ int16x8_t v122 = vaddq_s16(v114, v105);
+ int16x8_t v123 = vaddq_s16(v106, v109);
+ int16x8_t v124 = vaddq_s16(v122, v123);
+ int16x8_t v125 = vaddq_s16(v121, v124);
+ int16x8_t v126 = vaddq_s16(v123, v120);
+ int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573);
+ int16x8_t v127 = vaddq_s16(v127_tmp, v126);
+ int16x8_t v128 = vld1q_s16(in + in_stride * 248 + i);
+ int16x8_t v129 = vaddq_s16(v128, v93);
+ int16x8_t v130 = vaddq_s16(v129, v113);
+ int16x8_t v131 = vaddq_s16(v130, v122);
+ int16x8_t v132 = vaddq_s16(v131, v126);
+ int16x8_t v133 = vaddq_s16(v127, v132);
+ int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+ int16x8_t v135 = vaddq_s16(v125, v134);
+ int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705);
+ int16x8_t v137 = vaddq_s16(v119, v136);
+ int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463);
+ int16x8_t v139 = vaddq_s16(v102, v138);
+ int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404);
+ int16x8_t v141 = vaddq_s16(v61, v140);
+ int16x8_t v142 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573);
+ int16x8_t v143 = vaddq_s16(v143_tmp, v142);
+ int16x8_t v144 = vld1q_s16(in + in_stride * 132 + i);
+ int16x8_t v145 = vld1q_s16(in + in_stride * 124 + i);
+ int16x8_t v146 = vaddq_s16(v144, v145);
+ int16x8_t v147 = vaddq_s16(v143, v146);
+ int16x8_t v148 = vld1q_s16(in + in_stride * 68 + i);
+ int16x8_t v149 = vld1q_s16(in + in_stride * 60 + i);
+ int16x8_t v150 = vaddq_s16(v148, v149);
+ int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573);
+ int16x8_t v151 = vaddq_s16(v151_tmp, v150);
+ int16x8_t v152 = vld1q_s16(in + in_stride * 196 + i);
+ int16x8_t v153 = vld1q_s16(in + in_stride * 188 + i);
+ int16x8_t v154 = vaddq_s16(v152, v153);
+ int16x8_t v155 = vaddq_s16(v154, v150);
+ int16x8_t v156 = vaddq_s16(v151, v155);
+ int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734);
+ int16x8_t v158 = vaddq_s16(v147, v157);
+ int16x8_t v159 = vld1q_s16(in + in_stride * 36 + i);
+ int16x8_t v160 = vld1q_s16(in + in_stride * 28 + i);
+ int16x8_t v161 = vaddq_s16(v159, v160);
+ int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573);
+ int16x8_t v162 = vaddq_s16(v162_tmp, v161);
+ int16x8_t v163 = vld1q_s16(in + in_stride * 164 + i);
+ int16x8_t v164 = vld1q_s16(in + in_stride * 156 + i);
+ int16x8_t v165 = vaddq_s16(v163, v164);
+ int16x8_t v166 = vld1q_s16(in + in_stride * 100 + i);
+ int16x8_t v167 = vld1q_s16(in + in_stride * 92 + i);
+ int16x8_t v168 = vaddq_s16(v166, v167);
+ int16x8_t v169 = vaddq_s16(v165, v168);
+ int16x8_t v170 = vaddq_s16(v162, v169);
+ int16x8_t v171 = vaddq_s16(v168, v161);
+ int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573);
+ int16x8_t v172 = vaddq_s16(v172_tmp, v171);
+ int16x8_t v173 = vld1q_s16(in + in_stride * 228 + i);
+ int16x8_t v174 = vld1q_s16(in + in_stride * 220 + i);
+ int16x8_t v175 = vaddq_s16(v173, v174);
+ int16x8_t v176 = vaddq_s16(v175, v165);
+ int16x8_t v177 = vaddq_s16(v176, v171);
+ int16x8_t v178 = vaddq_s16(v172, v177);
+ int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734);
+ int16x8_t v180 = vaddq_s16(v170, v179);
+ int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705);
+ int16x8_t v182 = vaddq_s16(v158, v181);
+ int16x8_t v183 = vld1q_s16(in + in_stride * 20 + i);
+ int16x8_t v184 = vld1q_s16(in + in_stride * 12 + i);
+ int16x8_t v185 = vaddq_s16(v183, v184);
+ int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573);
+ int16x8_t v186 = vaddq_s16(v186_tmp, v185);
+ int16x8_t v187 = vld1q_s16(in + in_stride * 148 + i);
+ int16x8_t v188 = vld1q_s16(in + in_stride * 140 + i);
+ int16x8_t v189 = vaddq_s16(v187, v188);
+ int16x8_t v190 = vld1q_s16(in + in_stride * 116 + i);
+ int16x8_t v191 = vld1q_s16(in + in_stride * 108 + i);
+ int16x8_t v192 = vaddq_s16(v190, v191);
+ int16x8_t v193 = vaddq_s16(v189, v192);
+ int16x8_t v194 = vaddq_s16(v186, v193);
+ int16x8_t v195 = vld1q_s16(in + in_stride * 84 + i);
+ int16x8_t v196 = vld1q_s16(in + in_stride * 76 + i);
+ int16x8_t v197 = vaddq_s16(v195, v196);
+ int16x8_t v198 = vld1q_s16(in + in_stride * 52 + i);
+ int16x8_t v199 = vld1q_s16(in + in_stride * 44 + i);
+ int16x8_t v200 = vaddq_s16(v198, v199);
+ int16x8_t v201 = vaddq_s16(v197, v200);
+ int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573);
+ int16x8_t v202 = vaddq_s16(v202_tmp, v201);
+ int16x8_t v203 = vld1q_s16(in + in_stride * 212 + i);
+ int16x8_t v204 = vld1q_s16(in + in_stride * 204 + i);
+ int16x8_t v205 = vaddq_s16(v203, v204);
+ int16x8_t v206 = vld1q_s16(in + in_stride * 180 + i);
+ int16x8_t v207 = vld1q_s16(in + in_stride * 172 + i);
+ int16x8_t v208 = vaddq_s16(v206, v207);
+ int16x8_t v209 = vaddq_s16(v205, v208);
+ int16x8_t v210 = vaddq_s16(v209, v201);
+ int16x8_t v211 = vaddq_s16(v202, v210);
+ int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734);
+ int16x8_t v213 = vaddq_s16(v194, v212);
+ int16x8_t v214 = vaddq_s16(v200, v185);
+ int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573);
+ int16x8_t v215 = vaddq_s16(v215_tmp, v214);
+ int16x8_t v216 = vaddq_s16(v208, v189);
+ int16x8_t v217 = vaddq_s16(v192, v197);
+ int16x8_t v218 = vaddq_s16(v216, v217);
+ int16x8_t v219 = vaddq_s16(v215, v218);
+ int16x8_t v220 = vaddq_s16(v217, v214);
+ int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573);
+ int16x8_t v221 = vaddq_s16(v221_tmp, v220);
+ int16x8_t v222 = vld1q_s16(in + in_stride * 244 + i);
+ int16x8_t v223 = vld1q_s16(in + in_stride * 236 + i);
+ int16x8_t v224 = vaddq_s16(v222, v223);
+ int16x8_t v225 = vaddq_s16(v224, v205);
+ int16x8_t v226 = vaddq_s16(v225, v216);
+ int16x8_t v227 = vaddq_s16(v226, v220);
+ int16x8_t v228 = vaddq_s16(v221, v227);
+ int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734);
+ int16x8_t v230 = vaddq_s16(v219, v229);
+ int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705);
+ int16x8_t v232 = vaddq_s16(v213, v231);
+ int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463);
+ int16x8_t v234 = vaddq_s16(v182, v233);
+ int16x8_t v235 = vaddq_s16(v184, v142);
+ int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573);
+ int16x8_t v236 = vaddq_s16(v236_tmp, v235);
+ int16x8_t v237 = vaddq_s16(v188, v144);
+ int16x8_t v238 = vaddq_s16(v145, v190);
+ int16x8_t v239 = vaddq_s16(v237, v238);
+ int16x8_t v240 = vaddq_s16(v236, v239);
+ int16x8_t v241 = vaddq_s16(v196, v148);
+ int16x8_t v242 = vaddq_s16(v149, v198);
+ int16x8_t v243 = vaddq_s16(v241, v242);
+ int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573);
+ int16x8_t v244 = vaddq_s16(v244_tmp, v243);
+ int16x8_t v245 = vaddq_s16(v204, v152);
+ int16x8_t v246 = vaddq_s16(v153, v206);
+ int16x8_t v247 = vaddq_s16(v245, v246);
+ int16x8_t v248 = vaddq_s16(v247, v243);
+ int16x8_t v249 = vaddq_s16(v244, v248);
+ int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734);
+ int16x8_t v251 = vaddq_s16(v240, v250);
+ int16x8_t v252 = vaddq_s16(v199, v159);
+ int16x8_t v253 = vaddq_s16(v160, v183);
+ int16x8_t v254 = vaddq_s16(v252, v253);
+ int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573);
+ int16x8_t v255 = vaddq_s16(v255_tmp, v254);
+ int16x8_t v256 = vaddq_s16(v207, v163);
+ int16x8_t v257 = vaddq_s16(v164, v187);
+ int16x8_t v258 = vaddq_s16(v256, v257);
+ int16x8_t v259 = vaddq_s16(v191, v166);
+ int16x8_t v260 = vaddq_s16(v167, v195);
+ int16x8_t v261 = vaddq_s16(v259, v260);
+ int16x8_t v262 = vaddq_s16(v258, v261);
+ int16x8_t v263 = vaddq_s16(v255, v262);
+ int16x8_t v264 = vaddq_s16(v261, v254);
+ int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573);
+ int16x8_t v265 = vaddq_s16(v265_tmp, v264);
+ int16x8_t v266 = vaddq_s16(v223, v173);
+ int16x8_t v267 = vaddq_s16(v174, v203);
+ int16x8_t v268 = vaddq_s16(v266, v267);
+ int16x8_t v269 = vaddq_s16(v268, v258);
+ int16x8_t v270 = vaddq_s16(v269, v264);
+ int16x8_t v271 = vaddq_s16(v265, v270);
+ int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734);
+ int16x8_t v273 = vaddq_s16(v263, v272);
+ int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705);
+ int16x8_t v275 = vaddq_s16(v251, v274);
+ int16x8_t v276 = vaddq_s16(v253, v235);
+ int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573);
+ int16x8_t v277 = vaddq_s16(v277_tmp, v276);
+ int16x8_t v278 = vaddq_s16(v257, v237);
+ int16x8_t v279 = vaddq_s16(v238, v259);
+ int16x8_t v280 = vaddq_s16(v278, v279);
+ int16x8_t v281 = vaddq_s16(v277, v280);
+ int16x8_t v282 = vaddq_s16(v260, v241);
+ int16x8_t v283 = vaddq_s16(v242, v252);
+ int16x8_t v284 = vaddq_s16(v282, v283);
+ int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573);
+ int16x8_t v285 = vaddq_s16(v285_tmp, v284);
+ int16x8_t v286 = vaddq_s16(v267, v245);
+ int16x8_t v287 = vaddq_s16(v246, v256);
+ int16x8_t v288 = vaddq_s16(v286, v287);
+ int16x8_t v289 = vaddq_s16(v288, v284);
+ int16x8_t v290 = vaddq_s16(v285, v289);
+ int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734);
+ int16x8_t v292 = vaddq_s16(v281, v291);
+ int16x8_t v293 = vaddq_s16(v283, v276);
+ int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573);
+ int16x8_t v294 = vaddq_s16(v294_tmp, v293);
+ int16x8_t v295 = vaddq_s16(v287, v278);
+ int16x8_t v296 = vaddq_s16(v279, v282);
+ int16x8_t v297 = vaddq_s16(v295, v296);
+ int16x8_t v298 = vaddq_s16(v294, v297);
+ int16x8_t v299 = vaddq_s16(v296, v293);
+ int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573);
+ int16x8_t v300 = vaddq_s16(v300_tmp, v299);
+ int16x8_t v301 = vld1q_s16(in + in_stride * 252 + i);
+ int16x8_t v302 = vaddq_s16(v301, v222);
+ int16x8_t v303 = vaddq_s16(v302, v266);
+ int16x8_t v304 = vaddq_s16(v303, v286);
+ int16x8_t v305 = vaddq_s16(v304, v295);
+ int16x8_t v306 = vaddq_s16(v305, v299);
+ int16x8_t v307 = vaddq_s16(v300, v306);
+ int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734);
+ int16x8_t v309 = vaddq_s16(v298, v308);
+ int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705);
+ int16x8_t v311 = vaddq_s16(v292, v310);
+ int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463);
+ int16x8_t v313 = vaddq_s16(v275, v312);
+ int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404);
+ int16x8_t v315 = vaddq_s16(v234, v314);
+ int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389);
+ int16x8_t v317 = vaddq_s16(v141, v316);
+ int16x8_t v318 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573);
+ int16x8_t v319 = vaddq_s16(v319_tmp, v318);
+ int16x8_t v320 = vld1q_s16(in + in_stride * 130 + i);
+ int16x8_t v321 = vld1q_s16(in + in_stride * 126 + i);
+ int16x8_t v322 = vaddq_s16(v320, v321);
+ int16x8_t v323 = vaddq_s16(v319, v322);
+ int16x8_t v324 = vld1q_s16(in + in_stride * 66 + i);
+ int16x8_t v325 = vld1q_s16(in + in_stride * 62 + i);
+ int16x8_t v326 = vaddq_s16(v324, v325);
+ int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573);
+ int16x8_t v327 = vaddq_s16(v327_tmp, v326);
+ int16x8_t v328 = vld1q_s16(in + in_stride * 194 + i);
+ int16x8_t v329 = vld1q_s16(in + in_stride * 190 + i);
+ int16x8_t v330 = vaddq_s16(v328, v329);
+ int16x8_t v331 = vaddq_s16(v330, v326);
+ int16x8_t v332 = vaddq_s16(v327, v331);
+ int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734);
+ int16x8_t v334 = vaddq_s16(v323, v333);
+ int16x8_t v335 = vld1q_s16(in + in_stride * 34 + i);
+ int16x8_t v336 = vld1q_s16(in + in_stride * 30 + i);
+ int16x8_t v337 = vaddq_s16(v335, v336);
+ int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573);
+ int16x8_t v338 = vaddq_s16(v338_tmp, v337);
+ int16x8_t v339 = vld1q_s16(in + in_stride * 162 + i);
+ int16x8_t v340 = vld1q_s16(in + in_stride * 158 + i);
+ int16x8_t v341 = vaddq_s16(v339, v340);
+ int16x8_t v342 = vld1q_s16(in + in_stride * 98 + i);
+ int16x8_t v343 = vld1q_s16(in + in_stride * 94 + i);
+ int16x8_t v344 = vaddq_s16(v342, v343);
+ int16x8_t v345 = vaddq_s16(v341, v344);
+ int16x8_t v346 = vaddq_s16(v338, v345);
+ int16x8_t v347 = vaddq_s16(v344, v337);
+ int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573);
+ int16x8_t v348 = vaddq_s16(v348_tmp, v347);
+ int16x8_t v349 = vld1q_s16(in + in_stride * 226 + i);
+ int16x8_t v350 = vld1q_s16(in + in_stride * 222 + i);
+ int16x8_t v351 = vaddq_s16(v349, v350);
+ int16x8_t v352 = vaddq_s16(v351, v341);
+ int16x8_t v353 = vaddq_s16(v352, v347);
+ int16x8_t v354 = vaddq_s16(v348, v353);
+ int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734);
+ int16x8_t v356 = vaddq_s16(v346, v355);
+ int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705);
+ int16x8_t v358 = vaddq_s16(v334, v357);
+ int16x8_t v359 = vld1q_s16(in + in_stride * 18 + i);
+ int16x8_t v360 = vld1q_s16(in + in_stride * 14 + i);
+ int16x8_t v361 = vaddq_s16(v359, v360);
+ int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573);
+ int16x8_t v362 = vaddq_s16(v362_tmp, v361);
+ int16x8_t v363 = vld1q_s16(in + in_stride * 146 + i);
+ int16x8_t v364 = vld1q_s16(in + in_stride * 142 + i);
+ int16x8_t v365 = vaddq_s16(v363, v364);
+ int16x8_t v366 = vld1q_s16(in + in_stride * 114 + i);
+ int16x8_t v367 = vld1q_s16(in + in_stride * 110 + i);
+ int16x8_t v368 = vaddq_s16(v366, v367);
+ int16x8_t v369 = vaddq_s16(v365, v368);
+ int16x8_t v370 = vaddq_s16(v362, v369);
+ int16x8_t v371 = vld1q_s16(in + in_stride * 82 + i);
+ int16x8_t v372 = vld1q_s16(in + in_stride * 78 + i);
+ int16x8_t v373 = vaddq_s16(v371, v372);
+ int16x8_t v374 = vld1q_s16(in + in_stride * 50 + i);
+ int16x8_t v375 = vld1q_s16(in + in_stride * 46 + i);
+ int16x8_t v376 = vaddq_s16(v374, v375);
+ int16x8_t v377 = vaddq_s16(v373, v376);
+ int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573);
+ int16x8_t v378 = vaddq_s16(v378_tmp, v377);
+ int16x8_t v379 = vld1q_s16(in + in_stride * 210 + i);
+ int16x8_t v380 = vld1q_s16(in + in_stride * 206 + i);
+ int16x8_t v381 = vaddq_s16(v379, v380);
+ int16x8_t v382 = vld1q_s16(in + in_stride * 178 + i);
+ int16x8_t v383 = vld1q_s16(in + in_stride * 174 + i);
+ int16x8_t v384 = vaddq_s16(v382, v383);
+ int16x8_t v385 = vaddq_s16(v381, v384);
+ int16x8_t v386 = vaddq_s16(v385, v377);
+ int16x8_t v387 = vaddq_s16(v378, v386);
+ int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734);
+ int16x8_t v389 = vaddq_s16(v370, v388);
+ int16x8_t v390 = vaddq_s16(v376, v361);
+ int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573);
+ int16x8_t v391 = vaddq_s16(v391_tmp, v390);
+ int16x8_t v392 = vaddq_s16(v384, v365);
+ int16x8_t v393 = vaddq_s16(v368, v373);
+ int16x8_t v394 = vaddq_s16(v392, v393);
+ int16x8_t v395 = vaddq_s16(v391, v394);
+ int16x8_t v396 = vaddq_s16(v393, v390);
+ int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573);
+ int16x8_t v397 = vaddq_s16(v397_tmp, v396);
+ int16x8_t v398 = vld1q_s16(in + in_stride * 242 + i);
+ int16x8_t v399 = vld1q_s16(in + in_stride * 238 + i);
+ int16x8_t v400 = vaddq_s16(v398, v399);
+ int16x8_t v401 = vaddq_s16(v400, v381);
+ int16x8_t v402 = vaddq_s16(v401, v392);
+ int16x8_t v403 = vaddq_s16(v402, v396);
+ int16x8_t v404 = vaddq_s16(v397, v403);
+ int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734);
+ int16x8_t v406 = vaddq_s16(v395, v405);
+ int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705);
+ int16x8_t v408 = vaddq_s16(v389, v407);
+ int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463);
+ int16x8_t v410 = vaddq_s16(v358, v409);
+ int16x8_t v411 = vld1q_s16(in + in_stride * 10 + i);
+ int16x8_t v412 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v413 = vaddq_s16(v411, v412);
+ int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573);
+ int16x8_t v414 = vaddq_s16(v414_tmp, v413);
+ int16x8_t v415 = vld1q_s16(in + in_stride * 138 + i);
+ int16x8_t v416 = vld1q_s16(in + in_stride * 134 + i);
+ int16x8_t v417 = vaddq_s16(v415, v416);
+ int16x8_t v418 = vld1q_s16(in + in_stride * 122 + i);
+ int16x8_t v419 = vld1q_s16(in + in_stride * 118 + i);
+ int16x8_t v420 = vaddq_s16(v418, v419);
+ int16x8_t v421 = vaddq_s16(v417, v420);
+ int16x8_t v422 = vaddq_s16(v414, v421);
+ int16x8_t v423 = vld1q_s16(in + in_stride * 74 + i);
+ int16x8_t v424 = vld1q_s16(in + in_stride * 70 + i);
+ int16x8_t v425 = vaddq_s16(v423, v424);
+ int16x8_t v426 = vld1q_s16(in + in_stride * 58 + i);
+ int16x8_t v427 = vld1q_s16(in + in_stride * 54 + i);
+ int16x8_t v428 = vaddq_s16(v426, v427);
+ int16x8_t v429 = vaddq_s16(v425, v428);
+ int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573);
+ int16x8_t v430 = vaddq_s16(v430_tmp, v429);
+ int16x8_t v431 = vld1q_s16(in + in_stride * 202 + i);
+ int16x8_t v432 = vld1q_s16(in + in_stride * 198 + i);
+ int16x8_t v433 = vaddq_s16(v431, v432);
+ int16x8_t v434 = vld1q_s16(in + in_stride * 186 + i);
+ int16x8_t v435 = vld1q_s16(in + in_stride * 182 + i);
+ int16x8_t v436 = vaddq_s16(v434, v435);
+ int16x8_t v437 = vaddq_s16(v433, v436);
+ int16x8_t v438 = vaddq_s16(v437, v429);
+ int16x8_t v439 = vaddq_s16(v430, v438);
+ int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734);
+ int16x8_t v441 = vaddq_s16(v422, v440);
+ int16x8_t v442 = vld1q_s16(in + in_stride * 42 + i);
+ int16x8_t v443 = vld1q_s16(in + in_stride * 38 + i);
+ int16x8_t v444 = vaddq_s16(v442, v443);
+ int16x8_t v445 = vld1q_s16(in + in_stride * 26 + i);
+ int16x8_t v446 = vld1q_s16(in + in_stride * 22 + i);
+ int16x8_t v447 = vaddq_s16(v445, v446);
+ int16x8_t v448 = vaddq_s16(v444, v447);
+ int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573);
+ int16x8_t v449 = vaddq_s16(v449_tmp, v448);
+ int16x8_t v450 = vld1q_s16(in + in_stride * 170 + i);
+ int16x8_t v451 = vld1q_s16(in + in_stride * 166 + i);
+ int16x8_t v452 = vaddq_s16(v450, v451);
+ int16x8_t v453 = vld1q_s16(in + in_stride * 154 + i);
+ int16x8_t v454 = vld1q_s16(in + in_stride * 150 + i);
+ int16x8_t v455 = vaddq_s16(v453, v454);
+ int16x8_t v456 = vaddq_s16(v452, v455);
+ int16x8_t v457 = vld1q_s16(in + in_stride * 106 + i);
+ int16x8_t v458 = vld1q_s16(in + in_stride * 102 + i);
+ int16x8_t v459 = vaddq_s16(v457, v458);
+ int16x8_t v460 = vld1q_s16(in + in_stride * 90 + i);
+ int16x8_t v461 = vld1q_s16(in + in_stride * 86 + i);
+ int16x8_t v462 = vaddq_s16(v460, v461);
+ int16x8_t v463 = vaddq_s16(v459, v462);
+ int16x8_t v464 = vaddq_s16(v456, v463);
+ int16x8_t v465 = vaddq_s16(v449, v464);
+ int16x8_t v466 = vaddq_s16(v463, v448);
+ int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573);
+ int16x8_t v467 = vaddq_s16(v467_tmp, v466);
+ int16x8_t v468 = vld1q_s16(in + in_stride * 234 + i);
+ int16x8_t v469 = vld1q_s16(in + in_stride * 230 + i);
+ int16x8_t v470 = vaddq_s16(v468, v469);
+ int16x8_t v471 = vld1q_s16(in + in_stride * 218 + i);
+ int16x8_t v472 = vld1q_s16(in + in_stride * 214 + i);
+ int16x8_t v473 = vaddq_s16(v471, v472);
+ int16x8_t v474 = vaddq_s16(v470, v473);
+ int16x8_t v475 = vaddq_s16(v474, v456);
+ int16x8_t v476 = vaddq_s16(v475, v466);
+ int16x8_t v477 = vaddq_s16(v467, v476);
+ int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734);
+ int16x8_t v479 = vaddq_s16(v465, v478);
+ int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705);
+ int16x8_t v481 = vaddq_s16(v441, v480);
+ int16x8_t v482 = vaddq_s16(v447, v413);
+ int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573);
+ int16x8_t v483 = vaddq_s16(v483_tmp, v482);
+ int16x8_t v484 = vaddq_s16(v455, v417);
+ int16x8_t v485 = vaddq_s16(v420, v459);
+ int16x8_t v486 = vaddq_s16(v484, v485);
+ int16x8_t v487 = vaddq_s16(v483, v486);
+ int16x8_t v488 = vaddq_s16(v462, v425);
+ int16x8_t v489 = vaddq_s16(v428, v444);
+ int16x8_t v490 = vaddq_s16(v488, v489);
+ int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573);
+ int16x8_t v491 = vaddq_s16(v491_tmp, v490);
+ int16x8_t v492 = vaddq_s16(v473, v433);
+ int16x8_t v493 = vaddq_s16(v436, v452);
+ int16x8_t v494 = vaddq_s16(v492, v493);
+ int16x8_t v495 = vaddq_s16(v494, v490);
+ int16x8_t v496 = vaddq_s16(v491, v495);
+ int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734);
+ int16x8_t v498 = vaddq_s16(v487, v497);
+ int16x8_t v499 = vaddq_s16(v489, v482);
+ int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573);
+ int16x8_t v500 = vaddq_s16(v500_tmp, v499);
+ int16x8_t v501 = vaddq_s16(v493, v484);
+ int16x8_t v502 = vaddq_s16(v485, v488);
+ int16x8_t v503 = vaddq_s16(v501, v502);
+ int16x8_t v504 = vaddq_s16(v500, v503);
+ int16x8_t v505 = vaddq_s16(v502, v499);
+ int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573);
+ int16x8_t v506 = vaddq_s16(v506_tmp, v505);
+ int16x8_t v507 = vld1q_s16(in + in_stride * 250 + i);
+ int16x8_t v508 = vld1q_s16(in + in_stride * 246 + i);
+ int16x8_t v509 = vaddq_s16(v507, v508);
+ int16x8_t v510 = vaddq_s16(v509, v470);
+ int16x8_t v511 = vaddq_s16(v510, v492);
+ int16x8_t v512 = vaddq_s16(v511, v501);
+ int16x8_t v513 = vaddq_s16(v512, v505);
+ int16x8_t v514 = vaddq_s16(v506, v513);
+ int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734);
+ int16x8_t v516 = vaddq_s16(v504, v515);
+ int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705);
+ int16x8_t v518 = vaddq_s16(v498, v517);
+ int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463);
+ int16x8_t v520 = vaddq_s16(v481, v519);
+ int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404);
+ int16x8_t v522 = vaddq_s16(v410, v521);
+ int16x8_t v523 = vaddq_s16(v412, v318);
+ int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573);
+ int16x8_t v524 = vaddq_s16(v524_tmp, v523);
+ int16x8_t v525 = vaddq_s16(v416, v320);
+ int16x8_t v526 = vaddq_s16(v321, v418);
+ int16x8_t v527 = vaddq_s16(v525, v526);
+ int16x8_t v528 = vaddq_s16(v524, v527);
+ int16x8_t v529 = vaddq_s16(v424, v324);
+ int16x8_t v530 = vaddq_s16(v325, v426);
+ int16x8_t v531 = vaddq_s16(v529, v530);
+ int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573);
+ int16x8_t v532 = vaddq_s16(v532_tmp, v531);
+ int16x8_t v533 = vaddq_s16(v432, v328);
+ int16x8_t v534 = vaddq_s16(v329, v434);
+ int16x8_t v535 = vaddq_s16(v533, v534);
+ int16x8_t v536 = vaddq_s16(v535, v531);
+ int16x8_t v537 = vaddq_s16(v532, v536);
+ int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734);
+ int16x8_t v539 = vaddq_s16(v528, v538);
+ int16x8_t v540 = vaddq_s16(v443, v335);
+ int16x8_t v541 = vaddq_s16(v336, v445);
+ int16x8_t v542 = vaddq_s16(v540, v541);
+ int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573);
+ int16x8_t v543 = vaddq_s16(v543_tmp, v542);
+ int16x8_t v544 = vaddq_s16(v451, v339);
+ int16x8_t v545 = vaddq_s16(v340, v453);
+ int16x8_t v546 = vaddq_s16(v544, v545);
+ int16x8_t v547 = vaddq_s16(v458, v342);
+ int16x8_t v548 = vaddq_s16(v343, v460);
+ int16x8_t v549 = vaddq_s16(v547, v548);
+ int16x8_t v550 = vaddq_s16(v546, v549);
+ int16x8_t v551 = vaddq_s16(v543, v550);
+ int16x8_t v552 = vaddq_s16(v549, v542);
+ int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573);
+ int16x8_t v553 = vaddq_s16(v553_tmp, v552);
+ int16x8_t v554 = vaddq_s16(v469, v349);
+ int16x8_t v555 = vaddq_s16(v350, v471);
+ int16x8_t v556 = vaddq_s16(v554, v555);
+ int16x8_t v557 = vaddq_s16(v556, v546);
+ int16x8_t v558 = vaddq_s16(v557, v552);
+ int16x8_t v559 = vaddq_s16(v553, v558);
+ int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734);
+ int16x8_t v561 = vaddq_s16(v551, v560);
+ int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705);
+ int16x8_t v563 = vaddq_s16(v539, v562);
+ int16x8_t v564 = vaddq_s16(v446, v359);
+ int16x8_t v565 = vaddq_s16(v360, v411);
+ int16x8_t v566 = vaddq_s16(v564, v565);
+ int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573);
+ int16x8_t v567 = vaddq_s16(v567_tmp, v566);
+ int16x8_t v568 = vaddq_s16(v454, v363);
+ int16x8_t v569 = vaddq_s16(v364, v415);
+ int16x8_t v570 = vaddq_s16(v568, v569);
+ int16x8_t v571 = vaddq_s16(v419, v366);
+ int16x8_t v572 = vaddq_s16(v367, v457);
+ int16x8_t v573 = vaddq_s16(v571, v572);
+ int16x8_t v574 = vaddq_s16(v570, v573);
+ int16x8_t v575 = vaddq_s16(v567, v574);
+ int16x8_t v576 = vaddq_s16(v461, v371);
+ int16x8_t v577 = vaddq_s16(v372, v423);
+ int16x8_t v578 = vaddq_s16(v576, v577);
+ int16x8_t v579 = vaddq_s16(v427, v374);
+ int16x8_t v580 = vaddq_s16(v375, v442);
+ int16x8_t v581 = vaddq_s16(v579, v580);
+ int16x8_t v582 = vaddq_s16(v578, v581);
+ int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573);
+ int16x8_t v583 = vaddq_s16(v583_tmp, v582);
+ int16x8_t v584 = vaddq_s16(v472, v379);
+ int16x8_t v585 = vaddq_s16(v380, v431);
+ int16x8_t v586 = vaddq_s16(v584, v585);
+ int16x8_t v587 = vaddq_s16(v435, v382);
+ int16x8_t v588 = vaddq_s16(v383, v450);
+ int16x8_t v589 = vaddq_s16(v587, v588);
+ int16x8_t v590 = vaddq_s16(v586, v589);
+ int16x8_t v591 = vaddq_s16(v590, v582);
+ int16x8_t v592 = vaddq_s16(v583, v591);
+ int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734);
+ int16x8_t v594 = vaddq_s16(v575, v593);
+ int16x8_t v595 = vaddq_s16(v581, v566);
+ int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573);
+ int16x8_t v596 = vaddq_s16(v596_tmp, v595);
+ int16x8_t v597 = vaddq_s16(v589, v570);
+ int16x8_t v598 = vaddq_s16(v573, v578);
+ int16x8_t v599 = vaddq_s16(v597, v598);
+ int16x8_t v600 = vaddq_s16(v596, v599);
+ int16x8_t v601 = vaddq_s16(v598, v595);
+ int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573);
+ int16x8_t v602 = vaddq_s16(v602_tmp, v601);
+ int16x8_t v603 = vaddq_s16(v508, v398);
+ int16x8_t v604 = vaddq_s16(v399, v468);
+ int16x8_t v605 = vaddq_s16(v603, v604);
+ int16x8_t v606 = vaddq_s16(v605, v586);
+ int16x8_t v607 = vaddq_s16(v606, v597);
+ int16x8_t v608 = vaddq_s16(v607, v601);
+ int16x8_t v609 = vaddq_s16(v602, v608);
+ int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734);
+ int16x8_t v611 = vaddq_s16(v600, v610);
+ int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705);
+ int16x8_t v613 = vaddq_s16(v594, v612);
+ int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463);
+ int16x8_t v615 = vaddq_s16(v563, v614);
+ int16x8_t v616 = vaddq_s16(v565, v523);
+ int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573);
+ int16x8_t v617 = vaddq_s16(v617_tmp, v616);
+ int16x8_t v618 = vaddq_s16(v569, v525);
+ int16x8_t v619 = vaddq_s16(v526, v571);
+ int16x8_t v620 = vaddq_s16(v618, v619);
+ int16x8_t v621 = vaddq_s16(v617, v620);
+ int16x8_t v622 = vaddq_s16(v577, v529);
+ int16x8_t v623 = vaddq_s16(v530, v579);
+ int16x8_t v624 = vaddq_s16(v622, v623);
+ int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573);
+ int16x8_t v625 = vaddq_s16(v625_tmp, v624);
+ int16x8_t v626 = vaddq_s16(v585, v533);
+ int16x8_t v627 = vaddq_s16(v534, v587);
+ int16x8_t v628 = vaddq_s16(v626, v627);
+ int16x8_t v629 = vaddq_s16(v628, v624);
+ int16x8_t v630 = vaddq_s16(v625, v629);
+ int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734);
+ int16x8_t v632 = vaddq_s16(v621, v631);
+ int16x8_t v633 = vaddq_s16(v580, v540);
+ int16x8_t v634 = vaddq_s16(v541, v564);
+ int16x8_t v635 = vaddq_s16(v633, v634);
+ int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573);
+ int16x8_t v636 = vaddq_s16(v636_tmp, v635);
+ int16x8_t v637 = vaddq_s16(v588, v544);
+ int16x8_t v638 = vaddq_s16(v545, v568);
+ int16x8_t v639 = vaddq_s16(v637, v638);
+ int16x8_t v640 = vaddq_s16(v572, v547);
+ int16x8_t v641 = vaddq_s16(v548, v576);
+ int16x8_t v642 = vaddq_s16(v640, v641);
+ int16x8_t v643 = vaddq_s16(v639, v642);
+ int16x8_t v644 = vaddq_s16(v636, v643);
+ int16x8_t v645 = vaddq_s16(v642, v635);
+ int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573);
+ int16x8_t v646 = vaddq_s16(v646_tmp, v645);
+ int16x8_t v647 = vaddq_s16(v604, v554);
+ int16x8_t v648 = vaddq_s16(v555, v584);
+ int16x8_t v649 = vaddq_s16(v647, v648);
+ int16x8_t v650 = vaddq_s16(v649, v639);
+ int16x8_t v651 = vaddq_s16(v650, v645);
+ int16x8_t v652 = vaddq_s16(v646, v651);
+ int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734);
+ int16x8_t v654 = vaddq_s16(v644, v653);
+ int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705);
+ int16x8_t v656 = vaddq_s16(v632, v655);
+ int16x8_t v657 = vaddq_s16(v634, v616);
+ int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573);
+ int16x8_t v658 = vaddq_s16(v658_tmp, v657);
+ int16x8_t v659 = vaddq_s16(v638, v618);
+ int16x8_t v660 = vaddq_s16(v619, v640);
+ int16x8_t v661 = vaddq_s16(v659, v660);
+ int16x8_t v662 = vaddq_s16(v658, v661);
+ int16x8_t v663 = vaddq_s16(v641, v622);
+ int16x8_t v664 = vaddq_s16(v623, v633);
+ int16x8_t v665 = vaddq_s16(v663, v664);
+ int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573);
+ int16x8_t v666 = vaddq_s16(v666_tmp, v665);
+ int16x8_t v667 = vaddq_s16(v648, v626);
+ int16x8_t v668 = vaddq_s16(v627, v637);
+ int16x8_t v669 = vaddq_s16(v667, v668);
+ int16x8_t v670 = vaddq_s16(v669, v665);
+ int16x8_t v671 = vaddq_s16(v666, v670);
+ int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734);
+ int16x8_t v673 = vaddq_s16(v662, v672);
+ int16x8_t v674 = vaddq_s16(v664, v657);
+ int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573);
+ int16x8_t v675 = vaddq_s16(v675_tmp, v674);
+ int16x8_t v676 = vaddq_s16(v668, v659);
+ int16x8_t v677 = vaddq_s16(v660, v663);
+ int16x8_t v678 = vaddq_s16(v676, v677);
+ int16x8_t v679 = vaddq_s16(v675, v678);
+ int16x8_t v680 = vaddq_s16(v677, v674);
+ int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573);
+ int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+ int16x8_t v682 = vld1q_s16(in + in_stride * 254 + i);
+ int16x8_t v683 = vaddq_s16(v682, v507);
+ int16x8_t v684 = vaddq_s16(v683, v603);
+ int16x8_t v685 = vaddq_s16(v684, v647);
+ int16x8_t v686 = vaddq_s16(v685, v667);
+ int16x8_t v687 = vaddq_s16(v686, v676);
+ int16x8_t v688 = vaddq_s16(v687, v680);
+ int16x8_t v689 = vaddq_s16(v681, v688);
+ int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734);
+ int16x8_t v691 = vaddq_s16(v679, v690);
+ int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705);
+ int16x8_t v693 = vaddq_s16(v673, v692);
+ int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463);
+ int16x8_t v695 = vaddq_s16(v656, v694);
+ int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404);
+ int16x8_t v697 = vaddq_s16(v615, v696);
+ int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389);
+ int16x8_t v699 = vaddq_s16(v522, v698);
+ int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385);
+ int16x8_t v701 = vaddq_s16(v317, v700);
+ int16x8_t v702 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 13573);
+ int16x8_t v703 = vaddq_s16(v703_tmp, v702);
+ int16x8_t v704 = vld1q_s16(in + in_stride * 129 + i);
+ int16x8_t v705 = vld1q_s16(in + in_stride * 127 + i);
+ int16x8_t v706 = vaddq_s16(v704, v705);
+ int16x8_t v707 = vaddq_s16(v703, v706);
+ int16x8_t v708 = vld1q_s16(in + in_stride * 65 + i);
+ int16x8_t v709 = vld1q_s16(in + in_stride * 63 + i);
+ int16x8_t v710 = vaddq_s16(v708, v709);
+ int16x8_t v711_tmp = vqrdmulhq_n_s16(v710, 13573);
+ int16x8_t v711 = vaddq_s16(v711_tmp, v710);
+ int16x8_t v712 = vld1q_s16(in + in_stride * 193 + i);
+ int16x8_t v713 = vld1q_s16(in + in_stride * 191 + i);
+ int16x8_t v714 = vaddq_s16(v712, v713);
+ int16x8_t v715 = vaddq_s16(v714, v710);
+ int16x8_t v716 = vaddq_s16(v711, v715);
+ int16x8_t v717 = vqrdmulhq_n_s16(v716, 17734);
+ int16x8_t v718 = vaddq_s16(v707, v717);
+ int16x8_t v719 = vld1q_s16(in + in_stride * 33 + i);
+ int16x8_t v720 = vld1q_s16(in + in_stride * 31 + i);
+ int16x8_t v721 = vaddq_s16(v719, v720);
+ int16x8_t v722_tmp = vqrdmulhq_n_s16(v721, 13573);
+ int16x8_t v722 = vaddq_s16(v722_tmp, v721);
+ int16x8_t v723 = vld1q_s16(in + in_stride * 161 + i);
+ int16x8_t v724 = vld1q_s16(in + in_stride * 159 + i);
+ int16x8_t v725 = vaddq_s16(v723, v724);
+ int16x8_t v726 = vld1q_s16(in + in_stride * 97 + i);
+ int16x8_t v727 = vld1q_s16(in + in_stride * 95 + i);
+ int16x8_t v728 = vaddq_s16(v726, v727);
+ int16x8_t v729 = vaddq_s16(v725, v728);
+ int16x8_t v730 = vaddq_s16(v722, v729);
+ int16x8_t v731 = vaddq_s16(v728, v721);
+ int16x8_t v732_tmp = vqrdmulhq_n_s16(v731, 13573);
+ int16x8_t v732 = vaddq_s16(v732_tmp, v731);
+ int16x8_t v733 = vld1q_s16(in + in_stride * 225 + i);
+ int16x8_t v734 = vld1q_s16(in + in_stride * 223 + i);
+ int16x8_t v735 = vaddq_s16(v733, v734);
+ int16x8_t v736 = vaddq_s16(v735, v725);
+ int16x8_t v737 = vaddq_s16(v736, v731);
+ int16x8_t v738 = vaddq_s16(v732, v737);
+ int16x8_t v739 = vqrdmulhq_n_s16(v738, 17734);
+ int16x8_t v740 = vaddq_s16(v730, v739);
+ int16x8_t v741 = vqrdmulhq_n_s16(v740, 16705);
+ int16x8_t v742 = vaddq_s16(v718, v741);
+ int16x8_t v743 = vld1q_s16(in + in_stride * 17 + i);
+ int16x8_t v744 = vld1q_s16(in + in_stride * 15 + i);
+ int16x8_t v745 = vaddq_s16(v743, v744);
+ int16x8_t v746_tmp = vqrdmulhq_n_s16(v745, 13573);
+ int16x8_t v746 = vaddq_s16(v746_tmp, v745);
+ int16x8_t v747 = vld1q_s16(in + in_stride * 145 + i);
+ int16x8_t v748 = vld1q_s16(in + in_stride * 143 + i);
+ int16x8_t v749 = vaddq_s16(v747, v748);
+ int16x8_t v750 = vld1q_s16(in + in_stride * 113 + i);
+ int16x8_t v751 = vld1q_s16(in + in_stride * 111 + i);
+ int16x8_t v752 = vaddq_s16(v750, v751);
+ int16x8_t v753 = vaddq_s16(v749, v752);
+ int16x8_t v754 = vaddq_s16(v746, v753);
+ int16x8_t v755 = vld1q_s16(in + in_stride * 81 + i);
+ int16x8_t v756 = vld1q_s16(in + in_stride * 79 + i);
+ int16x8_t v757 = vaddq_s16(v755, v756);
+ int16x8_t v758 = vld1q_s16(in + in_stride * 49 + i);
+ int16x8_t v759 = vld1q_s16(in + in_stride * 47 + i);
+ int16x8_t v760 = vaddq_s16(v758, v759);
+ int16x8_t v761 = vaddq_s16(v757, v760);
+ int16x8_t v762_tmp = vqrdmulhq_n_s16(v761, 13573);
+ int16x8_t v762 = vaddq_s16(v762_tmp, v761);
+ int16x8_t v763 = vld1q_s16(in + in_stride * 209 + i);
+ int16x8_t v764 = vld1q_s16(in + in_stride * 207 + i);
+ int16x8_t v765 = vaddq_s16(v763, v764);
+ int16x8_t v766 = vld1q_s16(in + in_stride * 177 + i);
+ int16x8_t v767 = vld1q_s16(in + in_stride * 175 + i);
+ int16x8_t v768 = vaddq_s16(v766, v767);
+ int16x8_t v769 = vaddq_s16(v765, v768);
+ int16x8_t v770 = vaddq_s16(v769, v761);
+ int16x8_t v771 = vaddq_s16(v762, v770);
+ int16x8_t v772 = vqrdmulhq_n_s16(v771, 17734);
+ int16x8_t v773 = vaddq_s16(v754, v772);
+ int16x8_t v774 = vaddq_s16(v760, v745);
+ int16x8_t v775_tmp = vqrdmulhq_n_s16(v774, 13573);
+ int16x8_t v775 = vaddq_s16(v775_tmp, v774);
+ int16x8_t v776 = vaddq_s16(v768, v749);
+ int16x8_t v777 = vaddq_s16(v752, v757);
+ int16x8_t v778 = vaddq_s16(v776, v777);
+ int16x8_t v779 = vaddq_s16(v775, v778);
+ int16x8_t v780 = vaddq_s16(v777, v774);
+ int16x8_t v781_tmp = vqrdmulhq_n_s16(v780, 13573);
+ int16x8_t v781 = vaddq_s16(v781_tmp, v780);
+ int16x8_t v782 = vld1q_s16(in + in_stride * 241 + i);
+ int16x8_t v783 = vld1q_s16(in + in_stride * 239 + i);
+ int16x8_t v784 = vaddq_s16(v782, v783);
+ int16x8_t v785 = vaddq_s16(v784, v765);
+ int16x8_t v786 = vaddq_s16(v785, v776);
+ int16x8_t v787 = vaddq_s16(v786, v780);
+ int16x8_t v788 = vaddq_s16(v781, v787);
+ int16x8_t v789 = vqrdmulhq_n_s16(v788, 17734);
+ int16x8_t v790 = vaddq_s16(v779, v789);
+ int16x8_t v791 = vqrdmulhq_n_s16(v790, 16705);
+ int16x8_t v792 = vaddq_s16(v773, v791);
+ int16x8_t v793 = vqrdmulhq_n_s16(v792, 16463);
+ int16x8_t v794 = vaddq_s16(v742, v793);
+ int16x8_t v795 = vld1q_s16(in + in_stride * 9 + i);
+ int16x8_t v796 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v797 = vaddq_s16(v795, v796);
+ int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 13573);
+ int16x8_t v798 = vaddq_s16(v798_tmp, v797);
+ int16x8_t v799 = vld1q_s16(in + in_stride * 137 + i);
+ int16x8_t v800 = vld1q_s16(in + in_stride * 135 + i);
+ int16x8_t v801 = vaddq_s16(v799, v800);
+ int16x8_t v802 = vld1q_s16(in + in_stride * 121 + i);
+ int16x8_t v803 = vld1q_s16(in + in_stride * 119 + i);
+ int16x8_t v804 = vaddq_s16(v802, v803);
+ int16x8_t v805 = vaddq_s16(v801, v804);
+ int16x8_t v806 = vaddq_s16(v798, v805);
+ int16x8_t v807 = vld1q_s16(in + in_stride * 73 + i);
+ int16x8_t v808 = vld1q_s16(in + in_stride * 71 + i);
+ int16x8_t v809 = vaddq_s16(v807, v808);
+ int16x8_t v810 = vld1q_s16(in + in_stride * 57 + i);
+ int16x8_t v811 = vld1q_s16(in + in_stride * 55 + i);
+ int16x8_t v812 = vaddq_s16(v810, v811);
+ int16x8_t v813 = vaddq_s16(v809, v812);
+ int16x8_t v814_tmp = vqrdmulhq_n_s16(v813, 13573);
+ int16x8_t v814 = vaddq_s16(v814_tmp, v813);
+ int16x8_t v815 = vld1q_s16(in + in_stride * 201 + i);
+ int16x8_t v816 = vld1q_s16(in + in_stride * 199 + i);
+ int16x8_t v817 = vaddq_s16(v815, v816);
+ int16x8_t v818 = vld1q_s16(in + in_stride * 185 + i);
+ int16x8_t v819 = vld1q_s16(in + in_stride * 183 + i);
+ int16x8_t v820 = vaddq_s16(v818, v819);
+ int16x8_t v821 = vaddq_s16(v817, v820);
+ int16x8_t v822 = vaddq_s16(v821, v813);
+ int16x8_t v823 = vaddq_s16(v814, v822);
+ int16x8_t v824 = vqrdmulhq_n_s16(v823, 17734);
+ int16x8_t v825 = vaddq_s16(v806, v824);
+ int16x8_t v826 = vld1q_s16(in + in_stride * 41 + i);
+ int16x8_t v827 = vld1q_s16(in + in_stride * 39 + i);
+ int16x8_t v828 = vaddq_s16(v826, v827);
+ int16x8_t v829 = vld1q_s16(in + in_stride * 25 + i);
+ int16x8_t v830 = vld1q_s16(in + in_stride * 23 + i);
+ int16x8_t v831 = vaddq_s16(v829, v830);
+ int16x8_t v832 = vaddq_s16(v828, v831);
+ int16x8_t v833_tmp = vqrdmulhq_n_s16(v832, 13573);
+ int16x8_t v833 = vaddq_s16(v833_tmp, v832);
+ int16x8_t v834 = vld1q_s16(in + in_stride * 169 + i);
+ int16x8_t v835 = vld1q_s16(in + in_stride * 167 + i);
+ int16x8_t v836 = vaddq_s16(v834, v835);
+ int16x8_t v837 = vld1q_s16(in + in_stride * 153 + i);
+ int16x8_t v838 = vld1q_s16(in + in_stride * 151 + i);
+ int16x8_t v839 = vaddq_s16(v837, v838);
+ int16x8_t v840 = vaddq_s16(v836, v839);
+ int16x8_t v841 = vld1q_s16(in + in_stride * 105 + i);
+ int16x8_t v842 = vld1q_s16(in + in_stride * 103 + i);
+ int16x8_t v843 = vaddq_s16(v841, v842);
+ int16x8_t v844 = vld1q_s16(in + in_stride * 89 + i);
+ int16x8_t v845 = vld1q_s16(in + in_stride * 87 + i);
+ int16x8_t v846 = vaddq_s16(v844, v845);
+ int16x8_t v847 = vaddq_s16(v843, v846);
+ int16x8_t v848 = vaddq_s16(v840, v847);
+ int16x8_t v849 = vaddq_s16(v833, v848);
+ int16x8_t v850 = vaddq_s16(v847, v832);
+ int16x8_t v851_tmp = vqrdmulhq_n_s16(v850, 13573);
+ int16x8_t v851 = vaddq_s16(v851_tmp, v850);
+ int16x8_t v852 = vld1q_s16(in + in_stride * 233 + i);
+ int16x8_t v853 = vld1q_s16(in + in_stride * 231 + i);
+ int16x8_t v854 = vaddq_s16(v852, v853);
+ int16x8_t v855 = vld1q_s16(in + in_stride * 217 + i);
+ int16x8_t v856 = vld1q_s16(in + in_stride * 215 + i);
+ int16x8_t v857 = vaddq_s16(v855, v856);
+ int16x8_t v858 = vaddq_s16(v854, v857);
+ int16x8_t v859 = vaddq_s16(v858, v840);
+ int16x8_t v860 = vaddq_s16(v859, v850);
+ int16x8_t v861 = vaddq_s16(v851, v860);
+ int16x8_t v862 = vqrdmulhq_n_s16(v861, 17734);
+ int16x8_t v863 = vaddq_s16(v849, v862);
+ int16x8_t v864 = vqrdmulhq_n_s16(v863, 16705);
+ int16x8_t v865 = vaddq_s16(v825, v864);
+ int16x8_t v866 = vaddq_s16(v831, v797);
+ int16x8_t v867_tmp = vqrdmulhq_n_s16(v866, 13573);
+ int16x8_t v867 = vaddq_s16(v867_tmp, v866);
+ int16x8_t v868 = vaddq_s16(v839, v801);
+ int16x8_t v869 = vaddq_s16(v804, v843);
+ int16x8_t v870 = vaddq_s16(v868, v869);
+ int16x8_t v871 = vaddq_s16(v867, v870);
+ int16x8_t v872 = vaddq_s16(v846, v809);
+ int16x8_t v873 = vaddq_s16(v812, v828);
+ int16x8_t v874 = vaddq_s16(v872, v873);
+ int16x8_t v875_tmp = vqrdmulhq_n_s16(v874, 13573);
+ int16x8_t v875 = vaddq_s16(v875_tmp, v874);
+ int16x8_t v876 = vaddq_s16(v857, v817);
+ int16x8_t v877 = vaddq_s16(v820, v836);
+ int16x8_t v878 = vaddq_s16(v876, v877);
+ int16x8_t v879 = vaddq_s16(v878, v874);
+ int16x8_t v880 = vaddq_s16(v875, v879);
+ int16x8_t v881 = vqrdmulhq_n_s16(v880, 17734);
+ int16x8_t v882 = vaddq_s16(v871, v881);
+ int16x8_t v883 = vaddq_s16(v873, v866);
+ int16x8_t v884_tmp = vqrdmulhq_n_s16(v883, 13573);
+ int16x8_t v884 = vaddq_s16(v884_tmp, v883);
+ int16x8_t v885 = vaddq_s16(v877, v868);
+ int16x8_t v886 = vaddq_s16(v869, v872);
+ int16x8_t v887 = vaddq_s16(v885, v886);
+ int16x8_t v888 = vaddq_s16(v884, v887);
+ int16x8_t v889 = vaddq_s16(v886, v883);
+ int16x8_t v890_tmp = vqrdmulhq_n_s16(v889, 13573);
+ int16x8_t v890 = vaddq_s16(v890_tmp, v889);
+ int16x8_t v891 = vld1q_s16(in + in_stride * 249 + i);
+ int16x8_t v892 = vld1q_s16(in + in_stride * 247 + i);
+ int16x8_t v893 = vaddq_s16(v891, v892);
+ int16x8_t v894 = vaddq_s16(v893, v854);
+ int16x8_t v895 = vaddq_s16(v894, v876);
+ int16x8_t v896 = vaddq_s16(v895, v885);
+ int16x8_t v897 = vaddq_s16(v896, v889);
+ int16x8_t v898 = vaddq_s16(v890, v897);
+ int16x8_t v899 = vqrdmulhq_n_s16(v898, 17734);
+ int16x8_t v900 = vaddq_s16(v888, v899);
+ int16x8_t v901 = vqrdmulhq_n_s16(v900, 16705);
+ int16x8_t v902 = vaddq_s16(v882, v901);
+ int16x8_t v903 = vqrdmulhq_n_s16(v902, 16463);
+ int16x8_t v904 = vaddq_s16(v865, v903);
+ int16x8_t v905 = vqrdmulhq_n_s16(v904, 16404);
+ int16x8_t v906 = vaddq_s16(v794, v905);
+ int16x8_t v907 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v908 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v909 = vaddq_s16(v907, v908);
+ int16x8_t v910_tmp = vqrdmulhq_n_s16(v909, 13573);
+ int16x8_t v910 = vaddq_s16(v910_tmp, v909);
+ int16x8_t v911 = vld1q_s16(in + in_stride * 133 + i);
+ int16x8_t v912 = vld1q_s16(in + in_stride * 131 + i);
+ int16x8_t v913 = vaddq_s16(v911, v912);
+ int16x8_t v914 = vld1q_s16(in + in_stride * 125 + i);
+ int16x8_t v915 = vld1q_s16(in + in_stride * 123 + i);
+ int16x8_t v916 = vaddq_s16(v914, v915);
+ int16x8_t v917 = vaddq_s16(v913, v916);
+ int16x8_t v918 = vaddq_s16(v910, v917);
+ int16x8_t v919 = vld1q_s16(in + in_stride * 69 + i);
+ int16x8_t v920 = vld1q_s16(in + in_stride * 67 + i);
+ int16x8_t v921 = vaddq_s16(v919, v920);
+ int16x8_t v922 = vld1q_s16(in + in_stride * 61 + i);
+ int16x8_t v923 = vld1q_s16(in + in_stride * 59 + i);
+ int16x8_t v924 = vaddq_s16(v922, v923);
+ int16x8_t v925 = vaddq_s16(v921, v924);
+ int16x8_t v926_tmp = vqrdmulhq_n_s16(v925, 13573);
+ int16x8_t v926 = vaddq_s16(v926_tmp, v925);
+ int16x8_t v927 = vld1q_s16(in + in_stride * 197 + i);
+ int16x8_t v928 = vld1q_s16(in + in_stride * 195 + i);
+ int16x8_t v929 = vaddq_s16(v927, v928);
+ int16x8_t v930 = vld1q_s16(in + in_stride * 189 + i);
+ int16x8_t v931 = vld1q_s16(in + in_stride * 187 + i);
+ int16x8_t v932 = vaddq_s16(v930, v931);
+ int16x8_t v933 = vaddq_s16(v929, v932);
+ int16x8_t v934 = vaddq_s16(v933, v925);
+ int16x8_t v935 = vaddq_s16(v926, v934);
+ int16x8_t v936 = vqrdmulhq_n_s16(v935, 17734);
+ int16x8_t v937 = vaddq_s16(v918, v936);
+ int16x8_t v938 = vld1q_s16(in + in_stride * 37 + i);
+ int16x8_t v939 = vld1q_s16(in + in_stride * 35 + i);
+ int16x8_t v940 = vaddq_s16(v938, v939);
+ int16x8_t v941 = vld1q_s16(in + in_stride * 29 + i);
+ int16x8_t v942 = vld1q_s16(in + in_stride * 27 + i);
+ int16x8_t v943 = vaddq_s16(v941, v942);
+ int16x8_t v944 = vaddq_s16(v940, v943);
+ int16x8_t v945_tmp = vqrdmulhq_n_s16(v944, 13573);
+ int16x8_t v945 = vaddq_s16(v945_tmp, v944);
+ int16x8_t v946 = vld1q_s16(in + in_stride * 165 + i);
+ int16x8_t v947 = vld1q_s16(in + in_stride * 163 + i);
+ int16x8_t v948 = vaddq_s16(v946, v947);
+ int16x8_t v949 = vld1q_s16(in + in_stride * 157 + i);
+ int16x8_t v950 = vld1q_s16(in + in_stride * 155 + i);
+ int16x8_t v951 = vaddq_s16(v949, v950);
+ int16x8_t v952 = vaddq_s16(v948, v951);
+ int16x8_t v953 = vld1q_s16(in + in_stride * 101 + i);
+ int16x8_t v954 = vld1q_s16(in + in_stride * 99 + i);
+ int16x8_t v955 = vaddq_s16(v953, v954);
+ int16x8_t v956 = vld1q_s16(in + in_stride * 93 + i);
+ int16x8_t v957 = vld1q_s16(in + in_stride * 91 + i);
+ int16x8_t v958 = vaddq_s16(v956, v957);
+ int16x8_t v959 = vaddq_s16(v955, v958);
+ int16x8_t v960 = vaddq_s16(v952, v959);
+ int16x8_t v961 = vaddq_s16(v945, v960);
+ int16x8_t v962 = vaddq_s16(v959, v944);
+ int16x8_t v963_tmp = vqrdmulhq_n_s16(v962, 13573);
+ int16x8_t v963 = vaddq_s16(v963_tmp, v962);
+ int16x8_t v964 = vld1q_s16(in + in_stride * 229 + i);
+ int16x8_t v965 = vld1q_s16(in + in_stride * 227 + i);
+ int16x8_t v966 = vaddq_s16(v964, v965);
+ int16x8_t v967 = vld1q_s16(in + in_stride * 221 + i);
+ int16x8_t v968 = vld1q_s16(in + in_stride * 219 + i);
+ int16x8_t v969 = vaddq_s16(v967, v968);
+ int16x8_t v970 = vaddq_s16(v966, v969);
+ int16x8_t v971 = vaddq_s16(v970, v952);
+ int16x8_t v972 = vaddq_s16(v971, v962);
+ int16x8_t v973 = vaddq_s16(v963, v972);
+ int16x8_t v974 = vqrdmulhq_n_s16(v973, 17734);
+ int16x8_t v975 = vaddq_s16(v961, v974);
+ int16x8_t v976 = vqrdmulhq_n_s16(v975, 16705);
+ int16x8_t v977 = vaddq_s16(v937, v976);
+ int16x8_t v978 = vld1q_s16(in + in_stride * 21 + i);
+ int16x8_t v979 = vld1q_s16(in + in_stride * 19 + i);
+ int16x8_t v980 = vaddq_s16(v978, v979);
+ int16x8_t v981 = vld1q_s16(in + in_stride * 13 + i);
+ int16x8_t v982 = vld1q_s16(in + in_stride * 11 + i);
+ int16x8_t v983 = vaddq_s16(v981, v982);
+ int16x8_t v984 = vaddq_s16(v980, v983);
+ int16x8_t v985_tmp = vqrdmulhq_n_s16(v984, 13573);
+ int16x8_t v985 = vaddq_s16(v985_tmp, v984);
+ int16x8_t v986 = vld1q_s16(in + in_stride * 149 + i);
+ int16x8_t v987 = vld1q_s16(in + in_stride * 147 + i);
+ int16x8_t v988 = vaddq_s16(v986, v987);
+ int16x8_t v989 = vld1q_s16(in + in_stride * 141 + i);
+ int16x8_t v990 = vld1q_s16(in + in_stride * 139 + i);
+ int16x8_t v991 = vaddq_s16(v989, v990);
+ int16x8_t v992 = vaddq_s16(v988, v991);
+ int16x8_t v993 = vld1q_s16(in + in_stride * 117 + i);
+ int16x8_t v994 = vld1q_s16(in + in_stride * 115 + i);
+ int16x8_t v995 = vaddq_s16(v993, v994);
+ int16x8_t v996 = vld1q_s16(in + in_stride * 109 + i);
+ int16x8_t v997 = vld1q_s16(in + in_stride * 107 + i);
+ int16x8_t v998 = vaddq_s16(v996, v997);
+ int16x8_t v999 = vaddq_s16(v995, v998);
+ int16x8_t v1000 = vaddq_s16(v992, v999);
+ int16x8_t v1001 = vaddq_s16(v985, v1000);
+ int16x8_t v1002 = vld1q_s16(in + in_stride * 85 + i);
+ int16x8_t v1003 = vld1q_s16(in + in_stride * 83 + i);
+ int16x8_t v1004 = vaddq_s16(v1002, v1003);
+ int16x8_t v1005 = vld1q_s16(in + in_stride * 77 + i);
+ int16x8_t v1006 = vld1q_s16(in + in_stride * 75 + i);
+ int16x8_t v1007 = vaddq_s16(v1005, v1006);
+ int16x8_t v1008 = vaddq_s16(v1004, v1007);
+ int16x8_t v1009 = vld1q_s16(in + in_stride * 53 + i);
+ int16x8_t v1010 = vld1q_s16(in + in_stride * 51 + i);
+ int16x8_t v1011 = vaddq_s16(v1009, v1010);
+ int16x8_t v1012 = vld1q_s16(in + in_stride * 45 + i);
+ int16x8_t v1013 = vld1q_s16(in + in_stride * 43 + i);
+ int16x8_t v1014 = vaddq_s16(v1012, v1013);
+ int16x8_t v1015 = vaddq_s16(v1011, v1014);
+ int16x8_t v1016 = vaddq_s16(v1008, v1015);
+ int16x8_t v1017_tmp = vqrdmulhq_n_s16(v1016, 13573);
+ int16x8_t v1017 = vaddq_s16(v1017_tmp, v1016);
+ int16x8_t v1018 = vld1q_s16(in + in_stride * 213 + i);
+ int16x8_t v1019 = vld1q_s16(in + in_stride * 211 + i);
+ int16x8_t v1020 = vaddq_s16(v1018, v1019);
+ int16x8_t v1021 = vld1q_s16(in + in_stride * 205 + i);
+ int16x8_t v1022 = vld1q_s16(in + in_stride * 203 + i);
+ int16x8_t v1023 = vaddq_s16(v1021, v1022);
+ int16x8_t v1024 = vaddq_s16(v1020, v1023);
+ int16x8_t v1025 = vld1q_s16(in + in_stride * 181 + i);
+ int16x8_t v1026 = vld1q_s16(in + in_stride * 179 + i);
+ int16x8_t v1027 = vaddq_s16(v1025, v1026);
+ int16x8_t v1028 = vld1q_s16(in + in_stride * 173 + i);
+ int16x8_t v1029 = vld1q_s16(in + in_stride * 171 + i);
+ int16x8_t v1030 = vaddq_s16(v1028, v1029);
+ int16x8_t v1031 = vaddq_s16(v1027, v1030);
+ int16x8_t v1032 = vaddq_s16(v1024, v1031);
+ int16x8_t v1033 = vaddq_s16(v1032, v1016);
+ int16x8_t v1034 = vaddq_s16(v1017, v1033);
+ int16x8_t v1035 = vqrdmulhq_n_s16(v1034, 17734);
+ int16x8_t v1036 = vaddq_s16(v1001, v1035);
+ int16x8_t v1037 = vaddq_s16(v1015, v984);
+ int16x8_t v1038_tmp = vqrdmulhq_n_s16(v1037, 13573);
+ int16x8_t v1038 = vaddq_s16(v1038_tmp, v1037);
+ int16x8_t v1039 = vaddq_s16(v1031, v992);
+ int16x8_t v1040 = vaddq_s16(v999, v1008);
+ int16x8_t v1041 = vaddq_s16(v1039, v1040);
+ int16x8_t v1042 = vaddq_s16(v1038, v1041);
+ int16x8_t v1043 = vaddq_s16(v1040, v1037);
+ int16x8_t v1044_tmp = vqrdmulhq_n_s16(v1043, 13573);
+ int16x8_t v1044 = vaddq_s16(v1044_tmp, v1043);
+ int16x8_t v1045 = vld1q_s16(in + in_stride * 245 + i);
+ int16x8_t v1046 = vld1q_s16(in + in_stride * 243 + i);
+ int16x8_t v1047 = vaddq_s16(v1045, v1046);
+ int16x8_t v1048 = vld1q_s16(in + in_stride * 237 + i);
+ int16x8_t v1049 = vld1q_s16(in + in_stride * 235 + i);
+ int16x8_t v1050 = vaddq_s16(v1048, v1049);
+ int16x8_t v1051 = vaddq_s16(v1047, v1050);
+ int16x8_t v1052 = vaddq_s16(v1051, v1024);
+ int16x8_t v1053 = vaddq_s16(v1052, v1039);
+ int16x8_t v1054 = vaddq_s16(v1053, v1043);
+ int16x8_t v1055 = vaddq_s16(v1044, v1054);
+ int16x8_t v1056 = vqrdmulhq_n_s16(v1055, 17734);
+ int16x8_t v1057 = vaddq_s16(v1042, v1056);
+ int16x8_t v1058 = vqrdmulhq_n_s16(v1057, 16705);
+ int16x8_t v1059 = vaddq_s16(v1036, v1058);
+ int16x8_t v1060 = vqrdmulhq_n_s16(v1059, 16463);
+ int16x8_t v1061 = vaddq_s16(v977, v1060);
+ int16x8_t v1062 = vaddq_s16(v983, v909);
+ int16x8_t v1063_tmp = vqrdmulhq_n_s16(v1062, 13573);
+ int16x8_t v1063 = vaddq_s16(v1063_tmp, v1062);
+ int16x8_t v1064 = vaddq_s16(v991, v913);
+ int16x8_t v1065 = vaddq_s16(v916, v995);
+ int16x8_t v1066 = vaddq_s16(v1064, v1065);
+ int16x8_t v1067 = vaddq_s16(v1063, v1066);
+ int16x8_t v1068 = vaddq_s16(v1007, v921);
+ int16x8_t v1069 = vaddq_s16(v924, v1011);
+ int16x8_t v1070 = vaddq_s16(v1068, v1069);
+ int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 13573);
+ int16x8_t v1071 = vaddq_s16(v1071_tmp, v1070);
+ int16x8_t v1072 = vaddq_s16(v1023, v929);
+ int16x8_t v1073 = vaddq_s16(v932, v1027);
+ int16x8_t v1074 = vaddq_s16(v1072, v1073);
+ int16x8_t v1075 = vaddq_s16(v1074, v1070);
+ int16x8_t v1076 = vaddq_s16(v1071, v1075);
+ int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 17734);
+ int16x8_t v1078 = vaddq_s16(v1067, v1077);
+ int16x8_t v1079 = vaddq_s16(v1014, v940);
+ int16x8_t v1080 = vaddq_s16(v943, v980);
+ int16x8_t v1081 = vaddq_s16(v1079, v1080);
+ int16x8_t v1082_tmp = vqrdmulhq_n_s16(v1081, 13573);
+ int16x8_t v1082 = vaddq_s16(v1082_tmp, v1081);
+ int16x8_t v1083 = vaddq_s16(v1030, v948);
+ int16x8_t v1084 = vaddq_s16(v951, v988);
+ int16x8_t v1085 = vaddq_s16(v1083, v1084);
+ int16x8_t v1086 = vaddq_s16(v998, v955);
+ int16x8_t v1087 = vaddq_s16(v958, v1004);
+ int16x8_t v1088 = vaddq_s16(v1086, v1087);
+ int16x8_t v1089 = vaddq_s16(v1085, v1088);
+ int16x8_t v1090 = vaddq_s16(v1082, v1089);
+ int16x8_t v1091 = vaddq_s16(v1088, v1081);
+ int16x8_t v1092_tmp = vqrdmulhq_n_s16(v1091, 13573);
+ int16x8_t v1092 = vaddq_s16(v1092_tmp, v1091);
+ int16x8_t v1093 = vaddq_s16(v1050, v966);
+ int16x8_t v1094 = vaddq_s16(v969, v1020);
+ int16x8_t v1095 = vaddq_s16(v1093, v1094);
+ int16x8_t v1096 = vaddq_s16(v1095, v1085);
+ int16x8_t v1097 = vaddq_s16(v1096, v1091);
+ int16x8_t v1098 = vaddq_s16(v1092, v1097);
+ int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 17734);
+ int16x8_t v1100 = vaddq_s16(v1090, v1099);
+ int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16705);
+ int16x8_t v1102 = vaddq_s16(v1078, v1101);
+ int16x8_t v1103 = vaddq_s16(v1080, v1062);
+ int16x8_t v1104_tmp = vqrdmulhq_n_s16(v1103, 13573);
+ int16x8_t v1104 = vaddq_s16(v1104_tmp, v1103);
+ int16x8_t v1105 = vaddq_s16(v1084, v1064);
+ int16x8_t v1106 = vaddq_s16(v1065, v1086);
+ int16x8_t v1107 = vaddq_s16(v1105, v1106);
+ int16x8_t v1108 = vaddq_s16(v1104, v1107);
+ int16x8_t v1109 = vaddq_s16(v1087, v1068);
+ int16x8_t v1110 = vaddq_s16(v1069, v1079);
+ int16x8_t v1111 = vaddq_s16(v1109, v1110);
+ int16x8_t v1112_tmp = vqrdmulhq_n_s16(v1111, 13573);
+ int16x8_t v1112 = vaddq_s16(v1112_tmp, v1111);
+ int16x8_t v1113 = vaddq_s16(v1094, v1072);
+ int16x8_t v1114 = vaddq_s16(v1073, v1083);
+ int16x8_t v1115 = vaddq_s16(v1113, v1114);
+ int16x8_t v1116 = vaddq_s16(v1115, v1111);
+ int16x8_t v1117 = vaddq_s16(v1112, v1116);
+ int16x8_t v1118 = vqrdmulhq_n_s16(v1117, 17734);
+ int16x8_t v1119 = vaddq_s16(v1108, v1118);
+ int16x8_t v1120 = vaddq_s16(v1110, v1103);
+ int16x8_t v1121_tmp = vqrdmulhq_n_s16(v1120, 13573);
+ int16x8_t v1121 = vaddq_s16(v1121_tmp, v1120);
+ int16x8_t v1122 = vaddq_s16(v1114, v1105);
+ int16x8_t v1123 = vaddq_s16(v1106, v1109);
+ int16x8_t v1124 = vaddq_s16(v1122, v1123);
+ int16x8_t v1125 = vaddq_s16(v1121, v1124);
+ int16x8_t v1126 = vaddq_s16(v1123, v1120);
+ int16x8_t v1127_tmp = vqrdmulhq_n_s16(v1126, 13573);
+ int16x8_t v1127 = vaddq_s16(v1127_tmp, v1126);
+ int16x8_t v1128 = vld1q_s16(in + in_stride * 253 + i);
+ int16x8_t v1129 = vld1q_s16(in + in_stride * 251 + i);
+ int16x8_t v1130 = vaddq_s16(v1128, v1129);
+ int16x8_t v1131 = vaddq_s16(v1130, v1047);
+ int16x8_t v1132 = vaddq_s16(v1131, v1093);
+ int16x8_t v1133 = vaddq_s16(v1132, v1113);
+ int16x8_t v1134 = vaddq_s16(v1133, v1122);
+ int16x8_t v1135 = vaddq_s16(v1134, v1126);
+ int16x8_t v1136 = vaddq_s16(v1127, v1135);
+ int16x8_t v1137 = vqrdmulhq_n_s16(v1136, 17734);
+ int16x8_t v1138 = vaddq_s16(v1125, v1137);
+ int16x8_t v1139 = vqrdmulhq_n_s16(v1138, 16705);
+ int16x8_t v1140 = vaddq_s16(v1119, v1139);
+ int16x8_t v1141 = vqrdmulhq_n_s16(v1140, 16463);
+ int16x8_t v1142 = vaddq_s16(v1102, v1141);
+ int16x8_t v1143 = vqrdmulhq_n_s16(v1142, 16404);
+ int16x8_t v1144 = vaddq_s16(v1061, v1143);
+ int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 16389);
+ int16x8_t v1146 = vaddq_s16(v906, v1145);
+ int16x8_t v1147 = vaddq_s16(v908, v702);
+ int16x8_t v1148_tmp = vqrdmulhq_n_s16(v1147, 13573);
+ int16x8_t v1148 = vaddq_s16(v1148_tmp, v1147);
+ int16x8_t v1149 = vaddq_s16(v912, v704);
+ int16x8_t v1150 = vaddq_s16(v705, v914);
+ int16x8_t v1151 = vaddq_s16(v1149, v1150);
+ int16x8_t v1152 = vaddq_s16(v1148, v1151);
+ int16x8_t v1153 = vaddq_s16(v920, v708);
+ int16x8_t v1154 = vaddq_s16(v709, v922);
+ int16x8_t v1155 = vaddq_s16(v1153, v1154);
+ int16x8_t v1156_tmp = vqrdmulhq_n_s16(v1155, 13573);
+ int16x8_t v1156 = vaddq_s16(v1156_tmp, v1155);
+ int16x8_t v1157 = vaddq_s16(v928, v712);
+ int16x8_t v1158 = vaddq_s16(v713, v930);
+ int16x8_t v1159 = vaddq_s16(v1157, v1158);
+ int16x8_t v1160 = vaddq_s16(v1159, v1155);
+ int16x8_t v1161 = vaddq_s16(v1156, v1160);
+ int16x8_t v1162 = vqrdmulhq_n_s16(v1161, 17734);
+ int16x8_t v1163 = vaddq_s16(v1152, v1162);
+ int16x8_t v1164 = vaddq_s16(v939, v719);
+ int16x8_t v1165 = vaddq_s16(v720, v941);
+ int16x8_t v1166 = vaddq_s16(v1164, v1165);
+ int16x8_t v1167_tmp = vqrdmulhq_n_s16(v1166, 13573);
+ int16x8_t v1167 = vaddq_s16(v1167_tmp, v1166);
+ int16x8_t v1168 = vaddq_s16(v947, v723);
+ int16x8_t v1169 = vaddq_s16(v724, v949);
+ int16x8_t v1170 = vaddq_s16(v1168, v1169);
+ int16x8_t v1171 = vaddq_s16(v954, v726);
+ int16x8_t v1172 = vaddq_s16(v727, v956);
+ int16x8_t v1173 = vaddq_s16(v1171, v1172);
+ int16x8_t v1174 = vaddq_s16(v1170, v1173);
+ int16x8_t v1175 = vaddq_s16(v1167, v1174);
+ int16x8_t v1176 = vaddq_s16(v1173, v1166);
+ int16x8_t v1177_tmp = vqrdmulhq_n_s16(v1176, 13573);
+ int16x8_t v1177 = vaddq_s16(v1177_tmp, v1176);
+ int16x8_t v1178 = vaddq_s16(v965, v733);
+ int16x8_t v1179 = vaddq_s16(v734, v967);
+ int16x8_t v1180 = vaddq_s16(v1178, v1179);
+ int16x8_t v1181 = vaddq_s16(v1180, v1170);
+ int16x8_t v1182 = vaddq_s16(v1181, v1176);
+ int16x8_t v1183 = vaddq_s16(v1177, v1182);
+ int16x8_t v1184 = vqrdmulhq_n_s16(v1183, 17734);
+ int16x8_t v1185 = vaddq_s16(v1175, v1184);
+ int16x8_t v1186 = vqrdmulhq_n_s16(v1185, 16705);
+ int16x8_t v1187 = vaddq_s16(v1163, v1186);
+ int16x8_t v1188 = vaddq_s16(v979, v743);
+ int16x8_t v1189 = vaddq_s16(v744, v981);
+ int16x8_t v1190 = vaddq_s16(v1188, v1189);
+ int16x8_t v1191_tmp = vqrdmulhq_n_s16(v1190, 13573);
+ int16x8_t v1191 = vaddq_s16(v1191_tmp, v1190);
+ int16x8_t v1192 = vaddq_s16(v987, v747);
+ int16x8_t v1193 = vaddq_s16(v748, v989);
+ int16x8_t v1194 = vaddq_s16(v1192, v1193);
+ int16x8_t v1195 = vaddq_s16(v994, v750);
+ int16x8_t v1196 = vaddq_s16(v751, v996);
+ int16x8_t v1197 = vaddq_s16(v1195, v1196);
+ int16x8_t v1198 = vaddq_s16(v1194, v1197);
+ int16x8_t v1199 = vaddq_s16(v1191, v1198);
+ int16x8_t v1200 = vaddq_s16(v1003, v755);
+ int16x8_t v1201 = vaddq_s16(v756, v1005);
+ int16x8_t v1202 = vaddq_s16(v1200, v1201);
+ int16x8_t v1203 = vaddq_s16(v1010, v758);
+ int16x8_t v1204 = vaddq_s16(v759, v1012);
+ int16x8_t v1205 = vaddq_s16(v1203, v1204);
+ int16x8_t v1206 = vaddq_s16(v1202, v1205);
+ int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 13573);
+ int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206);
+ int16x8_t v1208 = vaddq_s16(v1019, v763);
+ int16x8_t v1209 = vaddq_s16(v764, v1021);
+ int16x8_t v1210 = vaddq_s16(v1208, v1209);
+ int16x8_t v1211 = vaddq_s16(v1026, v766);
+ int16x8_t v1212 = vaddq_s16(v767, v1028);
+ int16x8_t v1213 = vaddq_s16(v1211, v1212);
+ int16x8_t v1214 = vaddq_s16(v1210, v1213);
+ int16x8_t v1215 = vaddq_s16(v1214, v1206);
+ int16x8_t v1216 = vaddq_s16(v1207, v1215);
+ int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 17734);
+ int16x8_t v1218 = vaddq_s16(v1199, v1217);
+ int16x8_t v1219 = vaddq_s16(v1205, v1190);
+ int16x8_t v1220_tmp = vqrdmulhq_n_s16(v1219, 13573);
+ int16x8_t v1220 = vaddq_s16(v1220_tmp, v1219);
+ int16x8_t v1221 = vaddq_s16(v1213, v1194);
+ int16x8_t v1222 = vaddq_s16(v1197, v1202);
+ int16x8_t v1223 = vaddq_s16(v1221, v1222);
+ int16x8_t v1224 = vaddq_s16(v1220, v1223);
+ int16x8_t v1225 = vaddq_s16(v1222, v1219);
+ int16x8_t v1226_tmp = vqrdmulhq_n_s16(v1225, 13573);
+ int16x8_t v1226 = vaddq_s16(v1226_tmp, v1225);
+ int16x8_t v1227 = vaddq_s16(v1046, v782);
+ int16x8_t v1228 = vaddq_s16(v783, v1048);
+ int16x8_t v1229 = vaddq_s16(v1227, v1228);
+ int16x8_t v1230 = vaddq_s16(v1229, v1210);
+ int16x8_t v1231 = vaddq_s16(v1230, v1221);
+ int16x8_t v1232 = vaddq_s16(v1231, v1225);
+ int16x8_t v1233 = vaddq_s16(v1226, v1232);
+ int16x8_t v1234 = vqrdmulhq_n_s16(v1233, 17734);
+ int16x8_t v1235 = vaddq_s16(v1224, v1234);
+ int16x8_t v1236 = vqrdmulhq_n_s16(v1235, 16705);
+ int16x8_t v1237 = vaddq_s16(v1218, v1236);
+ int16x8_t v1238 = vqrdmulhq_n_s16(v1237, 16463);
+ int16x8_t v1239 = vaddq_s16(v1187, v1238);
+ int16x8_t v1240 = vaddq_s16(v982, v795);
+ int16x8_t v1241 = vaddq_s16(v796, v907);
+ int16x8_t v1242 = vaddq_s16(v1240, v1241);
+ int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 13573);
+ int16x8_t v1243 = vaddq_s16(v1243_tmp, v1242);
+ int16x8_t v1244 = vaddq_s16(v990, v799);
+ int16x8_t v1245 = vaddq_s16(v800, v911);
+ int16x8_t v1246 = vaddq_s16(v1244, v1245);
+ int16x8_t v1247 = vaddq_s16(v915, v802);
+ int16x8_t v1248 = vaddq_s16(v803, v993);
+ int16x8_t v1249 = vaddq_s16(v1247, v1248);
+ int16x8_t v1250 = vaddq_s16(v1246, v1249);
+ int16x8_t v1251 = vaddq_s16(v1243, v1250);
+ int16x8_t v1252 = vaddq_s16(v1006, v807);
+ int16x8_t v1253 = vaddq_s16(v808, v919);
+ int16x8_t v1254 = vaddq_s16(v1252, v1253);
+ int16x8_t v1255 = vaddq_s16(v923, v810);
+ int16x8_t v1256 = vaddq_s16(v811, v1009);
+ int16x8_t v1257 = vaddq_s16(v1255, v1256);
+ int16x8_t v1258 = vaddq_s16(v1254, v1257);
+ int16x8_t v1259_tmp = vqrdmulhq_n_s16(v1258, 13573);
+ int16x8_t v1259 = vaddq_s16(v1259_tmp, v1258);
+ int16x8_t v1260 = vaddq_s16(v1022, v815);
+ int16x8_t v1261 = vaddq_s16(v816, v927);
+ int16x8_t v1262 = vaddq_s16(v1260, v1261);
+ int16x8_t v1263 = vaddq_s16(v931, v818);
+ int16x8_t v1264 = vaddq_s16(v819, v1025);
+ int16x8_t v1265 = vaddq_s16(v1263, v1264);
+ int16x8_t v1266 = vaddq_s16(v1262, v1265);
+ int16x8_t v1267 = vaddq_s16(v1266, v1258);
+ int16x8_t v1268 = vaddq_s16(v1259, v1267);
+ int16x8_t v1269 = vqrdmulhq_n_s16(v1268, 17734);
+ int16x8_t v1270 = vaddq_s16(v1251, v1269);
+ int16x8_t v1271 = vaddq_s16(v1013, v826);
+ int16x8_t v1272 = vaddq_s16(v827, v938);
+ int16x8_t v1273 = vaddq_s16(v1271, v1272);
+ int16x8_t v1274 = vaddq_s16(v942, v829);
+ int16x8_t v1275 = vaddq_s16(v830, v978);
+ int16x8_t v1276 = vaddq_s16(v1274, v1275);
+ int16x8_t v1277 = vaddq_s16(v1273, v1276);
+ int16x8_t v1278_tmp = vqrdmulhq_n_s16(v1277, 13573);
+ int16x8_t v1278 = vaddq_s16(v1278_tmp, v1277);
+ int16x8_t v1279 = vaddq_s16(v1029, v834);
+ int16x8_t v1280 = vaddq_s16(v835, v946);
+ int16x8_t v1281 = vaddq_s16(v1279, v1280);
+ int16x8_t v1282 = vaddq_s16(v950, v837);
+ int16x8_t v1283 = vaddq_s16(v838, v986);
+ int16x8_t v1284 = vaddq_s16(v1282, v1283);
+ int16x8_t v1285 = vaddq_s16(v1281, v1284);
+ int16x8_t v1286 = vaddq_s16(v997, v841);
+ int16x8_t v1287 = vaddq_s16(v842, v953);
+ int16x8_t v1288 = vaddq_s16(v1286, v1287);
+ int16x8_t v1289 = vaddq_s16(v957, v844);
+ int16x8_t v1290 = vaddq_s16(v845, v1002);
+ int16x8_t v1291 = vaddq_s16(v1289, v1290);
+ int16x8_t v1292 = vaddq_s16(v1288, v1291);
+ int16x8_t v1293 = vaddq_s16(v1285, v1292);
+ int16x8_t v1294 = vaddq_s16(v1278, v1293);
+ int16x8_t v1295 = vaddq_s16(v1292, v1277);
+ int16x8_t v1296_tmp = vqrdmulhq_n_s16(v1295, 13573);
+ int16x8_t v1296 = vaddq_s16(v1296_tmp, v1295);
+ int16x8_t v1297 = vaddq_s16(v1049, v852);
+ int16x8_t v1298 = vaddq_s16(v853, v964);
+ int16x8_t v1299 = vaddq_s16(v1297, v1298);
+ int16x8_t v1300 = vaddq_s16(v968, v855);
+ int16x8_t v1301 = vaddq_s16(v856, v1018);
+ int16x8_t v1302 = vaddq_s16(v1300, v1301);
+ int16x8_t v1303 = vaddq_s16(v1299, v1302);
+ int16x8_t v1304 = vaddq_s16(v1303, v1285);
+ int16x8_t v1305 = vaddq_s16(v1304, v1295);
+ int16x8_t v1306 = vaddq_s16(v1296, v1305);
+ int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 17734);
+ int16x8_t v1308 = vaddq_s16(v1294, v1307);
+ int16x8_t v1309 = vqrdmulhq_n_s16(v1308, 16705);
+ int16x8_t v1310 = vaddq_s16(v1270, v1309);
+ int16x8_t v1311 = vaddq_s16(v1276, v1242);
+ int16x8_t v1312_tmp = vqrdmulhq_n_s16(v1311, 13573);
+ int16x8_t v1312 = vaddq_s16(v1312_tmp, v1311);
+ int16x8_t v1313 = vaddq_s16(v1284, v1246);
+ int16x8_t v1314 = vaddq_s16(v1249, v1288);
+ int16x8_t v1315 = vaddq_s16(v1313, v1314);
+ int16x8_t v1316 = vaddq_s16(v1312, v1315);
+ int16x8_t v1317 = vaddq_s16(v1291, v1254);
+ int16x8_t v1318 = vaddq_s16(v1257, v1273);
+ int16x8_t v1319 = vaddq_s16(v1317, v1318);
+ int16x8_t v1320_tmp = vqrdmulhq_n_s16(v1319, 13573);
+ int16x8_t v1320 = vaddq_s16(v1320_tmp, v1319);
+ int16x8_t v1321 = vaddq_s16(v1302, v1262);
+ int16x8_t v1322 = vaddq_s16(v1265, v1281);
+ int16x8_t v1323 = vaddq_s16(v1321, v1322);
+ int16x8_t v1324 = vaddq_s16(v1323, v1319);
+ int16x8_t v1325 = vaddq_s16(v1320, v1324);
+ int16x8_t v1326 = vqrdmulhq_n_s16(v1325, 17734);
+ int16x8_t v1327 = vaddq_s16(v1316, v1326);
+ int16x8_t v1328 = vaddq_s16(v1318, v1311);
+ int16x8_t v1329_tmp = vqrdmulhq_n_s16(v1328, 13573);
+ int16x8_t v1329 = vaddq_s16(v1329_tmp, v1328);
+ int16x8_t v1330 = vaddq_s16(v1322, v1313);
+ int16x8_t v1331 = vaddq_s16(v1314, v1317);
+ int16x8_t v1332 = vaddq_s16(v1330, v1331);
+ int16x8_t v1333 = vaddq_s16(v1329, v1332);
+ int16x8_t v1334 = vaddq_s16(v1331, v1328);
+ int16x8_t v1335_tmp = vqrdmulhq_n_s16(v1334, 13573);
+ int16x8_t v1335 = vaddq_s16(v1335_tmp, v1334);
+ int16x8_t v1336 = vaddq_s16(v1129, v891);
+ int16x8_t v1337 = vaddq_s16(v892, v1045);
+ int16x8_t v1338 = vaddq_s16(v1336, v1337);
+ int16x8_t v1339 = vaddq_s16(v1338, v1299);
+ int16x8_t v1340 = vaddq_s16(v1339, v1321);
+ int16x8_t v1341 = vaddq_s16(v1340, v1330);
+ int16x8_t v1342 = vaddq_s16(v1341, v1334);
+ int16x8_t v1343 = vaddq_s16(v1335, v1342);
+ int16x8_t v1344 = vqrdmulhq_n_s16(v1343, 17734);
+ int16x8_t v1345 = vaddq_s16(v1333, v1344);
+ int16x8_t v1346 = vqrdmulhq_n_s16(v1345, 16705);
+ int16x8_t v1347 = vaddq_s16(v1327, v1346);
+ int16x8_t v1348 = vqrdmulhq_n_s16(v1347, 16463);
+ int16x8_t v1349 = vaddq_s16(v1310, v1348);
+ int16x8_t v1350 = vqrdmulhq_n_s16(v1349, 16404);
+ int16x8_t v1351 = vaddq_s16(v1239, v1350);
+ int16x8_t v1352 = vaddq_s16(v1241, v1147);
+ int16x8_t v1353_tmp = vqrdmulhq_n_s16(v1352, 13573);
+ int16x8_t v1353 = vaddq_s16(v1353_tmp, v1352);
+ int16x8_t v1354 = vaddq_s16(v1245, v1149);
+ int16x8_t v1355 = vaddq_s16(v1150, v1247);
+ int16x8_t v1356 = vaddq_s16(v1354, v1355);
+ int16x8_t v1357 = vaddq_s16(v1353, v1356);
+ int16x8_t v1358 = vaddq_s16(v1253, v1153);
+ int16x8_t v1359 = vaddq_s16(v1154, v1255);
+ int16x8_t v1360 = vaddq_s16(v1358, v1359);
+ int16x8_t v1361_tmp = vqrdmulhq_n_s16(v1360, 13573);
+ int16x8_t v1361 = vaddq_s16(v1361_tmp, v1360);
+ int16x8_t v1362 = vaddq_s16(v1261, v1157);
+ int16x8_t v1363 = vaddq_s16(v1158, v1263);
+ int16x8_t v1364 = vaddq_s16(v1362, v1363);
+ int16x8_t v1365 = vaddq_s16(v1364, v1360);
+ int16x8_t v1366 = vaddq_s16(v1361, v1365);
+ int16x8_t v1367 = vqrdmulhq_n_s16(v1366, 17734);
+ int16x8_t v1368 = vaddq_s16(v1357, v1367);
+ int16x8_t v1369 = vaddq_s16(v1272, v1164);
+ int16x8_t v1370 = vaddq_s16(v1165, v1274);
+ int16x8_t v1371 = vaddq_s16(v1369, v1370);
+ int16x8_t v1372_tmp = vqrdmulhq_n_s16(v1371, 13573);
+ int16x8_t v1372 = vaddq_s16(v1372_tmp, v1371);
+ int16x8_t v1373 = vaddq_s16(v1280, v1168);
+ int16x8_t v1374 = vaddq_s16(v1169, v1282);
+ int16x8_t v1375 = vaddq_s16(v1373, v1374);
+ int16x8_t v1376 = vaddq_s16(v1287, v1171);
+ int16x8_t v1377 = vaddq_s16(v1172, v1289);
+ int16x8_t v1378 = vaddq_s16(v1376, v1377);
+ int16x8_t v1379 = vaddq_s16(v1375, v1378);
+ int16x8_t v1380 = vaddq_s16(v1372, v1379);
+ int16x8_t v1381 = vaddq_s16(v1378, v1371);
+ int16x8_t v1382_tmp = vqrdmulhq_n_s16(v1381, 13573);
+ int16x8_t v1382 = vaddq_s16(v1382_tmp, v1381);
+ int16x8_t v1383 = vaddq_s16(v1298, v1178);
+ int16x8_t v1384 = vaddq_s16(v1179, v1300);
+ int16x8_t v1385 = vaddq_s16(v1383, v1384);
+ int16x8_t v1386 = vaddq_s16(v1385, v1375);
+ int16x8_t v1387 = vaddq_s16(v1386, v1381);
+ int16x8_t v1388 = vaddq_s16(v1382, v1387);
+ int16x8_t v1389 = vqrdmulhq_n_s16(v1388, 17734);
+ int16x8_t v1390 = vaddq_s16(v1380, v1389);
+ int16x8_t v1391 = vqrdmulhq_n_s16(v1390, 16705);
+ int16x8_t v1392 = vaddq_s16(v1368, v1391);
+ int16x8_t v1393 = vaddq_s16(v1275, v1188);
+ int16x8_t v1394 = vaddq_s16(v1189, v1240);
+ int16x8_t v1395 = vaddq_s16(v1393, v1394);
+ int16x8_t v1396_tmp = vqrdmulhq_n_s16(v1395, 13573);
+ int16x8_t v1396 = vaddq_s16(v1396_tmp, v1395);
+ int16x8_t v1397 = vaddq_s16(v1283, v1192);
+ int16x8_t v1398 = vaddq_s16(v1193, v1244);
+ int16x8_t v1399 = vaddq_s16(v1397, v1398);
+ int16x8_t v1400 = vaddq_s16(v1248, v1195);
+ int16x8_t v1401 = vaddq_s16(v1196, v1286);
+ int16x8_t v1402 = vaddq_s16(v1400, v1401);
+ int16x8_t v1403 = vaddq_s16(v1399, v1402);
+ int16x8_t v1404 = vaddq_s16(v1396, v1403);
+ int16x8_t v1405 = vaddq_s16(v1290, v1200);
+ int16x8_t v1406 = vaddq_s16(v1201, v1252);
+ int16x8_t v1407 = vaddq_s16(v1405, v1406);
+ int16x8_t v1408 = vaddq_s16(v1256, v1203);
+ int16x8_t v1409 = vaddq_s16(v1204, v1271);
+ int16x8_t v1410 = vaddq_s16(v1408, v1409);
+ int16x8_t v1411 = vaddq_s16(v1407, v1410);
+ int16x8_t v1412_tmp = vqrdmulhq_n_s16(v1411, 13573);
+ int16x8_t v1412 = vaddq_s16(v1412_tmp, v1411);
+ int16x8_t v1413 = vaddq_s16(v1301, v1208);
+ int16x8_t v1414 = vaddq_s16(v1209, v1260);
+ int16x8_t v1415 = vaddq_s16(v1413, v1414);
+ int16x8_t v1416 = vaddq_s16(v1264, v1211);
+ int16x8_t v1417 = vaddq_s16(v1212, v1279);
+ int16x8_t v1418 = vaddq_s16(v1416, v1417);
+ int16x8_t v1419 = vaddq_s16(v1415, v1418);
+ int16x8_t v1420 = vaddq_s16(v1419, v1411);
+ int16x8_t v1421 = vaddq_s16(v1412, v1420);
+ int16x8_t v1422 = vqrdmulhq_n_s16(v1421, 17734);
+ int16x8_t v1423 = vaddq_s16(v1404, v1422);
+ int16x8_t v1424 = vaddq_s16(v1410, v1395);
+ int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 13573);
+ int16x8_t v1425 = vaddq_s16(v1425_tmp, v1424);
+ int16x8_t v1426 = vaddq_s16(v1418, v1399);
+ int16x8_t v1427 = vaddq_s16(v1402, v1407);
+ int16x8_t v1428 = vaddq_s16(v1426, v1427);
+ int16x8_t v1429 = vaddq_s16(v1425, v1428);
+ int16x8_t v1430 = vaddq_s16(v1427, v1424);
+ int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 13573);
+ int16x8_t v1431 = vaddq_s16(v1431_tmp, v1430);
+ int16x8_t v1432 = vaddq_s16(v1337, v1227);
+ int16x8_t v1433 = vaddq_s16(v1228, v1297);
+ int16x8_t v1434 = vaddq_s16(v1432, v1433);
+ int16x8_t v1435 = vaddq_s16(v1434, v1415);
+ int16x8_t v1436 = vaddq_s16(v1435, v1426);
+ int16x8_t v1437 = vaddq_s16(v1436, v1430);
+ int16x8_t v1438 = vaddq_s16(v1431, v1437);
+ int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17734);
+ int16x8_t v1440 = vaddq_s16(v1429, v1439);
+ int16x8_t v1441 = vqrdmulhq_n_s16(v1440, 16705);
+ int16x8_t v1442 = vaddq_s16(v1423, v1441);
+ int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 16463);
+ int16x8_t v1444 = vaddq_s16(v1392, v1443);
+ int16x8_t v1445 = vaddq_s16(v1394, v1352);
+ int16x8_t v1446_tmp = vqrdmulhq_n_s16(v1445, 13573);
+ int16x8_t v1446 = vaddq_s16(v1446_tmp, v1445);
+ int16x8_t v1447 = vaddq_s16(v1398, v1354);
+ int16x8_t v1448 = vaddq_s16(v1355, v1400);
+ int16x8_t v1449 = vaddq_s16(v1447, v1448);
+ int16x8_t v1450 = vaddq_s16(v1446, v1449);
+ int16x8_t v1451 = vaddq_s16(v1406, v1358);
+ int16x8_t v1452 = vaddq_s16(v1359, v1408);
+ int16x8_t v1453 = vaddq_s16(v1451, v1452);
+ int16x8_t v1454_tmp = vqrdmulhq_n_s16(v1453, 13573);
+ int16x8_t v1454 = vaddq_s16(v1454_tmp, v1453);
+ int16x8_t v1455 = vaddq_s16(v1414, v1362);
+ int16x8_t v1456 = vaddq_s16(v1363, v1416);
+ int16x8_t v1457 = vaddq_s16(v1455, v1456);
+ int16x8_t v1458 = vaddq_s16(v1457, v1453);
+ int16x8_t v1459 = vaddq_s16(v1454, v1458);
+ int16x8_t v1460 = vqrdmulhq_n_s16(v1459, 17734);
+ int16x8_t v1461 = vaddq_s16(v1450, v1460);
+ int16x8_t v1462 = vaddq_s16(v1409, v1369);
+ int16x8_t v1463 = vaddq_s16(v1370, v1393);
+ int16x8_t v1464 = vaddq_s16(v1462, v1463);
+ int16x8_t v1465_tmp = vqrdmulhq_n_s16(v1464, 13573);
+ int16x8_t v1465 = vaddq_s16(v1465_tmp, v1464);
+ int16x8_t v1466 = vaddq_s16(v1417, v1373);
+ int16x8_t v1467 = vaddq_s16(v1374, v1397);
+ int16x8_t v1468 = vaddq_s16(v1466, v1467);
+ int16x8_t v1469 = vaddq_s16(v1401, v1376);
+ int16x8_t v1470 = vaddq_s16(v1377, v1405);
+ int16x8_t v1471 = vaddq_s16(v1469, v1470);
+ int16x8_t v1472 = vaddq_s16(v1468, v1471);
+ int16x8_t v1473 = vaddq_s16(v1465, v1472);
+ int16x8_t v1474 = vaddq_s16(v1471, v1464);
+ int16x8_t v1475_tmp = vqrdmulhq_n_s16(v1474, 13573);
+ int16x8_t v1475 = vaddq_s16(v1475_tmp, v1474);
+ int16x8_t v1476 = vaddq_s16(v1433, v1383);
+ int16x8_t v1477 = vaddq_s16(v1384, v1413);
+ int16x8_t v1478 = vaddq_s16(v1476, v1477);
+ int16x8_t v1479 = vaddq_s16(v1478, v1468);
+ int16x8_t v1480 = vaddq_s16(v1479, v1474);
+ int16x8_t v1481 = vaddq_s16(v1475, v1480);
+ int16x8_t v1482 = vqrdmulhq_n_s16(v1481, 17734);
+ int16x8_t v1483 = vaddq_s16(v1473, v1482);
+ int16x8_t v1484 = vqrdmulhq_n_s16(v1483, 16705);
+ int16x8_t v1485 = vaddq_s16(v1461, v1484);
+ int16x8_t v1486 = vaddq_s16(v1463, v1445);
+ int16x8_t v1487_tmp = vqrdmulhq_n_s16(v1486, 13573);
+ int16x8_t v1487 = vaddq_s16(v1487_tmp, v1486);
+ int16x8_t v1488 = vaddq_s16(v1467, v1447);
+ int16x8_t v1489 = vaddq_s16(v1448, v1469);
+ int16x8_t v1490 = vaddq_s16(v1488, v1489);
+ int16x8_t v1491 = vaddq_s16(v1487, v1490);
+ int16x8_t v1492 = vaddq_s16(v1470, v1451);
+ int16x8_t v1493 = vaddq_s16(v1452, v1462);
+ int16x8_t v1494 = vaddq_s16(v1492, v1493);
+ int16x8_t v1495_tmp = vqrdmulhq_n_s16(v1494, 13573);
+ int16x8_t v1495 = vaddq_s16(v1495_tmp, v1494);
+ int16x8_t v1496 = vaddq_s16(v1477, v1455);
+ int16x8_t v1497 = vaddq_s16(v1456, v1466);
+ int16x8_t v1498 = vaddq_s16(v1496, v1497);
+ int16x8_t v1499 = vaddq_s16(v1498, v1494);
+ int16x8_t v1500 = vaddq_s16(v1495, v1499);
+ int16x8_t v1501 = vqrdmulhq_n_s16(v1500, 17734);
+ int16x8_t v1502 = vaddq_s16(v1491, v1501);
+ int16x8_t v1503 = vaddq_s16(v1493, v1486);
+ int16x8_t v1504_tmp = vqrdmulhq_n_s16(v1503, 13573);
+ int16x8_t v1504 = vaddq_s16(v1504_tmp, v1503);
+ int16x8_t v1505 = vaddq_s16(v1497, v1488);
+ int16x8_t v1506 = vaddq_s16(v1489, v1492);
+ int16x8_t v1507 = vaddq_s16(v1505, v1506);
+ int16x8_t v1508 = vaddq_s16(v1504, v1507);
+ int16x8_t v1509 = vaddq_s16(v1506, v1503);
+ int16x8_t v1510_tmp = vqrdmulhq_n_s16(v1509, 13573);
+ int16x8_t v1510 = vaddq_s16(v1510_tmp, v1509);
+ int16x8_t v1511 = vld1q_s16(in + in_stride * 255 + i);
+ int16x8_t v1512 = vaddq_s16(v1511, v1128);
+ int16x8_t v1513 = vaddq_s16(v1512, v1336);
+ int16x8_t v1514 = vaddq_s16(v1513, v1432);
+ int16x8_t v1515 = vaddq_s16(v1514, v1476);
+ int16x8_t v1516 = vaddq_s16(v1515, v1496);
+ int16x8_t v1517 = vaddq_s16(v1516, v1505);
+ int16x8_t v1518 = vaddq_s16(v1517, v1509);
+ int16x8_t v1519 = vaddq_s16(v1510, v1518);
+ int16x8_t v1520 = vqrdmulhq_n_s16(v1519, 17734);
+ int16x8_t v1521 = vaddq_s16(v1508, v1520);
+ int16x8_t v1522 = vqrdmulhq_n_s16(v1521, 16705);
+ int16x8_t v1523 = vaddq_s16(v1502, v1522);
+ int16x8_t v1524 = vqrdmulhq_n_s16(v1523, 16463);
+ int16x8_t v1525 = vaddq_s16(v1485, v1524);
+ int16x8_t v1526 = vqrdmulhq_n_s16(v1525, 16404);
+ int16x8_t v1527 = vaddq_s16(v1444, v1526);
+ int16x8_t v1528 = vqrdmulhq_n_s16(v1527, 16389);
+ int16x8_t v1529 = vaddq_s16(v1351, v1528);
+ int16x8_t v1530 = vqrdmulhq_n_s16(v1529, 16385);
+ int16x8_t v1531 = vaddq_s16(v1146, v1530);
+ int16x8_t v1532 = vqrdmulhq_n_s16(v1531, 16384);
+ int16x8_t v1533 = vaddq_s16(v701, v1532);
+ int16x8_t v1534 = vsubq_s16(v0, v1);
+ int16x8_t v1535 = vsubq_s16(v4, v6);
+ int16x8_t v1536_tmp = vqrdmulhq_n_s16(v1535, 10045);
+ int16x8_t v1536 = vaddq_s16(v1536_tmp, v1535);
+ int16x8_t v1537 = vaddq_s16(v1534, v1536);
+ int16x8_t v1538 = vsubq_s16(v11, v14);
+ int16x8_t v1539 = vsubq_s16(v17, v20);
+ int16x8_t v1540_tmp = vqrdmulhq_n_s16(v1539, 10045);
+ int16x8_t v1540 = vaddq_s16(v1540_tmp, v1539);
+ int16x8_t v1541 = vaddq_s16(v1538, v1540);
+ int16x8_t v1542 = vqrdmulhq_n_s16(v1541, 19705);
+ int16x8_t v1543 = vaddq_s16(v1537, v1542);
+ int16x8_t v1544 = vsubq_s16(v27, v30);
+ int16x8_t v1545 = vsubq_s16(v35, v39);
+ int16x8_t v1546_tmp = vqrdmulhq_n_s16(v1545, 10045);
+ int16x8_t v1546 = vaddq_s16(v1546_tmp, v1545);
+ int16x8_t v1547 = vaddq_s16(v1544, v1546);
+ int16x8_t v1548 = vsubq_s16(v44, v47);
+ int16x8_t v1549 = vsubq_s16(v50, v54);
+ int16x8_t v1550_tmp = vqrdmulhq_n_s16(v1549, 10045);
+ int16x8_t v1550 = vaddq_s16(v1550_tmp, v1549);
+ int16x8_t v1551 = vaddq_s16(v1548, v1550);
+ int16x8_t v1552 = vqrdmulhq_n_s16(v1551, 19705);
+ int16x8_t v1553 = vaddq_s16(v1547, v1552);
+ int16x8_t v1554 = vqrdmulhq_n_s16(v1553, 17121);
+ int16x8_t v1555 = vaddq_s16(v1543, v1554);
+ int16x8_t v1556 = vsubq_s16(v63, v66);
+ int16x8_t v1557 = vsubq_s16(v71, v75);
+ int16x8_t v1558_tmp = vqrdmulhq_n_s16(v1557, 10045);
+ int16x8_t v1558 = vaddq_s16(v1558_tmp, v1557);
+ int16x8_t v1559 = vaddq_s16(v1556, v1558);
+ int16x8_t v1560 = vsubq_s16(v82, v89);
+ int16x8_t v1561 = vsubq_s16(v92, v97);
+ int16x8_t v1562_tmp = vqrdmulhq_n_s16(v1561, 10045);
+ int16x8_t v1562 = vaddq_s16(v1562_tmp, v1561);
+ int16x8_t v1563 = vaddq_s16(v1560, v1562);
+ int16x8_t v1564 = vqrdmulhq_n_s16(v1563, 19705);
+ int16x8_t v1565 = vaddq_s16(v1559, v1564);
+ int16x8_t v1566 = vsubq_s16(v104, v107);
+ int16x8_t v1567 = vsubq_s16(v112, v116);
+ int16x8_t v1568_tmp = vqrdmulhq_n_s16(v1567, 10045);
+ int16x8_t v1568 = vaddq_s16(v1568_tmp, v1567);
+ int16x8_t v1569 = vaddq_s16(v1566, v1568);
+ int16x8_t v1570 = vsubq_s16(v121, v124);
+ int16x8_t v1571 = vsubq_s16(v127, v132);
+ int16x8_t v1572_tmp = vqrdmulhq_n_s16(v1571, 10045);
+ int16x8_t v1572 = vaddq_s16(v1572_tmp, v1571);
+ int16x8_t v1573 = vaddq_s16(v1570, v1572);
+ int16x8_t v1574 = vqrdmulhq_n_s16(v1573, 19705);
+ int16x8_t v1575 = vaddq_s16(v1569, v1574);
+ int16x8_t v1576 = vqrdmulhq_n_s16(v1575, 17121);
+ int16x8_t v1577 = vaddq_s16(v1565, v1576);
+ int16x8_t v1578 = vqrdmulhq_n_s16(v1577, 16563);
+ int16x8_t v1579 = vaddq_s16(v1555, v1578);
+ int16x8_t v1580 = vsubq_s16(v143, v146);
+ int16x8_t v1581 = vsubq_s16(v151, v155);
+ int16x8_t v1582_tmp = vqrdmulhq_n_s16(v1581, 10045);
+ int16x8_t v1582 = vaddq_s16(v1582_tmp, v1581);
+ int16x8_t v1583 = vaddq_s16(v1580, v1582);
+ int16x8_t v1584 = vsubq_s16(v162, v169);
+ int16x8_t v1585 = vsubq_s16(v172, v177);
+ int16x8_t v1586_tmp = vqrdmulhq_n_s16(v1585, 10045);
+ int16x8_t v1586 = vaddq_s16(v1586_tmp, v1585);
+ int16x8_t v1587 = vaddq_s16(v1584, v1586);
+ int16x8_t v1588 = vqrdmulhq_n_s16(v1587, 19705);
+ int16x8_t v1589 = vaddq_s16(v1583, v1588);
+ int16x8_t v1590 = vsubq_s16(v186, v193);
+ int16x8_t v1591 = vsubq_s16(v202, v210);
+ int16x8_t v1592_tmp = vqrdmulhq_n_s16(v1591, 10045);
+ int16x8_t v1592 = vaddq_s16(v1592_tmp, v1591);
+ int16x8_t v1593 = vaddq_s16(v1590, v1592);
+ int16x8_t v1594 = vsubq_s16(v215, v218);
+ int16x8_t v1595 = vsubq_s16(v221, v227);
+ int16x8_t v1596_tmp = vqrdmulhq_n_s16(v1595, 10045);
+ int16x8_t v1596 = vaddq_s16(v1596_tmp, v1595);
+ int16x8_t v1597 = vaddq_s16(v1594, v1596);
+ int16x8_t v1598 = vqrdmulhq_n_s16(v1597, 19705);
+ int16x8_t v1599 = vaddq_s16(v1593, v1598);
+ int16x8_t v1600 = vqrdmulhq_n_s16(v1599, 17121);
+ int16x8_t v1601 = vaddq_s16(v1589, v1600);
+ int16x8_t v1602 = vsubq_s16(v236, v239);
+ int16x8_t v1603 = vsubq_s16(v244, v248);
+ int16x8_t v1604_tmp = vqrdmulhq_n_s16(v1603, 10045);
+ int16x8_t v1604 = vaddq_s16(v1604_tmp, v1603);
+ int16x8_t v1605 = vaddq_s16(v1602, v1604);
+ int16x8_t v1606 = vsubq_s16(v255, v262);
+ int16x8_t v1607 = vsubq_s16(v265, v270);
+ int16x8_t v1608_tmp = vqrdmulhq_n_s16(v1607, 10045);
+ int16x8_t v1608 = vaddq_s16(v1608_tmp, v1607);
+ int16x8_t v1609 = vaddq_s16(v1606, v1608);
+ int16x8_t v1610 = vqrdmulhq_n_s16(v1609, 19705);
+ int16x8_t v1611 = vaddq_s16(v1605, v1610);
+ int16x8_t v1612 = vsubq_s16(v277, v280);
+ int16x8_t v1613 = vsubq_s16(v285, v289);
+ int16x8_t v1614_tmp = vqrdmulhq_n_s16(v1613, 10045);
+ int16x8_t v1614 = vaddq_s16(v1614_tmp, v1613);
+ int16x8_t v1615 = vaddq_s16(v1612, v1614);
+ int16x8_t v1616 = vsubq_s16(v294, v297);
+ int16x8_t v1617 = vsubq_s16(v300, v306);
+ int16x8_t v1618_tmp = vqrdmulhq_n_s16(v1617, 10045);
+ int16x8_t v1618 = vaddq_s16(v1618_tmp, v1617);
+ int16x8_t v1619 = vaddq_s16(v1616, v1618);
+ int16x8_t v1620 = vqrdmulhq_n_s16(v1619, 19705);
+ int16x8_t v1621 = vaddq_s16(v1615, v1620);
+ int16x8_t v1622 = vqrdmulhq_n_s16(v1621, 17121);
+ int16x8_t v1623 = vaddq_s16(v1611, v1622);
+ int16x8_t v1624 = vqrdmulhq_n_s16(v1623, 16563);
+ int16x8_t v1625 = vaddq_s16(v1601, v1624);
+ int16x8_t v1626 = vqrdmulhq_n_s16(v1625, 16429);
+ int16x8_t v1627 = vaddq_s16(v1579, v1626);
+ int16x8_t v1628 = vsubq_s16(v319, v322);
+ int16x8_t v1629 = vsubq_s16(v327, v331);
+ int16x8_t v1630_tmp = vqrdmulhq_n_s16(v1629, 10045);
+ int16x8_t v1630 = vaddq_s16(v1630_tmp, v1629);
+ int16x8_t v1631 = vaddq_s16(v1628, v1630);
+ int16x8_t v1632 = vsubq_s16(v338, v345);
+ int16x8_t v1633 = vsubq_s16(v348, v353);
+ int16x8_t v1634_tmp = vqrdmulhq_n_s16(v1633, 10045);
+ int16x8_t v1634 = vaddq_s16(v1634_tmp, v1633);
+ int16x8_t v1635 = vaddq_s16(v1632, v1634);
+ int16x8_t v1636 = vqrdmulhq_n_s16(v1635, 19705);
+ int16x8_t v1637 = vaddq_s16(v1631, v1636);
+ int16x8_t v1638 = vsubq_s16(v362, v369);
+ int16x8_t v1639 = vsubq_s16(v378, v386);
+ int16x8_t v1640_tmp = vqrdmulhq_n_s16(v1639, 10045);
+ int16x8_t v1640 = vaddq_s16(v1640_tmp, v1639);
+ int16x8_t v1641 = vaddq_s16(v1638, v1640);
+ int16x8_t v1642 = vsubq_s16(v391, v394);
+ int16x8_t v1643 = vsubq_s16(v397, v403);
+ int16x8_t v1644_tmp = vqrdmulhq_n_s16(v1643, 10045);
+ int16x8_t v1644 = vaddq_s16(v1644_tmp, v1643);
+ int16x8_t v1645 = vaddq_s16(v1642, v1644);
+ int16x8_t v1646 = vqrdmulhq_n_s16(v1645, 19705);
+ int16x8_t v1647 = vaddq_s16(v1641, v1646);
+ int16x8_t v1648 = vqrdmulhq_n_s16(v1647, 17121);
+ int16x8_t v1649 = vaddq_s16(v1637, v1648);
+ int16x8_t v1650 = vsubq_s16(v414, v421);
+ int16x8_t v1651 = vsubq_s16(v430, v438);
+ int16x8_t v1652_tmp = vqrdmulhq_n_s16(v1651, 10045);
+ int16x8_t v1652 = vaddq_s16(v1652_tmp, v1651);
+ int16x8_t v1653 = vaddq_s16(v1650, v1652);
+ int16x8_t v1654 = vsubq_s16(v449, v464);
+ int16x8_t v1655 = vsubq_s16(v467, v476);
+ int16x8_t v1656_tmp = vqrdmulhq_n_s16(v1655, 10045);
+ int16x8_t v1656 = vaddq_s16(v1656_tmp, v1655);
+ int16x8_t v1657 = vaddq_s16(v1654, v1656);
+ int16x8_t v1658 = vqrdmulhq_n_s16(v1657, 19705);
+ int16x8_t v1659 = vaddq_s16(v1653, v1658);
+ int16x8_t v1660 = vsubq_s16(v483, v486);
+ int16x8_t v1661 = vsubq_s16(v491, v495);
+ int16x8_t v1662_tmp = vqrdmulhq_n_s16(v1661, 10045);
+ int16x8_t v1662 = vaddq_s16(v1662_tmp, v1661);
+ int16x8_t v1663 = vaddq_s16(v1660, v1662);
+ int16x8_t v1664 = vsubq_s16(v500, v503);
+ int16x8_t v1665 = vsubq_s16(v506, v513);
+ int16x8_t v1666_tmp = vqrdmulhq_n_s16(v1665, 10045);
+ int16x8_t v1666 = vaddq_s16(v1666_tmp, v1665);
+ int16x8_t v1667 = vaddq_s16(v1664, v1666);
+ int16x8_t v1668 = vqrdmulhq_n_s16(v1667, 19705);
+ int16x8_t v1669 = vaddq_s16(v1663, v1668);
+ int16x8_t v1670 = vqrdmulhq_n_s16(v1669, 17121);
+ int16x8_t v1671 = vaddq_s16(v1659, v1670);
+ int16x8_t v1672 = vqrdmulhq_n_s16(v1671, 16563);
+ int16x8_t v1673 = vaddq_s16(v1649, v1672);
+ int16x8_t v1674 = vsubq_s16(v524, v527);
+ int16x8_t v1675 = vsubq_s16(v532, v536);
+ int16x8_t v1676_tmp = vqrdmulhq_n_s16(v1675, 10045);
+ int16x8_t v1676 = vaddq_s16(v1676_tmp, v1675);
+ int16x8_t v1677 = vaddq_s16(v1674, v1676);
+ int16x8_t v1678 = vsubq_s16(v543, v550);
+ int16x8_t v1679 = vsubq_s16(v553, v558);
+ int16x8_t v1680_tmp = vqrdmulhq_n_s16(v1679, 10045);
+ int16x8_t v1680 = vaddq_s16(v1680_tmp, v1679);
+ int16x8_t v1681 = vaddq_s16(v1678, v1680);
+ int16x8_t v1682 = vqrdmulhq_n_s16(v1681, 19705);
+ int16x8_t v1683 = vaddq_s16(v1677, v1682);
+ int16x8_t v1684 = vsubq_s16(v567, v574);
+ int16x8_t v1685 = vsubq_s16(v583, v591);
+ int16x8_t v1686_tmp = vqrdmulhq_n_s16(v1685, 10045);
+ int16x8_t v1686 = vaddq_s16(v1686_tmp, v1685);
+ int16x8_t v1687 = vaddq_s16(v1684, v1686);
+ int16x8_t v1688 = vsubq_s16(v596, v599);
+ int16x8_t v1689 = vsubq_s16(v602, v608);
+ int16x8_t v1690_tmp = vqrdmulhq_n_s16(v1689, 10045);
+ int16x8_t v1690 = vaddq_s16(v1690_tmp, v1689);
+ int16x8_t v1691 = vaddq_s16(v1688, v1690);
+ int16x8_t v1692 = vqrdmulhq_n_s16(v1691, 19705);
+ int16x8_t v1693 = vaddq_s16(v1687, v1692);
+ int16x8_t v1694 = vqrdmulhq_n_s16(v1693, 17121);
+ int16x8_t v1695 = vaddq_s16(v1683, v1694);
+ int16x8_t v1696 = vsubq_s16(v617, v620);
+ int16x8_t v1697 = vsubq_s16(v625, v629);
+ int16x8_t v1698_tmp = vqrdmulhq_n_s16(v1697, 10045);
+ int16x8_t v1698 = vaddq_s16(v1698_tmp, v1697);
+ int16x8_t v1699 = vaddq_s16(v1696, v1698);
+ int16x8_t v1700 = vsubq_s16(v636, v643);
+ int16x8_t v1701 = vsubq_s16(v646, v651);
+ int16x8_t v1702_tmp = vqrdmulhq_n_s16(v1701, 10045);
+ int16x8_t v1702 = vaddq_s16(v1702_tmp, v1701);
+ int16x8_t v1703 = vaddq_s16(v1700, v1702);
+ int16x8_t v1704 = vqrdmulhq_n_s16(v1703, 19705);
+ int16x8_t v1705 = vaddq_s16(v1699, v1704);
+ int16x8_t v1706 = vsubq_s16(v658, v661);
+ int16x8_t v1707 = vsubq_s16(v666, v670);
+ int16x8_t v1708_tmp = vqrdmulhq_n_s16(v1707, 10045);
+ int16x8_t v1708 = vaddq_s16(v1708_tmp, v1707);
+ int16x8_t v1709 = vaddq_s16(v1706, v1708);
+ int16x8_t v1710 = vsubq_s16(v675, v678);
+ int16x8_t v1711 = vsubq_s16(v681, v688);
+ int16x8_t v1712_tmp = vqrdmulhq_n_s16(v1711, 10045);
+ int16x8_t v1712 = vaddq_s16(v1712_tmp, v1711);
+ int16x8_t v1713 = vaddq_s16(v1710, v1712);
+ int16x8_t v1714 = vqrdmulhq_n_s16(v1713, 19705);
+ int16x8_t v1715 = vaddq_s16(v1709, v1714);
+ int16x8_t v1716 = vqrdmulhq_n_s16(v1715, 17121);
+ int16x8_t v1717 = vaddq_s16(v1705, v1716);
+ int16x8_t v1718 = vqrdmulhq_n_s16(v1717, 16563);
+ int16x8_t v1719 = vaddq_s16(v1695, v1718);
+ int16x8_t v1720 = vqrdmulhq_n_s16(v1719, 16429);
+ int16x8_t v1721 = vaddq_s16(v1673, v1720);
+ int16x8_t v1722 = vqrdmulhq_n_s16(v1721, 16395);
+ int16x8_t v1723 = vaddq_s16(v1627, v1722);
+ int16x8_t v1724 = vsubq_s16(v703, v706);
+ int16x8_t v1725 = vsubq_s16(v711, v715);
+ int16x8_t v1726_tmp = vqrdmulhq_n_s16(v1725, 10045);
+ int16x8_t v1726 = vaddq_s16(v1726_tmp, v1725);
+ int16x8_t v1727 = vaddq_s16(v1724, v1726);
+ int16x8_t v1728 = vsubq_s16(v722, v729);
+ int16x8_t v1729 = vsubq_s16(v732, v737);
+ int16x8_t v1730_tmp = vqrdmulhq_n_s16(v1729, 10045);
+ int16x8_t v1730 = vaddq_s16(v1730_tmp, v1729);
+ int16x8_t v1731 = vaddq_s16(v1728, v1730);
+ int16x8_t v1732 = vqrdmulhq_n_s16(v1731, 19705);
+ int16x8_t v1733 = vaddq_s16(v1727, v1732);
+ int16x8_t v1734 = vsubq_s16(v746, v753);
+ int16x8_t v1735 = vsubq_s16(v762, v770);
+ int16x8_t v1736_tmp = vqrdmulhq_n_s16(v1735, 10045);
+ int16x8_t v1736 = vaddq_s16(v1736_tmp, v1735);
+ int16x8_t v1737 = vaddq_s16(v1734, v1736);
+ int16x8_t v1738 = vsubq_s16(v775, v778);
+ int16x8_t v1739 = vsubq_s16(v781, v787);
+ int16x8_t v1740_tmp = vqrdmulhq_n_s16(v1739, 10045);
+ int16x8_t v1740 = vaddq_s16(v1740_tmp, v1739);
+ int16x8_t v1741 = vaddq_s16(v1738, v1740);
+ int16x8_t v1742 = vqrdmulhq_n_s16(v1741, 19705);
+ int16x8_t v1743 = vaddq_s16(v1737, v1742);
+ int16x8_t v1744 = vqrdmulhq_n_s16(v1743, 17121);
+ int16x8_t v1745 = vaddq_s16(v1733, v1744);
+ int16x8_t v1746 = vsubq_s16(v798, v805);
+ int16x8_t v1747 = vsubq_s16(v814, v822);
+ int16x8_t v1748_tmp = vqrdmulhq_n_s16(v1747, 10045);
+ int16x8_t v1748 = vaddq_s16(v1748_tmp, v1747);
+ int16x8_t v1749 = vaddq_s16(v1746, v1748);
+ int16x8_t v1750 = vsubq_s16(v833, v848);
+ int16x8_t v1751 = vsubq_s16(v851, v860);
+ int16x8_t v1752_tmp = vqrdmulhq_n_s16(v1751, 10045);
+ int16x8_t v1752 = vaddq_s16(v1752_tmp, v1751);
+ int16x8_t v1753 = vaddq_s16(v1750, v1752);
+ int16x8_t v1754 = vqrdmulhq_n_s16(v1753, 19705);
+ int16x8_t v1755 = vaddq_s16(v1749, v1754);
+ int16x8_t v1756 = vsubq_s16(v867, v870);
+ int16x8_t v1757 = vsubq_s16(v875, v879);
+ int16x8_t v1758_tmp = vqrdmulhq_n_s16(v1757, 10045);
+ int16x8_t v1758 = vaddq_s16(v1758_tmp, v1757);
+ int16x8_t v1759 = vaddq_s16(v1756, v1758);
+ int16x8_t v1760 = vsubq_s16(v884, v887);
+ int16x8_t v1761 = vsubq_s16(v890, v897);
+ int16x8_t v1762_tmp = vqrdmulhq_n_s16(v1761, 10045);
+ int16x8_t v1762 = vaddq_s16(v1762_tmp, v1761);
+ int16x8_t v1763 = vaddq_s16(v1760, v1762);
+ int16x8_t v1764 = vqrdmulhq_n_s16(v1763, 19705);
+ int16x8_t v1765 = vaddq_s16(v1759, v1764);
+ int16x8_t v1766 = vqrdmulhq_n_s16(v1765, 17121);
+ int16x8_t v1767 = vaddq_s16(v1755, v1766);
+ int16x8_t v1768 = vqrdmulhq_n_s16(v1767, 16563);
+ int16x8_t v1769 = vaddq_s16(v1745, v1768);
+ int16x8_t v1770 = vsubq_s16(v910, v917);
+ int16x8_t v1771 = vsubq_s16(v926, v934);
+ int16x8_t v1772_tmp = vqrdmulhq_n_s16(v1771, 10045);
+ int16x8_t v1772 = vaddq_s16(v1772_tmp, v1771);
+ int16x8_t v1773 = vaddq_s16(v1770, v1772);
+ int16x8_t v1774 = vsubq_s16(v945, v960);
+ int16x8_t v1775 = vsubq_s16(v963, v972);
+ int16x8_t v1776_tmp = vqrdmulhq_n_s16(v1775, 10045);
+ int16x8_t v1776 = vaddq_s16(v1776_tmp, v1775);
+ int16x8_t v1777 = vaddq_s16(v1774, v1776);
+ int16x8_t v1778 = vqrdmulhq_n_s16(v1777, 19705);
+ int16x8_t v1779 = vaddq_s16(v1773, v1778);
+ int16x8_t v1780 = vsubq_s16(v985, v1000);
+ int16x8_t v1781 = vsubq_s16(v1017, v1033);
+ int16x8_t v1782_tmp = vqrdmulhq_n_s16(v1781, 10045);
+ int16x8_t v1782 = vaddq_s16(v1782_tmp, v1781);
+ int16x8_t v1783 = vaddq_s16(v1780, v1782);
+ int16x8_t v1784 = vsubq_s16(v1038, v1041);
+ int16x8_t v1785 = vsubq_s16(v1044, v1054);
+ int16x8_t v1786_tmp = vqrdmulhq_n_s16(v1785, 10045);
+ int16x8_t v1786 = vaddq_s16(v1786_tmp, v1785);
+ int16x8_t v1787 = vaddq_s16(v1784, v1786);
+ int16x8_t v1788 = vqrdmulhq_n_s16(v1787, 19705);
+ int16x8_t v1789 = vaddq_s16(v1783, v1788);
+ int16x8_t v1790 = vqrdmulhq_n_s16(v1789, 17121);
+ int16x8_t v1791 = vaddq_s16(v1779, v1790);
+ int16x8_t v1792 = vsubq_s16(v1063, v1066);
+ int16x8_t v1793 = vsubq_s16(v1071, v1075);
+ int16x8_t v1794_tmp = vqrdmulhq_n_s16(v1793, 10045);
+ int16x8_t v1794 = vaddq_s16(v1794_tmp, v1793);
+ int16x8_t v1795 = vaddq_s16(v1792, v1794);
+ int16x8_t v1796 = vsubq_s16(v1082, v1089);
+ int16x8_t v1797 = vsubq_s16(v1092, v1097);
+ int16x8_t v1798_tmp = vqrdmulhq_n_s16(v1797, 10045);
+ int16x8_t v1798 = vaddq_s16(v1798_tmp, v1797);
+ int16x8_t v1799 = vaddq_s16(v1796, v1798);
+ int16x8_t v1800 = vqrdmulhq_n_s16(v1799, 19705);
+ int16x8_t v1801 = vaddq_s16(v1795, v1800);
+ int16x8_t v1802 = vsubq_s16(v1104, v1107);
+ int16x8_t v1803 = vsubq_s16(v1112, v1116);
+ int16x8_t v1804_tmp = vqrdmulhq_n_s16(v1803, 10045);
+ int16x8_t v1804 = vaddq_s16(v1804_tmp, v1803);
+ int16x8_t v1805 = vaddq_s16(v1802, v1804);
+ int16x8_t v1806 = vsubq_s16(v1121, v1124);
+ int16x8_t v1807 = vsubq_s16(v1127, v1135);
+ int16x8_t v1808_tmp = vqrdmulhq_n_s16(v1807, 10045);
+ int16x8_t v1808 = vaddq_s16(v1808_tmp, v1807);
+ int16x8_t v1809 = vaddq_s16(v1806, v1808);
+ int16x8_t v1810 = vqrdmulhq_n_s16(v1809, 19705);
+ int16x8_t v1811 = vaddq_s16(v1805, v1810);
+ int16x8_t v1812 = vqrdmulhq_n_s16(v1811, 17121);
+ int16x8_t v1813 = vaddq_s16(v1801, v1812);
+ int16x8_t v1814 = vqrdmulhq_n_s16(v1813, 16563);
+ int16x8_t v1815 = vaddq_s16(v1791, v1814);
+ int16x8_t v1816 = vqrdmulhq_n_s16(v1815, 16429);
+ int16x8_t v1817 = vaddq_s16(v1769, v1816);
+ int16x8_t v1818 = vsubq_s16(v1148, v1151);
+ int16x8_t v1819 = vsubq_s16(v1156, v1160);
+ int16x8_t v1820_tmp = vqrdmulhq_n_s16(v1819, 10045);
+ int16x8_t v1820 = vaddq_s16(v1820_tmp, v1819);
+ int16x8_t v1821 = vaddq_s16(v1818, v1820);
+ int16x8_t v1822 = vsubq_s16(v1167, v1174);
+ int16x8_t v1823 = vsubq_s16(v1177, v1182);
+ int16x8_t v1824_tmp = vqrdmulhq_n_s16(v1823, 10045);
+ int16x8_t v1824 = vaddq_s16(v1824_tmp, v1823);
+ int16x8_t v1825 = vaddq_s16(v1822, v1824);
+ int16x8_t v1826 = vqrdmulhq_n_s16(v1825, 19705);
+ int16x8_t v1827 = vaddq_s16(v1821, v1826);
+ int16x8_t v1828 = vsubq_s16(v1191, v1198);
+ int16x8_t v1829 = vsubq_s16(v1207, v1215);
+ int16x8_t v1830_tmp = vqrdmulhq_n_s16(v1829, 10045);
+ int16x8_t v1830 = vaddq_s16(v1830_tmp, v1829);
+ int16x8_t v1831 = vaddq_s16(v1828, v1830);
+ int16x8_t v1832 = vsubq_s16(v1220, v1223);
+ int16x8_t v1833 = vsubq_s16(v1226, v1232);
+ int16x8_t v1834_tmp = vqrdmulhq_n_s16(v1833, 10045);
+ int16x8_t v1834 = vaddq_s16(v1834_tmp, v1833);
+ int16x8_t v1835 = vaddq_s16(v1832, v1834);
+ int16x8_t v1836 = vqrdmulhq_n_s16(v1835, 19705);
+ int16x8_t v1837 = vaddq_s16(v1831, v1836);
+ int16x8_t v1838 = vqrdmulhq_n_s16(v1837, 17121);
+ int16x8_t v1839 = vaddq_s16(v1827, v1838);
+ int16x8_t v1840 = vsubq_s16(v1243, v1250);
+ int16x8_t v1841 = vsubq_s16(v1259, v1267);
+ int16x8_t v1842_tmp = vqrdmulhq_n_s16(v1841, 10045);
+ int16x8_t v1842 = vaddq_s16(v1842_tmp, v1841);
+ int16x8_t v1843 = vaddq_s16(v1840, v1842);
+ int16x8_t v1844 = vsubq_s16(v1278, v1293);
+ int16x8_t v1845 = vsubq_s16(v1296, v1305);
+ int16x8_t v1846_tmp = vqrdmulhq_n_s16(v1845, 10045);
+ int16x8_t v1846 = vaddq_s16(v1846_tmp, v1845);
+ int16x8_t v1847 = vaddq_s16(v1844, v1846);
+ int16x8_t v1848 = vqrdmulhq_n_s16(v1847, 19705);
+ int16x8_t v1849 = vaddq_s16(v1843, v1848);
+ int16x8_t v1850 = vsubq_s16(v1312, v1315);
+ int16x8_t v1851 = vsubq_s16(v1320, v1324);
+ int16x8_t v1852_tmp = vqrdmulhq_n_s16(v1851, 10045);
+ int16x8_t v1852 = vaddq_s16(v1852_tmp, v1851);
+ int16x8_t v1853 = vaddq_s16(v1850, v1852);
+ int16x8_t v1854 = vsubq_s16(v1329, v1332);
+ int16x8_t v1855 = vsubq_s16(v1335, v1342);
+ int16x8_t v1856_tmp = vqrdmulhq_n_s16(v1855, 10045);
+ int16x8_t v1856 = vaddq_s16(v1856_tmp, v1855);
+ int16x8_t v1857 = vaddq_s16(v1854, v1856);
+ int16x8_t v1858 = vqrdmulhq_n_s16(v1857, 19705);
+ int16x8_t v1859 = vaddq_s16(v1853, v1858);
+ int16x8_t v1860 = vqrdmulhq_n_s16(v1859, 17121);
+ int16x8_t v1861 = vaddq_s16(v1849, v1860);
+ int16x8_t v1862 = vqrdmulhq_n_s16(v1861, 16563);
+ int16x8_t v1863 = vaddq_s16(v1839, v1862);
+ int16x8_t v1864 = vsubq_s16(v1353, v1356);
+ int16x8_t v1865 = vsubq_s16(v1361, v1365);
+ int16x8_t v1866_tmp = vqrdmulhq_n_s16(v1865, 10045);
+ int16x8_t v1866 = vaddq_s16(v1866_tmp, v1865);
+ int16x8_t v1867 = vaddq_s16(v1864, v1866);
+ int16x8_t v1868 = vsubq_s16(v1372, v1379);
+ int16x8_t v1869 = vsubq_s16(v1382, v1387);
+ int16x8_t v1870_tmp = vqrdmulhq_n_s16(v1869, 10045);
+ int16x8_t v1870 = vaddq_s16(v1870_tmp, v1869);
+ int16x8_t v1871 = vaddq_s16(v1868, v1870);
+ int16x8_t v1872 = vqrdmulhq_n_s16(v1871, 19705);
+ int16x8_t v1873 = vaddq_s16(v1867, v1872);
+ int16x8_t v1874 = vsubq_s16(v1396, v1403);
+ int16x8_t v1875 = vsubq_s16(v1412, v1420);
+ int16x8_t v1876_tmp = vqrdmulhq_n_s16(v1875, 10045);
+ int16x8_t v1876 = vaddq_s16(v1876_tmp, v1875);
+ int16x8_t v1877 = vaddq_s16(v1874, v1876);
+ int16x8_t v1878 = vsubq_s16(v1425, v1428);
+ int16x8_t v1879 = vsubq_s16(v1431, v1437);
+ int16x8_t v1880_tmp = vqrdmulhq_n_s16(v1879, 10045);
+ int16x8_t v1880 = vaddq_s16(v1880_tmp, v1879);
+ int16x8_t v1881 = vaddq_s16(v1878, v1880);
+ int16x8_t v1882 = vqrdmulhq_n_s16(v1881, 19705);
+ int16x8_t v1883 = vaddq_s16(v1877, v1882);
+ int16x8_t v1884 = vqrdmulhq_n_s16(v1883, 17121);
+ int16x8_t v1885 = vaddq_s16(v1873, v1884);
+ int16x8_t v1886 = vsubq_s16(v1446, v1449);
+ int16x8_t v1887 = vsubq_s16(v1454, v1458);
+ int16x8_t v1888_tmp = vqrdmulhq_n_s16(v1887, 10045);
+ int16x8_t v1888 = vaddq_s16(v1888_tmp, v1887);
+ int16x8_t v1889 = vaddq_s16(v1886, v1888);
+ int16x8_t v1890 = vsubq_s16(v1465, v1472);
+ int16x8_t v1891 = vsubq_s16(v1475, v1480);
+ int16x8_t v1892_tmp = vqrdmulhq_n_s16(v1891, 10045);
+ int16x8_t v1892 = vaddq_s16(v1892_tmp, v1891);
+ int16x8_t v1893 = vaddq_s16(v1890, v1892);
+ int16x8_t v1894 = vqrdmulhq_n_s16(v1893, 19705);
+ int16x8_t v1895 = vaddq_s16(v1889, v1894);
+ int16x8_t v1896 = vsubq_s16(v1487, v1490);
+ int16x8_t v1897 = vsubq_s16(v1495, v1499);
+ int16x8_t v1898_tmp = vqrdmulhq_n_s16(v1897, 10045);
+ int16x8_t v1898 = vaddq_s16(v1898_tmp, v1897);
+ int16x8_t v1899 = vaddq_s16(v1896, v1898);
+ int16x8_t v1900 = vsubq_s16(v1504, v1507);
+ int16x8_t v1901 = vsubq_s16(v1510, v1518);
+ int16x8_t v1902_tmp = vqrdmulhq_n_s16(v1901, 10045);
+ int16x8_t v1902 = vaddq_s16(v1902_tmp, v1901);
+ int16x8_t v1903 = vaddq_s16(v1900, v1902);
+ int16x8_t v1904 = vqrdmulhq_n_s16(v1903, 19705);
+ int16x8_t v1905 = vaddq_s16(v1899, v1904);
+ int16x8_t v1906 = vqrdmulhq_n_s16(v1905, 17121);
+ int16x8_t v1907 = vaddq_s16(v1895, v1906);
+ int16x8_t v1908 = vqrdmulhq_n_s16(v1907, 16563);
+ int16x8_t v1909 = vaddq_s16(v1885, v1908);
+ int16x8_t v1910 = vqrdmulhq_n_s16(v1909, 16429);
+ int16x8_t v1911 = vaddq_s16(v1863, v1910);
+ int16x8_t v1912 = vqrdmulhq_n_s16(v1911, 16395);
+ int16x8_t v1913 = vaddq_s16(v1817, v1912);
+ int16x8_t v1914 = vqrdmulhq_n_s16(v1913, 16387);
+ int16x8_t v1915 = vaddq_s16(v1723, v1914);
+ int16x8_t v1916 = vsubq_s16(v1534, v1536);
+ int16x8_t v1917 = vsubq_s16(v1538, v1540);
+ int16x8_t v1918 = vqrdmulhq_n_s16(v1917, 29490);
+ int16x8_t v1919 = vaddq_s16(v1916, v1918);
+ int16x8_t v1920 = vsubq_s16(v1544, v1546);
+ int16x8_t v1921 = vsubq_s16(v1548, v1550);
+ int16x8_t v1922 = vqrdmulhq_n_s16(v1921, 29490);
+ int16x8_t v1923 = vaddq_s16(v1920, v1922);
+ int16x8_t v1924 = vqrdmulhq_n_s16(v1923, 18578);
+ int16x8_t v1925 = vaddq_s16(v1919, v1924);
+ int16x8_t v1926 = vsubq_s16(v1556, v1558);
+ int16x8_t v1927 = vsubq_s16(v1560, v1562);
+ int16x8_t v1928 = vqrdmulhq_n_s16(v1927, 29490);
+ int16x8_t v1929 = vaddq_s16(v1926, v1928);
+ int16x8_t v1930 = vsubq_s16(v1566, v1568);
+ int16x8_t v1931 = vsubq_s16(v1570, v1572);
+ int16x8_t v1932 = vqrdmulhq_n_s16(v1931, 29490);
+ int16x8_t v1933 = vaddq_s16(v1930, v1932);
+ int16x8_t v1934 = vqrdmulhq_n_s16(v1933, 18578);
+ int16x8_t v1935 = vaddq_s16(v1929, v1934);
+ int16x8_t v1936 = vqrdmulhq_n_s16(v1935, 16890);
+ int16x8_t v1937 = vaddq_s16(v1925, v1936);
+ int16x8_t v1938 = vsubq_s16(v1580, v1582);
+ int16x8_t v1939 = vsubq_s16(v1584, v1586);
+ int16x8_t v1940 = vqrdmulhq_n_s16(v1939, 29490);
+ int16x8_t v1941 = vaddq_s16(v1938, v1940);
+ int16x8_t v1942 = vsubq_s16(v1590, v1592);
+ int16x8_t v1943 = vsubq_s16(v1594, v1596);
+ int16x8_t v1944 = vqrdmulhq_n_s16(v1943, 29490);
+ int16x8_t v1945 = vaddq_s16(v1942, v1944);
+ int16x8_t v1946 = vqrdmulhq_n_s16(v1945, 18578);
+ int16x8_t v1947 = vaddq_s16(v1941, v1946);
+ int16x8_t v1948 = vsubq_s16(v1602, v1604);
+ int16x8_t v1949 = vsubq_s16(v1606, v1608);
+ int16x8_t v1950 = vqrdmulhq_n_s16(v1949, 29490);
+ int16x8_t v1951 = vaddq_s16(v1948, v1950);
+ int16x8_t v1952 = vsubq_s16(v1612, v1614);
+ int16x8_t v1953 = vsubq_s16(v1616, v1618);
+ int16x8_t v1954 = vqrdmulhq_n_s16(v1953, 29490);
+ int16x8_t v1955 = vaddq_s16(v1952, v1954);
+ int16x8_t v1956 = vqrdmulhq_n_s16(v1955, 18578);
+ int16x8_t v1957 = vaddq_s16(v1951, v1956);
+ int16x8_t v1958 = vqrdmulhq_n_s16(v1957, 16890);
+ int16x8_t v1959 = vaddq_s16(v1947, v1958);
+ int16x8_t v1960 = vqrdmulhq_n_s16(v1959, 16508);
+ int16x8_t v1961 = vaddq_s16(v1937, v1960);
+ int16x8_t v1962 = vsubq_s16(v1628, v1630);
+ int16x8_t v1963 = vsubq_s16(v1632, v1634);
+ int16x8_t v1964 = vqrdmulhq_n_s16(v1963, 29490);
+ int16x8_t v1965 = vaddq_s16(v1962, v1964);
+ int16x8_t v1966 = vsubq_s16(v1638, v1640);
+ int16x8_t v1967 = vsubq_s16(v1642, v1644);
+ int16x8_t v1968 = vqrdmulhq_n_s16(v1967, 29490);
+ int16x8_t v1969 = vaddq_s16(v1966, v1968);
+ int16x8_t v1970 = vqrdmulhq_n_s16(v1969, 18578);
+ int16x8_t v1971 = vaddq_s16(v1965, v1970);
+ int16x8_t v1972 = vsubq_s16(v1650, v1652);
+ int16x8_t v1973 = vsubq_s16(v1654, v1656);
+ int16x8_t v1974 = vqrdmulhq_n_s16(v1973, 29490);
+ int16x8_t v1975 = vaddq_s16(v1972, v1974);
+ int16x8_t v1976 = vsubq_s16(v1660, v1662);
+ int16x8_t v1977 = vsubq_s16(v1664, v1666);
+ int16x8_t v1978 = vqrdmulhq_n_s16(v1977, 29490);
+ int16x8_t v1979 = vaddq_s16(v1976, v1978);
+ int16x8_t v1980 = vqrdmulhq_n_s16(v1979, 18578);
+ int16x8_t v1981 = vaddq_s16(v1975, v1980);
+ int16x8_t v1982 = vqrdmulhq_n_s16(v1981, 16890);
+ int16x8_t v1983 = vaddq_s16(v1971, v1982);
+ int16x8_t v1984 = vsubq_s16(v1674, v1676);
+ int16x8_t v1985 = vsubq_s16(v1678, v1680);
+ int16x8_t v1986 = vqrdmulhq_n_s16(v1985, 29490);
+ int16x8_t v1987 = vaddq_s16(v1984, v1986);
+ int16x8_t v1988 = vsubq_s16(v1684, v1686);
+ int16x8_t v1989 = vsubq_s16(v1688, v1690);
+ int16x8_t v1990 = vqrdmulhq_n_s16(v1989, 29490);
+ int16x8_t v1991 = vaddq_s16(v1988, v1990);
+ int16x8_t v1992 = vqrdmulhq_n_s16(v1991, 18578);
+ int16x8_t v1993 = vaddq_s16(v1987, v1992);
+ int16x8_t v1994 = vsubq_s16(v1696, v1698);
+ int16x8_t v1995 = vsubq_s16(v1700, v1702);
+ int16x8_t v1996 = vqrdmulhq_n_s16(v1995, 29490);
+ int16x8_t v1997 = vaddq_s16(v1994, v1996);
+ int16x8_t v1998 = vsubq_s16(v1706, v1708);
+ int16x8_t v1999 = vsubq_s16(v1710, v1712);
+ int16x8_t v2000 = vqrdmulhq_n_s16(v1999, 29490);
+ int16x8_t v2001 = vaddq_s16(v1998, v2000);
+ int16x8_t v2002 = vqrdmulhq_n_s16(v2001, 18578);
+ int16x8_t v2003 = vaddq_s16(v1997, v2002);
+ int16x8_t v2004 = vqrdmulhq_n_s16(v2003, 16890);
+ int16x8_t v2005 = vaddq_s16(v1993, v2004);
+ int16x8_t v2006 = vqrdmulhq_n_s16(v2005, 16508);
+ int16x8_t v2007 = vaddq_s16(v1983, v2006);
+ int16x8_t v2008 = vqrdmulhq_n_s16(v2007, 16415);
+ int16x8_t v2009 = vaddq_s16(v1961, v2008);
+ int16x8_t v2010 = vsubq_s16(v1724, v1726);
+ int16x8_t v2011 = vsubq_s16(v1728, v1730);
+ int16x8_t v2012 = vqrdmulhq_n_s16(v2011, 29490);
+ int16x8_t v2013 = vaddq_s16(v2010, v2012);
+ int16x8_t v2014 = vsubq_s16(v1734, v1736);
+ int16x8_t v2015 = vsubq_s16(v1738, v1740);
+ int16x8_t v2016 = vqrdmulhq_n_s16(v2015, 29490);
+ int16x8_t v2017 = vaddq_s16(v2014, v2016);
+ int16x8_t v2018 = vqrdmulhq_n_s16(v2017, 18578);
+ int16x8_t v2019 = vaddq_s16(v2013, v2018);
+ int16x8_t v2020 = vsubq_s16(v1746, v1748);
+ int16x8_t v2021 = vsubq_s16(v1750, v1752);
+ int16x8_t v2022 = vqrdmulhq_n_s16(v2021, 29490);
+ int16x8_t v2023 = vaddq_s16(v2020, v2022);
+ int16x8_t v2024 = vsubq_s16(v1756, v1758);
+ int16x8_t v2025 = vsubq_s16(v1760, v1762);
+ int16x8_t v2026 = vqrdmulhq_n_s16(v2025, 29490);
+ int16x8_t v2027 = vaddq_s16(v2024, v2026);
+ int16x8_t v2028 = vqrdmulhq_n_s16(v2027, 18578);
+ int16x8_t v2029 = vaddq_s16(v2023, v2028);
+ int16x8_t v2030 = vqrdmulhq_n_s16(v2029, 16890);
+ int16x8_t v2031 = vaddq_s16(v2019, v2030);
+ int16x8_t v2032 = vsubq_s16(v1770, v1772);
+ int16x8_t v2033 = vsubq_s16(v1774, v1776);
+ int16x8_t v2034 = vqrdmulhq_n_s16(v2033, 29490);
+ int16x8_t v2035 = vaddq_s16(v2032, v2034);
+ int16x8_t v2036 = vsubq_s16(v1780, v1782);
+ int16x8_t v2037 = vsubq_s16(v1784, v1786);
+ int16x8_t v2038 = vqrdmulhq_n_s16(v2037, 29490);
+ int16x8_t v2039 = vaddq_s16(v2036, v2038);
+ int16x8_t v2040 = vqrdmulhq_n_s16(v2039, 18578);
+ int16x8_t v2041 = vaddq_s16(v2035, v2040);
+ int16x8_t v2042 = vsubq_s16(v1792, v1794);
+ int16x8_t v2043 = vsubq_s16(v1796, v1798);
+ int16x8_t v2044 = vqrdmulhq_n_s16(v2043, 29490);
+ int16x8_t v2045 = vaddq_s16(v2042, v2044);
+ int16x8_t v2046 = vsubq_s16(v1802, v1804);
+ int16x8_t v2047 = vsubq_s16(v1806, v1808);
+ int16x8_t v2048 = vqrdmulhq_n_s16(v2047, 29490);
+ int16x8_t v2049 = vaddq_s16(v2046, v2048);
+ int16x8_t v2050 = vqrdmulhq_n_s16(v2049, 18578);
+ int16x8_t v2051 = vaddq_s16(v2045, v2050);
+ int16x8_t v2052 = vqrdmulhq_n_s16(v2051, 16890);
+ int16x8_t v2053 = vaddq_s16(v2041, v2052);
+ int16x8_t v2054 = vqrdmulhq_n_s16(v2053, 16508);
+ int16x8_t v2055 = vaddq_s16(v2031, v2054);
+ int16x8_t v2056 = vsubq_s16(v1818, v1820);
+ int16x8_t v2057 = vsubq_s16(v1822, v1824);
+ int16x8_t v2058 = vqrdmulhq_n_s16(v2057, 29490);
+ int16x8_t v2059 = vaddq_s16(v2056, v2058);
+ int16x8_t v2060 = vsubq_s16(v1828, v1830);
+ int16x8_t v2061 = vsubq_s16(v1832, v1834);
+ int16x8_t v2062 = vqrdmulhq_n_s16(v2061, 29490);
+ int16x8_t v2063 = vaddq_s16(v2060, v2062);
+ int16x8_t v2064 = vqrdmulhq_n_s16(v2063, 18578);
+ int16x8_t v2065 = vaddq_s16(v2059, v2064);
+ int16x8_t v2066 = vsubq_s16(v1840, v1842);
+ int16x8_t v2067 = vsubq_s16(v1844, v1846);
+ int16x8_t v2068 = vqrdmulhq_n_s16(v2067, 29490);
+ int16x8_t v2069 = vaddq_s16(v2066, v2068);
+ int16x8_t v2070 = vsubq_s16(v1850, v1852);
+ int16x8_t v2071 = vqrdmulhq_n_s16(v2070, 18578);
+ int16x8_t v2072 = vsubq_s16(v1854, v1856);
+ int16x8_t v2073 = vqrdmulhq_n_s16(v2072, 16719);
+ int16x8_t v2074 = vaddq_s16(v2071, v2073);
+ int16x8_t v2075 = vaddq_s16(v2069, v2074);
+ int16x8_t v2076 = vqrdmulhq_n_s16(v2075, 16890);
+ int16x8_t v2077 = vaddq_s16(v2065, v2076);
+ int16x8_t v2078 = vsubq_s16(v1864, v1866);
+ int16x8_t v2079 = vsubq_s16(v1868, v1870);
+ int16x8_t v2080 = vqrdmulhq_n_s16(v2079, 29490);
+ int16x8_t v2081 = vaddq_s16(v2078, v2080);
+ int16x8_t v2082 = vsubq_s16(v1874, v1876);
+ int16x8_t v2083 = vsubq_s16(v1878, v1880);
+ int16x8_t v2084 = vqrdmulhq_n_s16(v2083, 29490);
+ int16x8_t v2085 = vaddq_s16(v2082, v2084);
+ int16x8_t v2086 = vqrdmulhq_n_s16(v2085, 18578);
+ int16x8_t v2087 = vaddq_s16(v2081, v2086);
+ int16x8_t v2088 = vsubq_s16(v1886, v1888);
+ int16x8_t v2089 = vsubq_s16(v1890, v1892);
+ int16x8_t v2090 = vqrdmulhq_n_s16(v2089, 29490);
+ int16x8_t v2091 = vaddq_s16(v2088, v2090);
+ int16x8_t v2092 = vsubq_s16(v1896, v1898);
+ int16x8_t v2093 = vsubq_s16(v1900, v1902);
+ int16x8_t v2094 = vqrdmulhq_n_s16(v2093, 29490);
+ int16x8_t v2095 = vaddq_s16(v2092, v2094);
+ int16x8_t v2096 = vqrdmulhq_n_s16(v2095, 18578);
+ int16x8_t v2097 = vaddq_s16(v2091, v2096);
+ int16x8_t v2098 = vqrdmulhq_n_s16(v2097, 16890);
+ int16x8_t v2099 = vaddq_s16(v2087, v2098);
+ int16x8_t v2100 = vqrdmulhq_n_s16(v2099, 16508);
+ int16x8_t v2101 = vaddq_s16(v2077, v2100);
+ int16x8_t v2102 = vqrdmulhq_n_s16(v2101, 16415);
+ int16x8_t v2103 = vaddq_s16(v2055, v2102);
+ int16x8_t v2104 = vqrdmulhq_n_s16(v2103, 16392);
+ int16x8_t v2105 = vaddq_s16(v2009, v2104);
+ int16x8_t v2106 = vsubq_s16(v2, v8);
+ int16x8_t v2107 = vsubq_s16(v15, v22);
+ int16x8_t v2108_tmp = vqrdmulhq_n_s16(v2107, 18446);
+ int16x8_t v2108 = vmlaq_n_s16(v2108_tmp, v2107, 2);
+ int16x8_t v2109 = vaddq_s16(v2106, v2108);
+ int16x8_t v2110 = vsubq_s16(v31, v41);
+ int16x8_t v2111 = vsubq_s16(v48, v56);
+ int16x8_t v2112_tmp = vqrdmulhq_n_s16(v2111, 18446);
+ int16x8_t v2112 = vmlaq_n_s16(v2112_tmp, v2111, 2);
+ int16x8_t v2113 = vaddq_s16(v2110, v2112);
+ int16x8_t v2114 = vqrdmulhq_n_s16(v2113, 21195);
+ int16x8_t v2115 = vaddq_s16(v2109, v2114);
+ int16x8_t v2116 = vsubq_s16(v67, v77);
+ int16x8_t v2117 = vsubq_s16(v90, v99);
+ int16x8_t v2118_tmp = vqrdmulhq_n_s16(v2117, 18446);
+ int16x8_t v2118 = vmlaq_n_s16(v2118_tmp, v2117, 2);
+ int16x8_t v2119 = vaddq_s16(v2116, v2118);
+ int16x8_t v2120 = vsubq_s16(v108, v118);
+ int16x8_t v2121 = vsubq_s16(v125, v134);
+ int16x8_t v2122_tmp = vqrdmulhq_n_s16(v2121, 18446);
+ int16x8_t v2122 = vmlaq_n_s16(v2122_tmp, v2121, 2);
+ int16x8_t v2123 = vaddq_s16(v2120, v2122);
+ int16x8_t v2124 = vqrdmulhq_n_s16(v2123, 21195);
+ int16x8_t v2125 = vaddq_s16(v2119, v2124);
+ int16x8_t v2126 = vqrdmulhq_n_s16(v2125, 17401);
+ int16x8_t v2127 = vaddq_s16(v2115, v2126);
+ int16x8_t v2128 = vsubq_s16(v147, v157);
+ int16x8_t v2129 = vsubq_s16(v170, v179);
+ int16x8_t v2130_tmp = vqrdmulhq_n_s16(v2129, 18446);
+ int16x8_t v2130 = vmlaq_n_s16(v2130_tmp, v2129, 2);
+ int16x8_t v2131 = vaddq_s16(v2128, v2130);
+ int16x8_t v2132 = vsubq_s16(v194, v212);
+ int16x8_t v2133 = vsubq_s16(v219, v229);
+ int16x8_t v2134_tmp = vqrdmulhq_n_s16(v2133, 18446);
+ int16x8_t v2134 = vmlaq_n_s16(v2134_tmp, v2133, 2);
+ int16x8_t v2135 = vaddq_s16(v2132, v2134);
+ int16x8_t v2136 = vqrdmulhq_n_s16(v2135, 21195);
+ int16x8_t v2137 = vaddq_s16(v2131, v2136);
+ int16x8_t v2138 = vsubq_s16(v240, v250);
+ int16x8_t v2139 = vsubq_s16(v263, v272);
+ int16x8_t v2140_tmp = vqrdmulhq_n_s16(v2139, 18446);
+ int16x8_t v2140 = vmlaq_n_s16(v2140_tmp, v2139, 2);
+ int16x8_t v2141 = vaddq_s16(v2138, v2140);
+ int16x8_t v2142 = vsubq_s16(v281, v291);
+ int16x8_t v2143 = vsubq_s16(v298, v308);
+ int16x8_t v2144_tmp = vqrdmulhq_n_s16(v2143, 18446);
+ int16x8_t v2144 = vmlaq_n_s16(v2144_tmp, v2143, 2);
+ int16x8_t v2145 = vaddq_s16(v2142, v2144);
+ int16x8_t v2146 = vqrdmulhq_n_s16(v2145, 21195);
+ int16x8_t v2147 = vaddq_s16(v2141, v2146);
+ int16x8_t v2148 = vqrdmulhq_n_s16(v2147, 17401);
+ int16x8_t v2149 = vaddq_s16(v2137, v2148);
+ int16x8_t v2150 = vqrdmulhq_n_s16(v2149, 16629);
+ int16x8_t v2151 = vaddq_s16(v2127, v2150);
+ int16x8_t v2152 = vsubq_s16(v323, v333);
+ int16x8_t v2153 = vsubq_s16(v346, v355);
+ int16x8_t v2154_tmp = vqrdmulhq_n_s16(v2153, 18446);
+ int16x8_t v2154 = vmlaq_n_s16(v2154_tmp, v2153, 2);
+ int16x8_t v2155 = vaddq_s16(v2152, v2154);
+ int16x8_t v2156 = vsubq_s16(v370, v388);
+ int16x8_t v2157 = vsubq_s16(v395, v405);
+ int16x8_t v2158_tmp = vqrdmulhq_n_s16(v2157, 18446);
+ int16x8_t v2158 = vmlaq_n_s16(v2158_tmp, v2157, 2);
+ int16x8_t v2159 = vaddq_s16(v2156, v2158);
+ int16x8_t v2160 = vqrdmulhq_n_s16(v2159, 21195);
+ int16x8_t v2161 = vaddq_s16(v2155, v2160);
+ int16x8_t v2162 = vsubq_s16(v422, v440);
+ int16x8_t v2163 = vsubq_s16(v465, v478);
+ int16x8_t v2164_tmp = vqrdmulhq_n_s16(v2163, 18446);
+ int16x8_t v2164 = vmlaq_n_s16(v2164_tmp, v2163, 2);
+ int16x8_t v2165 = vaddq_s16(v2162, v2164);
+ int16x8_t v2166 = vsubq_s16(v487, v497);
+ int16x8_t v2167 = vsubq_s16(v504, v515);
+ int16x8_t v2168_tmp = vqrdmulhq_n_s16(v2167, 18446);
+ int16x8_t v2168 = vmlaq_n_s16(v2168_tmp, v2167, 2);
+ int16x8_t v2169 = vaddq_s16(v2166, v2168);
+ int16x8_t v2170 = vqrdmulhq_n_s16(v2169, 21195);
+ int16x8_t v2171 = vaddq_s16(v2165, v2170);
+ int16x8_t v2172 = vqrdmulhq_n_s16(v2171, 17401);
+ int16x8_t v2173 = vaddq_s16(v2161, v2172);
+ int16x8_t v2174 = vsubq_s16(v528, v538);
+ int16x8_t v2175 = vsubq_s16(v551, v560);
+ int16x8_t v2176_tmp = vqrdmulhq_n_s16(v2175, 18446);
+ int16x8_t v2176 = vmlaq_n_s16(v2176_tmp, v2175, 2);
+ int16x8_t v2177 = vaddq_s16(v2174, v2176);
+ int16x8_t v2178 = vsubq_s16(v575, v593);
+ int16x8_t v2179 = vsubq_s16(v600, v610);
+ int16x8_t v2180_tmp = vqrdmulhq_n_s16(v2179, 18446);
+ int16x8_t v2180 = vmlaq_n_s16(v2180_tmp, v2179, 2);
+ int16x8_t v2181 = vaddq_s16(v2178, v2180);
+ int16x8_t v2182 = vqrdmulhq_n_s16(v2181, 21195);
+ int16x8_t v2183 = vaddq_s16(v2177, v2182);
+ int16x8_t v2184 = vsubq_s16(v621, v631);
+ int16x8_t v2185 = vsubq_s16(v644, v653);
+ int16x8_t v2186_tmp = vqrdmulhq_n_s16(v2185, 18446);
+ int16x8_t v2186 = vmlaq_n_s16(v2186_tmp, v2185, 2);
+ int16x8_t v2187 = vaddq_s16(v2184, v2186);
+ int16x8_t v2188 = vsubq_s16(v662, v672);
+ int16x8_t v2189 = vsubq_s16(v679, v690);
+ int16x8_t v2190_tmp = vqrdmulhq_n_s16(v2189, 18446);
+ int16x8_t v2190 = vmlaq_n_s16(v2190_tmp, v2189, 2);
+ int16x8_t v2191 = vaddq_s16(v2188, v2190);
+ int16x8_t v2192 = vqrdmulhq_n_s16(v2191, 21195);
+ int16x8_t v2193 = vaddq_s16(v2187, v2192);
+ int16x8_t v2194 = vqrdmulhq_n_s16(v2193, 17401);
+ int16x8_t v2195 = vaddq_s16(v2183, v2194);
+ int16x8_t v2196 = vqrdmulhq_n_s16(v2195, 16629);
+ int16x8_t v2197 = vaddq_s16(v2173, v2196);
+ int16x8_t v2198 = vqrdmulhq_n_s16(v2197, 16445);
+ int16x8_t v2199 = vaddq_s16(v2151, v2198);
+ int16x8_t v2200 = vsubq_s16(v707, v717);
+ int16x8_t v2201 = vsubq_s16(v730, v739);
+ int16x8_t v2202_tmp = vqrdmulhq_n_s16(v2201, 18446);
+ int16x8_t v2202 = vmlaq_n_s16(v2202_tmp, v2201, 2);
+ int16x8_t v2203 = vaddq_s16(v2200, v2202);
+ int16x8_t v2204 = vsubq_s16(v754, v772);
+ int16x8_t v2205 = vsubq_s16(v779, v789);
+ int16x8_t v2206_tmp = vqrdmulhq_n_s16(v2205, 18446);
+ int16x8_t v2206 = vmlaq_n_s16(v2206_tmp, v2205, 2);
+ int16x8_t v2207 = vaddq_s16(v2204, v2206);
+ int16x8_t v2208 = vqrdmulhq_n_s16(v2207, 21195);
+ int16x8_t v2209 = vaddq_s16(v2203, v2208);
+ int16x8_t v2210 = vsubq_s16(v806, v824);
+ int16x8_t v2211 = vsubq_s16(v849, v862);
+ int16x8_t v2212_tmp = vqrdmulhq_n_s16(v2211, 18446);
+ int16x8_t v2212 = vmlaq_n_s16(v2212_tmp, v2211, 2);
+ int16x8_t v2213 = vaddq_s16(v2210, v2212);
+ int16x8_t v2214 = vsubq_s16(v871, v881);
+ int16x8_t v2215 = vsubq_s16(v888, v899);
+ int16x8_t v2216_tmp = vqrdmulhq_n_s16(v2215, 18446);
+ int16x8_t v2216 = vmlaq_n_s16(v2216_tmp, v2215, 2);
+ int16x8_t v2217 = vaddq_s16(v2214, v2216);
+ int16x8_t v2218 = vqrdmulhq_n_s16(v2217, 21195);
+ int16x8_t v2219 = vaddq_s16(v2213, v2218);
+ int16x8_t v2220 = vqrdmulhq_n_s16(v2219, 17401);
+ int16x8_t v2221 = vaddq_s16(v2209, v2220);
+ int16x8_t v2222 = vsubq_s16(v918, v936);
+ int16x8_t v2223 = vsubq_s16(v961, v974);
+ int16x8_t v2224_tmp = vqrdmulhq_n_s16(v2223, 18446);
+ int16x8_t v2224 = vmlaq_n_s16(v2224_tmp, v2223, 2);
+ int16x8_t v2225 = vaddq_s16(v2222, v2224);
+ int16x8_t v2226 = vsubq_s16(v1001, v1035);
+ int16x8_t v2227 = vsubq_s16(v1042, v1056);
+ int16x8_t v2228_tmp = vqrdmulhq_n_s16(v2227, 18446);
+ int16x8_t v2228 = vmlaq_n_s16(v2228_tmp, v2227, 2);
+ int16x8_t v2229 = vaddq_s16(v2226, v2228);
+ int16x8_t v2230 = vqrdmulhq_n_s16(v2229, 21195);
+ int16x8_t v2231 = vaddq_s16(v2225, v2230);
+ int16x8_t v2232 = vsubq_s16(v1067, v1077);
+ int16x8_t v2233 = vsubq_s16(v1090, v1099);
+ int16x8_t v2234_tmp = vqrdmulhq_n_s16(v2233, 18446);
+ int16x8_t v2234 = vmlaq_n_s16(v2234_tmp, v2233, 2);
+ int16x8_t v2235 = vaddq_s16(v2232, v2234);
+ int16x8_t v2236 = vsubq_s16(v1108, v1118);
+ int16x8_t v2237 = vsubq_s16(v1125, v1137);
+ int16x8_t v2238_tmp = vqrdmulhq_n_s16(v2237, 18446);
+ int16x8_t v2238 = vmlaq_n_s16(v2238_tmp, v2237, 2);
+ int16x8_t v2239 = vaddq_s16(v2236, v2238);
+ int16x8_t v2240 = vqrdmulhq_n_s16(v2239, 21195);
+ int16x8_t v2241 = vaddq_s16(v2235, v2240);
+ int16x8_t v2242 = vqrdmulhq_n_s16(v2241, 17401);
+ int16x8_t v2243 = vaddq_s16(v2231, v2242);
+ int16x8_t v2244 = vqrdmulhq_n_s16(v2243, 16629);
+ int16x8_t v2245 = vaddq_s16(v2221, v2244);
+ int16x8_t v2246 = vsubq_s16(v1152, v1162);
+ int16x8_t v2247 = vsubq_s16(v1175, v1184);
+ int16x8_t v2248_tmp = vqrdmulhq_n_s16(v2247, 18446);
+ int16x8_t v2248 = vmlaq_n_s16(v2248_tmp, v2247, 2);
+ int16x8_t v2249 = vaddq_s16(v2246, v2248);
+ int16x8_t v2250 = vsubq_s16(v1199, v1217);
+ int16x8_t v2251 = vsubq_s16(v1224, v1234);
+ int16x8_t v2252_tmp = vqrdmulhq_n_s16(v2251, 18446);
+ int16x8_t v2252 = vmlaq_n_s16(v2252_tmp, v2251, 2);
+ int16x8_t v2253 = vaddq_s16(v2250, v2252);
+ int16x8_t v2254 = vqrdmulhq_n_s16(v2253, 21195);
+ int16x8_t v2255 = vaddq_s16(v2249, v2254);
+ int16x8_t v2256 = vsubq_s16(v1251, v1269);
+ int16x8_t v2257 = vsubq_s16(v1294, v1307);
+ int16x8_t v2258_tmp = vqrdmulhq_n_s16(v2257, 18446);
+ int16x8_t v2258 = vmlaq_n_s16(v2258_tmp, v2257, 2);
+ int16x8_t v2259 = vaddq_s16(v2256, v2258);
+ int16x8_t v2260 = vsubq_s16(v1316, v1326);
+ int16x8_t v2261 = vsubq_s16(v1333, v1344);
+ int16x8_t v2262_tmp = vqrdmulhq_n_s16(v2261, 18446);
+ int16x8_t v2262 = vmlaq_n_s16(v2262_tmp, v2261, 2);
+ int16x8_t v2263 = vaddq_s16(v2260, v2262);
+ int16x8_t v2264 = vqrdmulhq_n_s16(v2263, 21195);
+ int16x8_t v2265 = vaddq_s16(v2259, v2264);
+ int16x8_t v2266 = vqrdmulhq_n_s16(v2265, 17401);
+ int16x8_t v2267 = vaddq_s16(v2255, v2266);
+ int16x8_t v2268 = vsubq_s16(v1357, v1367);
+ int16x8_t v2269 = vsubq_s16(v1380, v1389);
+ int16x8_t v2270_tmp = vqrdmulhq_n_s16(v2269, 18446);
+ int16x8_t v2270 = vmlaq_n_s16(v2270_tmp, v2269, 2);
+ int16x8_t v2271 = vaddq_s16(v2268, v2270);
+ int16x8_t v2272 = vsubq_s16(v1404, v1422);
+ int16x8_t v2273 = vsubq_s16(v1429, v1439);
+ int16x8_t v2274_tmp = vqrdmulhq_n_s16(v2273, 18446);
+ int16x8_t v2274 = vmlaq_n_s16(v2274_tmp, v2273, 2);
+ int16x8_t v2275 = vaddq_s16(v2272, v2274);
+ int16x8_t v2276 = vqrdmulhq_n_s16(v2275, 21195);
+ int16x8_t v2277 = vaddq_s16(v2271, v2276);
+ int16x8_t v2278 = vsubq_s16(v1450, v1460);
+ int16x8_t v2279 = vsubq_s16(v1473, v1482);
+ int16x8_t v2280_tmp = vqrdmulhq_n_s16(v2279, 18446);
+ int16x8_t v2280 = vmlaq_n_s16(v2280_tmp, v2279, 2);
+ int16x8_t v2281 = vaddq_s16(v2278, v2280);
+ int16x8_t v2282 = vsubq_s16(v1491, v1501);
+ int16x8_t v2283 = vsubq_s16(v1508, v1520);
+ int16x8_t v2284_tmp = vqrdmulhq_n_s16(v2283, 18446);
+ int16x8_t v2284 = vmlaq_n_s16(v2284_tmp, v2283, 2);
+ int16x8_t v2285 = vaddq_s16(v2282, v2284);
+ int16x8_t v2286 = vqrdmulhq_n_s16(v2285, 21195);
+ int16x8_t v2287 = vaddq_s16(v2281, v2286);
+ int16x8_t v2288 = vqrdmulhq_n_s16(v2287, 17401);
+ int16x8_t v2289 = vaddq_s16(v2277, v2288);
+ int16x8_t v2290 = vqrdmulhq_n_s16(v2289, 16629);
+ int16x8_t v2291 = vaddq_s16(v2267, v2290);
+ int16x8_t v2292 = vqrdmulhq_n_s16(v2291, 16445);
+ int16x8_t v2293 = vaddq_s16(v2245, v2292);
+ int16x8_t v2294 = vqrdmulhq_n_s16(v2293, 16399);
+ int16x8_t v2295 = vaddq_s16(v2199, v2294);
+ int16x8_t v2296 = vsubq_s16(v2106, v2108);
+ int16x8_t v2297 = vsubq_s16(v2110, v2112);
+ int16x8_t v2298 = vqrdmulhq_n_s16(v2297, 25826);
+ int16x8_t v2299 = vaddq_s16(v2296, v2298);
+ int16x8_t v2300 = vsubq_s16(v2116, v2118);
+ int16x8_t v2301 = vsubq_s16(v2120, v2122);
+ int16x8_t v2302 = vqrdmulhq_n_s16(v2301, 25826);
+ int16x8_t v2303 = vaddq_s16(v2300, v2302);
+ int16x8_t v2304 = vqrdmulhq_n_s16(v2303, 18124);
+ int16x8_t v2305 = vaddq_s16(v2299, v2304);
+ int16x8_t v2306 = vsubq_s16(v2128, v2130);
+ int16x8_t v2307 = vsubq_s16(v2132, v2134);
+ int16x8_t v2308 = vqrdmulhq_n_s16(v2307, 25826);
+ int16x8_t v2309 = vaddq_s16(v2306, v2308);
+ int16x8_t v2310 = vsubq_s16(v2138, v2140);
+ int16x8_t v2311 = vsubq_s16(v2142, v2144);
+ int16x8_t v2312 = vqrdmulhq_n_s16(v2311, 25826);
+ int16x8_t v2313 = vaddq_s16(v2310, v2312);
+ int16x8_t v2314 = vqrdmulhq_n_s16(v2313, 18124);
+ int16x8_t v2315 = vaddq_s16(v2309, v2314);
+ int16x8_t v2316 = vqrdmulhq_n_s16(v2315, 16792);
+ int16x8_t v2317 = vaddq_s16(v2305, v2316);
+ int16x8_t v2318 = vsubq_s16(v2152, v2154);
+ int16x8_t v2319 = vsubq_s16(v2156, v2158);
+ int16x8_t v2320 = vqrdmulhq_n_s16(v2319, 25826);
+ int16x8_t v2321 = vaddq_s16(v2318, v2320);
+ int16x8_t v2322 = vsubq_s16(v2162, v2164);
+ int16x8_t v2323 = vsubq_s16(v2166, v2168);
+ int16x8_t v2324 = vqrdmulhq_n_s16(v2323, 25826);
+ int16x8_t v2325 = vaddq_s16(v2322, v2324);
+ int16x8_t v2326 = vqrdmulhq_n_s16(v2325, 18124);
+ int16x8_t v2327 = vaddq_s16(v2321, v2326);
+ int16x8_t v2328 = vsubq_s16(v2174, v2176);
+ int16x8_t v2329 = vsubq_s16(v2178, v2180);
+ int16x8_t v2330 = vqrdmulhq_n_s16(v2329, 25826);
+ int16x8_t v2331 = vaddq_s16(v2328, v2330);
+ int16x8_t v2332 = vsubq_s16(v2184, v2186);
+ int16x8_t v2333 = vsubq_s16(v2188, v2190);
+ int16x8_t v2334 = vqrdmulhq_n_s16(v2333, 25826);
+ int16x8_t v2335 = vaddq_s16(v2332, v2334);
+ int16x8_t v2336 = vqrdmulhq_n_s16(v2335, 18124);
+ int16x8_t v2337 = vaddq_s16(v2331, v2336);
+ int16x8_t v2338 = vqrdmulhq_n_s16(v2337, 16792);
+ int16x8_t v2339 = vaddq_s16(v2327, v2338);
+ int16x8_t v2340 = vqrdmulhq_n_s16(v2339, 16484);
+ int16x8_t v2341 = vaddq_s16(v2317, v2340);
+ int16x8_t v2342 = vsubq_s16(v2200, v2202);
+ int16x8_t v2343 = vsubq_s16(v2204, v2206);
+ int16x8_t v2344 = vqrdmulhq_n_s16(v2343, 25826);
+ int16x8_t v2345 = vaddq_s16(v2342, v2344);
+ int16x8_t v2346 = vsubq_s16(v2210, v2212);
+ int16x8_t v2347 = vsubq_s16(v2214, v2216);
+ int16x8_t v2348 = vqrdmulhq_n_s16(v2347, 25826);
+ int16x8_t v2349 = vaddq_s16(v2346, v2348);
+ int16x8_t v2350 = vqrdmulhq_n_s16(v2349, 18124);
+ int16x8_t v2351 = vaddq_s16(v2345, v2350);
+ int16x8_t v2352 = vsubq_s16(v2222, v2224);
+ int16x8_t v2353 = vsubq_s16(v2226, v2228);
+ int16x8_t v2354 = vqrdmulhq_n_s16(v2353, 25826);
+ int16x8_t v2355 = vaddq_s16(v2352, v2354);
+ int16x8_t v2356 = vsubq_s16(v2232, v2234);
+ int16x8_t v2357 = vsubq_s16(v2236, v2238);
+ int16x8_t v2358 = vqrdmulhq_n_s16(v2357, 25826);
+ int16x8_t v2359 = vaddq_s16(v2356, v2358);
+ int16x8_t v2360 = vqrdmulhq_n_s16(v2359, 18124);
+ int16x8_t v2361 = vaddq_s16(v2355, v2360);
+ int16x8_t v2362 = vqrdmulhq_n_s16(v2361, 16792);
+ int16x8_t v2363 = vaddq_s16(v2351, v2362);
+ int16x8_t v2364 = vsubq_s16(v2246, v2248);
+ int16x8_t v2365 = vsubq_s16(v2250, v2252);
+ int16x8_t v2366 = vqrdmulhq_n_s16(v2365, 25826);
+ int16x8_t v2367 = vaddq_s16(v2364, v2366);
+ int16x8_t v2368 = vsubq_s16(v2256, v2258);
+ int16x8_t v2369 = vsubq_s16(v2260, v2262);
+ int16x8_t v2370 = vqrdmulhq_n_s16(v2369, 25826);
+ int16x8_t v2371 = vaddq_s16(v2368, v2370);
+ int16x8_t v2372 = vqrdmulhq_n_s16(v2371, 18124);
+ int16x8_t v2373 = vaddq_s16(v2367, v2372);
+ int16x8_t v2374 = vsubq_s16(v2268, v2270);
+ int16x8_t v2375 = vsubq_s16(v2272, v2274);
+ int16x8_t v2376 = vqrdmulhq_n_s16(v2375, 25826);
+ int16x8_t v2377 = vaddq_s16(v2374, v2376);
+ int16x8_t v2378 = vsubq_s16(v2278, v2280);
+ int16x8_t v2379 = vsubq_s16(v2282, v2284);
+ int16x8_t v2380 = vqrdmulhq_n_s16(v2379, 25826);
+ int16x8_t v2381 = vaddq_s16(v2378, v2380);
+ int16x8_t v2382 = vqrdmulhq_n_s16(v2381, 18124);
+ int16x8_t v2383 = vaddq_s16(v2377, v2382);
+ int16x8_t v2384 = vqrdmulhq_n_s16(v2383, 16792);
+ int16x8_t v2385 = vaddq_s16(v2373, v2384);
+ int16x8_t v2386 = vqrdmulhq_n_s16(v2385, 16484);
+ int16x8_t v2387 = vaddq_s16(v2363, v2386);
+ int16x8_t v2388 = vqrdmulhq_n_s16(v2387, 16409);
+ int16x8_t v2389 = vaddq_s16(v2341, v2388);
+ int16x8_t v2390 = vsubq_s16(v1916, v1918);
+ int16x8_t v2391 = vsubq_s16(v1920, v1922);
+ int16x8_t v2392_tmp = vqrdmulhq_n_s16(v2391, 1988);
+ int16x8_t v2392 = vaddq_s16(v2392_tmp, v2391);
+ int16x8_t v2393 = vaddq_s16(v2390, v2392);
+ int16x8_t v2394 = vsubq_s16(v1926, v1928);
+ int16x8_t v2395 = vsubq_s16(v1930, v1932);
+ int16x8_t v2396_tmp = vqrdmulhq_n_s16(v2395, 1988);
+ int16x8_t v2396 = vaddq_s16(v2396_tmp, v2395);
+ int16x8_t v2397 = vaddq_s16(v2394, v2396);
+ int16x8_t v2398 = vqrdmulhq_n_s16(v2397, 19102);
+ int16x8_t v2399 = vaddq_s16(v2393, v2398);
+ int16x8_t v2400 = vsubq_s16(v1938, v1940);
+ int16x8_t v2401 = vsubq_s16(v1942, v1944);
+ int16x8_t v2402_tmp = vqrdmulhq_n_s16(v2401, 1988);
+ int16x8_t v2402 = vaddq_s16(v2402_tmp, v2401);
+ int16x8_t v2403 = vaddq_s16(v2400, v2402);
+ int16x8_t v2404 = vsubq_s16(v1948, v1950);
+ int16x8_t v2405 = vsubq_s16(v1952, v1954);
+ int16x8_t v2406_tmp = vqrdmulhq_n_s16(v2405, 1988);
+ int16x8_t v2406 = vaddq_s16(v2406_tmp, v2405);
+ int16x8_t v2407 = vaddq_s16(v2404, v2406);
+ int16x8_t v2408 = vqrdmulhq_n_s16(v2407, 19102);
+ int16x8_t v2409 = vaddq_s16(v2403, v2408);
+ int16x8_t v2410 = vqrdmulhq_n_s16(v2409, 17000);
+ int16x8_t v2411 = vaddq_s16(v2399, v2410);
+ int16x8_t v2412 = vsubq_s16(v1962, v1964);
+ int16x8_t v2413 = vsubq_s16(v1966, v1968);
+ int16x8_t v2414_tmp = vqrdmulhq_n_s16(v2413, 1988);
+ int16x8_t v2414 = vaddq_s16(v2414_tmp, v2413);
+ int16x8_t v2415 = vaddq_s16(v2412, v2414);
+ int16x8_t v2416 = vsubq_s16(v1972, v1974);
+ int16x8_t v2417 = vsubq_s16(v1976, v1978);
+ int16x8_t v2418_tmp = vqrdmulhq_n_s16(v2417, 1988);
+ int16x8_t v2418 = vaddq_s16(v2418_tmp, v2417);
+ int16x8_t v2419 = vaddq_s16(v2416, v2418);
+ int16x8_t v2420 = vqrdmulhq_n_s16(v2419, 19102);
+ int16x8_t v2421 = vaddq_s16(v2415, v2420);
+ int16x8_t v2422 = vsubq_s16(v1984, v1986);
+ int16x8_t v2423 = vsubq_s16(v1988, v1990);
+ int16x8_t v2424_tmp = vqrdmulhq_n_s16(v2423, 1988);
+ int16x8_t v2424 = vaddq_s16(v2424_tmp, v2423);
+ int16x8_t v2425 = vaddq_s16(v2422, v2424);
+ int16x8_t v2426 = vsubq_s16(v1994, v1996);
+ int16x8_t v2427 = vsubq_s16(v1998, v2000);
+ int16x8_t v2428_tmp = vqrdmulhq_n_s16(v2427, 1988);
+ int16x8_t v2428 = vaddq_s16(v2428_tmp, v2427);
+ int16x8_t v2429 = vaddq_s16(v2426, v2428);
+ int16x8_t v2430 = vqrdmulhq_n_s16(v2429, 19102);
+ int16x8_t v2431 = vaddq_s16(v2425, v2430);
+ int16x8_t v2432 = vqrdmulhq_n_s16(v2431, 17000);
+ int16x8_t v2433 = vaddq_s16(v2421, v2432);
+ int16x8_t v2434 = vqrdmulhq_n_s16(v2433, 16534);
+ int16x8_t v2435 = vaddq_s16(v2411, v2434);
+ int16x8_t v2436 = vsubq_s16(v2010, v2012);
+ int16x8_t v2437 = vsubq_s16(v2014, v2016);
+ int16x8_t v2438_tmp = vqrdmulhq_n_s16(v2437, 1988);
+ int16x8_t v2438 = vaddq_s16(v2438_tmp, v2437);
+ int16x8_t v2439 = vaddq_s16(v2436, v2438);
+ int16x8_t v2440 = vsubq_s16(v2020, v2022);
+ int16x8_t v2441 = vsubq_s16(v2024, v2026);
+ int16x8_t v2442_tmp = vqrdmulhq_n_s16(v2441, 1988);
+ int16x8_t v2442 = vaddq_s16(v2442_tmp, v2441);
+ int16x8_t v2443 = vaddq_s16(v2440, v2442);
+ int16x8_t v2444 = vqrdmulhq_n_s16(v2443, 19102);
+ int16x8_t v2445 = vaddq_s16(v2439, v2444);
+ int16x8_t v2446 = vsubq_s16(v2032, v2034);
+ int16x8_t v2447 = vsubq_s16(v2036, v2038);
+ int16x8_t v2448_tmp = vqrdmulhq_n_s16(v2447, 1988);
+ int16x8_t v2448 = vaddq_s16(v2448_tmp, v2447);
+ int16x8_t v2449 = vaddq_s16(v2446, v2448);
+ int16x8_t v2450 = vsubq_s16(v2042, v2044);
+ int16x8_t v2451 = vsubq_s16(v2046, v2048);
+ int16x8_t v2452_tmp = vqrdmulhq_n_s16(v2451, 1988);
+ int16x8_t v2452 = vaddq_s16(v2452_tmp, v2451);
+ int16x8_t v2453 = vaddq_s16(v2450, v2452);
+ int16x8_t v2454 = vqrdmulhq_n_s16(v2453, 19102);
+ int16x8_t v2455 = vaddq_s16(v2449, v2454);
+ int16x8_t v2456 = vqrdmulhq_n_s16(v2455, 17000);
+ int16x8_t v2457 = vaddq_s16(v2445, v2456);
+ int16x8_t v2458 = vsubq_s16(v2056, v2058);
+ int16x8_t v2459 = vsubq_s16(v2060, v2062);
+ int16x8_t v2460_tmp = vqrdmulhq_n_s16(v2459, 1988);
+ int16x8_t v2460 = vaddq_s16(v2460_tmp, v2459);
+ int16x8_t v2461 = vaddq_s16(v2458, v2460);
+ int16x8_t v2462 = vsubq_s16(v2066, v2068);
+ int16x8_t v2463 = vqrdmulhq_n_s16(v2072, 29490);
+ int16x8_t v2464 = vsubq_s16(v2070, v2463);
+ int16x8_t v2465_tmp = vqrdmulhq_n_s16(v2464, 1988);
+ int16x8_t v2465 = vaddq_s16(v2465_tmp, v2464);
+ int16x8_t v2466 = vaddq_s16(v2462, v2465);
+ int16x8_t v2467 = vqrdmulhq_n_s16(v2466, 19102);
+ int16x8_t v2468 = vaddq_s16(v2461, v2467);
+ int16x8_t v2469 = vsubq_s16(v2078, v2080);
+ int16x8_t v2470 = vsubq_s16(v2082, v2084);
+ int16x8_t v2471_tmp = vqrdmulhq_n_s16(v2470, 1988);
+ int16x8_t v2471 = vaddq_s16(v2471_tmp, v2470);
+ int16x8_t v2472 = vaddq_s16(v2469, v2471);
+ int16x8_t v2473 = vsubq_s16(v2088, v2090);
+ int16x8_t v2474 = vsubq_s16(v2092, v2094);
+ int16x8_t v2475_tmp = vqrdmulhq_n_s16(v2474, 1988);
+ int16x8_t v2475 = vaddq_s16(v2475_tmp, v2474);
+ int16x8_t v2476 = vaddq_s16(v2473, v2475);
+ int16x8_t v2477 = vqrdmulhq_n_s16(v2476, 19102);
+ int16x8_t v2478 = vaddq_s16(v2472, v2477);
+ int16x8_t v2479 = vqrdmulhq_n_s16(v2478, 17000);
+ int16x8_t v2480 = vaddq_s16(v2468, v2479);
+ int16x8_t v2481 = vqrdmulhq_n_s16(v2480, 16534);
+ int16x8_t v2482 = vaddq_s16(v2457, v2481);
+ int16x8_t v2483 = vqrdmulhq_n_s16(v2482, 16421);
+ int16x8_t v2484 = vaddq_s16(v2435, v2483);
+ int16x8_t v2485 = vsubq_s16(v1537, v1542);
+ int16x8_t v2486 = vsubq_s16(v1547, v1552);
+ int16x8_t v2487_tmp = vqrdmulhq_n_s16(v2486, 23673);
+ int16x8_t v2487 = vaddq_s16(v2487_tmp, v2486);
+ int16x8_t v2488 = vaddq_s16(v2485, v2487);
+ int16x8_t v2489 = vsubq_s16(v1559, v1564);
+ int16x8_t v2490 = vsubq_s16(v1569, v1574);
+ int16x8_t v2491_tmp = vqrdmulhq_n_s16(v2490, 23673);
+ int16x8_t v2491 = vaddq_s16(v2491_tmp, v2490);
+ int16x8_t v2492 = vaddq_s16(v2489, v2491);
+ int16x8_t v2493 = vqrdmulhq_n_s16(v2492, 20398);
+ int16x8_t v2494 = vaddq_s16(v2488, v2493);
+ int16x8_t v2495 = vsubq_s16(v1583, v1588);
+ int16x8_t v2496 = vsubq_s16(v1593, v1598);
+ int16x8_t v2497_tmp = vqrdmulhq_n_s16(v2496, 23673);
+ int16x8_t v2497 = vaddq_s16(v2497_tmp, v2496);
+ int16x8_t v2498 = vaddq_s16(v2495, v2497);
+ int16x8_t v2499 = vsubq_s16(v1605, v1610);
+ int16x8_t v2500 = vsubq_s16(v1615, v1620);
+ int16x8_t v2501_tmp = vqrdmulhq_n_s16(v2500, 23673);
+ int16x8_t v2501 = vaddq_s16(v2501_tmp, v2500);
+ int16x8_t v2502 = vaddq_s16(v2499, v2501);
+ int16x8_t v2503 = vqrdmulhq_n_s16(v2502, 20398);
+ int16x8_t v2504 = vaddq_s16(v2498, v2503);
+ int16x8_t v2505 = vqrdmulhq_n_s16(v2504, 17255);
+ int16x8_t v2506 = vaddq_s16(v2494, v2505);
+ int16x8_t v2507 = vsubq_s16(v1631, v1636);
+ int16x8_t v2508 = vsubq_s16(v1641, v1646);
+ int16x8_t v2509_tmp = vqrdmulhq_n_s16(v2508, 23673);
+ int16x8_t v2509 = vaddq_s16(v2509_tmp, v2508);
+ int16x8_t v2510 = vaddq_s16(v2507, v2509);
+ int16x8_t v2511 = vsubq_s16(v1653, v1658);
+ int16x8_t v2512 = vsubq_s16(v1663, v1668);
+ int16x8_t v2513_tmp = vqrdmulhq_n_s16(v2512, 23673);
+ int16x8_t v2513 = vaddq_s16(v2513_tmp, v2512);
+ int16x8_t v2514 = vaddq_s16(v2511, v2513);
+ int16x8_t v2515 = vqrdmulhq_n_s16(v2514, 20398);
+ int16x8_t v2516 = vaddq_s16(v2510, v2515);
+ int16x8_t v2517 = vsubq_s16(v1677, v1682);
+ int16x8_t v2518 = vsubq_s16(v1687, v1692);
+ int16x8_t v2519_tmp = vqrdmulhq_n_s16(v2518, 23673);
+ int16x8_t v2519 = vaddq_s16(v2519_tmp, v2518);
+ int16x8_t v2520 = vaddq_s16(v2517, v2519);
+ int16x8_t v2521 = vsubq_s16(v1699, v1704);
+ int16x8_t v2522 = vsubq_s16(v1709, v1714);
+ int16x8_t v2523_tmp = vqrdmulhq_n_s16(v2522, 23673);
+ int16x8_t v2523 = vaddq_s16(v2523_tmp, v2522);
+ int16x8_t v2524 = vaddq_s16(v2521, v2523);
+ int16x8_t v2525 = vqrdmulhq_n_s16(v2524, 20398);
+ int16x8_t v2526 = vaddq_s16(v2520, v2525);
+ int16x8_t v2527 = vqrdmulhq_n_s16(v2526, 17255);
+ int16x8_t v2528 = vaddq_s16(v2516, v2527);
+ int16x8_t v2529 = vqrdmulhq_n_s16(v2528, 16595);
+ int16x8_t v2530 = vaddq_s16(v2506, v2529);
+ int16x8_t v2531 = vsubq_s16(v1727, v1732);
+ int16x8_t v2532 = vsubq_s16(v1737, v1742);
+ int16x8_t v2533_tmp = vqrdmulhq_n_s16(v2532, 23673);
+ int16x8_t v2533 = vaddq_s16(v2533_tmp, v2532);
+ int16x8_t v2534 = vaddq_s16(v2531, v2533);
+ int16x8_t v2535 = vsubq_s16(v1749, v1754);
+ int16x8_t v2536 = vsubq_s16(v1759, v1764);
+ int16x8_t v2537_tmp = vqrdmulhq_n_s16(v2536, 23673);
+ int16x8_t v2537 = vaddq_s16(v2537_tmp, v2536);
+ int16x8_t v2538 = vaddq_s16(v2535, v2537);
+ int16x8_t v2539 = vqrdmulhq_n_s16(v2538, 20398);
+ int16x8_t v2540 = vaddq_s16(v2534, v2539);
+ int16x8_t v2541 = vsubq_s16(v1773, v1778);
+ int16x8_t v2542 = vsubq_s16(v1783, v1788);
+ int16x8_t v2543_tmp = vqrdmulhq_n_s16(v2542, 23673);
+ int16x8_t v2543 = vaddq_s16(v2543_tmp, v2542);
+ int16x8_t v2544 = vaddq_s16(v2541, v2543);
+ int16x8_t v2545 = vsubq_s16(v1795, v1800);
+ int16x8_t v2546 = vsubq_s16(v1805, v1810);
+ int16x8_t v2547_tmp = vqrdmulhq_n_s16(v2546, 23673);
+ int16x8_t v2547 = vaddq_s16(v2547_tmp, v2546);
+ int16x8_t v2548 = vaddq_s16(v2545, v2547);
+ int16x8_t v2549 = vqrdmulhq_n_s16(v2548, 20398);
+ int16x8_t v2550 = vaddq_s16(v2544, v2549);
+ int16x8_t v2551 = vqrdmulhq_n_s16(v2550, 17255);
+ int16x8_t v2552 = vaddq_s16(v2540, v2551);
+ int16x8_t v2553 = vsubq_s16(v1821, v1826);
+ int16x8_t v2554 = vsubq_s16(v1831, v1836);
+ int16x8_t v2555_tmp = vqrdmulhq_n_s16(v2554, 23673);
+ int16x8_t v2555 = vaddq_s16(v2555_tmp, v2554);
+ int16x8_t v2556 = vaddq_s16(v2553, v2555);
+ int16x8_t v2557 = vsubq_s16(v1843, v1848);
+ int16x8_t v2558 = vsubq_s16(v1853, v1858);
+ int16x8_t v2559_tmp = vqrdmulhq_n_s16(v2558, 23673);
+ int16x8_t v2559 = vaddq_s16(v2559_tmp, v2558);
+ int16x8_t v2560 = vaddq_s16(v2557, v2559);
+ int16x8_t v2561 = vqrdmulhq_n_s16(v2560, 20398);
+ int16x8_t v2562 = vaddq_s16(v2556, v2561);
+ int16x8_t v2563 = vsubq_s16(v1867, v1872);
+ int16x8_t v2564 = vsubq_s16(v1877, v1882);
+ int16x8_t v2565_tmp = vqrdmulhq_n_s16(v2564, 23673);
+ int16x8_t v2565 = vaddq_s16(v2565_tmp, v2564);
+ int16x8_t v2566 = vaddq_s16(v2563, v2565);
+ int16x8_t v2567 = vsubq_s16(v1889, v1894);
+ int16x8_t v2568 = vsubq_s16(v1899, v1904);
+ int16x8_t v2569_tmp = vqrdmulhq_n_s16(v2568, 23673);
+ int16x8_t v2569 = vaddq_s16(v2569_tmp, v2568);
+ int16x8_t v2570 = vaddq_s16(v2567, v2569);
+ int16x8_t v2571 = vqrdmulhq_n_s16(v2570, 20398);
+ int16x8_t v2572 = vaddq_s16(v2566, v2571);
+ int16x8_t v2573 = vqrdmulhq_n_s16(v2572, 17255);
+ int16x8_t v2574 = vaddq_s16(v2562, v2573);
+ int16x8_t v2575 = vqrdmulhq_n_s16(v2574, 16595);
+ int16x8_t v2576 = vaddq_s16(v2552, v2575);
+ int16x8_t v2577 = vqrdmulhq_n_s16(v2576, 16436);
+ int16x8_t v2578 = vaddq_s16(v2530, v2577);
+ int16x8_t v2579 = vsubq_s16(v9, v24);
+ int16x8_t v2580 = vsubq_s16(v42, v58);
+ int16x8_t v2581_tmp = vqrdmulhq_n_s16(v2580, 3314);
+ int16x8_t v2581 = vmlaq_n_s16(v2581_tmp, v2580, 5);
+ int16x8_t v2582 = vaddq_s16(v2579, v2581);
+ int16x8_t v2583 = vsubq_s16(v78, v101);
+ int16x8_t v2584 = vsubq_s16(v119, v136);
+ int16x8_t v2585_tmp = vqrdmulhq_n_s16(v2584, 3314);
+ int16x8_t v2585 = vmlaq_n_s16(v2585_tmp, v2584, 5);
+ int16x8_t v2586 = vaddq_s16(v2583, v2585);
+ int16x8_t v2587 = vqrdmulhq_n_s16(v2586, 22112);
+ int16x8_t v2588 = vaddq_s16(v2582, v2587);
+ int16x8_t v2589 = vsubq_s16(v158, v181);
+ int16x8_t v2590 = vsubq_s16(v213, v231);
+ int16x8_t v2591_tmp = vqrdmulhq_n_s16(v2590, 3314);
+ int16x8_t v2591 = vmlaq_n_s16(v2591_tmp, v2590, 5);
+ int16x8_t v2592 = vaddq_s16(v2589, v2591);
+ int16x8_t v2593 = vsubq_s16(v251, v274);
+ int16x8_t v2594 = vsubq_s16(v292, v310);
+ int16x8_t v2595_tmp = vqrdmulhq_n_s16(v2594, 3314);
+ int16x8_t v2595 = vmlaq_n_s16(v2595_tmp, v2594, 5);
+ int16x8_t v2596 = vaddq_s16(v2593, v2595);
+ int16x8_t v2597 = vqrdmulhq_n_s16(v2596, 22112);
+ int16x8_t v2598 = vaddq_s16(v2592, v2597);
+ int16x8_t v2599 = vqrdmulhq_n_s16(v2598, 17561);
+ int16x8_t v2600 = vaddq_s16(v2588, v2599);
+ int16x8_t v2601 = vsubq_s16(v334, v357);
+ int16x8_t v2602 = vsubq_s16(v389, v407);
+ int16x8_t v2603_tmp = vqrdmulhq_n_s16(v2602, 3314);
+ int16x8_t v2603 = vmlaq_n_s16(v2603_tmp, v2602, 5);
+ int16x8_t v2604 = vaddq_s16(v2601, v2603);
+ int16x8_t v2605 = vsubq_s16(v441, v480);
+ int16x8_t v2606 = vsubq_s16(v498, v517);
+ int16x8_t v2607_tmp = vqrdmulhq_n_s16(v2606, 3314);
+ int16x8_t v2607 = vmlaq_n_s16(v2607_tmp, v2606, 5);
+ int16x8_t v2608 = vaddq_s16(v2605, v2607);
+ int16x8_t v2609 = vqrdmulhq_n_s16(v2608, 22112);
+ int16x8_t v2610 = vaddq_s16(v2604, v2609);
+ int16x8_t v2611 = vsubq_s16(v539, v562);
+ int16x8_t v2612 = vsubq_s16(v594, v612);
+ int16x8_t v2613_tmp = vqrdmulhq_n_s16(v2612, 3314);
+ int16x8_t v2613 = vmlaq_n_s16(v2613_tmp, v2612, 5);
+ int16x8_t v2614 = vaddq_s16(v2611, v2613);
+ int16x8_t v2615 = vsubq_s16(v632, v655);
+ int16x8_t v2616 = vsubq_s16(v673, v692);
+ int16x8_t v2617_tmp = vqrdmulhq_n_s16(v2616, 3314);
+ int16x8_t v2617 = vmlaq_n_s16(v2617_tmp, v2616, 5);
+ int16x8_t v2618 = vaddq_s16(v2615, v2617);
+ int16x8_t v2619 = vqrdmulhq_n_s16(v2618, 22112);
+ int16x8_t v2620 = vaddq_s16(v2614, v2619);
+ int16x8_t v2621 = vqrdmulhq_n_s16(v2620, 17561);
+ int16x8_t v2622 = vaddq_s16(v2610, v2621);
+ int16x8_t v2623 = vqrdmulhq_n_s16(v2622, 16666);
+ int16x8_t v2624 = vaddq_s16(v2600, v2623);
+ int16x8_t v2625 = vsubq_s16(v718, v741);
+ int16x8_t v2626 = vsubq_s16(v773, v791);
+ int16x8_t v2627_tmp = vqrdmulhq_n_s16(v2626, 3314);
+ int16x8_t v2627 = vmlaq_n_s16(v2627_tmp, v2626, 5);
+ int16x8_t v2628 = vaddq_s16(v2625, v2627);
+ int16x8_t v2629 = vsubq_s16(v825, v864);
+ int16x8_t v2630 = vsubq_s16(v882, v901);
+ int16x8_t v2631_tmp = vqrdmulhq_n_s16(v2630, 3314);
+ int16x8_t v2631 = vmlaq_n_s16(v2631_tmp, v2630, 5);
+ int16x8_t v2632 = vaddq_s16(v2629, v2631);
+ int16x8_t v2633 = vqrdmulhq_n_s16(v2632, 22112);
+ int16x8_t v2634 = vaddq_s16(v2628, v2633);
+ int16x8_t v2635 = vsubq_s16(v937, v976);
+ int16x8_t v2636 = vsubq_s16(v1036, v1058);
+ int16x8_t v2637_tmp = vqrdmulhq_n_s16(v2636, 3314);
+ int16x8_t v2637 = vmlaq_n_s16(v2637_tmp, v2636, 5);
+ int16x8_t v2638 = vaddq_s16(v2635, v2637);
+ int16x8_t v2639 = vsubq_s16(v1078, v1101);
+ int16x8_t v2640 = vsubq_s16(v1119, v1139);
+ int16x8_t v2641_tmp = vqrdmulhq_n_s16(v2640, 3314);
+ int16x8_t v2641 = vmlaq_n_s16(v2641_tmp, v2640, 5);
+ int16x8_t v2642 = vaddq_s16(v2639, v2641);
+ int16x8_t v2643 = vqrdmulhq_n_s16(v2642, 22112);
+ int16x8_t v2644 = vaddq_s16(v2638, v2643);
+ int16x8_t v2645 = vqrdmulhq_n_s16(v2644, 17561);
+ int16x8_t v2646 = vaddq_s16(v2634, v2645);
+ int16x8_t v2647 = vsubq_s16(v1163, v1186);
+ int16x8_t v2648 = vsubq_s16(v1218, v1236);
+ int16x8_t v2649_tmp = vqrdmulhq_n_s16(v2648, 3314);
+ int16x8_t v2649 = vmlaq_n_s16(v2649_tmp, v2648, 5);
+ int16x8_t v2650 = vaddq_s16(v2647, v2649);
+ int16x8_t v2651 = vsubq_s16(v1270, v1309);
+ int16x8_t v2652 = vsubq_s16(v1327, v1346);
+ int16x8_t v2653_tmp = vqrdmulhq_n_s16(v2652, 3314);
+ int16x8_t v2653 = vmlaq_n_s16(v2653_tmp, v2652, 5);
+ int16x8_t v2654 = vaddq_s16(v2651, v2653);
+ int16x8_t v2655 = vqrdmulhq_n_s16(v2654, 22112);
+ int16x8_t v2656 = vaddq_s16(v2650, v2655);
+ int16x8_t v2657 = vsubq_s16(v1368, v1391);
+ int16x8_t v2658 = vsubq_s16(v1423, v1441);
+ int16x8_t v2659_tmp = vqrdmulhq_n_s16(v2658, 3314);
+ int16x8_t v2659 = vmlaq_n_s16(v2659_tmp, v2658, 5);
+ int16x8_t v2660 = vaddq_s16(v2657, v2659);
+ int16x8_t v2661 = vsubq_s16(v1461, v1484);
+ int16x8_t v2662 = vsubq_s16(v1502, v1522);
+ int16x8_t v2663_tmp = vqrdmulhq_n_s16(v2662, 3314);
+ int16x8_t v2663 = vmlaq_n_s16(v2663_tmp, v2662, 5);
+ int16x8_t v2664 = vaddq_s16(v2661, v2663);
+ int16x8_t v2665 = vqrdmulhq_n_s16(v2664, 22112);
+ int16x8_t v2666 = vaddq_s16(v2660, v2665);
+ int16x8_t v2667 = vqrdmulhq_n_s16(v2666, 17561);
+ int16x8_t v2668 = vaddq_s16(v2656, v2667);
+ int16x8_t v2669 = vqrdmulhq_n_s16(v2668, 16666);
+ int16x8_t v2670 = vaddq_s16(v2646, v2669);
+ int16x8_t v2671 = vqrdmulhq_n_s16(v2670, 16454);
+ int16x8_t v2672 = vaddq_s16(v2624, v2671);
+ int16x8_t v2673 = vsubq_s16(v2579, v2581);
+ int16x8_t v2674 = vsubq_s16(v2583, v2585);
+ int16x8_t v2675 = vqrdmulhq_n_s16(v2674, 24397);
+ int16x8_t v2676 = vaddq_s16(v2673, v2675);
+ int16x8_t v2677 = vsubq_s16(v2589, v2591);
+ int16x8_t v2678 = vsubq_s16(v2593, v2595);
+ int16x8_t v2679 = vqrdmulhq_n_s16(v2678, 24397);
+ int16x8_t v2680 = vaddq_s16(v2677, v2679);
+ int16x8_t v2681 = vqrdmulhq_n_s16(v2680, 17921);
+ int16x8_t v2682 = vaddq_s16(v2676, v2681);
+ int16x8_t v2683 = vsubq_s16(v2601, v2603);
+ int16x8_t v2684 = vsubq_s16(v2605, v2607);
+ int16x8_t v2685 = vqrdmulhq_n_s16(v2684, 24397);
+ int16x8_t v2686 = vaddq_s16(v2683, v2685);
+ int16x8_t v2687 = vsubq_s16(v2611, v2613);
+ int16x8_t v2688 = vsubq_s16(v2615, v2617);
+ int16x8_t v2689 = vqrdmulhq_n_s16(v2688, 24397);
+ int16x8_t v2690 = vaddq_s16(v2687, v2689);
+ int16x8_t v2691 = vqrdmulhq_n_s16(v2690, 17921);
+ int16x8_t v2692 = vaddq_s16(v2686, v2691);
+ int16x8_t v2693 = vqrdmulhq_n_s16(v2692, 16747);
+ int16x8_t v2694 = vaddq_s16(v2682, v2693);
+ int16x8_t v2695 = vsubq_s16(v2625, v2627);
+ int16x8_t v2696 = vsubq_s16(v2629, v2631);
+ int16x8_t v2697 = vqrdmulhq_n_s16(v2696, 24397);
+ int16x8_t v2698 = vaddq_s16(v2695, v2697);
+ int16x8_t v2699 = vsubq_s16(v2635, v2637);
+ int16x8_t v2700 = vsubq_s16(v2639, v2641);
+ int16x8_t v2701 = vqrdmulhq_n_s16(v2700, 24397);
+ int16x8_t v2702 = vaddq_s16(v2699, v2701);
+ int16x8_t v2703 = vqrdmulhq_n_s16(v2702, 17921);
+ int16x8_t v2704 = vaddq_s16(v2698, v2703);
+ int16x8_t v2705 = vsubq_s16(v2647, v2649);
+ int16x8_t v2706 = vsubq_s16(v2651, v2653);
+ int16x8_t v2707 = vqrdmulhq_n_s16(v2706, 24397);
+ int16x8_t v2708 = vaddq_s16(v2705, v2707);
+ int16x8_t v2709 = vsubq_s16(v2657, v2659);
+ int16x8_t v2710 = vsubq_s16(v2661, v2663);
+ int16x8_t v2711 = vqrdmulhq_n_s16(v2710, 24397);
+ int16x8_t v2712 = vaddq_s16(v2709, v2711);
+ int16x8_t v2713 = vqrdmulhq_n_s16(v2712, 17921);
+ int16x8_t v2714 = vaddq_s16(v2708, v2713);
+ int16x8_t v2715 = vqrdmulhq_n_s16(v2714, 16747);
+ int16x8_t v2716 = vaddq_s16(v2704, v2715);
+ int16x8_t v2717 = vqrdmulhq_n_s16(v2716, 16474);
+ int16x8_t v2718 = vaddq_s16(v2694, v2717);
+ int16x8_t v2719 = vsubq_s16(v2485, v2487);
+ int16x8_t v2720 = vsubq_s16(v2489, v2491);
+ int16x8_t v2721 = vqrdmulhq_n_s16(v2720, 27504);
+ int16x8_t v2722 = vaddq_s16(v2719, v2721);
+ int16x8_t v2723 = vsubq_s16(v2495, v2497);
+ int16x8_t v2724 = vsubq_s16(v2499, v2501);
+ int16x8_t v2725 = vqrdmulhq_n_s16(v2724, 27504);
+ int16x8_t v2726 = vaddq_s16(v2723, v2725);
+ int16x8_t v2727 = vqrdmulhq_n_s16(v2726, 18343);
+ int16x8_t v2728 = vaddq_s16(v2722, v2727);
+ int16x8_t v2729 = vsubq_s16(v2507, v2509);
+ int16x8_t v2730 = vsubq_s16(v2511, v2513);
+ int16x8_t v2731 = vqrdmulhq_n_s16(v2730, 27504);
+ int16x8_t v2732 = vaddq_s16(v2729, v2731);
+ int16x8_t v2733 = vsubq_s16(v2517, v2519);
+ int16x8_t v2734 = vsubq_s16(v2521, v2523);
+ int16x8_t v2735 = vqrdmulhq_n_s16(v2734, 27504);
+ int16x8_t v2736 = vaddq_s16(v2733, v2735);
+ int16x8_t v2737 = vqrdmulhq_n_s16(v2736, 18343);
+ int16x8_t v2738 = vaddq_s16(v2732, v2737);
+ int16x8_t v2739 = vqrdmulhq_n_s16(v2738, 16840);
+ int16x8_t v2740 = vaddq_s16(v2728, v2739);
+ int16x8_t v2741 = vsubq_s16(v2531, v2533);
+ int16x8_t v2742 = vsubq_s16(v2535, v2537);
+ int16x8_t v2743 = vqrdmulhq_n_s16(v2742, 27504);
+ int16x8_t v2744 = vaddq_s16(v2741, v2743);
+ int16x8_t v2745 = vsubq_s16(v2541, v2543);
+ int16x8_t v2746 = vsubq_s16(v2545, v2547);
+ int16x8_t v2747 = vqrdmulhq_n_s16(v2746, 27504);
+ int16x8_t v2748 = vaddq_s16(v2745, v2747);
+ int16x8_t v2749 = vqrdmulhq_n_s16(v2748, 18343);
+ int16x8_t v2750 = vaddq_s16(v2744, v2749);
+ int16x8_t v2751 = vsubq_s16(v2553, v2555);
+ int16x8_t v2752 = vsubq_s16(v2557, v2559);
+ int16x8_t v2753 = vqrdmulhq_n_s16(v2752, 27504);
+ int16x8_t v2754 = vaddq_s16(v2751, v2753);
+ int16x8_t v2755 = vsubq_s16(v2563, v2565);
+ int16x8_t v2756 = vsubq_s16(v2567, v2569);
+ int16x8_t v2757 = vqrdmulhq_n_s16(v2756, 27504);
+ int16x8_t v2758 = vaddq_s16(v2755, v2757);
+ int16x8_t v2759 = vqrdmulhq_n_s16(v2758, 18343);
+ int16x8_t v2760 = vaddq_s16(v2754, v2759);
+ int16x8_t v2761 = vqrdmulhq_n_s16(v2760, 16840);
+ int16x8_t v2762 = vaddq_s16(v2750, v2761);
+ int16x8_t v2763 = vqrdmulhq_n_s16(v2762, 16496);
+ int16x8_t v2764 = vaddq_s16(v2740, v2763);
+ int16x8_t v2765 = vsubq_s16(v2390, v2392);
+ int16x8_t v2766 = vsubq_s16(v2394, v2396);
+ int16x8_t v2767 = vqrdmulhq_n_s16(v2766, 31869);
+ int16x8_t v2768 = vaddq_s16(v2765, v2767);
+ int16x8_t v2769 = vsubq_s16(v2400, v2402);
+ int16x8_t v2770 = vsubq_s16(v2404, v2406);
+ int16x8_t v2771 = vqrdmulhq_n_s16(v2770, 31869);
+ int16x8_t v2772 = vaddq_s16(v2769, v2771);
+ int16x8_t v2773 = vqrdmulhq_n_s16(v2772, 18830);
+ int16x8_t v2774 = vaddq_s16(v2768, v2773);
+ int16x8_t v2775 = vsubq_s16(v2412, v2414);
+ int16x8_t v2776 = vsubq_s16(v2416, v2418);
+ int16x8_t v2777 = vqrdmulhq_n_s16(v2776, 31869);
+ int16x8_t v2778 = vaddq_s16(v2775, v2777);
+ int16x8_t v2779 = vsubq_s16(v2422, v2424);
+ int16x8_t v2780 = vsubq_s16(v2426, v2428);
+ int16x8_t v2781 = vqrdmulhq_n_s16(v2780, 31869);
+ int16x8_t v2782 = vaddq_s16(v2779, v2781);
+ int16x8_t v2783 = vqrdmulhq_n_s16(v2782, 18830);
+ int16x8_t v2784 = vaddq_s16(v2778, v2783);
+ int16x8_t v2785 = vqrdmulhq_n_s16(v2784, 16944);
+ int16x8_t v2786 = vaddq_s16(v2774, v2785);
+ int16x8_t v2787 = vsubq_s16(v2436, v2438);
+ int16x8_t v2788 = vsubq_s16(v2440, v2442);
+ int16x8_t v2789 = vqrdmulhq_n_s16(v2788, 31869);
+ int16x8_t v2790 = vaddq_s16(v2787, v2789);
+ int16x8_t v2791 = vsubq_s16(v2446, v2448);
+ int16x8_t v2792 = vsubq_s16(v2450, v2452);
+ int16x8_t v2793 = vqrdmulhq_n_s16(v2792, 31869);
+ int16x8_t v2794 = vaddq_s16(v2791, v2793);
+ int16x8_t v2795 = vqrdmulhq_n_s16(v2794, 18830);
+ int16x8_t v2796 = vaddq_s16(v2790, v2795);
+ int16x8_t v2797 = vsubq_s16(v2458, v2460);
+ int16x8_t v2798 = vsubq_s16(v2462, v2465);
+ int16x8_t v2799 = vqrdmulhq_n_s16(v2798, 31869);
+ int16x8_t v2800 = vaddq_s16(v2797, v2799);
+ int16x8_t v2801 = vsubq_s16(v2469, v2471);
+ int16x8_t v2802 = vsubq_s16(v2473, v2475);
+ int16x8_t v2803 = vqrdmulhq_n_s16(v2802, 31869);
+ int16x8_t v2804 = vaddq_s16(v2801, v2803);
+ int16x8_t v2805 = vqrdmulhq_n_s16(v2804, 18830);
+ int16x8_t v2806 = vaddq_s16(v2800, v2805);
+ int16x8_t v2807 = vqrdmulhq_n_s16(v2806, 16944);
+ int16x8_t v2808 = vaddq_s16(v2796, v2807);
+ int16x8_t v2809 = vqrdmulhq_n_s16(v2808, 16521);
+ int16x8_t v2810 = vaddq_s16(v2786, v2809);
+ int16x8_t v2811 = vsubq_s16(v2296, v2298);
+ int16x8_t v2812 = vsubq_s16(v2300, v2302);
+ int16x8_t v2813_tmp = vqrdmulhq_n_s16(v2812, 5552);
+ int16x8_t v2813 = vaddq_s16(v2813_tmp, v2812);
+ int16x8_t v2814 = vaddq_s16(v2811, v2813);
+ int16x8_t v2815 = vsubq_s16(v2306, v2308);
+ int16x8_t v2816 = vsubq_s16(v2310, v2312);
+ int16x8_t v2817_tmp = vqrdmulhq_n_s16(v2816, 5552);
+ int16x8_t v2817 = vaddq_s16(v2817_tmp, v2816);
+ int16x8_t v2818 = vaddq_s16(v2815, v2817);
+ int16x8_t v2819 = vqrdmulhq_n_s16(v2818, 19393);
+ int16x8_t v2820 = vaddq_s16(v2814, v2819);
+ int16x8_t v2821 = vsubq_s16(v2318, v2320);
+ int16x8_t v2822 = vsubq_s16(v2322, v2324);
+ int16x8_t v2823_tmp = vqrdmulhq_n_s16(v2822, 5552);
+ int16x8_t v2823 = vaddq_s16(v2823_tmp, v2822);
+ int16x8_t v2824 = vaddq_s16(v2821, v2823);
+ int16x8_t v2825 = vsubq_s16(v2328, v2330);
+ int16x8_t v2826 = vsubq_s16(v2332, v2334);
+ int16x8_t v2827_tmp = vqrdmulhq_n_s16(v2826, 5552);
+ int16x8_t v2827 = vaddq_s16(v2827_tmp, v2826);
+ int16x8_t v2828 = vaddq_s16(v2825, v2827);
+ int16x8_t v2829 = vqrdmulhq_n_s16(v2828, 19393);
+ int16x8_t v2830 = vaddq_s16(v2824, v2829);
+ int16x8_t v2831 = vqrdmulhq_n_s16(v2830, 17059);
+ int16x8_t v2832 = vaddq_s16(v2820, v2831);
+ int16x8_t v2833 = vsubq_s16(v2342, v2344);
+ int16x8_t v2834 = vsubq_s16(v2346, v2348);
+ int16x8_t v2835_tmp = vqrdmulhq_n_s16(v2834, 5552);
+ int16x8_t v2835 = vaddq_s16(v2835_tmp, v2834);
+ int16x8_t v2836 = vaddq_s16(v2833, v2835);
+ int16x8_t v2837 = vsubq_s16(v2352, v2354);
+ int16x8_t v2838 = vsubq_s16(v2356, v2358);
+ int16x8_t v2839_tmp = vqrdmulhq_n_s16(v2838, 5552);
+ int16x8_t v2839 = vaddq_s16(v2839_tmp, v2838);
+ int16x8_t v2840 = vaddq_s16(v2837, v2839);
+ int16x8_t v2841 = vqrdmulhq_n_s16(v2840, 19393);
+ int16x8_t v2842 = vaddq_s16(v2836, v2841);
+ int16x8_t v2843 = vsubq_s16(v2364, v2366);
+ int16x8_t v2844 = vsubq_s16(v2368, v2370);
+ int16x8_t v2845_tmp = vqrdmulhq_n_s16(v2844, 5552);
+ int16x8_t v2845 = vaddq_s16(v2845_tmp, v2844);
+ int16x8_t v2846 = vaddq_s16(v2843, v2845);
+ int16x8_t v2847 = vsubq_s16(v2374, v2376);
+ int16x8_t v2848 = vsubq_s16(v2378, v2380);
+ int16x8_t v2849_tmp = vqrdmulhq_n_s16(v2848, 5552);
+ int16x8_t v2849 = vaddq_s16(v2849_tmp, v2848);
+ int16x8_t v2850 = vaddq_s16(v2847, v2849);
+ int16x8_t v2851 = vqrdmulhq_n_s16(v2850, 19393);
+ int16x8_t v2852 = vaddq_s16(v2846, v2851);
+ int16x8_t v2853 = vqrdmulhq_n_s16(v2852, 17059);
+ int16x8_t v2854 = vaddq_s16(v2842, v2853);
+ int16x8_t v2855 = vqrdmulhq_n_s16(v2854, 16549);
+ int16x8_t v2856 = vaddq_s16(v2832, v2855);
+ int16x8_t v2857 = vsubq_s16(v2109, v2114);
+ int16x8_t v2858 = vsubq_s16(v2119, v2124);
+ int16x8_t v2859_tmp = vqrdmulhq_n_s16(v2858, 15865);
+ int16x8_t v2859 = vaddq_s16(v2859_tmp, v2858);
+ int16x8_t v2860 = vaddq_s16(v2857, v2859);
+ int16x8_t v2861 = vsubq_s16(v2131, v2136);
+ int16x8_t v2862 = vsubq_s16(v2141, v2146);
+ int16x8_t v2863_tmp = vqrdmulhq_n_s16(v2862, 15865);
+ int16x8_t v2863 = vaddq_s16(v2863_tmp, v2862);
+ int16x8_t v2864 = vaddq_s16(v2861, v2863);
+ int16x8_t v2865 = vqrdmulhq_n_s16(v2864, 20040);
+ int16x8_t v2866 = vaddq_s16(v2860, v2865);
+ int16x8_t v2867 = vsubq_s16(v2155, v2160);
+ int16x8_t v2868 = vsubq_s16(v2165, v2170);
+ int16x8_t v2869_tmp = vqrdmulhq_n_s16(v2868, 15865);
+ int16x8_t v2869 = vaddq_s16(v2869_tmp, v2868);
+ int16x8_t v2870 = vaddq_s16(v2867, v2869);
+ int16x8_t v2871 = vsubq_s16(v2177, v2182);
+ int16x8_t v2872 = vsubq_s16(v2187, v2192);
+ int16x8_t v2873_tmp = vqrdmulhq_n_s16(v2872, 15865);
+ int16x8_t v2873 = vaddq_s16(v2873_tmp, v2872);
+ int16x8_t v2874 = vaddq_s16(v2871, v2873);
+ int16x8_t v2875 = vqrdmulhq_n_s16(v2874, 20040);
+ int16x8_t v2876 = vaddq_s16(v2870, v2875);
+ int16x8_t v2877 = vqrdmulhq_n_s16(v2876, 17187);
+ int16x8_t v2878 = vaddq_s16(v2866, v2877);
+ int16x8_t v2879 = vsubq_s16(v2203, v2208);
+ int16x8_t v2880 = vsubq_s16(v2213, v2218);
+ int16x8_t v2881_tmp = vqrdmulhq_n_s16(v2880, 15865);
+ int16x8_t v2881 = vaddq_s16(v2881_tmp, v2880);
+ int16x8_t v2882 = vaddq_s16(v2879, v2881);
+ int16x8_t v2883 = vsubq_s16(v2225, v2230);
+ int16x8_t v2884 = vsubq_s16(v2235, v2240);
+ int16x8_t v2885_tmp = vqrdmulhq_n_s16(v2884, 15865);
+ int16x8_t v2885 = vaddq_s16(v2885_tmp, v2884);
+ int16x8_t v2886 = vaddq_s16(v2883, v2885);
+ int16x8_t v2887 = vqrdmulhq_n_s16(v2886, 20040);
+ int16x8_t v2888 = vaddq_s16(v2882, v2887);
+ int16x8_t v2889 = vsubq_s16(v2249, v2254);
+ int16x8_t v2890 = vsubq_s16(v2259, v2264);
+ int16x8_t v2891_tmp = vqrdmulhq_n_s16(v2890, 15865);
+ int16x8_t v2891 = vaddq_s16(v2891_tmp, v2890);
+ int16x8_t v2892 = vaddq_s16(v2889, v2891);
+ int16x8_t v2893 = vsubq_s16(v2271, v2276);
+ int16x8_t v2894 = vsubq_s16(v2281, v2286);
+ int16x8_t v2895_tmp = vqrdmulhq_n_s16(v2894, 15865);
+ int16x8_t v2895 = vaddq_s16(v2895_tmp, v2894);
+ int16x8_t v2896 = vaddq_s16(v2893, v2895);
+ int16x8_t v2897 = vqrdmulhq_n_s16(v2896, 20040);
+ int16x8_t v2898 = vaddq_s16(v2892, v2897);
+ int16x8_t v2899 = vqrdmulhq_n_s16(v2898, 17187);
+ int16x8_t v2900 = vaddq_s16(v2888, v2899);
+ int16x8_t v2901 = vqrdmulhq_n_s16(v2900, 16579);
+ int16x8_t v2902 = vaddq_s16(v2878, v2901);
+ int16x8_t v2903 = vsubq_s16(v1919, v1924);
+ int16x8_t v2904 = vsubq_s16(v1929, v1934);
+ int16x8_t v2905_tmp = vqrdmulhq_n_s16(v2904, 1893);
+ int16x8_t v2905 = vmlaq_n_s16(v2905_tmp, v2904, 2);
+ int16x8_t v2906 = vaddq_s16(v2903, v2905);
+ int16x8_t v2907 = vsubq_s16(v1941, v1946);
+ int16x8_t v2908 = vsubq_s16(v1951, v1956);
+ int16x8_t v2909_tmp = vqrdmulhq_n_s16(v2908, 1893);
+ int16x8_t v2909 = vmlaq_n_s16(v2909_tmp, v2908, 2);
+ int16x8_t v2910 = vaddq_s16(v2907, v2909);
+ int16x8_t v2911 = vqrdmulhq_n_s16(v2910, 20783);
+ int16x8_t v2912 = vaddq_s16(v2906, v2911);
+ int16x8_t v2913 = vsubq_s16(v1965, v1970);
+ int16x8_t v2914 = vsubq_s16(v1975, v1980);
+ int16x8_t v2915_tmp = vqrdmulhq_n_s16(v2914, 1893);
+ int16x8_t v2915 = vmlaq_n_s16(v2915_tmp, v2914, 2);
+ int16x8_t v2916 = vaddq_s16(v2913, v2915);
+ int16x8_t v2917 = vsubq_s16(v1987, v1992);
+ int16x8_t v2918 = vsubq_s16(v1997, v2002);
+ int16x8_t v2919_tmp = vqrdmulhq_n_s16(v2918, 1893);
+ int16x8_t v2919 = vmlaq_n_s16(v2919_tmp, v2918, 2);
+ int16x8_t v2920 = vaddq_s16(v2917, v2919);
+ int16x8_t v2921 = vqrdmulhq_n_s16(v2920, 20783);
+ int16x8_t v2922 = vaddq_s16(v2916, v2921);
+ int16x8_t v2923 = vqrdmulhq_n_s16(v2922, 17326);
+ int16x8_t v2924 = vaddq_s16(v2912, v2923);
+ int16x8_t v2925 = vsubq_s16(v2013, v2018);
+ int16x8_t v2926 = vsubq_s16(v2023, v2028);
+ int16x8_t v2927_tmp = vqrdmulhq_n_s16(v2926, 1893);
+ int16x8_t v2927 = vmlaq_n_s16(v2927_tmp, v2926, 2);
+ int16x8_t v2928 = vaddq_s16(v2925, v2927);
+ int16x8_t v2929 = vsubq_s16(v2035, v2040);
+ int16x8_t v2930 = vsubq_s16(v2045, v2050);
+ int16x8_t v2931_tmp = vqrdmulhq_n_s16(v2930, 1893);
+ int16x8_t v2931 = vmlaq_n_s16(v2931_tmp, v2930, 2);
+ int16x8_t v2932 = vaddq_s16(v2929, v2931);
+ int16x8_t v2933 = vqrdmulhq_n_s16(v2932, 20783);
+ int16x8_t v2934 = vaddq_s16(v2928, v2933);
+ int16x8_t v2935 = vsubq_s16(v2059, v2064);
+ int16x8_t v2936 = vsubq_s16(v2069, v2074);
+ int16x8_t v2937_tmp = vqrdmulhq_n_s16(v2936, 1893);
+ int16x8_t v2937 = vmlaq_n_s16(v2937_tmp, v2936, 2);
+ int16x8_t v2938 = vaddq_s16(v2935, v2937);
+ int16x8_t v2939 = vsubq_s16(v2081, v2086);
+ int16x8_t v2940 = vsubq_s16(v2091, v2096);
+ int16x8_t v2941_tmp = vqrdmulhq_n_s16(v2940, 1893);
+ int16x8_t v2941 = vmlaq_n_s16(v2941_tmp, v2940, 2);
+ int16x8_t v2942 = vaddq_s16(v2939, v2941);
+ int16x8_t v2943 = vqrdmulhq_n_s16(v2942, 20783);
+ int16x8_t v2944 = vaddq_s16(v2938, v2943);
+ int16x8_t v2945 = vqrdmulhq_n_s16(v2944, 17326);
+ int16x8_t v2946 = vaddq_s16(v2934, v2945);
+ int16x8_t v2947 = vqrdmulhq_n_s16(v2946, 16611);
+ int16x8_t v2948 = vaddq_s16(v2924, v2947);
+ int16x8_t v2949 = vsubq_s16(v1543, v1554);
+ int16x8_t v2950 = vsubq_s16(v1565, v1576);
+ int16x8_t v2951_tmp = vqrdmulhq_n_s16(v2950, 13357);
+ int16x8_t v2951 = vmlaq_n_s16(v2951_tmp, v2950, 3);
+ int16x8_t v2952 = vaddq_s16(v2949, v2951);
+ int16x8_t v2953 = vsubq_s16(v1589, v1600);
+ int16x8_t v2954 = vsubq_s16(v1611, v1622);
+ int16x8_t v2955_tmp = vqrdmulhq_n_s16(v2954, 13357);
+ int16x8_t v2955 = vmlaq_n_s16(v2955_tmp, v2954, 3);
+ int16x8_t v2956 = vaddq_s16(v2953, v2955);
+ int16x8_t v2957 = vqrdmulhq_n_s16(v2956, 21637);
+ int16x8_t v2958 = vaddq_s16(v2952, v2957);
+ int16x8_t v2959 = vsubq_s16(v1637, v1648);
+ int16x8_t v2960 = vsubq_s16(v1659, v1670);
+ int16x8_t v2961_tmp = vqrdmulhq_n_s16(v2960, 13357);
+ int16x8_t v2961 = vmlaq_n_s16(v2961_tmp, v2960, 3);
+ int16x8_t v2962 = vaddq_s16(v2959, v2961);
+ int16x8_t v2963 = vsubq_s16(v1683, v1694);
+ int16x8_t v2964 = vsubq_s16(v1705, v1716);
+ int16x8_t v2965_tmp = vqrdmulhq_n_s16(v2964, 13357);
+ int16x8_t v2965 = vmlaq_n_s16(v2965_tmp, v2964, 3);
+ int16x8_t v2966 = vaddq_s16(v2963, v2965);
+ int16x8_t v2967 = vqrdmulhq_n_s16(v2966, 21637);
+ int16x8_t v2968 = vaddq_s16(v2962, v2967);
+ int16x8_t v2969 = vqrdmulhq_n_s16(v2968, 17479);
+ int16x8_t v2970 = vaddq_s16(v2958, v2969);
+ int16x8_t v2971 = vsubq_s16(v1733, v1744);
+ int16x8_t v2972 = vsubq_s16(v1755, v1766);
+ int16x8_t v2973_tmp = vqrdmulhq_n_s16(v2972, 13357);
+ int16x8_t v2973 = vmlaq_n_s16(v2973_tmp, v2972, 3);
+ int16x8_t v2974 = vaddq_s16(v2971, v2973);
+ int16x8_t v2975 = vsubq_s16(v1779, v1790);
+ int16x8_t v2976 = vsubq_s16(v1801, v1812);
+ int16x8_t v2977_tmp = vqrdmulhq_n_s16(v2976, 13357);
+ int16x8_t v2977 = vmlaq_n_s16(v2977_tmp, v2976, 3);
+ int16x8_t v2978 = vaddq_s16(v2975, v2977);
+ int16x8_t v2979 = vqrdmulhq_n_s16(v2978, 21637);
+ int16x8_t v2980 = vaddq_s16(v2974, v2979);
+ int16x8_t v2981 = vsubq_s16(v1827, v1838);
+ int16x8_t v2982 = vsubq_s16(v1849, v1860);
+ int16x8_t v2983_tmp = vqrdmulhq_n_s16(v2982, 13357);
+ int16x8_t v2983 = vmlaq_n_s16(v2983_tmp, v2982, 3);
+ int16x8_t v2984 = vaddq_s16(v2981, v2983);
+ int16x8_t v2985 = vsubq_s16(v1873, v1884);
+ int16x8_t v2986 = vsubq_s16(v1895, v1906);
+ int16x8_t v2987_tmp = vqrdmulhq_n_s16(v2986, 13357);
+ int16x8_t v2987 = vmlaq_n_s16(v2987_tmp, v2986, 3);
+ int16x8_t v2988 = vaddq_s16(v2985, v2987);
+ int16x8_t v2989 = vqrdmulhq_n_s16(v2988, 21637);
+ int16x8_t v2990 = vaddq_s16(v2984, v2989);
+ int16x8_t v2991 = vqrdmulhq_n_s16(v2990, 17479);
+ int16x8_t v2992 = vaddq_s16(v2980, v2991);
+ int16x8_t v2993 = vqrdmulhq_n_s16(v2992, 16647);
+ int16x8_t v2994 = vaddq_s16(v2970, v2993);
+ int16x8_t v2995 = vsubq_s16(v25, v60);
+ int16x8_t v2996 = vsubq_s16(v102, v138);
+ int16x8_t v2997_tmp = vqrdmulhq_n_s16(v2996, 6226);
+ int16x8_t v2997 = vmlaq_n_s16(v2997_tmp, v2996, 10);
+ int16x8_t v2998 = vaddq_s16(v2995, v2997);
+ int16x8_t v2999 = vsubq_s16(v182, v233);
+ int16x8_t v3000 = vsubq_s16(v275, v312);
+ int16x8_t v3001_tmp = vqrdmulhq_n_s16(v3000, 6226);
+ int16x8_t v3001 = vmlaq_n_s16(v3001_tmp, v3000, 10);
+ int16x8_t v3002 = vaddq_s16(v2999, v3001);
+ int16x8_t v3003 = vqrdmulhq_n_s16(v3002, 22622);
+ int16x8_t v3004 = vaddq_s16(v2998, v3003);
+ int16x8_t v3005 = vsubq_s16(v358, v409);
+ int16x8_t v3006 = vsubq_s16(v481, v519);
+ int16x8_t v3007_tmp = vqrdmulhq_n_s16(v3006, 6226);
+ int16x8_t v3007 = vmlaq_n_s16(v3007_tmp, v3006, 10);
+ int16x8_t v3008 = vaddq_s16(v3005, v3007);
+ int16x8_t v3009 = vsubq_s16(v563, v614);
+ int16x8_t v3010 = vsubq_s16(v656, v694);
+ int16x8_t v3011_tmp = vqrdmulhq_n_s16(v3010, 6226);
+ int16x8_t v3011 = vmlaq_n_s16(v3011_tmp, v3010, 10);
+ int16x8_t v3012 = vaddq_s16(v3009, v3011);
+ int16x8_t v3013 = vqrdmulhq_n_s16(v3012, 22622);
+ int16x8_t v3014 = vaddq_s16(v3008, v3013);
+ int16x8_t v3015 = vqrdmulhq_n_s16(v3014, 17646);
+ int16x8_t v3016 = vaddq_s16(v3004, v3015);
+ int16x8_t v3017 = vsubq_s16(v742, v793);
+ int16x8_t v3018 = vsubq_s16(v865, v903);
+ int16x8_t v3019_tmp = vqrdmulhq_n_s16(v3018, 6226);
+ int16x8_t v3019 = vmlaq_n_s16(v3019_tmp, v3018, 10);
+ int16x8_t v3020 = vaddq_s16(v3017, v3019);
+ int16x8_t v3021 = vsubq_s16(v977, v1060);
+ int16x8_t v3022 = vsubq_s16(v1102, v1141);
+ int16x8_t v3023_tmp = vqrdmulhq_n_s16(v3022, 6226);
+ int16x8_t v3023 = vmlaq_n_s16(v3023_tmp, v3022, 10);
+ int16x8_t v3024 = vaddq_s16(v3021, v3023);
+ int16x8_t v3025 = vqrdmulhq_n_s16(v3024, 22622);
+ int16x8_t v3026 = vaddq_s16(v3020, v3025);
+ int16x8_t v3027 = vsubq_s16(v1187, v1238);
+ int16x8_t v3028 = vsubq_s16(v1310, v1348);
+ int16x8_t v3029_tmp = vqrdmulhq_n_s16(v3028, 6226);
+ int16x8_t v3029 = vmlaq_n_s16(v3029_tmp, v3028, 10);
+ int16x8_t v3030 = vaddq_s16(v3027, v3029);
+ int16x8_t v3031 = vsubq_s16(v1392, v1443);
+ int16x8_t v3032 = vsubq_s16(v1485, v1524);
+ int16x8_t v3033_tmp = vqrdmulhq_n_s16(v3032, 6226);
+ int16x8_t v3033 = vmlaq_n_s16(v3033_tmp, v3032, 10);
+ int16x8_t v3034 = vaddq_s16(v3031, v3033);
+ int16x8_t v3035 = vqrdmulhq_n_s16(v3034, 22622);
+ int16x8_t v3036 = vaddq_s16(v3030, v3035);
+ int16x8_t v3037 = vqrdmulhq_n_s16(v3036, 17646);
+ int16x8_t v3038 = vaddq_s16(v3026, v3037);
+ int16x8_t v3039 = vqrdmulhq_n_s16(v3038, 16685);
+ int16x8_t v3040 = vaddq_s16(v3016, v3039);
+ int16x8_t v3041 = vsubq_s16(v2995, v2997);
+ int16x8_t v3042 = vsubq_s16(v2999, v3001);
+ int16x8_t v3043 = vqrdmulhq_n_s16(v3042, 23761);
+ int16x8_t v3044 = vaddq_s16(v3041, v3043);
+ int16x8_t v3045 = vsubq_s16(v3005, v3007);
+ int16x8_t v3046 = vsubq_s16(v3009, v3011);
+ int16x8_t v3047 = vqrdmulhq_n_s16(v3046, 23761);
+ int16x8_t v3048 = vaddq_s16(v3045, v3047);
+ int16x8_t v3049 = vqrdmulhq_n_s16(v3048, 17826);
+ int16x8_t v3050 = vaddq_s16(v3044, v3049);
+ int16x8_t v3051 = vsubq_s16(v3017, v3019);
+ int16x8_t v3052 = vsubq_s16(v3021, v3023);
+ int16x8_t v3053 = vqrdmulhq_n_s16(v3052, 23761);
+ int16x8_t v3054 = vaddq_s16(v3051, v3053);
+ int16x8_t v3055 = vsubq_s16(v3027, v3029);
+ int16x8_t v3056 = vsubq_s16(v3031, v3033);
+ int16x8_t v3057 = vqrdmulhq_n_s16(v3056, 23761);
+ int16x8_t v3058 = vaddq_s16(v3055, v3057);
+ int16x8_t v3059 = vqrdmulhq_n_s16(v3058, 17826);
+ int16x8_t v3060 = vaddq_s16(v3054, v3059);
+ int16x8_t v3061 = vqrdmulhq_n_s16(v3060, 16726);
+ int16x8_t v3062 = vaddq_s16(v3050, v3061);
+ int16x8_t v3063 = vsubq_s16(v2949, v2951);
+ int16x8_t v3064 = vsubq_s16(v2953, v2955);
+ int16x8_t v3065 = vqrdmulhq_n_s16(v3064, 25084);
+ int16x8_t v3066 = vaddq_s16(v3063, v3065);
+ int16x8_t v3067 = vsubq_s16(v2959, v2961);
+ int16x8_t v3068 = vsubq_s16(v2963, v2965);
+ int16x8_t v3069 = vqrdmulhq_n_s16(v3068, 25084);
+ int16x8_t v3070 = vaddq_s16(v3067, v3069);
+ int16x8_t v3071 = vqrdmulhq_n_s16(v3070, 18021);
+ int16x8_t v3072 = vaddq_s16(v3066, v3071);
+ int16x8_t v3073 = vsubq_s16(v2971, v2973);
+ int16x8_t v3074 = vsubq_s16(v2975, v2977);
+ int16x8_t v3075 = vqrdmulhq_n_s16(v3074, 25084);
+ int16x8_t v3076 = vaddq_s16(v3073, v3075);
+ int16x8_t v3077 = vsubq_s16(v2981, v2983);
+ int16x8_t v3078 = vsubq_s16(v2985, v2987);
+ int16x8_t v3079 = vqrdmulhq_n_s16(v3078, 25084);
+ int16x8_t v3080 = vaddq_s16(v3077, v3079);
+ int16x8_t v3081 = vqrdmulhq_n_s16(v3080, 18021);
+ int16x8_t v3082 = vaddq_s16(v3076, v3081);
+ int16x8_t v3083 = vqrdmulhq_n_s16(v3082, 16769);
+ int16x8_t v3084 = vaddq_s16(v3072, v3083);
+ int16x8_t v3085 = vsubq_s16(v2903, v2905);
+ int16x8_t v3086 = vsubq_s16(v2907, v2909);
+ int16x8_t v3087 = vqrdmulhq_n_s16(v3086, 26631);
+ int16x8_t v3088 = vaddq_s16(v3085, v3087);
+ int16x8_t v3089 = vsubq_s16(v2913, v2915);
+ int16x8_t v3090 = vsubq_s16(v2917, v2919);
+ int16x8_t v3091 = vqrdmulhq_n_s16(v3090, 26631);
+ int16x8_t v3092 = vaddq_s16(v3089, v3091);
+ int16x8_t v3093 = vqrdmulhq_n_s16(v3092, 18231);
+ int16x8_t v3094 = vaddq_s16(v3088, v3093);
+ int16x8_t v3095 = vsubq_s16(v2925, v2927);
+ int16x8_t v3096 = vsubq_s16(v2929, v2931);
+ int16x8_t v3097 = vqrdmulhq_n_s16(v3096, 26631);
+ int16x8_t v3098 = vaddq_s16(v3095, v3097);
+ int16x8_t v3099 = vsubq_s16(v2935, v2937);
+ int16x8_t v3100 = vsubq_s16(v2939, v2941);
+ int16x8_t v3101 = vqrdmulhq_n_s16(v3100, 26631);
+ int16x8_t v3102 = vaddq_s16(v3099, v3101);
+ int16x8_t v3103 = vqrdmulhq_n_s16(v3102, 18231);
+ int16x8_t v3104 = vaddq_s16(v3098, v3103);
+ int16x8_t v3105 = vqrdmulhq_n_s16(v3104, 16815);
+ int16x8_t v3106 = vaddq_s16(v3094, v3105);
+ int16x8_t v3107 = vsubq_s16(v2857, v2859);
+ int16x8_t v3108 = vsubq_s16(v2861, v2863);
+ int16x8_t v3109 = vqrdmulhq_n_s16(v3108, 28454);
+ int16x8_t v3110 = vaddq_s16(v3107, v3109);
+ int16x8_t v3111 = vsubq_s16(v2867, v2869);
+ int16x8_t v3112 = vsubq_s16(v2871, v2873);
+ int16x8_t v3113 = vqrdmulhq_n_s16(v3112, 28454);
+ int16x8_t v3114 = vaddq_s16(v3111, v3113);
+ int16x8_t v3115 = vqrdmulhq_n_s16(v3114, 18458);
+ int16x8_t v3116 = vaddq_s16(v3110, v3115);
+ int16x8_t v3117 = vsubq_s16(v2879, v2881);
+ int16x8_t v3118 = vsubq_s16(v2883, v2885);
+ int16x8_t v3119 = vqrdmulhq_n_s16(v3118, 28454);
+ int16x8_t v3120 = vaddq_s16(v3117, v3119);
+ int16x8_t v3121 = vsubq_s16(v2889, v2891);
+ int16x8_t v3122 = vsubq_s16(v2893, v2895);
+ int16x8_t v3123 = vqrdmulhq_n_s16(v3122, 28454);
+ int16x8_t v3124 = vaddq_s16(v3121, v3123);
+ int16x8_t v3125 = vqrdmulhq_n_s16(v3124, 18458);
+ int16x8_t v3126 = vaddq_s16(v3120, v3125);
+ int16x8_t v3127 = vqrdmulhq_n_s16(v3126, 16865);
+ int16x8_t v3128 = vaddq_s16(v3116, v3127);
+ int16x8_t v3129 = vsubq_s16(v2811, v2813);
+ int16x8_t v3130 = vsubq_s16(v2815, v2817);
+ int16x8_t v3131 = vqrdmulhq_n_s16(v3130, 30624);
+ int16x8_t v3132 = vaddq_s16(v3129, v3131);
+ int16x8_t v3133 = vsubq_s16(v2821, v2823);
+ int16x8_t v3134 = vsubq_s16(v2825, v2827);
+ int16x8_t v3135 = vqrdmulhq_n_s16(v3134, 30624);
+ int16x8_t v3136 = vaddq_s16(v3133, v3135);
+ int16x8_t v3137 = vqrdmulhq_n_s16(v3136, 18702);
+ int16x8_t v3138 = vaddq_s16(v3132, v3137);
+ int16x8_t v3139 = vsubq_s16(v2833, v2835);
+ int16x8_t v3140 = vsubq_s16(v2837, v2839);
+ int16x8_t v3141 = vqrdmulhq_n_s16(v3140, 30624);
+ int16x8_t v3142 = vaddq_s16(v3139, v3141);
+ int16x8_t v3143 = vsubq_s16(v2843, v2845);
+ int16x8_t v3144 = vsubq_s16(v2847, v2849);
+ int16x8_t v3145 = vqrdmulhq_n_s16(v3144, 30624);
+ int16x8_t v3146 = vaddq_s16(v3143, v3145);
+ int16x8_t v3147 = vqrdmulhq_n_s16(v3146, 18702);
+ int16x8_t v3148 = vaddq_s16(v3142, v3147);
+ int16x8_t v3149 = vqrdmulhq_n_s16(v3148, 16916);
+ int16x8_t v3150 = vaddq_s16(v3138, v3149);
+ int16x8_t v3151 = vsubq_s16(v2765, v2767);
+ int16x8_t v3152 = vsubq_s16(v2769, v2771);
+ int16x8_t v3153_tmp = vqrdmulhq_n_s16(v3152, 472);
+ int16x8_t v3153 = vaddq_s16(v3153_tmp, v3152);
+ int16x8_t v3154 = vaddq_s16(v3151, v3153);
+ int16x8_t v3155 = vsubq_s16(v2775, v2777);
+ int16x8_t v3156 = vsubq_s16(v2779, v2781);
+ int16x8_t v3157_tmp = vqrdmulhq_n_s16(v3156, 472);
+ int16x8_t v3157 = vaddq_s16(v3157_tmp, v3156);
+ int16x8_t v3158 = vaddq_s16(v3155, v3157);
+ int16x8_t v3159 = vqrdmulhq_n_s16(v3158, 18964);
+ int16x8_t v3160 = vaddq_s16(v3154, v3159);
+ int16x8_t v3161 = vsubq_s16(v2787, v2789);
+ int16x8_t v3162 = vsubq_s16(v2791, v2793);
+ int16x8_t v3163_tmp = vqrdmulhq_n_s16(v3162, 472);
+ int16x8_t v3163 = vaddq_s16(v3163_tmp, v3162);
+ int16x8_t v3164 = vaddq_s16(v3161, v3163);
+ int16x8_t v3165 = vsubq_s16(v2797, v2799);
+ int16x8_t v3166 = vsubq_s16(v2801, v2803);
+ int16x8_t v3167_tmp = vqrdmulhq_n_s16(v3166, 472);
+ int16x8_t v3167 = vaddq_s16(v3167_tmp, v3166);
+ int16x8_t v3168 = vaddq_s16(v3165, v3167);
+ int16x8_t v3169 = vqrdmulhq_n_s16(v3168, 18964);
+ int16x8_t v3170 = vaddq_s16(v3164, v3169);
+ int16x8_t v3171 = vqrdmulhq_n_s16(v3170, 16971);
+ int16x8_t v3172 = vaddq_s16(v3160, v3171);
+ int16x8_t v3173 = vsubq_s16(v2719, v2721);
+ int16x8_t v3174 = vsubq_s16(v2723, v2725);
+ int16x8_t v3175_tmp = vqrdmulhq_n_s16(v3174, 3672);
+ int16x8_t v3175 = vaddq_s16(v3175_tmp, v3174);
+ int16x8_t v3176 = vaddq_s16(v3173, v3175);
+ int16x8_t v3177 = vsubq_s16(v2729, v2731);
+ int16x8_t v3178 = vsubq_s16(v2733, v2735);
+ int16x8_t v3179_tmp = vqrdmulhq_n_s16(v3178, 3672);
+ int16x8_t v3179 = vaddq_s16(v3179_tmp, v3178);
+ int16x8_t v3180 = vaddq_s16(v3177, v3179);
+ int16x8_t v3181 = vqrdmulhq_n_s16(v3180, 19245);
+ int16x8_t v3182 = vaddq_s16(v3176, v3181);
+ int16x8_t v3183 = vsubq_s16(v2741, v2743);
+ int16x8_t v3184 = vsubq_s16(v2745, v2747);
+ int16x8_t v3185_tmp = vqrdmulhq_n_s16(v3184, 3672);
+ int16x8_t v3185 = vaddq_s16(v3185_tmp, v3184);
+ int16x8_t v3186 = vaddq_s16(v3183, v3185);
+ int16x8_t v3187 = vsubq_s16(v2751, v2753);
+ int16x8_t v3188 = vsubq_s16(v2755, v2757);
+ int16x8_t v3189_tmp = vqrdmulhq_n_s16(v3188, 3672);
+ int16x8_t v3189 = vaddq_s16(v3189_tmp, v3188);
+ int16x8_t v3190 = vaddq_s16(v3187, v3189);
+ int16x8_t v3191 = vqrdmulhq_n_s16(v3190, 19245);
+ int16x8_t v3192 = vaddq_s16(v3186, v3191);
+ int16x8_t v3193 = vqrdmulhq_n_s16(v3192, 17029);
+ int16x8_t v3194 = vaddq_s16(v3182, v3193);
+ int16x8_t v3195 = vsubq_s16(v2673, v2675);
+ int16x8_t v3196 = vsubq_s16(v2677, v2679);
+ int16x8_t v3197_tmp = vqrdmulhq_n_s16(v3196, 7662);
+ int16x8_t v3197 = vaddq_s16(v3197_tmp, v3196);
+ int16x8_t v3198 = vaddq_s16(v3195, v3197);
+ int16x8_t v3199 = vsubq_s16(v2683, v2685);
+ int16x8_t v3200 = vsubq_s16(v2687, v2689);
+ int16x8_t v3201_tmp = vqrdmulhq_n_s16(v3200, 7662);
+ int16x8_t v3201 = vaddq_s16(v3201_tmp, v3200);
+ int16x8_t v3202 = vaddq_s16(v3199, v3201);
+ int16x8_t v3203 = vqrdmulhq_n_s16(v3202, 19546);
+ int16x8_t v3204 = vaddq_s16(v3198, v3203);
+ int16x8_t v3205 = vsubq_s16(v2695, v2697);
+ int16x8_t v3206 = vsubq_s16(v2699, v2701);
+ int16x8_t v3207_tmp = vqrdmulhq_n_s16(v3206, 7662);
+ int16x8_t v3207 = vaddq_s16(v3207_tmp, v3206);
+ int16x8_t v3208 = vaddq_s16(v3205, v3207);
+ int16x8_t v3209 = vsubq_s16(v2705, v2707);
+ int16x8_t v3210 = vsubq_s16(v2709, v2711);
+ int16x8_t v3211_tmp = vqrdmulhq_n_s16(v3210, 7662);
+ int16x8_t v3211 = vaddq_s16(v3211_tmp, v3210);
+ int16x8_t v3212 = vaddq_s16(v3209, v3211);
+ int16x8_t v3213 = vqrdmulhq_n_s16(v3212, 19546);
+ int16x8_t v3214 = vaddq_s16(v3208, v3213);
+ int16x8_t v3215 = vqrdmulhq_n_s16(v3214, 17090);
+ int16x8_t v3216 = vaddq_s16(v3204, v3215);
+ int16x8_t v3217 = vsubq_s16(v2582, v2587);
+ int16x8_t v3218 = vsubq_s16(v2592, v2597);
+ int16x8_t v3219_tmp = vqrdmulhq_n_s16(v3218, 12756);
+ int16x8_t v3219 = vaddq_s16(v3219_tmp, v3218);
+ int16x8_t v3220 = vaddq_s16(v3217, v3219);
+ int16x8_t v3221 = vsubq_s16(v2604, v2609);
+ int16x8_t v3222 = vsubq_s16(v2614, v2619);
+ int16x8_t v3223_tmp = vqrdmulhq_n_s16(v3222, 12756);
+ int16x8_t v3223 = vaddq_s16(v3223_tmp, v3222);
+ int16x8_t v3224 = vaddq_s16(v3221, v3223);
+ int16x8_t v3225 = vqrdmulhq_n_s16(v3224, 19869);
+ int16x8_t v3226 = vaddq_s16(v3220, v3225);
+ int16x8_t v3227 = vsubq_s16(v2628, v2633);
+ int16x8_t v3228 = vsubq_s16(v2638, v2643);
+ int16x8_t v3229_tmp = vqrdmulhq_n_s16(v3228, 12756);
+ int16x8_t v3229 = vaddq_s16(v3229_tmp, v3228);
+ int16x8_t v3230 = vaddq_s16(v3227, v3229);
+ int16x8_t v3231 = vsubq_s16(v2650, v2655);
+ int16x8_t v3232 = vsubq_s16(v2660, v2665);
+ int16x8_t v3233_tmp = vqrdmulhq_n_s16(v3232, 12756);
+ int16x8_t v3233 = vaddq_s16(v3233_tmp, v3232);
+ int16x8_t v3234 = vaddq_s16(v3231, v3233);
+ int16x8_t v3235 = vqrdmulhq_n_s16(v3234, 19869);
+ int16x8_t v3236 = vaddq_s16(v3230, v3235);
+ int16x8_t v3237 = vqrdmulhq_n_s16(v3236, 17153);
+ int16x8_t v3238 = vaddq_s16(v3226, v3237);
+ int16x8_t v3239 = vsubq_s16(v2488, v2493);
+ int16x8_t v3240 = vsubq_s16(v2498, v2503);
+ int16x8_t v3241_tmp = vqrdmulhq_n_s16(v3240, 19463);
+ int16x8_t v3241 = vaddq_s16(v3241_tmp, v3240);
+ int16x8_t v3242 = vaddq_s16(v3239, v3241);
+ int16x8_t v3243 = vsubq_s16(v2510, v2515);
+ int16x8_t v3244 = vsubq_s16(v2520, v2525);
+ int16x8_t v3245_tmp = vqrdmulhq_n_s16(v3244, 19463);
+ int16x8_t v3245 = vaddq_s16(v3245_tmp, v3244);
+ int16x8_t v3246 = vaddq_s16(v3243, v3245);
+ int16x8_t v3247 = vqrdmulhq_n_s16(v3246, 20216);
+ int16x8_t v3248 = vaddq_s16(v3242, v3247);
+ int16x8_t v3249 = vsubq_s16(v2534, v2539);
+ int16x8_t v3250 = vsubq_s16(v2544, v2549);
+ int16x8_t v3251_tmp = vqrdmulhq_n_s16(v3250, 19463);
+ int16x8_t v3251 = vaddq_s16(v3251_tmp, v3250);
+ int16x8_t v3252 = vaddq_s16(v3249, v3251);
+ int16x8_t v3253 = vsubq_s16(v2556, v2561);
+ int16x8_t v3254 = vsubq_s16(v2566, v2571);
+ int16x8_t v3255_tmp = vqrdmulhq_n_s16(v3254, 19463);
+ int16x8_t v3255 = vaddq_s16(v3255_tmp, v3254);
+ int16x8_t v3256 = vaddq_s16(v3253, v3255);
+ int16x8_t v3257 = vqrdmulhq_n_s16(v3256, 20216);
+ int16x8_t v3258 = vaddq_s16(v3252, v3257);
+ int16x8_t v3259 = vqrdmulhq_n_s16(v3258, 17220);
+ int16x8_t v3260 = vaddq_s16(v3248, v3259);
+ int16x8_t v3261 = vsubq_s16(v2393, v2398);
+ int16x8_t v3262 = vsubq_s16(v2403, v2408);
+ int16x8_t v3263_tmp = vqrdmulhq_n_s16(v3262, 28661);
+ int16x8_t v3263 = vaddq_s16(v3263_tmp, v3262);
+ int16x8_t v3264 = vaddq_s16(v3261, v3263);
+ int16x8_t v3265 = vsubq_s16(v2415, v2420);
+ int16x8_t v3266 = vsubq_s16(v2425, v2430);
+ int16x8_t v3267_tmp = vqrdmulhq_n_s16(v3266, 28661);
+ int16x8_t v3267 = vaddq_s16(v3267_tmp, v3266);
+ int16x8_t v3268 = vaddq_s16(v3265, v3267);
+ int16x8_t v3269 = vqrdmulhq_n_s16(v3268, 20587);
+ int16x8_t v3270 = vaddq_s16(v3264, v3269);
+ int16x8_t v3271 = vsubq_s16(v2439, v2444);
+ int16x8_t v3272 = vsubq_s16(v2449, v2454);
+ int16x8_t v3273_tmp = vqrdmulhq_n_s16(v3272, 28661);
+ int16x8_t v3273 = vaddq_s16(v3273_tmp, v3272);
+ int16x8_t v3274 = vaddq_s16(v3271, v3273);
+ int16x8_t v3275 = vsubq_s16(v2461, v2467);
+ int16x8_t v3276 = vsubq_s16(v2472, v2477);
+ int16x8_t v3277_tmp = vqrdmulhq_n_s16(v3276, 28661);
+ int16x8_t v3277 = vaddq_s16(v3277_tmp, v3276);
+ int16x8_t v3278 = vaddq_s16(v3275, v3277);
+ int16x8_t v3279 = vqrdmulhq_n_s16(v3278, 20587);
+ int16x8_t v3280 = vaddq_s16(v3274, v3279);
+ int16x8_t v3281 = vqrdmulhq_n_s16(v3280, 17290);
+ int16x8_t v3282 = vaddq_s16(v3270, v3281);
+ int16x8_t v3283 = vsubq_s16(v2299, v2304);
+ int16x8_t v3284 = vsubq_s16(v2309, v2314);
+ int16x8_t v3285_tmp = vqrdmulhq_n_s16(v3284, 9242);
+ int16x8_t v3285 = vmlaq_n_s16(v3285_tmp, v3284, 2);
+ int16x8_t v3286 = vaddq_s16(v3283, v3285);
+ int16x8_t v3287 = vsubq_s16(v2321, v2326);
+ int16x8_t v3288 = vsubq_s16(v2331, v2336);
+ int16x8_t v3289_tmp = vqrdmulhq_n_s16(v3288, 9242);
+ int16x8_t v3289 = vmlaq_n_s16(v3289_tmp, v3288, 2);
+ int16x8_t v3290 = vaddq_s16(v3287, v3289);
+ int16x8_t v3291 = vqrdmulhq_n_s16(v3290, 20985);
+ int16x8_t v3292 = vaddq_s16(v3286, v3291);
+ int16x8_t v3293 = vsubq_s16(v2345, v2350);
+ int16x8_t v3294 = vsubq_s16(v2355, v2360);
+ int16x8_t v3295_tmp = vqrdmulhq_n_s16(v3294, 9242);
+ int16x8_t v3295 = vmlaq_n_s16(v3295_tmp, v3294, 2);
+ int16x8_t v3296 = vaddq_s16(v3293, v3295);
+ int16x8_t v3297 = vsubq_s16(v2367, v2372);
+ int16x8_t v3298 = vsubq_s16(v2377, v2382);
+ int16x8_t v3299_tmp = vqrdmulhq_n_s16(v3298, 9242);
+ int16x8_t v3299 = vmlaq_n_s16(v3299_tmp, v3298, 2);
+ int16x8_t v3300 = vaddq_s16(v3297, v3299);
+ int16x8_t v3301 = vqrdmulhq_n_s16(v3300, 20985);
+ int16x8_t v3302 = vaddq_s16(v3296, v3301);
+ int16x8_t v3303 = vqrdmulhq_n_s16(v3302, 17363);
+ int16x8_t v3304 = vaddq_s16(v3292, v3303);
+ int16x8_t v3305 = vsubq_s16(v2115, v2126);
+ int16x8_t v3306 = vsubq_s16(v2137, v2148);
+ int16x8_t v3307_tmp = vqrdmulhq_n_s16(v3306, 30298);
+ int16x8_t v3307 = vmlaq_n_s16(v3307_tmp, v3306, 2);
+ int16x8_t v3308 = vaddq_s16(v3305, v3307);
+ int16x8_t v3309 = vsubq_s16(v2161, v2172);
+ int16x8_t v3310 = vsubq_s16(v2183, v2194);
+ int16x8_t v3311_tmp = vqrdmulhq_n_s16(v3310, 30298);
+ int16x8_t v3311 = vmlaq_n_s16(v3311_tmp, v3310, 2);
+ int16x8_t v3312 = vaddq_s16(v3309, v3311);
+ int16x8_t v3313 = vqrdmulhq_n_s16(v3312, 21412);
+ int16x8_t v3314 = vaddq_s16(v3308, v3313);
+ int16x8_t v3315 = vsubq_s16(v2209, v2220);
+ int16x8_t v3316 = vsubq_s16(v2231, v2242);
+ int16x8_t v3317_tmp = vqrdmulhq_n_s16(v3316, 30298);
+ int16x8_t v3317 = vmlaq_n_s16(v3317_tmp, v3316, 2);
+ int16x8_t v3318 = vaddq_s16(v3315, v3317);
+ int16x8_t v3319 = vsubq_s16(v2255, v2266);
+ int16x8_t v3320 = vsubq_s16(v2277, v2288);
+ int16x8_t v3321_tmp = vqrdmulhq_n_s16(v3320, 30298);
+ int16x8_t v3321 = vmlaq_n_s16(v3321_tmp, v3320, 2);
+ int16x8_t v3322 = vaddq_s16(v3319, v3321);
+ int16x8_t v3323 = vqrdmulhq_n_s16(v3322, 21412);
+ int16x8_t v3324 = vaddq_s16(v3318, v3323);
+ int16x8_t v3325 = vqrdmulhq_n_s16(v3324, 17440);
+ int16x8_t v3326 = vaddq_s16(v3314, v3325);
+ int16x8_t v3327 = vsubq_s16(v1925, v1936);
+ int16x8_t v3328 = vsubq_s16(v1947, v1958);
+ int16x8_t v3329_tmp = vqrdmulhq_n_s16(v3328, 2773);
+ int16x8_t v3329 = vmlaq_n_s16(v3329_tmp, v3328, 4);
+ int16x8_t v3330 = vaddq_s16(v3327, v3329);
+ int16x8_t v3331 = vsubq_s16(v1971, v1982);
+ int16x8_t v3332 = vsubq_s16(v1993, v2004);
+ int16x8_t v3333_tmp = vqrdmulhq_n_s16(v3332, 2773);
+ int16x8_t v3333 = vmlaq_n_s16(v3333_tmp, v3332, 4);
+ int16x8_t v3334 = vaddq_s16(v3331, v3333);
+ int16x8_t v3335 = vqrdmulhq_n_s16(v3334, 21871);
+ int16x8_t v3336 = vaddq_s16(v3330, v3335);
+ int16x8_t v3337 = vsubq_s16(v2019, v2030);
+ int16x8_t v3338 = vsubq_s16(v2041, v2052);
+ int16x8_t v3339_tmp = vqrdmulhq_n_s16(v3338, 2773);
+ int16x8_t v3339 = vmlaq_n_s16(v3339_tmp, v3338, 4);
+ int16x8_t v3340 = vaddq_s16(v3337, v3339);
+ int16x8_t v3341 = vsubq_s16(v2065, v2076);
+ int16x8_t v3342 = vsubq_s16(v2087, v2098);
+ int16x8_t v3343_tmp = vqrdmulhq_n_s16(v3342, 2773);
+ int16x8_t v3343 = vmlaq_n_s16(v3343_tmp, v3342, 4);
+ int16x8_t v3344 = vaddq_s16(v3341, v3343);
+ int16x8_t v3345 = vqrdmulhq_n_s16(v3344, 21871);
+ int16x8_t v3346 = vaddq_s16(v3340, v3345);
+ int16x8_t v3347 = vqrdmulhq_n_s16(v3346, 17520);
+ int16x8_t v3348 = vaddq_s16(v3336, v3347);
+ int16x8_t v3349 = vsubq_s16(v1555, v1578);
+ int16x8_t v3350 = vsubq_s16(v1601, v1624);
+ int16x8_t v3351_tmp = vqrdmulhq_n_s16(v3350, 26108);
+ int16x8_t v3351 = vmlaq_n_s16(v3351_tmp, v3350, 6);
+ int16x8_t v3352 = vaddq_s16(v3349, v3351);
+ int16x8_t v3353 = vsubq_s16(v1649, v1672);
+ int16x8_t v3354 = vsubq_s16(v1695, v1718);
+ int16x8_t v3355_tmp = vqrdmulhq_n_s16(v3354, 26108);
+ int16x8_t v3355 = vmlaq_n_s16(v3355_tmp, v3354, 6);
+ int16x8_t v3356 = vaddq_s16(v3353, v3355);
+ int16x8_t v3357 = vqrdmulhq_n_s16(v3356, 22363);
+ int16x8_t v3358 = vaddq_s16(v3352, v3357);
+ int16x8_t v3359 = vsubq_s16(v1745, v1768);
+ int16x8_t v3360 = vsubq_s16(v1791, v1814);
+ int16x8_t v3361_tmp = vqrdmulhq_n_s16(v3360, 26108);
+ int16x8_t v3361 = vmlaq_n_s16(v3361_tmp, v3360, 6);
+ int16x8_t v3362 = vaddq_s16(v3359, v3361);
+ int16x8_t v3363 = vsubq_s16(v1839, v1862);
+ int16x8_t v3364 = vsubq_s16(v1885, v1908);
+ int16x8_t v3365_tmp = vqrdmulhq_n_s16(v3364, 26108);
+ int16x8_t v3365 = vmlaq_n_s16(v3365_tmp, v3364, 6);
+ int16x8_t v3366 = vaddq_s16(v3363, v3365);
+ int16x8_t v3367 = vqrdmulhq_n_s16(v3366, 22363);
+ int16x8_t v3368 = vaddq_s16(v3362, v3367);
+ int16x8_t v3369 = vqrdmulhq_n_s16(v3368, 17603);
+ int16x8_t v3370 = vaddq_s16(v3358, v3369);
+ int16x8_t v3371 = vsubq_s16(v61, v140);
+ int16x8_t v3372 = vsubq_s16(v234, v314);
+ int16x8_t v3373_tmp = vqrdmulhq_n_s16(v3372, 12251);
+ int16x8_t v3373 = vmlaq_n_s16(v3373_tmp, v3372, 20);
+ int16x8_t v3374 = vaddq_s16(v3371, v3373);
+ int16x8_t v3375 = vsubq_s16(v410, v521);
+ int16x8_t v3376 = vsubq_s16(v615, v696);
+ int16x8_t v3377_tmp = vqrdmulhq_n_s16(v3376, 12251);
+ int16x8_t v3377 = vmlaq_n_s16(v3377_tmp, v3376, 20);
+ int16x8_t v3378 = vaddq_s16(v3375, v3377);
+ int16x8_t v3379 = vqrdmulhq_n_s16(v3378, 22891);
+ int16x8_t v3380 = vaddq_s16(v3374, v3379);
+ int16x8_t v3381 = vsubq_s16(v794, v905);
+ int16x8_t v3382 = vsubq_s16(v1061, v1143);
+ int16x8_t v3383_tmp = vqrdmulhq_n_s16(v3382, 12251);
+ int16x8_t v3383 = vmlaq_n_s16(v3383_tmp, v3382, 20);
+ int16x8_t v3384 = vaddq_s16(v3381, v3383);
+ int16x8_t v3385 = vsubq_s16(v1239, v1350);
+ int16x8_t v3386 = vsubq_s16(v1444, v1526);
+ int16x8_t v3387_tmp = vqrdmulhq_n_s16(v3386, 12251);
+ int16x8_t v3387 = vmlaq_n_s16(v3387_tmp, v3386, 20);
+ int16x8_t v3388 = vaddq_s16(v3385, v3387);
+ int16x8_t v3389 = vqrdmulhq_n_s16(v3388, 22891);
+ int16x8_t v3390 = vaddq_s16(v3384, v3389);
+ int16x8_t v3391 = vqrdmulhq_n_s16(v3390, 17689);
+ int16x8_t v3392 = vaddq_s16(v3380, v3391);
+ int16x8_t v3393 = vsubq_s16(v3371, v3373);
+ int16x8_t v3394 = vsubq_s16(v3375, v3377);
+ int16x8_t v3395 = vqrdmulhq_n_s16(v3394, 23460);
+ int16x8_t v3396 = vaddq_s16(v3393, v3395);
+ int16x8_t v3397 = vsubq_s16(v3381, v3383);
+ int16x8_t v3398 = vsubq_s16(v3385, v3387);
+ int16x8_t v3399 = vqrdmulhq_n_s16(v3398, 23460);
+ int16x8_t v3400 = vaddq_s16(v3397, v3399);
+ int16x8_t v3401 = vqrdmulhq_n_s16(v3400, 17779);
+ int16x8_t v3402 = vaddq_s16(v3396, v3401);
+ int16x8_t v3403 = vsubq_s16(v3349, v3351);
+ int16x8_t v3404 = vsubq_s16(v3353, v3355);
+ int16x8_t v3405 = vqrdmulhq_n_s16(v3404, 24073);
+ int16x8_t v3406 = vaddq_s16(v3403, v3405);
+ int16x8_t v3407 = vsubq_s16(v3359, v3361);
+ int16x8_t v3408 = vsubq_s16(v3363, v3365);
+ int16x8_t v3409 = vqrdmulhq_n_s16(v3408, 24073);
+ int16x8_t v3410 = vaddq_s16(v3407, v3409);
+ int16x8_t v3411 = vqrdmulhq_n_s16(v3410, 17873);
+ int16x8_t v3412 = vaddq_s16(v3406, v3411);
+ int16x8_t v3413 = vsubq_s16(v3327, v3329);
+ int16x8_t v3414 = vsubq_s16(v3331, v3333);
+ int16x8_t v3415 = vqrdmulhq_n_s16(v3414, 24734);
+ int16x8_t v3416 = vaddq_s16(v3413, v3415);
+ int16x8_t v3417 = vsubq_s16(v3337, v3339);
+ int16x8_t v3418 = vsubq_s16(v3341, v3343);
+ int16x8_t v3419 = vqrdmulhq_n_s16(v3418, 24734);
+ int16x8_t v3420 = vaddq_s16(v3417, v3419);
+ int16x8_t v3421 = vqrdmulhq_n_s16(v3420, 17971);
+ int16x8_t v3422 = vaddq_s16(v3416, v3421);
+ int16x8_t v3423 = vsubq_s16(v3305, v3307);
+ int16x8_t v3424 = vsubq_s16(v3309, v3311);
+ int16x8_t v3425 = vqrdmulhq_n_s16(v3424, 25448);
+ int16x8_t v3426 = vaddq_s16(v3423, v3425);
+ int16x8_t v3427 = vsubq_s16(v3315, v3317);
+ int16x8_t v3428 = vsubq_s16(v3319, v3321);
+ int16x8_t v3429 = vqrdmulhq_n_s16(v3428, 25448);
+ int16x8_t v3430 = vaddq_s16(v3427, v3429);
+ int16x8_t v3431 = vqrdmulhq_n_s16(v3430, 18072);
+ int16x8_t v3432 = vaddq_s16(v3426, v3431);
+ int16x8_t v3433 = vsubq_s16(v3283, v3285);
+ int16x8_t v3434 = vsubq_s16(v3287, v3289);
+ int16x8_t v3435 = vqrdmulhq_n_s16(v3434, 26220);
+ int16x8_t v3436 = vaddq_s16(v3433, v3435);
+ int16x8_t v3437 = vsubq_s16(v3293, v3295);
+ int16x8_t v3438 = vsubq_s16(v3297, v3299);
+ int16x8_t v3439 = vqrdmulhq_n_s16(v3438, 26220);
+ int16x8_t v3440 = vaddq_s16(v3437, v3439);
+ int16x8_t v3441 = vqrdmulhq_n_s16(v3440, 18177);
+ int16x8_t v3442 = vaddq_s16(v3436, v3441);
+ int16x8_t v3443 = vsubq_s16(v3261, v3263);
+ int16x8_t v3444 = vsubq_s16(v3265, v3267);
+ int16x8_t v3445 = vqrdmulhq_n_s16(v3444, 27058);
+ int16x8_t v3446 = vaddq_s16(v3443, v3445);
+ int16x8_t v3447 = vsubq_s16(v3271, v3273);
+ int16x8_t v3448 = vsubq_s16(v3275, v3277);
+ int16x8_t v3449 = vqrdmulhq_n_s16(v3448, 27058);
+ int16x8_t v3450 = vaddq_s16(v3447, v3449);
+ int16x8_t v3451 = vqrdmulhq_n_s16(v3450, 18286);
+ int16x8_t v3452 = vaddq_s16(v3446, v3451);
+ int16x8_t v3453 = vsubq_s16(v3239, v3241);
+ int16x8_t v3454 = vsubq_s16(v3243, v3245);
+ int16x8_t v3455 = vqrdmulhq_n_s16(v3454, 27969);
+ int16x8_t v3456 = vaddq_s16(v3453, v3455);
+ int16x8_t v3457 = vsubq_s16(v3249, v3251);
+ int16x8_t v3458 = vsubq_s16(v3253, v3255);
+ int16x8_t v3459 = vqrdmulhq_n_s16(v3458, 27969);
+ int16x8_t v3460 = vaddq_s16(v3457, v3459);
+ int16x8_t v3461 = vqrdmulhq_n_s16(v3460, 18400);
+ int16x8_t v3462 = vaddq_s16(v3456, v3461);
+ int16x8_t v3463 = vsubq_s16(v3217, v3219);
+ int16x8_t v3464 = vsubq_s16(v3221, v3223);
+ int16x8_t v3465 = vqrdmulhq_n_s16(v3464, 28961);
+ int16x8_t v3466 = vaddq_s16(v3463, v3465);
+ int16x8_t v3467 = vsubq_s16(v3227, v3229);
+ int16x8_t v3468 = vsubq_s16(v3231, v3233);
+ int16x8_t v3469 = vqrdmulhq_n_s16(v3468, 28961);
+ int16x8_t v3470 = vaddq_s16(v3467, v3469);
+ int16x8_t v3471 = vqrdmulhq_n_s16(v3470, 18517);
+ int16x8_t v3472 = vaddq_s16(v3466, v3471);
+ int16x8_t v3473 = vsubq_s16(v3195, v3197);
+ int16x8_t v3474 = vsubq_s16(v3199, v3201);
+ int16x8_t v3475 = vqrdmulhq_n_s16(v3474, 30044);
+ int16x8_t v3476 = vaddq_s16(v3473, v3475);
+ int16x8_t v3477 = vsubq_s16(v3205, v3207);
+ int16x8_t v3478 = vsubq_s16(v3209, v3211);
+ int16x8_t v3479 = vqrdmulhq_n_s16(v3478, 30044);
+ int16x8_t v3480 = vaddq_s16(v3477, v3479);
+ int16x8_t v3481 = vqrdmulhq_n_s16(v3480, 18639);
+ int16x8_t v3482 = vaddq_s16(v3476, v3481);
+ int16x8_t v3483 = vsubq_s16(v3173, v3175);
+ int16x8_t v3484 = vsubq_s16(v3177, v3179);
+ int16x8_t v3485 = vqrdmulhq_n_s16(v3484, 31232);
+ int16x8_t v3486 = vaddq_s16(v3483, v3485);
+ int16x8_t v3487 = vsubq_s16(v3183, v3185);
+ int16x8_t v3488 = vsubq_s16(v3187, v3189);
+ int16x8_t v3489 = vqrdmulhq_n_s16(v3488, 31232);
+ int16x8_t v3490 = vaddq_s16(v3487, v3489);
+ int16x8_t v3491 = vqrdmulhq_n_s16(v3490, 18765);
+ int16x8_t v3492 = vaddq_s16(v3486, v3491);
+ int16x8_t v3493 = vsubq_s16(v3151, v3153);
+ int16x8_t v3494 = vsubq_s16(v3155, v3157);
+ int16x8_t v3495 = vqrdmulhq_n_s16(v3494, 32538);
+ int16x8_t v3496 = vaddq_s16(v3493, v3495);
+ int16x8_t v3497 = vsubq_s16(v3161, v3163);
+ int16x8_t v3498 = vsubq_s16(v3165, v3167);
+ int16x8_t v3499 = vqrdmulhq_n_s16(v3498, 32538);
+ int16x8_t v3500 = vaddq_s16(v3497, v3499);
+ int16x8_t v3501 = vqrdmulhq_n_s16(v3500, 18896);
+ int16x8_t v3502 = vaddq_s16(v3496, v3501);
+ int16x8_t v3503 = vsubq_s16(v3129, v3131);
+ int16x8_t v3504 = vsubq_s16(v3133, v3135);
+ int16x8_t v3505_tmp = vqrdmulhq_n_s16(v3504, 1211);
+ int16x8_t v3505 = vaddq_s16(v3505_tmp, v3504);
+ int16x8_t v3506 = vaddq_s16(v3503, v3505);
+ int16x8_t v3507 = vsubq_s16(v3139, v3141);
+ int16x8_t v3508 = vsubq_s16(v3143, v3145);
+ int16x8_t v3509_tmp = vqrdmulhq_n_s16(v3508, 1211);
+ int16x8_t v3509 = vaddq_s16(v3509_tmp, v3508);
+ int16x8_t v3510 = vaddq_s16(v3507, v3509);
+ int16x8_t v3511 = vqrdmulhq_n_s16(v3510, 19032);
+ int16x8_t v3512 = vaddq_s16(v3506, v3511);
+ int16x8_t v3513 = vsubq_s16(v3107, v3109);
+ int16x8_t v3514 = vsubq_s16(v3111, v3113);
+ int16x8_t v3515_tmp = vqrdmulhq_n_s16(v3514, 2808);
+ int16x8_t v3515 = vaddq_s16(v3515_tmp, v3514);
+ int16x8_t v3516 = vaddq_s16(v3513, v3515);
+ int16x8_t v3517 = vsubq_s16(v3117, v3119);
+ int16x8_t v3518 = vsubq_s16(v3121, v3123);
+ int16x8_t v3519_tmp = vqrdmulhq_n_s16(v3518, 2808);
+ int16x8_t v3519 = vaddq_s16(v3519_tmp, v3518);
+ int16x8_t v3520 = vaddq_s16(v3517, v3519);
+ int16x8_t v3521 = vqrdmulhq_n_s16(v3520, 19172);
+ int16x8_t v3522 = vaddq_s16(v3516, v3521);
+ int16x8_t v3523 = vsubq_s16(v3085, v3087);
+ int16x8_t v3524 = vsubq_s16(v3089, v3091);
+ int16x8_t v3525_tmp = vqrdmulhq_n_s16(v3524, 4586);
+ int16x8_t v3525 = vaddq_s16(v3525_tmp, v3524);
+ int16x8_t v3526 = vaddq_s16(v3523, v3525);
+ int16x8_t v3527 = vsubq_s16(v3095, v3097);
+ int16x8_t v3528 = vsubq_s16(v3099, v3101);
+ int16x8_t v3529_tmp = vqrdmulhq_n_s16(v3528, 4586);
+ int16x8_t v3529 = vaddq_s16(v3529_tmp, v3528);
+ int16x8_t v3530 = vaddq_s16(v3527, v3529);
+ int16x8_t v3531 = vqrdmulhq_n_s16(v3530, 19318);
+ int16x8_t v3532 = vaddq_s16(v3526, v3531);
+ int16x8_t v3533 = vsubq_s16(v3063, v3065);
+ int16x8_t v3534 = vsubq_s16(v3067, v3069);
+ int16x8_t v3535_tmp = vqrdmulhq_n_s16(v3534, 6576);
+ int16x8_t v3535 = vaddq_s16(v3535_tmp, v3534);
+ int16x8_t v3536 = vaddq_s16(v3533, v3535);
+ int16x8_t v3537 = vsubq_s16(v3073, v3075);
+ int16x8_t v3538 = vsubq_s16(v3077, v3079);
+ int16x8_t v3539_tmp = vqrdmulhq_n_s16(v3538, 6576);
+ int16x8_t v3539 = vaddq_s16(v3539_tmp, v3538);
+ int16x8_t v3540 = vaddq_s16(v3537, v3539);
+ int16x8_t v3541 = vqrdmulhq_n_s16(v3540, 19469);
+ int16x8_t v3542 = vaddq_s16(v3536, v3541);
+ int16x8_t v3543 = vsubq_s16(v3041, v3043);
+ int16x8_t v3544 = vsubq_s16(v3045, v3047);
+ int16x8_t v3545_tmp = vqrdmulhq_n_s16(v3544, 8817);
+ int16x8_t v3545 = vaddq_s16(v3545_tmp, v3544);
+ int16x8_t v3546 = vaddq_s16(v3543, v3545);
+ int16x8_t v3547 = vsubq_s16(v3051, v3053);
+ int16x8_t v3548 = vsubq_s16(v3055, v3057);
+ int16x8_t v3549_tmp = vqrdmulhq_n_s16(v3548, 8817);
+ int16x8_t v3549 = vaddq_s16(v3549_tmp, v3548);
+ int16x8_t v3550 = vaddq_s16(v3547, v3549);
+ int16x8_t v3551 = vqrdmulhq_n_s16(v3550, 19625);
+ int16x8_t v3552 = vaddq_s16(v3546, v3551);
+ int16x8_t v3553 = vsubq_s16(v2998, v3003);
+ int16x8_t v3554 = vsubq_s16(v3008, v3013);
+ int16x8_t v3555_tmp = vqrdmulhq_n_s16(v3554, 11356);
+ int16x8_t v3555 = vaddq_s16(v3555_tmp, v3554);
+ int16x8_t v3556 = vaddq_s16(v3553, v3555);
+ int16x8_t v3557 = vsubq_s16(v3020, v3025);
+ int16x8_t v3558 = vsubq_s16(v3030, v3035);
+ int16x8_t v3559_tmp = vqrdmulhq_n_s16(v3558, 11356);
+ int16x8_t v3559 = vaddq_s16(v3559_tmp, v3558);
+ int16x8_t v3560 = vaddq_s16(v3557, v3559);
+ int16x8_t v3561 = vqrdmulhq_n_s16(v3560, 19786);
+ int16x8_t v3562 = vaddq_s16(v3556, v3561);
+ int16x8_t v3563 = vsubq_s16(v2952, v2957);
+ int16x8_t v3564 = vsubq_s16(v2962, v2967);
+ int16x8_t v3565_tmp = vqrdmulhq_n_s16(v3564, 14256);
+ int16x8_t v3565 = vaddq_s16(v3565_tmp, v3564);
+ int16x8_t v3566 = vaddq_s16(v3563, v3565);
+ int16x8_t v3567 = vsubq_s16(v2974, v2979);
+ int16x8_t v3568 = vsubq_s16(v2984, v2989);
+ int16x8_t v3569_tmp = vqrdmulhq_n_s16(v3568, 14256);
+ int16x8_t v3569 = vaddq_s16(v3569_tmp, v3568);
+ int16x8_t v3570 = vaddq_s16(v3567, v3569);
+ int16x8_t v3571 = vqrdmulhq_n_s16(v3570, 19954);
+ int16x8_t v3572 = vaddq_s16(v3566, v3571);
+ int16x8_t v3573 = vsubq_s16(v2906, v2911);
+ int16x8_t v3574 = vsubq_s16(v2916, v2921);
+ int16x8_t v3575_tmp = vqrdmulhq_n_s16(v3574, 17596);
+ int16x8_t v3575 = vaddq_s16(v3575_tmp, v3574);
+ int16x8_t v3576 = vaddq_s16(v3573, v3575);
+ int16x8_t v3577 = vsubq_s16(v2928, v2933);
+ int16x8_t v3578 = vsubq_s16(v2938, v2943);
+ int16x8_t v3579_tmp = vqrdmulhq_n_s16(v3578, 17596);
+ int16x8_t v3579 = vaddq_s16(v3579_tmp, v3578);
+ int16x8_t v3580 = vaddq_s16(v3577, v3579);
+ int16x8_t v3581 = vqrdmulhq_n_s16(v3580, 20127);
+ int16x8_t v3582 = vaddq_s16(v3576, v3581);
+ int16x8_t v3583 = vsubq_s16(v2860, v2865);
+ int16x8_t v3584 = vsubq_s16(v2870, v2875);
+ int16x8_t v3585_tmp = vqrdmulhq_n_s16(v3584, 21483);
+ int16x8_t v3585 = vaddq_s16(v3585_tmp, v3584);
+ int16x8_t v3586 = vaddq_s16(v3583, v3585);
+ int16x8_t v3587 = vsubq_s16(v2882, v2887);
+ int16x8_t v3588 = vsubq_s16(v2892, v2897);
+ int16x8_t v3589_tmp = vqrdmulhq_n_s16(v3588, 21483);
+ int16x8_t v3589 = vaddq_s16(v3589_tmp, v3588);
+ int16x8_t v3590 = vaddq_s16(v3587, v3589);
+ int16x8_t v3591 = vqrdmulhq_n_s16(v3590, 20306);
+ int16x8_t v3592 = vaddq_s16(v3586, v3591);
+ int16x8_t v3593 = vsubq_s16(v2814, v2819);
+ int16x8_t v3594 = vsubq_s16(v2824, v2829);
+ int16x8_t v3595_tmp = vqrdmulhq_n_s16(v3594, 26057);
+ int16x8_t v3595 = vaddq_s16(v3595_tmp, v3594);
+ int16x8_t v3596 = vaddq_s16(v3593, v3595);
+ int16x8_t v3597 = vsubq_s16(v2836, v2841);
+ int16x8_t v3598 = vsubq_s16(v2846, v2851);
+ int16x8_t v3599_tmp = vqrdmulhq_n_s16(v3598, 26057);
+ int16x8_t v3599 = vaddq_s16(v3599_tmp, v3598);
+ int16x8_t v3600 = vaddq_s16(v3597, v3599);
+ int16x8_t v3601 = vqrdmulhq_n_s16(v3600, 20492);
+ int16x8_t v3602 = vaddq_s16(v3596, v3601);
+ int16x8_t v3603 = vsubq_s16(v2768, v2773);
+ int16x8_t v3604 = vsubq_s16(v2778, v2783);
+ int16x8_t v3605_tmp = vqrdmulhq_n_s16(v3604, 31517);
+ int16x8_t v3605 = vaddq_s16(v3605_tmp, v3604);
+ int16x8_t v3606 = vaddq_s16(v3603, v3605);
+ int16x8_t v3607 = vsubq_s16(v2790, v2795);
+ int16x8_t v3608 = vsubq_s16(v2800, v2805);
+ int16x8_t v3609_tmp = vqrdmulhq_n_s16(v3608, 31517);
+ int16x8_t v3609 = vaddq_s16(v3609_tmp, v3608);
+ int16x8_t v3610 = vaddq_s16(v3607, v3609);
+ int16x8_t v3611 = vqrdmulhq_n_s16(v3610, 20684);
+ int16x8_t v3612 = vaddq_s16(v3606, v3611);
+ int16x8_t v3613 = vsubq_s16(v2722, v2727);
+ int16x8_t v3614 = vsubq_s16(v2732, v2737);
+ int16x8_t v3615_tmp = vqrdmulhq_n_s16(v3614, 5373);
+ int16x8_t v3615 = vmlaq_n_s16(v3615_tmp, v3614, 2);
+ int16x8_t v3616 = vaddq_s16(v3613, v3615);
+ int16x8_t v3617 = vsubq_s16(v2744, v2749);
+ int16x8_t v3618 = vsubq_s16(v2754, v2759);
+ int16x8_t v3619_tmp = vqrdmulhq_n_s16(v3618, 5373);
+ int16x8_t v3619 = vmlaq_n_s16(v3619_tmp, v3618, 2);
+ int16x8_t v3620 = vaddq_s16(v3617, v3619);
+ int16x8_t v3621 = vqrdmulhq_n_s16(v3620, 20883);
+ int16x8_t v3622 = vaddq_s16(v3616, v3621);
+ int16x8_t v3623 = vsubq_s16(v2676, v2681);
+ int16x8_t v3624 = vsubq_s16(v2686, v2691);
+ int16x8_t v3625_tmp = vqrdmulhq_n_s16(v3624, 13571);
+ int16x8_t v3625 = vmlaq_n_s16(v3625_tmp, v3624, 2);
+ int16x8_t v3626 = vaddq_s16(v3623, v3625);
+ int16x8_t v3627 = vsubq_s16(v2698, v2703);
+ int16x8_t v3628 = vsubq_s16(v2708, v2713);
+ int16x8_t v3629_tmp = vqrdmulhq_n_s16(v3628, 13571);
+ int16x8_t v3629 = vmlaq_n_s16(v3629_tmp, v3628, 2);
+ int16x8_t v3630 = vaddq_s16(v3627, v3629);
+ int16x8_t v3631 = vqrdmulhq_n_s16(v3630, 21089);
+ int16x8_t v3632 = vaddq_s16(v3626, v3631);
+ int16x8_t v3633 = vsubq_s16(v2588, v2599);
+ int16x8_t v3634 = vsubq_s16(v2610, v2621);
+ int16x8_t v3635_tmp = vqrdmulhq_n_s16(v3634, 23975);
+ int16x8_t v3635 = vmlaq_n_s16(v3635_tmp, v3634, 2);
+ int16x8_t v3636 = vaddq_s16(v3633, v3635);
+ int16x8_t v3637 = vsubq_s16(v2634, v2645);
+ int16x8_t v3638 = vsubq_s16(v2656, v2667);
+ int16x8_t v3639_tmp = vqrdmulhq_n_s16(v3638, 23975);
+ int16x8_t v3639 = vmlaq_n_s16(v3639_tmp, v3638, 2);
+ int16x8_t v3640 = vaddq_s16(v3637, v3639);
+ int16x8_t v3641 = vqrdmulhq_n_s16(v3640, 21303);
+ int16x8_t v3642 = vaddq_s16(v3636, v3641);
+ int16x8_t v3643 = vsubq_s16(v2494, v2505);
+ int16x8_t v3644 = vsubq_s16(v2516, v2527);
+ int16x8_t v3645_tmp = vqrdmulhq_n_s16(v3644, 4832);
+ int16x8_t v3645 = vmlaq_n_s16(v3645_tmp, v3644, 3);
+ int16x8_t v3646 = vaddq_s16(v3643, v3645);
+ int16x8_t v3647 = vsubq_s16(v2540, v2551);
+ int16x8_t v3648 = vsubq_s16(v2562, v2573);
+ int16x8_t v3649_tmp = vqrdmulhq_n_s16(v3648, 4832);
+ int16x8_t v3649 = vmlaq_n_s16(v3649_tmp, v3648, 3);
+ int16x8_t v3650 = vaddq_s16(v3647, v3649);
+ int16x8_t v3651 = vqrdmulhq_n_s16(v3650, 21524);
+ int16x8_t v3652 = vaddq_s16(v3646, v3651);
+ int16x8_t v3653 = vsubq_s16(v2399, v2410);
+ int16x8_t v3654 = vsubq_s16(v2421, v2432);
+ int16x8_t v3655_tmp = vqrdmulhq_n_s16(v3654, 23437);
+ int16x8_t v3655 = vmlaq_n_s16(v3655_tmp, v3654, 3);
+ int16x8_t v3656 = vaddq_s16(v3653, v3655);
+ int16x8_t v3657 = vsubq_s16(v2445, v2456);
+ int16x8_t v3658 = vsubq_s16(v2468, v2479);
+ int16x8_t v3659_tmp = vqrdmulhq_n_s16(v3658, 23437);
+ int16x8_t v3659 = vmlaq_n_s16(v3659_tmp, v3658, 3);
+ int16x8_t v3660 = vaddq_s16(v3657, v3659);
+ int16x8_t v3661 = vqrdmulhq_n_s16(v3660, 21753);
+ int16x8_t v3662 = vaddq_s16(v3656, v3661);
+ int16x8_t v3663 = vsubq_s16(v2305, v2316);
+ int16x8_t v3664 = vsubq_s16(v2327, v2338);
+ int16x8_t v3665_tmp = vqrdmulhq_n_s16(v3664, 17573);
+ int16x8_t v3665 = vmlaq_n_s16(v3665_tmp, v3664, 4);
+ int16x8_t v3666 = vaddq_s16(v3663, v3665);
+ int16x8_t v3667 = vsubq_s16(v2351, v2362);
+ int16x8_t v3668 = vsubq_s16(v2373, v2384);
+ int16x8_t v3669_tmp = vqrdmulhq_n_s16(v3668, 17573);
+ int16x8_t v3669 = vmlaq_n_s16(v3669_tmp, v3668, 4);
+ int16x8_t v3670 = vaddq_s16(v3667, v3669);
+ int16x8_t v3671 = vqrdmulhq_n_s16(v3670, 21990);
+ int16x8_t v3672 = vaddq_s16(v3666, v3671);
+ int16x8_t v3673 = vsubq_s16(v2127, v2150);
+ int16x8_t v3674 = vsubq_s16(v2173, v2196);
+ int16x8_t v3675_tmp = vqrdmulhq_n_s16(v3674, 27122);
+ int16x8_t v3675 = vmlaq_n_s16(v3675_tmp, v3674, 5);
+ int16x8_t v3676 = vaddq_s16(v3673, v3675);
+ int16x8_t v3677 = vsubq_s16(v2221, v2244);
+ int16x8_t v3678 = vsubq_s16(v2267, v2290);
+ int16x8_t v3679_tmp = vqrdmulhq_n_s16(v3678, 27122);
+ int16x8_t v3679 = vmlaq_n_s16(v3679_tmp, v3678, 5);
+ int16x8_t v3680 = vaddq_s16(v3677, v3679);
+ int16x8_t v3681 = vqrdmulhq_n_s16(v3680, 22236);
+ int16x8_t v3682 = vaddq_s16(v3676, v3681);
+ int16x8_t v3683 = vsubq_s16(v1937, v1960);
+ int16x8_t v3684 = vsubq_s16(v1983, v2006);
+ int16x8_t v3685_tmp = vqrdmulhq_n_s16(v3684, 5041);
+ int16x8_t v3685 = vmlaq_n_s16(v3685_tmp, v3684, 8);
+ int16x8_t v3686 = vaddq_s16(v3683, v3685);
+ int16x8_t v3687 = vsubq_s16(v2031, v2054);
+ int16x8_t v3688 = vsubq_s16(v2077, v2100);
+ int16x8_t v3689_tmp = vqrdmulhq_n_s16(v3688, 5041);
+ int16x8_t v3689 = vmlaq_n_s16(v3689_tmp, v3688, 8);
+ int16x8_t v3690 = vaddq_s16(v3687, v3689);
+ int16x8_t v3691 = vqrdmulhq_n_s16(v3690, 22491);
+ int16x8_t v3692 = vaddq_s16(v3686, v3691);
+ int16x8_t v3693 = vsubq_s16(v1579, v1626);
+ int16x8_t v3694 = vsubq_s16(v1673, v1720);
+ int16x8_t v3695_tmp = vqrdmulhq_n_s16(v3694, 19146);
+ int16x8_t v3695 = vmlaq_n_s16(v3695_tmp, v3694, 13);
+ int16x8_t v3696 = vaddq_s16(v3693, v3695);
+ int16x8_t v3697 = vsubq_s16(v1769, v1816);
+ int16x8_t v3698 = vsubq_s16(v1863, v1910);
+ int16x8_t v3699_tmp = vqrdmulhq_n_s16(v3698, 19146);
+ int16x8_t v3699 = vmlaq_n_s16(v3699_tmp, v3698, 13);
+ int16x8_t v3700 = vaddq_s16(v3697, v3699);
+ int16x8_t v3701 = vqrdmulhq_n_s16(v3700, 22755);
+ int16x8_t v3702 = vaddq_s16(v3696, v3701);
+ int16x8_t v3703 = vsubq_s16(v141, v316);
+ int16x8_t v3704 = vsubq_s16(v522, v698);
+ int16x8_t v3705_tmp = vqrdmulhq_n_s16(v3704, 24402);
+ int16x8_t v3705 = vmlaq_n_s16(v3705_tmp, v3704, 40);
+ int16x8_t v3706 = vaddq_s16(v3703, v3705);
+ int16x8_t v3707 = vsubq_s16(v906, v1145);
+ int16x8_t v3708 = vsubq_s16(v1351, v1528);
+ int16x8_t v3709_tmp = vqrdmulhq_n_s16(v3708, 24402);
+ int16x8_t v3709 = vmlaq_n_s16(v3709_tmp, v3708, 40);
+ int16x8_t v3710 = vaddq_s16(v3707, v3709);
+ int16x8_t v3711 = vqrdmulhq_n_s16(v3710, 23030);
+ int16x8_t v3712 = vaddq_s16(v3706, v3711);
+ int16x8_t v3713 = vsubq_s16(v3703, v3705);
+ int16x8_t v3714 = vsubq_s16(v3707, v3709);
+ int16x8_t v3715 = vqrdmulhq_n_s16(v3714, 23314);
+ int16x8_t v3716 = vaddq_s16(v3713, v3715);
+ int16x8_t v3717 = vsubq_s16(v3693, v3695);
+ int16x8_t v3718 = vsubq_s16(v3697, v3699);
+ int16x8_t v3719 = vqrdmulhq_n_s16(v3718, 23609);
+ int16x8_t v3720 = vaddq_s16(v3717, v3719);
+ int16x8_t v3721 = vsubq_s16(v3683, v3685);
+ int16x8_t v3722 = vsubq_s16(v3687, v3689);
+ int16x8_t v3723 = vqrdmulhq_n_s16(v3722, 23915);
+ int16x8_t v3724 = vaddq_s16(v3721, v3723);
+ int16x8_t v3725 = vsubq_s16(v3673, v3675);
+ int16x8_t v3726 = vsubq_s16(v3677, v3679);
+ int16x8_t v3727 = vqrdmulhq_n_s16(v3726, 24233);
+ int16x8_t v3728 = vaddq_s16(v3725, v3727);
+ int16x8_t v3729 = vsubq_s16(v3663, v3665);
+ int16x8_t v3730 = vsubq_s16(v3667, v3669);
+ int16x8_t v3731 = vqrdmulhq_n_s16(v3730, 24564);
+ int16x8_t v3732 = vaddq_s16(v3729, v3731);
+ int16x8_t v3733 = vsubq_s16(v3653, v3655);
+ int16x8_t v3734 = vsubq_s16(v3657, v3659);
+ int16x8_t v3735 = vqrdmulhq_n_s16(v3734, 24907);
+ int16x8_t v3736 = vaddq_s16(v3733, v3735);
+ int16x8_t v3737 = vsubq_s16(v3643, v3645);
+ int16x8_t v3738 = vsubq_s16(v3647, v3649);
+ int16x8_t v3739 = vqrdmulhq_n_s16(v3738, 25264);
+ int16x8_t v3740 = vaddq_s16(v3737, v3739);
+ int16x8_t v3741 = vsubq_s16(v3633, v3635);
+ int16x8_t v3742 = vsubq_s16(v3637, v3639);
+ int16x8_t v3743 = vqrdmulhq_n_s16(v3742, 25635);
+ int16x8_t v3744 = vaddq_s16(v3741, v3743);
+ int16x8_t v3745 = vsubq_s16(v3623, v3625);
+ int16x8_t v3746 = vsubq_s16(v3627, v3629);
+ int16x8_t v3747 = vqrdmulhq_n_s16(v3746, 26021);
+ int16x8_t v3748 = vaddq_s16(v3745, v3747);
+ int16x8_t v3749 = vsubq_s16(v3613, v3615);
+ int16x8_t v3750 = vsubq_s16(v3617, v3619);
+ int16x8_t v3751 = vqrdmulhq_n_s16(v3750, 26423);
+ int16x8_t v3752 = vaddq_s16(v3749, v3751);
+ int16x8_t v3753 = vsubq_s16(v3603, v3605);
+ int16x8_t v3754 = vsubq_s16(v3607, v3609);
+ int16x8_t v3755 = vqrdmulhq_n_s16(v3754, 26842);
+ int16x8_t v3756 = vaddq_s16(v3753, v3755);
+ int16x8_t v3757 = vsubq_s16(v3593, v3595);
+ int16x8_t v3758 = vsubq_s16(v3597, v3599);
+ int16x8_t v3759 = vqrdmulhq_n_s16(v3758, 27279);
+ int16x8_t v3760 = vaddq_s16(v3757, v3759);
+ int16x8_t v3761 = vsubq_s16(v3583, v3585);
+ int16x8_t v3762 = vsubq_s16(v3587, v3589);
+ int16x8_t v3763 = vqrdmulhq_n_s16(v3762, 27734);
+ int16x8_t v3764 = vaddq_s16(v3761, v3763);
+ int16x8_t v3765 = vsubq_s16(v3573, v3575);
+ int16x8_t v3766 = vsubq_s16(v3577, v3579);
+ int16x8_t v3767 = vqrdmulhq_n_s16(v3766, 28209);
+ int16x8_t v3768 = vaddq_s16(v3765, v3767);
+ int16x8_t v3769 = vsubq_s16(v3563, v3565);
+ int16x8_t v3770 = vsubq_s16(v3567, v3569);
+ int16x8_t v3771 = vqrdmulhq_n_s16(v3770, 28705);
+ int16x8_t v3772 = vaddq_s16(v3769, v3771);
+ int16x8_t v3773 = vsubq_s16(v3553, v3555);
+ int16x8_t v3774 = vsubq_s16(v3557, v3559);
+ int16x8_t v3775 = vqrdmulhq_n_s16(v3774, 29223);
+ int16x8_t v3776 = vaddq_s16(v3773, v3775);
+ int16x8_t v3777 = vsubq_s16(v3543, v3545);
+ int16x8_t v3778 = vsubq_s16(v3547, v3549);
+ int16x8_t v3779 = vqrdmulhq_n_s16(v3778, 29764);
+ int16x8_t v3780 = vaddq_s16(v3777, v3779);
+ int16x8_t v3781 = vsubq_s16(v3533, v3535);
+ int16x8_t v3782 = vsubq_s16(v3537, v3539);
+ int16x8_t v3783 = vqrdmulhq_n_s16(v3782, 30331);
+ int16x8_t v3784 = vaddq_s16(v3781, v3783);
+ int16x8_t v3785 = vsubq_s16(v3523, v3525);
+ int16x8_t v3786 = vsubq_s16(v3527, v3529);
+ int16x8_t v3787 = vqrdmulhq_n_s16(v3786, 30925);
+ int16x8_t v3788 = vaddq_s16(v3785, v3787);
+ int16x8_t v3789 = vsubq_s16(v3513, v3515);
+ int16x8_t v3790 = vsubq_s16(v3517, v3519);
+ int16x8_t v3791 = vqrdmulhq_n_s16(v3790, 31547);
+ int16x8_t v3792 = vaddq_s16(v3789, v3791);
+ int16x8_t v3793 = vsubq_s16(v3503, v3505);
+ int16x8_t v3794 = vsubq_s16(v3507, v3509);
+ int16x8_t v3795 = vqrdmulhq_n_s16(v3794, 32199);
+ int16x8_t v3796 = vaddq_s16(v3793, v3795);
+ int16x8_t v3797 = vsubq_s16(v3493, v3495);
+ int16x8_t v3798 = vsubq_s16(v3497, v3499);
+ int16x8_t v3799_tmp = vqrdmulhq_n_s16(v3798, 117);
+ int16x8_t v3799 = vaddq_s16(v3799_tmp, v3798);
+ int16x8_t v3800 = vaddq_s16(v3797, v3799);
+ int16x8_t v3801 = vsubq_s16(v3483, v3485);
+ int16x8_t v3802 = vsubq_s16(v3487, v3489);
+ int16x8_t v3803_tmp = vqrdmulhq_n_s16(v3802, 837);
+ int16x8_t v3803 = vaddq_s16(v3803_tmp, v3802);
+ int16x8_t v3804 = vaddq_s16(v3801, v3803);
+ int16x8_t v3805 = vsubq_s16(v3473, v3475);
+ int16x8_t v3806 = vsubq_s16(v3477, v3479);
+ int16x8_t v3807_tmp = vqrdmulhq_n_s16(v3806, 1594);
+ int16x8_t v3807 = vaddq_s16(v3807_tmp, v3806);
+ int16x8_t v3808 = vaddq_s16(v3805, v3807);
+ int16x8_t v3809 = vsubq_s16(v3463, v3465);
+ int16x8_t v3810 = vsubq_s16(v3467, v3469);
+ int16x8_t v3811_tmp = vqrdmulhq_n_s16(v3810, 2393);
+ int16x8_t v3811 = vaddq_s16(v3811_tmp, v3810);
+ int16x8_t v3812 = vaddq_s16(v3809, v3811);
+ int16x8_t v3813 = vsubq_s16(v3453, v3455);
+ int16x8_t v3814 = vsubq_s16(v3457, v3459);
+ int16x8_t v3815_tmp = vqrdmulhq_n_s16(v3814, 3234);
+ int16x8_t v3815 = vaddq_s16(v3815_tmp, v3814);
+ int16x8_t v3816 = vaddq_s16(v3813, v3815);
+ int16x8_t v3817 = vsubq_s16(v3443, v3445);
+ int16x8_t v3818 = vsubq_s16(v3447, v3449);
+ int16x8_t v3819_tmp = vqrdmulhq_n_s16(v3818, 4123);
+ int16x8_t v3819 = vaddq_s16(v3819_tmp, v3818);
+ int16x8_t v3820 = vaddq_s16(v3817, v3819);
+ int16x8_t v3821 = vsubq_s16(v3433, v3435);
+ int16x8_t v3822 = vsubq_s16(v3437, v3439);
+ int16x8_t v3823_tmp = vqrdmulhq_n_s16(v3822, 5062);
+ int16x8_t v3823 = vaddq_s16(v3823_tmp, v3822);
+ int16x8_t v3824 = vaddq_s16(v3821, v3823);
+ int16x8_t v3825 = vsubq_s16(v3423, v3425);
+ int16x8_t v3826 = vsubq_s16(v3427, v3429);
+ int16x8_t v3827_tmp = vqrdmulhq_n_s16(v3826, 6057);
+ int16x8_t v3827 = vaddq_s16(v3827_tmp, v3826);
+ int16x8_t v3828 = vaddq_s16(v3825, v3827);
+ int16x8_t v3829 = vsubq_s16(v3413, v3415);
+ int16x8_t v3830 = vsubq_s16(v3417, v3419);
+ int16x8_t v3831_tmp = vqrdmulhq_n_s16(v3830, 7111);
+ int16x8_t v3831 = vaddq_s16(v3831_tmp, v3830);
+ int16x8_t v3832 = vaddq_s16(v3829, v3831);
+ int16x8_t v3833 = vsubq_s16(v3403, v3405);
+ int16x8_t v3834 = vsubq_s16(v3407, v3409);
+ int16x8_t v3835_tmp = vqrdmulhq_n_s16(v3834, 8231);
+ int16x8_t v3835 = vaddq_s16(v3835_tmp, v3834);
+ int16x8_t v3836 = vaddq_s16(v3833, v3835);
+ int16x8_t v3837 = vsubq_s16(v3393, v3395);
+ int16x8_t v3838 = vsubq_s16(v3397, v3399);
+ int16x8_t v3839_tmp = vqrdmulhq_n_s16(v3838, 9421);
+ int16x8_t v3839 = vaddq_s16(v3839_tmp, v3838);
+ int16x8_t v3840 = vaddq_s16(v3837, v3839);
+ int16x8_t v3841 = vsubq_s16(v3374, v3379);
+ int16x8_t v3842 = vsubq_s16(v3384, v3389);
+ int16x8_t v3843_tmp = vqrdmulhq_n_s16(v3842, 10690);
+ int16x8_t v3843 = vaddq_s16(v3843_tmp, v3842);
+ int16x8_t v3844 = vaddq_s16(v3841, v3843);
+ int16x8_t v3845 = vsubq_s16(v3352, v3357);
+ int16x8_t v3846 = vsubq_s16(v3362, v3367);
+ int16x8_t v3847_tmp = vqrdmulhq_n_s16(v3846, 12044);
+ int16x8_t v3847 = vaddq_s16(v3847_tmp, v3846);
+ int16x8_t v3848 = vaddq_s16(v3845, v3847);
+ int16x8_t v3849 = vsubq_s16(v3330, v3335);
+ int16x8_t v3850 = vsubq_s16(v3340, v3345);
+ int16x8_t v3851_tmp = vqrdmulhq_n_s16(v3850, 13493);
+ int16x8_t v3851 = vaddq_s16(v3851_tmp, v3850);
+ int16x8_t v3852 = vaddq_s16(v3849, v3851);
+ int16x8_t v3853 = vsubq_s16(v3308, v3313);
+ int16x8_t v3854 = vsubq_s16(v3318, v3323);
+ int16x8_t v3855_tmp = vqrdmulhq_n_s16(v3854, 15046);
+ int16x8_t v3855 = vaddq_s16(v3855_tmp, v3854);
+ int16x8_t v3856 = vaddq_s16(v3853, v3855);
+ int16x8_t v3857 = vsubq_s16(v3286, v3291);
+ int16x8_t v3858 = vsubq_s16(v3296, v3301);
+ int16x8_t v3859_tmp = vqrdmulhq_n_s16(v3858, 16715);
+ int16x8_t v3859 = vaddq_s16(v3859_tmp, v3858);
+ int16x8_t v3860 = vaddq_s16(v3857, v3859);
+ int16x8_t v3861 = vsubq_s16(v3264, v3269);
+ int16x8_t v3862 = vsubq_s16(v3274, v3279);
+ int16x8_t v3863_tmp = vqrdmulhq_n_s16(v3862, 18512);
+ int16x8_t v3863 = vaddq_s16(v3863_tmp, v3862);
+ int16x8_t v3864 = vaddq_s16(v3861, v3863);
+ int16x8_t v3865 = vsubq_s16(v3242, v3247);
+ int16x8_t v3866 = vsubq_s16(v3252, v3257);
+ int16x8_t v3867_tmp = vqrdmulhq_n_s16(v3866, 20453);
+ int16x8_t v3867 = vaddq_s16(v3867_tmp, v3866);
+ int16x8_t v3868 = vaddq_s16(v3865, v3867);
+ int16x8_t v3869 = vsubq_s16(v3220, v3225);
+ int16x8_t v3870 = vsubq_s16(v3230, v3235);
+ int16x8_t v3871_tmp = vqrdmulhq_n_s16(v3870, 22555);
+ int16x8_t v3871 = vaddq_s16(v3871_tmp, v3870);
+ int16x8_t v3872 = vaddq_s16(v3869, v3871);
+ int16x8_t v3873 = vsubq_s16(v3198, v3203);
+ int16x8_t v3874 = vsubq_s16(v3208, v3213);
+ int16x8_t v3875_tmp = vqrdmulhq_n_s16(v3874, 24839);
+ int16x8_t v3875 = vaddq_s16(v3875_tmp, v3874);
+ int16x8_t v3876 = vaddq_s16(v3873, v3875);
+ int16x8_t v3877 = vsubq_s16(v3176, v3181);
+ int16x8_t v3878 = vsubq_s16(v3186, v3191);
+ int16x8_t v3879_tmp = vqrdmulhq_n_s16(v3878, 27330);
+ int16x8_t v3879 = vaddq_s16(v3879_tmp, v3878);
+ int16x8_t v3880 = vaddq_s16(v3877, v3879);
+ int16x8_t v3881 = vsubq_s16(v3154, v3159);
+ int16x8_t v3882 = vsubq_s16(v3164, v3169);
+ int16x8_t v3883_tmp = vqrdmulhq_n_s16(v3882, 30056);
+ int16x8_t v3883 = vaddq_s16(v3883_tmp, v3882);
+ int16x8_t v3884 = vaddq_s16(v3881, v3883);
+ int16x8_t v3885 = vsubq_s16(v3132, v3137);
+ int16x8_t v3886 = vsubq_s16(v3142, v3147);
+ int16x8_t v3887_tmp = vqrdmulhq_n_s16(v3886, 282);
+ int16x8_t v3887 = vmlaq_n_s16(v3887_tmp, v3886, 2);
+ int16x8_t v3888 = vaddq_s16(v3885, v3887);
+ int16x8_t v3889 = vsubq_s16(v3110, v3115);
+ int16x8_t v3890 = vsubq_s16(v3120, v3125);
+ int16x8_t v3891_tmp = vqrdmulhq_n_s16(v3890, 3588);
+ int16x8_t v3891 = vmlaq_n_s16(v3891_tmp, v3890, 2);
+ int16x8_t v3892 = vaddq_s16(v3889, v3891);
+ int16x8_t v3893 = vsubq_s16(v3088, v3093);
+ int16x8_t v3894 = vsubq_s16(v3098, v3103);
+ int16x8_t v3895_tmp = vqrdmulhq_n_s16(v3894, 7255);
+ int16x8_t v3895 = vmlaq_n_s16(v3895_tmp, v3894, 2);
+ int16x8_t v3896 = vaddq_s16(v3893, v3895);
+ int16x8_t v3897 = vsubq_s16(v3066, v3071);
+ int16x8_t v3898 = vsubq_s16(v3076, v3081);
+ int16x8_t v3899_tmp = vqrdmulhq_n_s16(v3898, 11344);
+ int16x8_t v3899 = vmlaq_n_s16(v3899_tmp, v3898, 2);
+ int16x8_t v3900 = vaddq_s16(v3897, v3899);
+ int16x8_t v3901 = vsubq_s16(v3044, v3049);
+ int16x8_t v3902 = vsubq_s16(v3054, v3059);
+ int16x8_t v3903_tmp = vqrdmulhq_n_s16(v3902, 15934);
+ int16x8_t v3903 = vmlaq_n_s16(v3903_tmp, v3902, 2);
+ int16x8_t v3904 = vaddq_s16(v3901, v3903);
+ int16x8_t v3905 = vsubq_s16(v3004, v3015);
+ int16x8_t v3906 = vsubq_s16(v3026, v3037);
+ int16x8_t v3907_tmp = vqrdmulhq_n_s16(v3906, 21120);
+ int16x8_t v3907 = vmlaq_n_s16(v3907_tmp, v3906, 2);
+ int16x8_t v3908 = vaddq_s16(v3905, v3907);
+ int16x8_t v3909 = vsubq_s16(v2958, v2969);
+ int16x8_t v3910 = vsubq_s16(v2980, v2991);
+ int16x8_t v3911_tmp = vqrdmulhq_n_s16(v3910, 27027);
+ int16x8_t v3911 = vmlaq_n_s16(v3911_tmp, v3910, 2);
+ int16x8_t v3912 = vaddq_s16(v3909, v3911);
+ int16x8_t v3913 = vsubq_s16(v2912, v2923);
+ int16x8_t v3914 = vsubq_s16(v2934, v2945);
+ int16x8_t v3915_tmp = vqrdmulhq_n_s16(v3914, 1045);
+ int16x8_t v3915 = vmlaq_n_s16(v3915_tmp, v3914, 3);
+ int16x8_t v3916 = vaddq_s16(v3913, v3915);
+ int16x8_t v3917 = vsubq_s16(v2866, v2877);
+ int16x8_t v3918 = vsubq_s16(v2888, v2899);
+ int16x8_t v3919_tmp = vqrdmulhq_n_s16(v3918, 8923);
+ int16x8_t v3919 = vmlaq_n_s16(v3919_tmp, v3918, 3);
+ int16x8_t v3920 = vaddq_s16(v3917, v3919);
+ int16x8_t v3921 = vsubq_s16(v2820, v2831);
+ int16x8_t v3922 = vsubq_s16(v2842, v2853);
+ int16x8_t v3923_tmp = vqrdmulhq_n_s16(v3922, 18177);
+ int16x8_t v3923 = vmlaq_n_s16(v3923_tmp, v3922, 3);
+ int16x8_t v3924 = vaddq_s16(v3921, v3923);
+ int16x8_t v3925 = vsubq_s16(v2774, v2785);
+ int16x8_t v3926 = vsubq_s16(v2796, v2807);
+ int16x8_t v3927_tmp = vqrdmulhq_n_s16(v3926, 29200);
+ int16x8_t v3927 = vmlaq_n_s16(v3927_tmp, v3926, 3);
+ int16x8_t v3928 = vaddq_s16(v3925, v3927);
+ int16x8_t v3929 = vsubq_s16(v2728, v2739);
+ int16x8_t v3930 = vsubq_s16(v2750, v2761);
+ int16x8_t v3931_tmp = vqrdmulhq_n_s16(v3930, 9782);
+ int16x8_t v3931 = vmlaq_n_s16(v3931_tmp, v3930, 4);
+ int16x8_t v3932 = vaddq_s16(v3929, v3931);
+ int16x8_t v3933 = vsubq_s16(v2682, v2693);
+ int16x8_t v3934 = vsubq_s16(v2704, v2715);
+ int16x8_t v3935_tmp = vqrdmulhq_n_s16(v3934, 26282);
+ int16x8_t v3935 = vmlaq_n_s16(v3935_tmp, v3934, 4);
+ int16x8_t v3936 = vaddq_s16(v3933, v3935);
+ int16x8_t v3937 = vsubq_s16(v2600, v2623);
+ int16x8_t v3938 = vsubq_s16(v2646, v2669);
+ int16x8_t v3939_tmp = vqrdmulhq_n_s16(v3938, 14423);
+ int16x8_t v3939 = vmlaq_n_s16(v3939_tmp, v3938, 5);
+ int16x8_t v3940 = vaddq_s16(v3937, v3939);
+ int16x8_t v3941 = vsubq_s16(v2506, v2529);
+ int16x8_t v3942 = vsubq_s16(v2552, v2575);
+ int16x8_t v3943_tmp = vqrdmulhq_n_s16(v3942, 9008);
+ int16x8_t v3943 = vmlaq_n_s16(v3943_tmp, v3942, 6);
+ int16x8_t v3944 = vaddq_s16(v3941, v3943);
+ int16x8_t v3945 = vsubq_s16(v2411, v2434);
+ int16x8_t v3946 = vsubq_s16(v2457, v2481);
+ int16x8_t v3947_tmp = vqrdmulhq_n_s16(v3946, 13552);
+ int16x8_t v3947 = vmlaq_n_s16(v3947_tmp, v3946, 7);
+ int16x8_t v3948 = vaddq_s16(v3945, v3947);
+ int16x8_t v3949 = vsubq_s16(v2317, v2340);
+ int16x8_t v3950 = vsubq_s16(v2363, v2386);
+ int16x8_t v3951_tmp = vqrdmulhq_n_s16(v3950, 1925);
+ int16x8_t v3951 = vmlaq_n_s16(v3951_tmp, v3950, 9);
+ int16x8_t v3952 = vaddq_s16(v3949, v3951);
+ int16x8_t v3953 = vsubq_s16(v2151, v2198);
+ int16x8_t v3954 = vsubq_s16(v2245, v2292);
+ int16x8_t v3955_tmp = vqrdmulhq_n_s16(v3954, 21123);
+ int16x8_t v3955 = vmlaq_n_s16(v3955_tmp, v3954, 11);
+ int16x8_t v3956 = vaddq_s16(v3953, v3955);
+ int16x8_t v3957 = vsubq_s16(v1961, v2008);
+ int16x8_t v3958 = vsubq_s16(v2055, v2102);
+ int16x8_t v3959_tmp = vqrdmulhq_n_s16(v3958, 9831);
+ int16x8_t v3959 = vmlaq_n_s16(v3959_tmp, v3958, 16);
+ int16x8_t v3960 = vaddq_s16(v3957, v3959);
+ int16x8_t v3961 = vsubq_s16(v1627, v1722);
+ int16x8_t v3962 = vsubq_s16(v1817, v1912);
+ int16x8_t v3963_tmp = vqrdmulhq_n_s16(v3962, 5373);
+ int16x8_t v3963 = vmlaq_n_s16(v3963_tmp, v3962, 27);
+ int16x8_t v3964 = vaddq_s16(v3961, v3963);
+ int16x8_t v3965 = vsubq_s16(v317, v700);
+ int16x8_t v3966 = vsubq_s16(v1146, v1530);
+ int16x8_t v3967_tmp = vqrdmulhq_n_s16(v3966, 15986);
+ int16x8_t v3967 = vmlaq_n_s16(v3967_tmp, v3966, 81);
+ int16x8_t v3968 = vaddq_s16(v3965, v3967);
+ int16x8_t v3969 = vsubq_s16(v3965, v3967);
+ int16x8_t v3970 = vsubq_s16(v3961, v3963);
+ int16x8_t v3971 = vsubq_s16(v3957, v3959);
+ int16x8_t v3972 = vsubq_s16(v3953, v3955);
+ int16x8_t v3973 = vsubq_s16(v3949, v3951);
+ int16x8_t v3974 = vsubq_s16(v3945, v3947);
+ int16x8_t v3975 = vsubq_s16(v3941, v3943);
+ int16x8_t v3976 = vsubq_s16(v3937, v3939);
+ int16x8_t v3977 = vsubq_s16(v3933, v3935);
+ int16x8_t v3978 = vsubq_s16(v3929, v3931);
+ int16x8_t v3979 = vsubq_s16(v3925, v3927);
+ int16x8_t v3980 = vsubq_s16(v3921, v3923);
+ int16x8_t v3981 = vsubq_s16(v3917, v3919);
+ int16x8_t v3982 = vsubq_s16(v3913, v3915);
+ int16x8_t v3983 = vsubq_s16(v3909, v3911);
+ int16x8_t v3984 = vsubq_s16(v3905, v3907);
+ int16x8_t v3985 = vsubq_s16(v3901, v3903);
+ int16x8_t v3986 = vsubq_s16(v3897, v3899);
+ int16x8_t v3987 = vsubq_s16(v3893, v3895);
+ int16x8_t v3988 = vsubq_s16(v3889, v3891);
+ int16x8_t v3989 = vsubq_s16(v3885, v3887);
+ int16x8_t v3990 = vsubq_s16(v3881, v3883);
+ int16x8_t v3991 = vsubq_s16(v3877, v3879);
+ int16x8_t v3992 = vsubq_s16(v3873, v3875);
+ int16x8_t v3993 = vsubq_s16(v3869, v3871);
+ int16x8_t v3994 = vsubq_s16(v3865, v3867);
+ int16x8_t v3995 = vsubq_s16(v3861, v3863);
+ int16x8_t v3996 = vsubq_s16(v3857, v3859);
+ int16x8_t v3997 = vsubq_s16(v3853, v3855);
+ int16x8_t v3998 = vsubq_s16(v3849, v3851);
+ int16x8_t v3999 = vsubq_s16(v3845, v3847);
+ int16x8_t v4000 = vsubq_s16(v3841, v3843);
+ int16x8_t v4001 = vsubq_s16(v3837, v3839);
+ int16x8_t v4002 = vsubq_s16(v3833, v3835);
+ int16x8_t v4003 = vsubq_s16(v3829, v3831);
+ int16x8_t v4004 = vsubq_s16(v3825, v3827);
+ int16x8_t v4005 = vsubq_s16(v3821, v3823);
+ int16x8_t v4006 = vsubq_s16(v3817, v3819);
+ int16x8_t v4007 = vsubq_s16(v3813, v3815);
+ int16x8_t v4008 = vsubq_s16(v3809, v3811);
+ int16x8_t v4009 = vsubq_s16(v3805, v3807);
+ int16x8_t v4010 = vsubq_s16(v3801, v3803);
+ int16x8_t v4011 = vsubq_s16(v3797, v3799);
+ int16x8_t v4012 = vsubq_s16(v3793, v3795);
+ int16x8_t v4013 = vsubq_s16(v3789, v3791);
+ int16x8_t v4014 = vsubq_s16(v3785, v3787);
+ int16x8_t v4015 = vsubq_s16(v3781, v3783);
+ int16x8_t v4016 = vsubq_s16(v3777, v3779);
+ int16x8_t v4017 = vsubq_s16(v3773, v3775);
+ int16x8_t v4018 = vsubq_s16(v3769, v3771);
+ int16x8_t v4019 = vsubq_s16(v3765, v3767);
+ int16x8_t v4020 = vsubq_s16(v3761, v3763);
+ int16x8_t v4021 = vsubq_s16(v3757, v3759);
+ int16x8_t v4022 = vsubq_s16(v3753, v3755);
+ int16x8_t v4023 = vsubq_s16(v3749, v3751);
+ int16x8_t v4024 = vsubq_s16(v3745, v3747);
+ int16x8_t v4025 = vsubq_s16(v3741, v3743);
+ int16x8_t v4026 = vsubq_s16(v3737, v3739);
+ int16x8_t v4027 = vsubq_s16(v3733, v3735);
+ int16x8_t v4028 = vsubq_s16(v3729, v3731);
+ int16x8_t v4029 = vsubq_s16(v3725, v3727);
+ int16x8_t v4030 = vsubq_s16(v3721, v3723);
+ int16x8_t v4031 = vsubq_s16(v3717, v3719);
+ int16x8_t v4032 = vsubq_s16(v3713, v3715);
+ int16x8_t v4033 = vsubq_s16(v3706, v3711);
+ int16x8_t v4034 = vsubq_s16(v3696, v3701);
+ int16x8_t v4035 = vsubq_s16(v3686, v3691);
+ int16x8_t v4036 = vsubq_s16(v3676, v3681);
+ int16x8_t v4037 = vsubq_s16(v3666, v3671);
+ int16x8_t v4038 = vsubq_s16(v3656, v3661);
+ int16x8_t v4039 = vsubq_s16(v3646, v3651);
+ int16x8_t v4040 = vsubq_s16(v3636, v3641);
+ int16x8_t v4041 = vsubq_s16(v3626, v3631);
+ int16x8_t v4042 = vsubq_s16(v3616, v3621);
+ int16x8_t v4043 = vsubq_s16(v3606, v3611);
+ int16x8_t v4044 = vsubq_s16(v3596, v3601);
+ int16x8_t v4045 = vsubq_s16(v3586, v3591);
+ int16x8_t v4046 = vsubq_s16(v3576, v3581);
+ int16x8_t v4047 = vsubq_s16(v3566, v3571);
+ int16x8_t v4048 = vsubq_s16(v3556, v3561);
+ int16x8_t v4049 = vsubq_s16(v3546, v3551);
+ int16x8_t v4050 = vsubq_s16(v3536, v3541);
+ int16x8_t v4051 = vsubq_s16(v3526, v3531);
+ int16x8_t v4052 = vsubq_s16(v3516, v3521);
+ int16x8_t v4053 = vsubq_s16(v3506, v3511);
+ int16x8_t v4054 = vsubq_s16(v3496, v3501);
+ int16x8_t v4055 = vsubq_s16(v3486, v3491);
+ int16x8_t v4056 = vsubq_s16(v3476, v3481);
+ int16x8_t v4057 = vsubq_s16(v3466, v3471);
+ int16x8_t v4058 = vsubq_s16(v3456, v3461);
+ int16x8_t v4059 = vsubq_s16(v3446, v3451);
+ int16x8_t v4060 = vsubq_s16(v3436, v3441);
+ int16x8_t v4061 = vsubq_s16(v3426, v3431);
+ int16x8_t v4062 = vsubq_s16(v3416, v3421);
+ int16x8_t v4063 = vsubq_s16(v3406, v3411);
+ int16x8_t v4064 = vsubq_s16(v3396, v3401);
+ int16x8_t v4065 = vsubq_s16(v3380, v3391);
+ int16x8_t v4066 = vsubq_s16(v3358, v3369);
+ int16x8_t v4067 = vsubq_s16(v3336, v3347);
+ int16x8_t v4068 = vsubq_s16(v3314, v3325);
+ int16x8_t v4069 = vsubq_s16(v3292, v3303);
+ int16x8_t v4070 = vsubq_s16(v3270, v3281);
+ int16x8_t v4071 = vsubq_s16(v3248, v3259);
+ int16x8_t v4072 = vsubq_s16(v3226, v3237);
+ int16x8_t v4073 = vsubq_s16(v3204, v3215);
+ int16x8_t v4074 = vsubq_s16(v3182, v3193);
+ int16x8_t v4075 = vsubq_s16(v3160, v3171);
+ int16x8_t v4076 = vsubq_s16(v3138, v3149);
+ int16x8_t v4077 = vsubq_s16(v3116, v3127);
+ int16x8_t v4078 = vsubq_s16(v3094, v3105);
+ int16x8_t v4079 = vsubq_s16(v3072, v3083);
+ int16x8_t v4080 = vsubq_s16(v3050, v3061);
+ int16x8_t v4081 = vsubq_s16(v3016, v3039);
+ int16x8_t v4082 = vsubq_s16(v2970, v2993);
+ int16x8_t v4083 = vsubq_s16(v2924, v2947);
+ int16x8_t v4084 = vsubq_s16(v2878, v2901);
+ int16x8_t v4085 = vsubq_s16(v2832, v2855);
+ int16x8_t v4086 = vsubq_s16(v2786, v2809);
+ int16x8_t v4087 = vsubq_s16(v2740, v2763);
+ int16x8_t v4088 = vsubq_s16(v2694, v2717);
+ int16x8_t v4089 = vsubq_s16(v2624, v2671);
+ int16x8_t v4090 = vsubq_s16(v2530, v2577);
+ int16x8_t v4091 = vsubq_s16(v2435, v2483);
+ int16x8_t v4092 = vsubq_s16(v2341, v2388);
+ int16x8_t v4093 = vsubq_s16(v2199, v2294);
+ int16x8_t v4094 = vsubq_s16(v2009, v2104);
+ int16x8_t v4095 = vsubq_s16(v1723, v1914);
+ int16x8_t v4096 = vsubq_s16(v701, v1532);
+ vst1q_s16(out + out_stride * 0 + i, v1533);
+ vst1q_s16(out + out_stride * 1 + i, v1915);
+ vst1q_s16(out + out_stride * 2 + i, v2105);
+ vst1q_s16(out + out_stride * 3 + i, v2295);
+ vst1q_s16(out + out_stride * 4 + i, v2389);
+ vst1q_s16(out + out_stride * 5 + i, v2484);
+ vst1q_s16(out + out_stride * 6 + i, v2578);
+ vst1q_s16(out + out_stride * 7 + i, v2672);
+ vst1q_s16(out + out_stride * 8 + i, v2718);
+ vst1q_s16(out + out_stride * 9 + i, v2764);
+ vst1q_s16(out + out_stride * 10 + i, v2810);
+ vst1q_s16(out + out_stride * 11 + i, v2856);
+ vst1q_s16(out + out_stride * 12 + i, v2902);
+ vst1q_s16(out + out_stride * 13 + i, v2948);
+ vst1q_s16(out + out_stride * 14 + i, v2994);
+ vst1q_s16(out + out_stride * 15 + i, v3040);
+ vst1q_s16(out + out_stride * 16 + i, v3062);
+ vst1q_s16(out + out_stride * 17 + i, v3084);
+ vst1q_s16(out + out_stride * 18 + i, v3106);
+ vst1q_s16(out + out_stride * 19 + i, v3128);
+ vst1q_s16(out + out_stride * 20 + i, v3150);
+ vst1q_s16(out + out_stride * 21 + i, v3172);
+ vst1q_s16(out + out_stride * 22 + i, v3194);
+ vst1q_s16(out + out_stride * 23 + i, v3216);
+ vst1q_s16(out + out_stride * 24 + i, v3238);
+ vst1q_s16(out + out_stride * 25 + i, v3260);
+ vst1q_s16(out + out_stride * 26 + i, v3282);
+ vst1q_s16(out + out_stride * 27 + i, v3304);
+ vst1q_s16(out + out_stride * 28 + i, v3326);
+ vst1q_s16(out + out_stride * 29 + i, v3348);
+ vst1q_s16(out + out_stride * 30 + i, v3370);
+ vst1q_s16(out + out_stride * 31 + i, v3392);
+ vst1q_s16(out + out_stride * 32 + i, v3402);
+ vst1q_s16(out + out_stride * 33 + i, v3412);
+ vst1q_s16(out + out_stride * 34 + i, v3422);
+ vst1q_s16(out + out_stride * 35 + i, v3432);
+ vst1q_s16(out + out_stride * 36 + i, v3442);
+ vst1q_s16(out + out_stride * 37 + i, v3452);
+ vst1q_s16(out + out_stride * 38 + i, v3462);
+ vst1q_s16(out + out_stride * 39 + i, v3472);
+ vst1q_s16(out + out_stride * 40 + i, v3482);
+ vst1q_s16(out + out_stride * 41 + i, v3492);
+ vst1q_s16(out + out_stride * 42 + i, v3502);
+ vst1q_s16(out + out_stride * 43 + i, v3512);
+ vst1q_s16(out + out_stride * 44 + i, v3522);
+ vst1q_s16(out + out_stride * 45 + i, v3532);
+ vst1q_s16(out + out_stride * 46 + i, v3542);
+ vst1q_s16(out + out_stride * 47 + i, v3552);
+ vst1q_s16(out + out_stride * 48 + i, v3562);
+ vst1q_s16(out + out_stride * 49 + i, v3572);
+ vst1q_s16(out + out_stride * 50 + i, v3582);
+ vst1q_s16(out + out_stride * 51 + i, v3592);
+ vst1q_s16(out + out_stride * 52 + i, v3602);
+ vst1q_s16(out + out_stride * 53 + i, v3612);
+ vst1q_s16(out + out_stride * 54 + i, v3622);
+ vst1q_s16(out + out_stride * 55 + i, v3632);
+ vst1q_s16(out + out_stride * 56 + i, v3642);
+ vst1q_s16(out + out_stride * 57 + i, v3652);
+ vst1q_s16(out + out_stride * 58 + i, v3662);
+ vst1q_s16(out + out_stride * 59 + i, v3672);
+ vst1q_s16(out + out_stride * 60 + i, v3682);
+ vst1q_s16(out + out_stride * 61 + i, v3692);
+ vst1q_s16(out + out_stride * 62 + i, v3702);
+ vst1q_s16(out + out_stride * 63 + i, v3712);
+ vst1q_s16(out + out_stride * 64 + i, v3716);
+ vst1q_s16(out + out_stride * 65 + i, v3720);
+ vst1q_s16(out + out_stride * 66 + i, v3724);
+ vst1q_s16(out + out_stride * 67 + i, v3728);
+ vst1q_s16(out + out_stride * 68 + i, v3732);
+ vst1q_s16(out + out_stride * 69 + i, v3736);
+ vst1q_s16(out + out_stride * 70 + i, v3740);
+ vst1q_s16(out + out_stride * 71 + i, v3744);
+ vst1q_s16(out + out_stride * 72 + i, v3748);
+ vst1q_s16(out + out_stride * 73 + i, v3752);
+ vst1q_s16(out + out_stride * 74 + i, v3756);
+ vst1q_s16(out + out_stride * 75 + i, v3760);
+ vst1q_s16(out + out_stride * 76 + i, v3764);
+ vst1q_s16(out + out_stride * 77 + i, v3768);
+ vst1q_s16(out + out_stride * 78 + i, v3772);
+ vst1q_s16(out + out_stride * 79 + i, v3776);
+ vst1q_s16(out + out_stride * 80 + i, v3780);
+ vst1q_s16(out + out_stride * 81 + i, v3784);
+ vst1q_s16(out + out_stride * 82 + i, v3788);
+ vst1q_s16(out + out_stride * 83 + i, v3792);
+ vst1q_s16(out + out_stride * 84 + i, v3796);
+ vst1q_s16(out + out_stride * 85 + i, v3800);
+ vst1q_s16(out + out_stride * 86 + i, v3804);
+ vst1q_s16(out + out_stride * 87 + i, v3808);
+ vst1q_s16(out + out_stride * 88 + i, v3812);
+ vst1q_s16(out + out_stride * 89 + i, v3816);
+ vst1q_s16(out + out_stride * 90 + i, v3820);
+ vst1q_s16(out + out_stride * 91 + i, v3824);
+ vst1q_s16(out + out_stride * 92 + i, v3828);
+ vst1q_s16(out + out_stride * 93 + i, v3832);
+ vst1q_s16(out + out_stride * 94 + i, v3836);
+ vst1q_s16(out + out_stride * 95 + i, v3840);
+ vst1q_s16(out + out_stride * 96 + i, v3844);
+ vst1q_s16(out + out_stride * 97 + i, v3848);
+ vst1q_s16(out + out_stride * 98 + i, v3852);
+ vst1q_s16(out + out_stride * 99 + i, v3856);
+ vst1q_s16(out + out_stride * 100 + i, v3860);
+ vst1q_s16(out + out_stride * 101 + i, v3864);
+ vst1q_s16(out + out_stride * 102 + i, v3868);
+ vst1q_s16(out + out_stride * 103 + i, v3872);
+ vst1q_s16(out + out_stride * 104 + i, v3876);
+ vst1q_s16(out + out_stride * 105 + i, v3880);
+ vst1q_s16(out + out_stride * 106 + i, v3884);
+ vst1q_s16(out + out_stride * 107 + i, v3888);
+ vst1q_s16(out + out_stride * 108 + i, v3892);
+ vst1q_s16(out + out_stride * 109 + i, v3896);
+ vst1q_s16(out + out_stride * 110 + i, v3900);
+ vst1q_s16(out + out_stride * 111 + i, v3904);
+ vst1q_s16(out + out_stride * 112 + i, v3908);
+ vst1q_s16(out + out_stride * 113 + i, v3912);
+ vst1q_s16(out + out_stride * 114 + i, v3916);
+ vst1q_s16(out + out_stride * 115 + i, v3920);
+ vst1q_s16(out + out_stride * 116 + i, v3924);
+ vst1q_s16(out + out_stride * 117 + i, v3928);
+ vst1q_s16(out + out_stride * 118 + i, v3932);
+ vst1q_s16(out + out_stride * 119 + i, v3936);
+ vst1q_s16(out + out_stride * 120 + i, v3940);
+ vst1q_s16(out + out_stride * 121 + i, v3944);
+ vst1q_s16(out + out_stride * 122 + i, v3948);
+ vst1q_s16(out + out_stride * 123 + i, v3952);
+ vst1q_s16(out + out_stride * 124 + i, v3956);
+ vst1q_s16(out + out_stride * 125 + i, v3960);
+ vst1q_s16(out + out_stride * 126 + i, v3964);
+ vst1q_s16(out + out_stride * 127 + i, v3968);
+ vst1q_s16(out + out_stride * 128 + i, v3969);
+ vst1q_s16(out + out_stride * 129 + i, v3970);
+ vst1q_s16(out + out_stride * 130 + i, v3971);
+ vst1q_s16(out + out_stride * 131 + i, v3972);
+ vst1q_s16(out + out_stride * 132 + i, v3973);
+ vst1q_s16(out + out_stride * 133 + i, v3974);
+ vst1q_s16(out + out_stride * 134 + i, v3975);
+ vst1q_s16(out + out_stride * 135 + i, v3976);
+ vst1q_s16(out + out_stride * 136 + i, v3977);
+ vst1q_s16(out + out_stride * 137 + i, v3978);
+ vst1q_s16(out + out_stride * 138 + i, v3979);
+ vst1q_s16(out + out_stride * 139 + i, v3980);
+ vst1q_s16(out + out_stride * 140 + i, v3981);
+ vst1q_s16(out + out_stride * 141 + i, v3982);
+ vst1q_s16(out + out_stride * 142 + i, v3983);
+ vst1q_s16(out + out_stride * 143 + i, v3984);
+ vst1q_s16(out + out_stride * 144 + i, v3985);
+ vst1q_s16(out + out_stride * 145 + i, v3986);
+ vst1q_s16(out + out_stride * 146 + i, v3987);
+ vst1q_s16(out + out_stride * 147 + i, v3988);
+ vst1q_s16(out + out_stride * 148 + i, v3989);
+ vst1q_s16(out + out_stride * 149 + i, v3990);
+ vst1q_s16(out + out_stride * 150 + i, v3991);
+ vst1q_s16(out + out_stride * 151 + i, v3992);
+ vst1q_s16(out + out_stride * 152 + i, v3993);
+ vst1q_s16(out + out_stride * 153 + i, v3994);
+ vst1q_s16(out + out_stride * 154 + i, v3995);
+ vst1q_s16(out + out_stride * 155 + i, v3996);
+ vst1q_s16(out + out_stride * 156 + i, v3997);
+ vst1q_s16(out + out_stride * 157 + i, v3998);
+ vst1q_s16(out + out_stride * 158 + i, v3999);
+ vst1q_s16(out + out_stride * 159 + i, v4000);
+ vst1q_s16(out + out_stride * 160 + i, v4001);
+ vst1q_s16(out + out_stride * 161 + i, v4002);
+ vst1q_s16(out + out_stride * 162 + i, v4003);
+ vst1q_s16(out + out_stride * 163 + i, v4004);
+ vst1q_s16(out + out_stride * 164 + i, v4005);
+ vst1q_s16(out + out_stride * 165 + i, v4006);
+ vst1q_s16(out + out_stride * 166 + i, v4007);
+ vst1q_s16(out + out_stride * 167 + i, v4008);
+ vst1q_s16(out + out_stride * 168 + i, v4009);
+ vst1q_s16(out + out_stride * 169 + i, v4010);
+ vst1q_s16(out + out_stride * 170 + i, v4011);
+ vst1q_s16(out + out_stride * 171 + i, v4012);
+ vst1q_s16(out + out_stride * 172 + i, v4013);
+ vst1q_s16(out + out_stride * 173 + i, v4014);
+ vst1q_s16(out + out_stride * 174 + i, v4015);
+ vst1q_s16(out + out_stride * 175 + i, v4016);
+ vst1q_s16(out + out_stride * 176 + i, v4017);
+ vst1q_s16(out + out_stride * 177 + i, v4018);
+ vst1q_s16(out + out_stride * 178 + i, v4019);
+ vst1q_s16(out + out_stride * 179 + i, v4020);
+ vst1q_s16(out + out_stride * 180 + i, v4021);
+ vst1q_s16(out + out_stride * 181 + i, v4022);
+ vst1q_s16(out + out_stride * 182 + i, v4023);
+ vst1q_s16(out + out_stride * 183 + i, v4024);
+ vst1q_s16(out + out_stride * 184 + i, v4025);
+ vst1q_s16(out + out_stride * 185 + i, v4026);
+ vst1q_s16(out + out_stride * 186 + i, v4027);
+ vst1q_s16(out + out_stride * 187 + i, v4028);
+ vst1q_s16(out + out_stride * 188 + i, v4029);
+ vst1q_s16(out + out_stride * 189 + i, v4030);
+ vst1q_s16(out + out_stride * 190 + i, v4031);
+ vst1q_s16(out + out_stride * 191 + i, v4032);
+ vst1q_s16(out + out_stride * 192 + i, v4033);
+ vst1q_s16(out + out_stride * 193 + i, v4034);
+ vst1q_s16(out + out_stride * 194 + i, v4035);
+ vst1q_s16(out + out_stride * 195 + i, v4036);
+ vst1q_s16(out + out_stride * 196 + i, v4037);
+ vst1q_s16(out + out_stride * 197 + i, v4038);
+ vst1q_s16(out + out_stride * 198 + i, v4039);
+ vst1q_s16(out + out_stride * 199 + i, v4040);
+ vst1q_s16(out + out_stride * 200 + i, v4041);
+ vst1q_s16(out + out_stride * 201 + i, v4042);
+ vst1q_s16(out + out_stride * 202 + i, v4043);
+ vst1q_s16(out + out_stride * 203 + i, v4044);
+ vst1q_s16(out + out_stride * 204 + i, v4045);
+ vst1q_s16(out + out_stride * 205 + i, v4046);
+ vst1q_s16(out + out_stride * 206 + i, v4047);
+ vst1q_s16(out + out_stride * 207 + i, v4048);
+ vst1q_s16(out + out_stride * 208 + i, v4049);
+ vst1q_s16(out + out_stride * 209 + i, v4050);
+ vst1q_s16(out + out_stride * 210 + i, v4051);
+ vst1q_s16(out + out_stride * 211 + i, v4052);
+ vst1q_s16(out + out_stride * 212 + i, v4053);
+ vst1q_s16(out + out_stride * 213 + i, v4054);
+ vst1q_s16(out + out_stride * 214 + i, v4055);
+ vst1q_s16(out + out_stride * 215 + i, v4056);
+ vst1q_s16(out + out_stride * 216 + i, v4057);
+ vst1q_s16(out + out_stride * 217 + i, v4058);
+ vst1q_s16(out + out_stride * 218 + i, v4059);
+ vst1q_s16(out + out_stride * 219 + i, v4060);
+ vst1q_s16(out + out_stride * 220 + i, v4061);
+ vst1q_s16(out + out_stride * 221 + i, v4062);
+ vst1q_s16(out + out_stride * 222 + i, v4063);
+ vst1q_s16(out + out_stride * 223 + i, v4064);
+ vst1q_s16(out + out_stride * 224 + i, v4065);
+ vst1q_s16(out + out_stride * 225 + i, v4066);
+ vst1q_s16(out + out_stride * 226 + i, v4067);
+ vst1q_s16(out + out_stride * 227 + i, v4068);
+ vst1q_s16(out + out_stride * 228 + i, v4069);
+ vst1q_s16(out + out_stride * 229 + i, v4070);
+ vst1q_s16(out + out_stride * 230 + i, v4071);
+ vst1q_s16(out + out_stride * 231 + i, v4072);
+ vst1q_s16(out + out_stride * 232 + i, v4073);
+ vst1q_s16(out + out_stride * 233 + i, v4074);
+ vst1q_s16(out + out_stride * 234 + i, v4075);
+ vst1q_s16(out + out_stride * 235 + i, v4076);
+ vst1q_s16(out + out_stride * 236 + i, v4077);
+ vst1q_s16(out + out_stride * 237 + i, v4078);
+ vst1q_s16(out + out_stride * 238 + i, v4079);
+ vst1q_s16(out + out_stride * 239 + i, v4080);
+ vst1q_s16(out + out_stride * 240 + i, v4081);
+ vst1q_s16(out + out_stride * 241 + i, v4082);
+ vst1q_s16(out + out_stride * 242 + i, v4083);
+ vst1q_s16(out + out_stride * 243 + i, v4084);
+ vst1q_s16(out + out_stride * 244 + i, v4085);
+ vst1q_s16(out + out_stride * 245 + i, v4086);
+ vst1q_s16(out + out_stride * 246 + i, v4087);
+ vst1q_s16(out + out_stride * 247 + i, v4088);
+ vst1q_s16(out + out_stride * 248 + i, v4089);
+ vst1q_s16(out + out_stride * 249 + i, v4090);
+ vst1q_s16(out + out_stride * 250 + i, v4091);
+ vst1q_s16(out + out_stride * 251 + i, v4092);
+ vst1q_s16(out + out_stride * 252 + i, v4093);
+ vst1q_s16(out + out_stride * 253 + i, v4094);
+ vst1q_s16(out + out_stride * 254 + i, v4095);
+ vst1q_s16(out + out_stride * 255 + i, v4096);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct32-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct32-inl.h
new file mode 100644
index 0000000000..0f3b31cfea
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct32-inl.h
@@ -0,0 +1,419 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<32>) { return 1; }
+
+void FastIDCT(FastDCTTag<32>, const int16_t* in, size_t in_stride, int16_t* out,
+ size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 16 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 8 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 24 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 20 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 12 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vld1q_s16(in + in_stride * 28 + i);
+ int16x8_t v17 = vaddq_s16(v16, v12);
+ int16x8_t v18 = vaddq_s16(v13, v10);
+ int16x8_t v19 = vaddq_s16(v17, v18);
+ int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734);
+ int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080);
+ int16x8_t v22 = vaddq_s16(v20, v21);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+ int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+ int16x8_t v28 = vld1q_s16(in + in_stride * 18 + i);
+ int16x8_t v29 = vld1q_s16(in + in_stride * 14 + i);
+ int16x8_t v30 = vaddq_s16(v28, v29);
+ int16x8_t v31 = vaddq_s16(v27, v30);
+ int16x8_t v32 = vld1q_s16(in + in_stride * 10 + i);
+ int16x8_t v33 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v34 = vaddq_s16(v32, v33);
+ int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080);
+ int16x8_t v36 = vld1q_s16(in + in_stride * 26 + i);
+ int16x8_t v37 = vld1q_s16(in + in_stride * 22 + i);
+ int16x8_t v38 = vaddq_s16(v36, v37);
+ int16x8_t v39 = vaddq_s16(v38, v34);
+ int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734);
+ int16x8_t v41 = vaddq_s16(v35, v40);
+ int16x8_t v42 = vaddq_s16(v31, v41);
+ int16x8_t v43 = vaddq_s16(v33, v26);
+ int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+ int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+ int16x8_t v45 = vaddq_s16(v29, v32);
+ int16x8_t v46 = vaddq_s16(v37, v28);
+ int16x8_t v47 = vaddq_s16(v45, v46);
+ int16x8_t v48 = vaddq_s16(v44, v47);
+ int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705);
+ int16x8_t v50 = vld1q_s16(in + in_stride * 30 + i);
+ int16x8_t v51 = vaddq_s16(v50, v36);
+ int16x8_t v52 = vaddq_s16(v51, v46);
+ int16x8_t v53 = vqrdmulhq_n_s16(v52, 17734);
+ int16x8_t v54 = vaddq_s16(v45, v43);
+ int16x8_t v55_tmp = vqrdmulhq_n_s16(v54, 10045);
+ int16x8_t v55 = vaddq_s16(v55_tmp, v54);
+ int16x8_t v56 = vaddq_s16(v53, v55);
+ int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705);
+ int16x8_t v58 = vaddq_s16(v49, v57);
+ int16x8_t v59 = vaddq_s16(v42, v58);
+ int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+ int16x8_t v61 = vaddq_s16(v25, v60);
+ int16x8_t v62 = vld1q_s16(in + in_stride * 13 + i);
+ int16x8_t v63 = vld1q_s16(in + in_stride * 11 + i);
+ int16x8_t v64 = vaddq_s16(v62, v63);
+ int16x8_t v65 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v66 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v67 = vaddq_s16(v65, v66);
+ int16x8_t v68 = vaddq_s16(v64, v67);
+ int16x8_t v69_tmp = vqrdmulhq_n_s16(v68, 10045);
+ int16x8_t v69 = vaddq_s16(v69_tmp, v68);
+ int16x8_t v70 = vld1q_s16(in + in_stride * 21 + i);
+ int16x8_t v71 = vld1q_s16(in + in_stride * 19 + i);
+ int16x8_t v72 = vaddq_s16(v70, v71);
+ int16x8_t v73 = vld1q_s16(in + in_stride * 29 + i);
+ int16x8_t v74 = vld1q_s16(in + in_stride * 27 + i);
+ int16x8_t v75 = vaddq_s16(v73, v74);
+ int16x8_t v76 = vaddq_s16(v72, v75);
+ int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734);
+ int16x8_t v78 = vaddq_s16(v69, v77);
+ int16x8_t v79 = vqrdmulhq_n_s16(v78, 16705);
+ int16x8_t v80_tmp = vqrdmulhq_n_s16(v67, 13573);
+ int16x8_t v80 = vaddq_s16(v80_tmp, v67);
+ int16x8_t v81 = vaddq_s16(v64, v72);
+ int16x8_t v82 = vaddq_s16(v80, v81);
+ int16x8_t v83 = vqrdmulhq_n_s16(v82, 16705);
+ int16x8_t v84 = vaddq_s16(v79, v83);
+ int16x8_t v85 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v86_tmp = vqrdmulhq_n_s16(v85, 13573);
+ int16x8_t v86 = vaddq_s16(v86_tmp, v85);
+ int16x8_t v87 = vld1q_s16(in + in_stride * 17 + i);
+ int16x8_t v88 = vld1q_s16(in + in_stride * 15 + i);
+ int16x8_t v89 = vaddq_s16(v87, v88);
+ int16x8_t v90 = vaddq_s16(v86, v89);
+ int16x8_t v91 = vld1q_s16(in + in_stride * 9 + i);
+ int16x8_t v92 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v93 = vaddq_s16(v91, v92);
+ int16x8_t v94 = vqrdmulhq_n_s16(v93, 25080);
+ int16x8_t v95 = vld1q_s16(in + in_stride * 25 + i);
+ int16x8_t v96 = vld1q_s16(in + in_stride * 23 + i);
+ int16x8_t v97 = vaddq_s16(v95, v96);
+ int16x8_t v98 = vaddq_s16(v97, v93);
+ int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734);
+ int16x8_t v100 = vaddq_s16(v94, v99);
+ int16x8_t v101 = vaddq_s16(v90, v100);
+ int16x8_t v102 = vaddq_s16(v84, v101);
+ int16x8_t v103 = vaddq_s16(v92, v65);
+ int16x8_t v104 = vaddq_s16(v66, v85);
+ int16x8_t v105 = vaddq_s16(v103, v104);
+ int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573);
+ int16x8_t v106 = vaddq_s16(v106_tmp, v105);
+ int16x8_t v107 = vaddq_s16(v96, v70);
+ int16x8_t v108 = vaddq_s16(v71, v87);
+ int16x8_t v109 = vaddq_s16(v107, v108);
+ int16x8_t v110 = vaddq_s16(v63, v91);
+ int16x8_t v111 = vaddq_s16(v88, v62);
+ int16x8_t v112 = vaddq_s16(v110, v111);
+ int16x8_t v113 = vaddq_s16(v109, v112);
+ int16x8_t v114 = vaddq_s16(v106, v113);
+ int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705);
+ int16x8_t v116 = vaddq_s16(v112, v105);
+ int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080);
+ int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734);
+ int16x8_t v119 = vaddq_s16(v74, v95);
+ int16x8_t v120 = vld1q_s16(in + in_stride * 31 + i);
+ int16x8_t v121 = vaddq_s16(v120, v73);
+ int16x8_t v122 = vaddq_s16(v119, v121);
+ int16x8_t v123 = vaddq_s16(v122, v109);
+ int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734);
+ int16x8_t v125 = vaddq_s16(v118, v124);
+ int16x8_t v126 = vaddq_s16(v117, v125);
+ int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705);
+ int16x8_t v128 = vaddq_s16(v115, v127);
+ int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463);
+ int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573);
+ int16x8_t v130 = vaddq_s16(v130_tmp, v104);
+ int16x8_t v131 = vaddq_s16(v108, v111);
+ int16x8_t v132 = vaddq_s16(v130, v131);
+ int16x8_t v133 = vaddq_s16(v119, v107);
+ int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734);
+ int16x8_t v135 = vaddq_s16(v110, v103);
+ int16x8_t v136_tmp = vqrdmulhq_n_s16(v135, 10045);
+ int16x8_t v136 = vaddq_s16(v136_tmp, v135);
+ int16x8_t v137 = vaddq_s16(v134, v136);
+ int16x8_t v138 = vaddq_s16(v132, v137);
+ int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463);
+ int16x8_t v140 = vaddq_s16(v129, v139);
+ int16x8_t v141 = vaddq_s16(v102, v140);
+ int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404);
+ int16x8_t v143 = vaddq_s16(v61, v142);
+ int16x8_t v144 = vsubq_s16(v0, v1);
+ int16x8_t v145 = vsubq_s16(v4, v6);
+ int16x8_t v146_tmp = vqrdmulhq_n_s16(v145, 10045);
+ int16x8_t v146 = vaddq_s16(v146_tmp, v145);
+ int16x8_t v147 = vaddq_s16(v144, v146);
+ int16x8_t v148 = vsubq_s16(v11, v14);
+ int16x8_t v149 = vqrdmulhq_n_s16(v18, 17734);
+ int16x8_t v150_tmp = vqrdmulhq_n_s16(v17, 10045);
+ int16x8_t v150 = vaddq_s16(v150_tmp, v17);
+ int16x8_t v151 = vsubq_s16(v149, v150);
+ int16x8_t v152 = vaddq_s16(v148, v151);
+ int16x8_t v153 = vqrdmulhq_n_s16(v152, 19705);
+ int16x8_t v154 = vaddq_s16(v147, v153);
+ int16x8_t v155 = vsubq_s16(v27, v30);
+ int16x8_t v156 = vqrdmulhq_n_s16(v34, 17734);
+ int16x8_t v157_tmp = vqrdmulhq_n_s16(v38, 10045);
+ int16x8_t v157 = vaddq_s16(v157_tmp, v38);
+ int16x8_t v158 = vsubq_s16(v156, v157);
+ int16x8_t v159 = vaddq_s16(v155, v158);
+ int16x8_t v160 = vqrdmulhq_n_s16(v54, 13573);
+ int16x8_t v161 = vsubq_s16(v160, v52);
+ int16x8_t v162 = vqrdmulhq_n_s16(v161, 25746);
+ int16x8_t v163 = vsubq_s16(v44, v47);
+ int16x8_t v164 = vqrdmulhq_n_s16(v163, 19705);
+ int16x8_t v165 = vaddq_s16(v162, v164);
+ int16x8_t v166 = vaddq_s16(v159, v165);
+ int16x8_t v167 = vqrdmulhq_n_s16(v166, 17121);
+ int16x8_t v168 = vaddq_s16(v154, v167);
+ int16x8_t v169 = vsubq_s16(v86, v89);
+ int16x8_t v170 = vqrdmulhq_n_s16(v93, 17734);
+ int16x8_t v171_tmp = vqrdmulhq_n_s16(v97, 10045);
+ int16x8_t v171 = vaddq_s16(v171_tmp, v97);
+ int16x8_t v172 = vsubq_s16(v170, v171);
+ int16x8_t v173 = vaddq_s16(v169, v172);
+ int16x8_t v174 = vsubq_s16(v80, v81);
+ int16x8_t v175 = vqrdmulhq_n_s16(v174, 19705);
+ int16x8_t v176 = vqrdmulhq_n_s16(v68, 13573);
+ int16x8_t v177 = vsubq_s16(v176, v76);
+ int16x8_t v178 = vqrdmulhq_n_s16(v177, 25746);
+ int16x8_t v179 = vaddq_s16(v175, v178);
+ int16x8_t v180 = vaddq_s16(v173, v179);
+ int16x8_t v181 = vsubq_s16(v130, v131);
+ int16x8_t v182 = vqrdmulhq_n_s16(v135, 13573);
+ int16x8_t v183 = vsubq_s16(v182, v133);
+ int16x8_t v184_tmp = vqrdmulhq_n_s16(v183, 10045);
+ int16x8_t v184 = vaddq_s16(v184_tmp, v183);
+ int16x8_t v185 = vaddq_s16(v181, v184);
+ int16x8_t v186 = vqrdmulhq_n_s16(v185, 17121);
+ int16x8_t v187 = vqrdmulhq_n_s16(v105, 27867);
+ int16x8_t v188 = vqrdmulhq_n_s16(v113, 19705);
+ int16x8_t v189 = vsubq_s16(v187, v188);
+ int16x8_t v190 = vqrdmulhq_n_s16(v116, 13573);
+ int16x8_t v191 = vsubq_s16(v190, v123);
+ int16x8_t v192 = vqrdmulhq_n_s16(v191, 25746);
+ int16x8_t v193 = vaddq_s16(v189, v192);
+ int16x8_t v194 = vqrdmulhq_n_s16(v193, 17121);
+ int16x8_t v195 = vaddq_s16(v186, v194);
+ int16x8_t v196 = vaddq_s16(v180, v195);
+ int16x8_t v197 = vqrdmulhq_n_s16(v196, 16563);
+ int16x8_t v198 = vaddq_s16(v168, v197);
+ int16x8_t v199 = vsubq_s16(v144, v146);
+ int16x8_t v200 = vsubq_s16(v148, v151);
+ int16x8_t v201 = vqrdmulhq_n_s16(v200, 29490);
+ int16x8_t v202 = vaddq_s16(v199, v201);
+ int16x8_t v203 = vsubq_s16(v155, v158);
+ int16x8_t v204 = vqrdmulhq_n_s16(v163, 29490);
+ int16x8_t v205_tmp = vqrdmulhq_n_s16(v161, 5763);
+ int16x8_t v205 = vaddq_s16(v205_tmp, v161);
+ int16x8_t v206 = vsubq_s16(v204, v205);
+ int16x8_t v207 = vaddq_s16(v203, v206);
+ int16x8_t v208 = vqrdmulhq_n_s16(v207, 18578);
+ int16x8_t v209 = vaddq_s16(v202, v208);
+ int16x8_t v210 = vsubq_s16(v169, v172);
+ int16x8_t v211 = vqrdmulhq_n_s16(v174, 29490);
+ int16x8_t v212_tmp = vqrdmulhq_n_s16(v177, 5763);
+ int16x8_t v212 = vaddq_s16(v212_tmp, v177);
+ int16x8_t v213 = vsubq_s16(v211, v212);
+ int16x8_t v214 = vaddq_s16(v210, v213);
+ int16x8_t v215 = vsubq_s16(v181, v184);
+ int16x8_t v216 = vqrdmulhq_n_s16(v215, 18578);
+ int16x8_t v217 = vqrdmulhq_n_s16(v189, 27803);
+ int16x8_t v218 = vqrdmulhq_n_s16(v191, 21845);
+ int16x8_t v219 = vsubq_s16(v217, v218);
+ int16x8_t v220 = vaddq_s16(v216, v219);
+ int16x8_t v221 = vaddq_s16(v214, v220);
+ int16x8_t v222 = vqrdmulhq_n_s16(v221, 16890);
+ int16x8_t v223 = vaddq_s16(v209, v222);
+ int16x8_t v224 = vsubq_s16(v2, v8);
+ int16x8_t v225 = vsubq_s16(v15, v22);
+ int16x8_t v226_tmp = vqrdmulhq_n_s16(v225, 18446);
+ int16x8_t v226 = vmlaq_n_s16(v226_tmp, v225, 2);
+ int16x8_t v227 = vaddq_s16(v224, v226);
+ int16x8_t v228 = vsubq_s16(v31, v41);
+ int16x8_t v229 = vsubq_s16(v48, v56);
+ int16x8_t v230_tmp = vqrdmulhq_n_s16(v229, 18446);
+ int16x8_t v230 = vmlaq_n_s16(v230_tmp, v229, 2);
+ int16x8_t v231 = vaddq_s16(v228, v230);
+ int16x8_t v232 = vqrdmulhq_n_s16(v231, 21195);
+ int16x8_t v233 = vaddq_s16(v227, v232);
+ int16x8_t v234 = vsubq_s16(v82, v78);
+ int16x8_t v235_tmp = vqrdmulhq_n_s16(v234, 18446);
+ int16x8_t v235 = vmlaq_n_s16(v235_tmp, v234, 2);
+ int16x8_t v236 = vsubq_s16(v90, v100);
+ int16x8_t v237 = vaddq_s16(v235, v236);
+ int16x8_t v238 = vsubq_s16(v132, v137);
+ int16x8_t v239 = vsubq_s16(v114, v126);
+ int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 18446);
+ int16x8_t v240 = vmlaq_n_s16(v240_tmp, v239, 2);
+ int16x8_t v241 = vaddq_s16(v238, v240);
+ int16x8_t v242 = vqrdmulhq_n_s16(v241, 21195);
+ int16x8_t v243 = vaddq_s16(v237, v242);
+ int16x8_t v244 = vqrdmulhq_n_s16(v243, 17401);
+ int16x8_t v245 = vaddq_s16(v233, v244);
+ int16x8_t v246 = vsubq_s16(v228, v230);
+ int16x8_t v247 = vqrdmulhq_n_s16(v246, 25826);
+ int16x8_t v248 = vsubq_s16(v224, v226);
+ int16x8_t v249 = vaddq_s16(v247, v248);
+ int16x8_t v250 = vsubq_s16(v238, v240);
+ int16x8_t v251 = vqrdmulhq_n_s16(v250, 25826);
+ int16x8_t v252 = vsubq_s16(v236, v235);
+ int16x8_t v253 = vaddq_s16(v251, v252);
+ int16x8_t v254 = vqrdmulhq_n_s16(v253, 18124);
+ int16x8_t v255 = vaddq_s16(v249, v254);
+ int16x8_t v256 = vsubq_s16(v199, v201);
+ int16x8_t v257 = vsubq_s16(v203, v206);
+ int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 1988);
+ int16x8_t v258 = vaddq_s16(v258_tmp, v257);
+ int16x8_t v259 = vaddq_s16(v256, v258);
+ int16x8_t v260 = vsubq_s16(v210, v213);
+ int16x8_t v261_tmp = vqrdmulhq_n_s16(v219, 25030);
+ int16x8_t v261 = vaddq_s16(v261_tmp, v219);
+ int16x8_t v262 = vsubq_s16(v215, v261);
+ int16x8_t v263_tmp = vqrdmulhq_n_s16(v262, 1988);
+ int16x8_t v263 = vaddq_s16(v263_tmp, v262);
+ int16x8_t v264 = vaddq_s16(v260, v263);
+ int16x8_t v265 = vqrdmulhq_n_s16(v264, 19102);
+ int16x8_t v266 = vaddq_s16(v259, v265);
+ int16x8_t v267 = vsubq_s16(v147, v153);
+ int16x8_t v268 = vsubq_s16(v159, v165);
+ int16x8_t v269_tmp = vqrdmulhq_n_s16(v268, 23673);
+ int16x8_t v269 = vaddq_s16(v269_tmp, v268);
+ int16x8_t v270 = vaddq_s16(v267, v269);
+ int16x8_t v271 = vsubq_s16(v173, v179);
+ int16x8_t v272 = vsubq_s16(v185, v193);
+ int16x8_t v273_tmp = vqrdmulhq_n_s16(v272, 23673);
+ int16x8_t v273 = vaddq_s16(v273_tmp, v272);
+ int16x8_t v274 = vaddq_s16(v271, v273);
+ int16x8_t v275 = vqrdmulhq_n_s16(v274, 20398);
+ int16x8_t v276 = vaddq_s16(v270, v275);
+ int16x8_t v277 = vsubq_s16(v9, v24);
+ int16x8_t v278 = vsubq_s16(v42, v58);
+ int16x8_t v279_tmp = vqrdmulhq_n_s16(v278, 3314);
+ int16x8_t v279 = vmlaq_n_s16(v279_tmp, v278, 5);
+ int16x8_t v280 = vaddq_s16(v277, v279);
+ int16x8_t v281 = vsubq_s16(v138, v128);
+ int16x8_t v282_tmp = vqrdmulhq_n_s16(v281, 3314);
+ int16x8_t v282 = vmlaq_n_s16(v282_tmp, v281, 5);
+ int16x8_t v283 = vsubq_s16(v101, v84);
+ int16x8_t v284 = vaddq_s16(v282, v283);
+ int16x8_t v285 = vqrdmulhq_n_s16(v284, 22112);
+ int16x8_t v286 = vaddq_s16(v280, v285);
+ int16x8_t v287 = vsubq_s16(v277, v279);
+ int16x8_t v288 = vsubq_s16(v283, v282);
+ int16x8_t v289 = vqrdmulhq_n_s16(v288, 24397);
+ int16x8_t v290 = vaddq_s16(v287, v289);
+ int16x8_t v291 = vsubq_s16(v267, v269);
+ int16x8_t v292 = vsubq_s16(v271, v273);
+ int16x8_t v293 = vqrdmulhq_n_s16(v292, 27504);
+ int16x8_t v294 = vaddq_s16(v291, v293);
+ int16x8_t v295 = vsubq_s16(v260, v263);
+ int16x8_t v296 = vqrdmulhq_n_s16(v295, 31869);
+ int16x8_t v297 = vsubq_s16(v256, v258);
+ int16x8_t v298 = vaddq_s16(v296, v297);
+ int16x8_t v299 = vsubq_s16(v248, v247);
+ int16x8_t v300 = vsubq_s16(v252, v251);
+ int16x8_t v301_tmp = vqrdmulhq_n_s16(v300, 5552);
+ int16x8_t v301 = vaddq_s16(v301_tmp, v300);
+ int16x8_t v302 = vaddq_s16(v299, v301);
+ int16x8_t v303 = vsubq_s16(v227, v232);
+ int16x8_t v304 = vsubq_s16(v237, v242);
+ int16x8_t v305_tmp = vqrdmulhq_n_s16(v304, 15865);
+ int16x8_t v305 = vaddq_s16(v305_tmp, v304);
+ int16x8_t v306 = vaddq_s16(v303, v305);
+ int16x8_t v307 = vsubq_s16(v202, v208);
+ int16x8_t v308 = vsubq_s16(v214, v220);
+ int16x8_t v309_tmp = vqrdmulhq_n_s16(v308, 1893);
+ int16x8_t v309 = vmlaq_n_s16(v309_tmp, v308, 2);
+ int16x8_t v310 = vaddq_s16(v307, v309);
+ int16x8_t v311 = vsubq_s16(v154, v167);
+ int16x8_t v312 = vsubq_s16(v180, v195);
+ int16x8_t v313_tmp = vqrdmulhq_n_s16(v312, 13357);
+ int16x8_t v313 = vmlaq_n_s16(v313_tmp, v312, 3);
+ int16x8_t v314 = vaddq_s16(v311, v313);
+ int16x8_t v315 = vsubq_s16(v102, v140);
+ int16x8_t v316_tmp = vqrdmulhq_n_s16(v315, 6226);
+ int16x8_t v316 = vmlaq_n_s16(v316_tmp, v315, 10);
+ int16x8_t v317 = vsubq_s16(v25, v60);
+ int16x8_t v318 = vaddq_s16(v316, v317);
+ int16x8_t v319 = vsubq_s16(v317, v316);
+ int16x8_t v320 = vsubq_s16(v311, v313);
+ int16x8_t v321 = vsubq_s16(v307, v309);
+ int16x8_t v322 = vsubq_s16(v303, v305);
+ int16x8_t v323 = vsubq_s16(v299, v301);
+ int16x8_t v324 = vsubq_s16(v297, v296);
+ int16x8_t v325 = vsubq_s16(v291, v293);
+ int16x8_t v326 = vsubq_s16(v287, v289);
+ int16x8_t v327 = vsubq_s16(v280, v285);
+ int16x8_t v328 = vsubq_s16(v270, v275);
+ int16x8_t v329 = vsubq_s16(v259, v265);
+ int16x8_t v330 = vsubq_s16(v249, v254);
+ int16x8_t v331 = vsubq_s16(v233, v244);
+ int16x8_t v332 = vsubq_s16(v209, v222);
+ int16x8_t v333 = vsubq_s16(v168, v197);
+ int16x8_t v334 = vsubq_s16(v61, v142);
+ vst1q_s16(out + out_stride * 0 + i, v143);
+ vst1q_s16(out + out_stride * 1 + i, v198);
+ vst1q_s16(out + out_stride * 2 + i, v223);
+ vst1q_s16(out + out_stride * 3 + i, v245);
+ vst1q_s16(out + out_stride * 4 + i, v255);
+ vst1q_s16(out + out_stride * 5 + i, v266);
+ vst1q_s16(out + out_stride * 6 + i, v276);
+ vst1q_s16(out + out_stride * 7 + i, v286);
+ vst1q_s16(out + out_stride * 8 + i, v290);
+ vst1q_s16(out + out_stride * 9 + i, v294);
+ vst1q_s16(out + out_stride * 10 + i, v298);
+ vst1q_s16(out + out_stride * 11 + i, v302);
+ vst1q_s16(out + out_stride * 12 + i, v306);
+ vst1q_s16(out + out_stride * 13 + i, v310);
+ vst1q_s16(out + out_stride * 14 + i, v314);
+ vst1q_s16(out + out_stride * 15 + i, v318);
+ vst1q_s16(out + out_stride * 16 + i, v319);
+ vst1q_s16(out + out_stride * 17 + i, v320);
+ vst1q_s16(out + out_stride * 18 + i, v321);
+ vst1q_s16(out + out_stride * 19 + i, v322);
+ vst1q_s16(out + out_stride * 20 + i, v323);
+ vst1q_s16(out + out_stride * 21 + i, v324);
+ vst1q_s16(out + out_stride * 22 + i, v325);
+ vst1q_s16(out + out_stride * 23 + i, v326);
+ vst1q_s16(out + out_stride * 24 + i, v327);
+ vst1q_s16(out + out_stride * 25 + i, v328);
+ vst1q_s16(out + out_stride * 26 + i, v329);
+ vst1q_s16(out + out_stride * 27 + i, v330);
+ vst1q_s16(out + out_stride * 28 + i, v331);
+ vst1q_s16(out + out_stride * 29 + i, v332);
+ vst1q_s16(out + out_stride * 30 + i, v333);
+ vst1q_s16(out + out_stride * 31 + i, v334);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct64-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct64-inl.h
new file mode 100644
index 0000000000..400da1a9de
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct64-inl.h
@@ -0,0 +1,985 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<64>) { return 1; }
+
+void FastIDCT(FastDCTTag<64>, const int16_t* in, size_t in_stride, int16_t* out,
+ size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 32 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 16 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 48 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 8 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 40 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 24 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vld1q_s16(in + in_stride * 56 + i);
+ int16x8_t v17 = vaddq_s16(v16, v12);
+ int16x8_t v18 = vaddq_s16(v13, v10);
+ int16x8_t v19 = vaddq_s16(v17, v18);
+ int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734);
+ int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080);
+ int16x8_t v22 = vaddq_s16(v20, v21);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573);
+ int16x8_t v27 = vaddq_s16(v27_tmp, v26);
+ int16x8_t v28 = vld1q_s16(in + in_stride * 36 + i);
+ int16x8_t v29 = vld1q_s16(in + in_stride * 28 + i);
+ int16x8_t v30 = vaddq_s16(v28, v29);
+ int16x8_t v31 = vaddq_s16(v27, v30);
+ int16x8_t v32 = vld1q_s16(in + in_stride * 20 + i);
+ int16x8_t v33 = vld1q_s16(in + in_stride * 12 + i);
+ int16x8_t v34 = vaddq_s16(v32, v33);
+ int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080);
+ int16x8_t v36 = vld1q_s16(in + in_stride * 52 + i);
+ int16x8_t v37 = vld1q_s16(in + in_stride * 44 + i);
+ int16x8_t v38 = vaddq_s16(v36, v37);
+ int16x8_t v39 = vaddq_s16(v38, v34);
+ int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734);
+ int16x8_t v41 = vaddq_s16(v35, v40);
+ int16x8_t v42 = vaddq_s16(v31, v41);
+ int16x8_t v43 = vaddq_s16(v33, v26);
+ int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573);
+ int16x8_t v44 = vaddq_s16(v44_tmp, v43);
+ int16x8_t v45 = vaddq_s16(v37, v28);
+ int16x8_t v46 = vaddq_s16(v29, v32);
+ int16x8_t v47 = vaddq_s16(v45, v46);
+ int16x8_t v48 = vaddq_s16(v44, v47);
+ int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705);
+ int16x8_t v50 = vaddq_s16(v46, v43);
+ int16x8_t v51_tmp = vqrdmulhq_n_s16(v50, 10045);
+ int16x8_t v51 = vaddq_s16(v51_tmp, v50);
+ int16x8_t v52 = vld1q_s16(in + in_stride * 60 + i);
+ int16x8_t v53 = vaddq_s16(v52, v36);
+ int16x8_t v54 = vaddq_s16(v53, v45);
+ int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734);
+ int16x8_t v56 = vaddq_s16(v51, v55);
+ int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705);
+ int16x8_t v58 = vaddq_s16(v49, v57);
+ int16x8_t v59 = vaddq_s16(v42, v58);
+ int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463);
+ int16x8_t v61 = vaddq_s16(v25, v60);
+ int16x8_t v62 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573);
+ int16x8_t v63 = vaddq_s16(v63_tmp, v62);
+ int16x8_t v64 = vld1q_s16(in + in_stride * 34 + i);
+ int16x8_t v65 = vld1q_s16(in + in_stride * 30 + i);
+ int16x8_t v66 = vaddq_s16(v64, v65);
+ int16x8_t v67 = vaddq_s16(v63, v66);
+ int16x8_t v68 = vld1q_s16(in + in_stride * 18 + i);
+ int16x8_t v69 = vld1q_s16(in + in_stride * 14 + i);
+ int16x8_t v70 = vaddq_s16(v68, v69);
+ int16x8_t v71 = vqrdmulhq_n_s16(v70, 25080);
+ int16x8_t v72 = vld1q_s16(in + in_stride * 50 + i);
+ int16x8_t v73 = vld1q_s16(in + in_stride * 46 + i);
+ int16x8_t v74 = vaddq_s16(v72, v73);
+ int16x8_t v75 = vaddq_s16(v74, v70);
+ int16x8_t v76 = vqrdmulhq_n_s16(v75, 17734);
+ int16x8_t v77 = vaddq_s16(v71, v76);
+ int16x8_t v78 = vaddq_s16(v67, v77);
+ int16x8_t v79 = vld1q_s16(in + in_stride * 10 + i);
+ int16x8_t v80 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v81 = vaddq_s16(v79, v80);
+ int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573);
+ int16x8_t v82 = vaddq_s16(v82_tmp, v81);
+ int16x8_t v83 = vld1q_s16(in + in_stride * 42 + i);
+ int16x8_t v84 = vld1q_s16(in + in_stride * 38 + i);
+ int16x8_t v85 = vaddq_s16(v83, v84);
+ int16x8_t v86 = vld1q_s16(in + in_stride * 26 + i);
+ int16x8_t v87 = vld1q_s16(in + in_stride * 22 + i);
+ int16x8_t v88 = vaddq_s16(v86, v87);
+ int16x8_t v89 = vaddq_s16(v85, v88);
+ int16x8_t v90 = vaddq_s16(v82, v89);
+ int16x8_t v91 = vqrdmulhq_n_s16(v90, 16705);
+ int16x8_t v92 = vaddq_s16(v88, v81);
+ int16x8_t v93_tmp = vqrdmulhq_n_s16(v92, 10045);
+ int16x8_t v93 = vaddq_s16(v93_tmp, v92);
+ int16x8_t v94 = vld1q_s16(in + in_stride * 58 + i);
+ int16x8_t v95 = vld1q_s16(in + in_stride * 54 + i);
+ int16x8_t v96 = vaddq_s16(v94, v95);
+ int16x8_t v97 = vaddq_s16(v96, v85);
+ int16x8_t v98 = vqrdmulhq_n_s16(v97, 17734);
+ int16x8_t v99 = vaddq_s16(v93, v98);
+ int16x8_t v100 = vqrdmulhq_n_s16(v99, 16705);
+ int16x8_t v101 = vaddq_s16(v91, v100);
+ int16x8_t v102 = vaddq_s16(v78, v101);
+ int16x8_t v103 = vaddq_s16(v69, v79);
+ int16x8_t v104 = vaddq_s16(v80, v62);
+ int16x8_t v105 = vaddq_s16(v103, v104);
+ int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573);
+ int16x8_t v106 = vaddq_s16(v106_tmp, v105);
+ int16x8_t v107 = vaddq_s16(v73, v83);
+ int16x8_t v108 = vaddq_s16(v84, v64);
+ int16x8_t v109 = vaddq_s16(v107, v108);
+ int16x8_t v110 = vaddq_s16(v65, v86);
+ int16x8_t v111 = vaddq_s16(v87, v68);
+ int16x8_t v112 = vaddq_s16(v110, v111);
+ int16x8_t v113 = vaddq_s16(v109, v112);
+ int16x8_t v114 = vaddq_s16(v106, v113);
+ int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705);
+ int16x8_t v116 = vaddq_s16(v112, v105);
+ int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080);
+ int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734);
+ int16x8_t v119 = vld1q_s16(in + in_stride * 62 + i);
+ int16x8_t v120 = vaddq_s16(v119, v94);
+ int16x8_t v121 = vaddq_s16(v95, v72);
+ int16x8_t v122 = vaddq_s16(v120, v121);
+ int16x8_t v123 = vaddq_s16(v122, v109);
+ int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734);
+ int16x8_t v125 = vaddq_s16(v118, v124);
+ int16x8_t v126 = vaddq_s16(v117, v125);
+ int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705);
+ int16x8_t v128 = vaddq_s16(v115, v127);
+ int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463);
+ int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573);
+ int16x8_t v130 = vaddq_s16(v130_tmp, v104);
+ int16x8_t v131 = vaddq_s16(v108, v110);
+ int16x8_t v132 = vaddq_s16(v130, v131);
+ int16x8_t v133 = vaddq_s16(v111, v103);
+ int16x8_t v134_tmp = vqrdmulhq_n_s16(v133, 10045);
+ int16x8_t v134 = vaddq_s16(v134_tmp, v133);
+ int16x8_t v135 = vaddq_s16(v121, v107);
+ int16x8_t v136 = vqrdmulhq_n_s16(v135, 17734);
+ int16x8_t v137 = vaddq_s16(v134, v136);
+ int16x8_t v138 = vaddq_s16(v132, v137);
+ int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463);
+ int16x8_t v140 = vaddq_s16(v129, v139);
+ int16x8_t v141 = vaddq_s16(v102, v140);
+ int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404);
+ int16x8_t v143 = vaddq_s16(v61, v142);
+ int16x8_t v144 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v145_tmp = vqrdmulhq_n_s16(v144, 13573);
+ int16x8_t v145 = vaddq_s16(v145_tmp, v144);
+ int16x8_t v146 = vld1q_s16(in + in_stride * 33 + i);
+ int16x8_t v147 = vld1q_s16(in + in_stride * 31 + i);
+ int16x8_t v148 = vaddq_s16(v146, v147);
+ int16x8_t v149 = vaddq_s16(v145, v148);
+ int16x8_t v150 = vld1q_s16(in + in_stride * 17 + i);
+ int16x8_t v151 = vld1q_s16(in + in_stride * 15 + i);
+ int16x8_t v152 = vaddq_s16(v150, v151);
+ int16x8_t v153 = vqrdmulhq_n_s16(v152, 25080);
+ int16x8_t v154 = vld1q_s16(in + in_stride * 49 + i);
+ int16x8_t v155 = vld1q_s16(in + in_stride * 47 + i);
+ int16x8_t v156 = vaddq_s16(v154, v155);
+ int16x8_t v157 = vaddq_s16(v156, v152);
+ int16x8_t v158 = vqrdmulhq_n_s16(v157, 17734);
+ int16x8_t v159 = vaddq_s16(v153, v158);
+ int16x8_t v160 = vaddq_s16(v149, v159);
+ int16x8_t v161 = vld1q_s16(in + in_stride * 9 + i);
+ int16x8_t v162 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v163 = vaddq_s16(v161, v162);
+ int16x8_t v164_tmp = vqrdmulhq_n_s16(v163, 13573);
+ int16x8_t v164 = vaddq_s16(v164_tmp, v163);
+ int16x8_t v165 = vld1q_s16(in + in_stride * 41 + i);
+ int16x8_t v166 = vld1q_s16(in + in_stride * 39 + i);
+ int16x8_t v167 = vaddq_s16(v165, v166);
+ int16x8_t v168 = vld1q_s16(in + in_stride * 25 + i);
+ int16x8_t v169 = vld1q_s16(in + in_stride * 23 + i);
+ int16x8_t v170 = vaddq_s16(v168, v169);
+ int16x8_t v171 = vaddq_s16(v167, v170);
+ int16x8_t v172 = vaddq_s16(v164, v171);
+ int16x8_t v173 = vqrdmulhq_n_s16(v172, 16705);
+ int16x8_t v174 = vaddq_s16(v170, v163);
+ int16x8_t v175_tmp = vqrdmulhq_n_s16(v174, 10045);
+ int16x8_t v175 = vaddq_s16(v175_tmp, v174);
+ int16x8_t v176 = vld1q_s16(in + in_stride * 57 + i);
+ int16x8_t v177 = vld1q_s16(in + in_stride * 55 + i);
+ int16x8_t v178 = vaddq_s16(v176, v177);
+ int16x8_t v179 = vaddq_s16(v178, v167);
+ int16x8_t v180 = vqrdmulhq_n_s16(v179, 17734);
+ int16x8_t v181 = vaddq_s16(v175, v180);
+ int16x8_t v182 = vqrdmulhq_n_s16(v181, 16705);
+ int16x8_t v183 = vaddq_s16(v173, v182);
+ int16x8_t v184 = vaddq_s16(v160, v183);
+ int16x8_t v185 = vld1q_s16(in + in_stride * 37 + i);
+ int16x8_t v186 = vld1q_s16(in + in_stride * 35 + i);
+ int16x8_t v187 = vaddq_s16(v185, v186);
+ int16x8_t v188 = vld1q_s16(in + in_stride * 45 + i);
+ int16x8_t v189 = vld1q_s16(in + in_stride * 43 + i);
+ int16x8_t v190 = vaddq_s16(v188, v189);
+ int16x8_t v191 = vaddq_s16(v187, v190);
+ int16x8_t v192 = vld1q_s16(in + in_stride * 29 + i);
+ int16x8_t v193 = vld1q_s16(in + in_stride * 27 + i);
+ int16x8_t v194 = vaddq_s16(v192, v193);
+ int16x8_t v195 = vld1q_s16(in + in_stride * 21 + i);
+ int16x8_t v196 = vld1q_s16(in + in_stride * 19 + i);
+ int16x8_t v197 = vaddq_s16(v195, v196);
+ int16x8_t v198 = vaddq_s16(v194, v197);
+ int16x8_t v199 = vaddq_s16(v191, v198);
+ int16x8_t v200 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v201 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v202 = vaddq_s16(v200, v201);
+ int16x8_t v203 = vld1q_s16(in + in_stride * 13 + i);
+ int16x8_t v204 = vld1q_s16(in + in_stride * 11 + i);
+ int16x8_t v205 = vaddq_s16(v203, v204);
+ int16x8_t v206 = vaddq_s16(v202, v205);
+ int16x8_t v207_tmp = vqrdmulhq_n_s16(v206, 13573);
+ int16x8_t v207 = vaddq_s16(v207_tmp, v206);
+ int16x8_t v208 = vaddq_s16(v199, v207);
+ int16x8_t v209 = vqrdmulhq_n_s16(v208, 16705);
+ int16x8_t v210 = vaddq_s16(v198, v206);
+ int16x8_t v211 = vqrdmulhq_n_s16(v210, 25080);
+ int16x8_t v212 = vqrdmulhq_n_s16(v210, 17734);
+ int16x8_t v213 = vld1q_s16(in + in_stride * 53 + i);
+ int16x8_t v214 = vld1q_s16(in + in_stride * 51 + i);
+ int16x8_t v215 = vaddq_s16(v213, v214);
+ int16x8_t v216 = vld1q_s16(in + in_stride * 61 + i);
+ int16x8_t v217 = vld1q_s16(in + in_stride * 59 + i);
+ int16x8_t v218 = vaddq_s16(v216, v217);
+ int16x8_t v219 = vaddq_s16(v215, v218);
+ int16x8_t v220 = vaddq_s16(v219, v191);
+ int16x8_t v221 = vqrdmulhq_n_s16(v220, 17734);
+ int16x8_t v222 = vaddq_s16(v212, v221);
+ int16x8_t v223 = vaddq_s16(v211, v222);
+ int16x8_t v224 = vqrdmulhq_n_s16(v223, 16705);
+ int16x8_t v225 = vaddq_s16(v209, v224);
+ int16x8_t v226 = vqrdmulhq_n_s16(v225, 16463);
+ int16x8_t v227_tmp = vqrdmulhq_n_s16(v202, 13573);
+ int16x8_t v227 = vaddq_s16(v227_tmp, v202);
+ int16x8_t v228 = vaddq_s16(v187, v194);
+ int16x8_t v229 = vaddq_s16(v227, v228);
+ int16x8_t v230 = vaddq_s16(v215, v190);
+ int16x8_t v231 = vqrdmulhq_n_s16(v230, 17734);
+ int16x8_t v232 = vaddq_s16(v197, v205);
+ int16x8_t v233_tmp = vqrdmulhq_n_s16(v232, 10045);
+ int16x8_t v233 = vaddq_s16(v233_tmp, v232);
+ int16x8_t v234 = vaddq_s16(v231, v233);
+ int16x8_t v235 = vaddq_s16(v229, v234);
+ int16x8_t v236 = vqrdmulhq_n_s16(v235, 16463);
+ int16x8_t v237 = vaddq_s16(v226, v236);
+ int16x8_t v238 = vaddq_s16(v184, v237);
+ int16x8_t v239 = vaddq_s16(v201, v144);
+ int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 13573);
+ int16x8_t v240 = vaddq_s16(v240_tmp, v239);
+ int16x8_t v241 = vaddq_s16(v186, v146);
+ int16x8_t v242 = vaddq_s16(v147, v192);
+ int16x8_t v243 = vaddq_s16(v241, v242);
+ int16x8_t v244 = vaddq_s16(v240, v243);
+ int16x8_t v245 = vaddq_s16(v196, v150);
+ int16x8_t v246 = vaddq_s16(v151, v203);
+ int16x8_t v247 = vaddq_s16(v245, v246);
+ int16x8_t v248_tmp = vqrdmulhq_n_s16(v247, 10045);
+ int16x8_t v248 = vaddq_s16(v248_tmp, v247);
+ int16x8_t v249 = vaddq_s16(v155, v188);
+ int16x8_t v250 = vaddq_s16(v214, v154);
+ int16x8_t v251 = vaddq_s16(v249, v250);
+ int16x8_t v252 = vqrdmulhq_n_s16(v251, 17734);
+ int16x8_t v253 = vaddq_s16(v248, v252);
+ int16x8_t v254 = vaddq_s16(v244, v253);
+ int16x8_t v255 = vaddq_s16(v204, v161);
+ int16x8_t v256 = vaddq_s16(v162, v200);
+ int16x8_t v257 = vaddq_s16(v255, v256);
+ int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 13573);
+ int16x8_t v258 = vaddq_s16(v258_tmp, v257);
+ int16x8_t v259 = vaddq_s16(v189, v165);
+ int16x8_t v260 = vaddq_s16(v166, v185);
+ int16x8_t v261 = vaddq_s16(v259, v260);
+ int16x8_t v262 = vaddq_s16(v169, v195);
+ int16x8_t v263 = vaddq_s16(v193, v168);
+ int16x8_t v264 = vaddq_s16(v262, v263);
+ int16x8_t v265 = vaddq_s16(v261, v264);
+ int16x8_t v266 = vaddq_s16(v258, v265);
+ int16x8_t v267 = vqrdmulhq_n_s16(v266, 16705);
+ int16x8_t v268 = vaddq_s16(v264, v257);
+ int16x8_t v269 = vqrdmulhq_n_s16(v268, 25080);
+ int16x8_t v270 = vaddq_s16(v217, v176);
+ int16x8_t v271 = vaddq_s16(v177, v213);
+ int16x8_t v272 = vaddq_s16(v270, v271);
+ int16x8_t v273 = vaddq_s16(v272, v261);
+ int16x8_t v274 = vqrdmulhq_n_s16(v273, 17734);
+ int16x8_t v275 = vqrdmulhq_n_s16(v268, 17734);
+ int16x8_t v276 = vaddq_s16(v274, v275);
+ int16x8_t v277 = vaddq_s16(v269, v276);
+ int16x8_t v278 = vqrdmulhq_n_s16(v277, 16705);
+ int16x8_t v279 = vaddq_s16(v267, v278);
+ int16x8_t v280 = vaddq_s16(v254, v279);
+ int16x8_t v281 = vqrdmulhq_n_s16(v280, 16404);
+ int16x8_t v282 = vaddq_s16(v256, v239);
+ int16x8_t v283_tmp = vqrdmulhq_n_s16(v282, 13573);
+ int16x8_t v283 = vaddq_s16(v283_tmp, v282);
+ int16x8_t v284 = vaddq_s16(v260, v241);
+ int16x8_t v285 = vaddq_s16(v242, v263);
+ int16x8_t v286 = vaddq_s16(v284, v285);
+ int16x8_t v287 = vaddq_s16(v283, v286);
+ int16x8_t v288 = vaddq_s16(v262, v245);
+ int16x8_t v289 = vaddq_s16(v246, v255);
+ int16x8_t v290 = vaddq_s16(v288, v289);
+ int16x8_t v291 = vqrdmulhq_n_s16(v290, 25080);
+ int16x8_t v292 = vqrdmulhq_n_s16(v290, 17734);
+ int16x8_t v293 = vaddq_s16(v271, v250);
+ int16x8_t v294 = vaddq_s16(v249, v259);
+ int16x8_t v295 = vaddq_s16(v293, v294);
+ int16x8_t v296 = vqrdmulhq_n_s16(v295, 17734);
+ int16x8_t v297 = vaddq_s16(v292, v296);
+ int16x8_t v298 = vaddq_s16(v291, v297);
+ int16x8_t v299 = vaddq_s16(v287, v298);
+ int16x8_t v300 = vqrdmulhq_n_s16(v299, 16463);
+ int16x8_t v301 = vaddq_s16(v289, v282);
+ int16x8_t v302 = vqrdmulhq_n_s16(v301, 23624);
+ int16x8_t v303 = vaddq_s16(v294, v284);
+ int16x8_t v304 = vqrdmulhq_n_s16(v303, 19705);
+ int16x8_t v305 = vaddq_s16(v285, v288);
+ int16x8_t v306 = vqrdmulhq_n_s16(v305, 19705);
+ int16x8_t v307 = vaddq_s16(v304, v306);
+ int16x8_t v308 = vqrdmulhq_n_s16(v307, 27779);
+ int16x8_t v309 = vaddq_s16(v302, v308);
+ int16x8_t v310 = vaddq_s16(v305, v301);
+ int16x8_t v311 = vqrdmulhq_n_s16(v310, 25080);
+ int16x8_t v312 = vqrdmulhq_n_s16(v310, 17734);
+ int16x8_t v313 = vld1q_s16(in + in_stride * 63 + i);
+ int16x8_t v314 = vaddq_s16(v313, v216);
+ int16x8_t v315 = vaddq_s16(v314, v270);
+ int16x8_t v316 = vaddq_s16(v315, v293);
+ int16x8_t v317 = vqrdmulhq_n_s16(v316, 25746);
+ int16x8_t v318 = vqrdmulhq_n_s16(v303, 25746);
+ int16x8_t v319 = vaddq_s16(v317, v318);
+ int16x8_t v320 = vqrdmulhq_n_s16(v319, 22571);
+ int16x8_t v321 = vaddq_s16(v312, v320);
+ int16x8_t v322 = vaddq_s16(v311, v321);
+ int16x8_t v323 = vqrdmulhq_n_s16(v322, 16705);
+ int16x8_t v324 = vaddq_s16(v309, v323);
+ int16x8_t v325 = vqrdmulhq_n_s16(v324, 16463);
+ int16x8_t v326 = vaddq_s16(v300, v325);
+ int16x8_t v327 = vqrdmulhq_n_s16(v326, 16404);
+ int16x8_t v328 = vaddq_s16(v281, v327);
+ int16x8_t v329 = vaddq_s16(v238, v328);
+ int16x8_t v330 = vqrdmulhq_n_s16(v329, 16389);
+ int16x8_t v331 = vaddq_s16(v143, v330);
+ int16x8_t v332 = vsubq_s16(v82, v89);
+ int16x8_t v333 = vqrdmulhq_n_s16(v332, 19705);
+ int16x8_t v334 = vqrdmulhq_n_s16(v92, 13573);
+ int16x8_t v335 = vsubq_s16(v334, v97);
+ int16x8_t v336 = vqrdmulhq_n_s16(v335, 25746);
+ int16x8_t v337 = vaddq_s16(v333, v336);
+ int16x8_t v338 = vsubq_s16(v63, v66);
+ int16x8_t v339 = vqrdmulhq_n_s16(v70, 17734);
+ int16x8_t v340_tmp = vqrdmulhq_n_s16(v74, 10045);
+ int16x8_t v340 = vaddq_s16(v340_tmp, v74);
+ int16x8_t v341 = vsubq_s16(v339, v340);
+ int16x8_t v342 = vaddq_s16(v338, v341);
+ int16x8_t v343 = vaddq_s16(v337, v342);
+ int16x8_t v344 = vsubq_s16(v130, v131);
+ int16x8_t v345 = vqrdmulhq_n_s16(v133, 13573);
+ int16x8_t v346 = vsubq_s16(v345, v135);
+ int16x8_t v347_tmp = vqrdmulhq_n_s16(v346, 10045);
+ int16x8_t v347 = vaddq_s16(v347_tmp, v346);
+ int16x8_t v348 = vaddq_s16(v344, v347);
+ int16x8_t v349 = vqrdmulhq_n_s16(v348, 17121);
+ int16x8_t v350 = vqrdmulhq_n_s16(v105, 27867);
+ int16x8_t v351 = vqrdmulhq_n_s16(v113, 19705);
+ int16x8_t v352 = vsubq_s16(v350, v351);
+ int16x8_t v353 = vqrdmulhq_n_s16(v116, 13573);
+ int16x8_t v354 = vsubq_s16(v353, v123);
+ int16x8_t v355 = vqrdmulhq_n_s16(v354, 25746);
+ int16x8_t v356 = vaddq_s16(v352, v355);
+ int16x8_t v357 = vqrdmulhq_n_s16(v356, 17121);
+ int16x8_t v358 = vaddq_s16(v349, v357);
+ int16x8_t v359 = vaddq_s16(v343, v358);
+ int16x8_t v360 = vqrdmulhq_n_s16(v359, 16563);
+ int16x8_t v361 = vsubq_s16(v27, v30);
+ int16x8_t v362 = vqrdmulhq_n_s16(v34, 17734);
+ int16x8_t v363_tmp = vqrdmulhq_n_s16(v38, 10045);
+ int16x8_t v363 = vaddq_s16(v363_tmp, v38);
+ int16x8_t v364 = vsubq_s16(v362, v363);
+ int16x8_t v365 = vaddq_s16(v361, v364);
+ int16x8_t v366 = vsubq_s16(v44, v47);
+ int16x8_t v367 = vqrdmulhq_n_s16(v366, 19705);
+ int16x8_t v368 = vqrdmulhq_n_s16(v50, 13573);
+ int16x8_t v369 = vsubq_s16(v368, v54);
+ int16x8_t v370 = vqrdmulhq_n_s16(v369, 25746);
+ int16x8_t v371 = vaddq_s16(v367, v370);
+ int16x8_t v372 = vaddq_s16(v365, v371);
+ int16x8_t v373 = vqrdmulhq_n_s16(v372, 17121);
+ int16x8_t v374 = vsubq_s16(v0, v1);
+ int16x8_t v375 = vsubq_s16(v4, v6);
+ int16x8_t v376_tmp = vqrdmulhq_n_s16(v375, 10045);
+ int16x8_t v376 = vaddq_s16(v376_tmp, v375);
+ int16x8_t v377 = vaddq_s16(v374, v376);
+ int16x8_t v378 = vsubq_s16(v11, v14);
+ int16x8_t v379 = vqrdmulhq_n_s16(v18, 17734);
+ int16x8_t v380_tmp = vqrdmulhq_n_s16(v17, 10045);
+ int16x8_t v380 = vaddq_s16(v380_tmp, v17);
+ int16x8_t v381 = vsubq_s16(v379, v380);
+ int16x8_t v382 = vaddq_s16(v378, v381);
+ int16x8_t v383 = vqrdmulhq_n_s16(v382, 19705);
+ int16x8_t v384 = vaddq_s16(v377, v383);
+ int16x8_t v385 = vaddq_s16(v373, v384);
+ int16x8_t v386 = vaddq_s16(v360, v385);
+ int16x8_t v387 = vsubq_s16(v145, v148);
+ int16x8_t v388 = vqrdmulhq_n_s16(v152, 17734);
+ int16x8_t v389_tmp = vqrdmulhq_n_s16(v156, 10045);
+ int16x8_t v389 = vaddq_s16(v389_tmp, v156);
+ int16x8_t v390 = vsubq_s16(v388, v389);
+ int16x8_t v391 = vaddq_s16(v387, v390);
+ int16x8_t v392 = vsubq_s16(v164, v171);
+ int16x8_t v393 = vqrdmulhq_n_s16(v392, 19705);
+ int16x8_t v394 = vqrdmulhq_n_s16(v174, 13573);
+ int16x8_t v395 = vsubq_s16(v394, v179);
+ int16x8_t v396 = vqrdmulhq_n_s16(v395, 25746);
+ int16x8_t v397 = vaddq_s16(v393, v396);
+ int16x8_t v398 = vaddq_s16(v391, v397);
+ int16x8_t v399 = vsubq_s16(v227, v228);
+ int16x8_t v400 = vqrdmulhq_n_s16(v232, 13573);
+ int16x8_t v401 = vsubq_s16(v400, v230);
+ int16x8_t v402_tmp = vqrdmulhq_n_s16(v401, 10045);
+ int16x8_t v402 = vaddq_s16(v402_tmp, v401);
+ int16x8_t v403 = vaddq_s16(v399, v402);
+ int16x8_t v404 = vqrdmulhq_n_s16(v403, 17121);
+ int16x8_t v405 = vqrdmulhq_n_s16(v206, 27867);
+ int16x8_t v406 = vqrdmulhq_n_s16(v199, 19705);
+ int16x8_t v407 = vsubq_s16(v405, v406);
+ int16x8_t v408 = vqrdmulhq_n_s16(v210, 13573);
+ int16x8_t v409 = vsubq_s16(v408, v220);
+ int16x8_t v410 = vqrdmulhq_n_s16(v409, 25746);
+ int16x8_t v411 = vaddq_s16(v407, v410);
+ int16x8_t v412 = vqrdmulhq_n_s16(v411, 17121);
+ int16x8_t v413 = vaddq_s16(v404, v412);
+ int16x8_t v414 = vaddq_s16(v398, v413);
+ int16x8_t v415 = vsubq_s16(v240, v243);
+ int16x8_t v416 = vqrdmulhq_n_s16(v247, 13573);
+ int16x8_t v417 = vsubq_s16(v416, v251);
+ int16x8_t v418_tmp = vqrdmulhq_n_s16(v417, 10045);
+ int16x8_t v418 = vaddq_s16(v418_tmp, v417);
+ int16x8_t v419 = vaddq_s16(v415, v418);
+ int16x8_t v420 = vqrdmulhq_n_s16(v257, 27867);
+ int16x8_t v421 = vqrdmulhq_n_s16(v265, 19705);
+ int16x8_t v422 = vsubq_s16(v420, v421);
+ int16x8_t v423 = vqrdmulhq_n_s16(v268, 13573);
+ int16x8_t v424 = vsubq_s16(v423, v273);
+ int16x8_t v425 = vqrdmulhq_n_s16(v424, 25746);
+ int16x8_t v426 = vaddq_s16(v422, v425);
+ int16x8_t v427 = vaddq_s16(v419, v426);
+ int16x8_t v428 = vqrdmulhq_n_s16(v427, 16563);
+ int16x8_t v429 = vqrdmulhq_n_s16(v301, 27867);
+ int16x8_t v430 = vsubq_s16(v429, v307);
+ int16x8_t v431 = vqrdmulhq_n_s16(v310, 10664);
+ int16x8_t v432 = vsubq_s16(v431, v319);
+ int16x8_t v433 = vaddq_s16(v430, v432);
+ int16x8_t v434 = vqrdmulhq_n_s16(v433, 17121);
+ int16x8_t v435 = vsubq_s16(v283, v286);
+ int16x8_t v436 = vqrdmulhq_n_s16(v290, 13573);
+ int16x8_t v437 = vsubq_s16(v436, v295);
+ int16x8_t v438_tmp = vqrdmulhq_n_s16(v437, 10045);
+ int16x8_t v438 = vaddq_s16(v438_tmp, v437);
+ int16x8_t v439 = vaddq_s16(v435, v438);
+ int16x8_t v440 = vqrdmulhq_n_s16(v439, 17121);
+ int16x8_t v441 = vaddq_s16(v434, v440);
+ int16x8_t v442 = vqrdmulhq_n_s16(v441, 16563);
+ int16x8_t v443 = vaddq_s16(v428, v442);
+ int16x8_t v444 = vaddq_s16(v414, v443);
+ int16x8_t v445 = vqrdmulhq_n_s16(v444, 16429);
+ int16x8_t v446 = vaddq_s16(v386, v445);
+ int16x8_t v447 = vsubq_s16(v374, v376);
+ int16x8_t v448 = vsubq_s16(v378, v381);
+ int16x8_t v449 = vqrdmulhq_n_s16(v448, 29490);
+ int16x8_t v450 = vaddq_s16(v447, v449);
+ int16x8_t v451 = vsubq_s16(v361, v364);
+ int16x8_t v452 = vqrdmulhq_n_s16(v366, 29490);
+ int16x8_t v453_tmp = vqrdmulhq_n_s16(v369, 5763);
+ int16x8_t v453 = vaddq_s16(v453_tmp, v369);
+ int16x8_t v454 = vsubq_s16(v452, v453);
+ int16x8_t v455 = vaddq_s16(v451, v454);
+ int16x8_t v456 = vqrdmulhq_n_s16(v455, 18578);
+ int16x8_t v457 = vaddq_s16(v450, v456);
+ int16x8_t v458 = vsubq_s16(v338, v341);
+ int16x8_t v459 = vqrdmulhq_n_s16(v332, 29490);
+ int16x8_t v460_tmp = vqrdmulhq_n_s16(v335, 5763);
+ int16x8_t v460 = vaddq_s16(v460_tmp, v335);
+ int16x8_t v461 = vsubq_s16(v459, v460);
+ int16x8_t v462 = vaddq_s16(v458, v461);
+ int16x8_t v463 = vqrdmulhq_n_s16(v352, 27803);
+ int16x8_t v464 = vqrdmulhq_n_s16(v354, 21845);
+ int16x8_t v465 = vsubq_s16(v463, v464);
+ int16x8_t v466 = vsubq_s16(v344, v347);
+ int16x8_t v467 = vqrdmulhq_n_s16(v466, 18578);
+ int16x8_t v468 = vaddq_s16(v465, v467);
+ int16x8_t v469 = vaddq_s16(v462, v468);
+ int16x8_t v470 = vqrdmulhq_n_s16(v469, 16890);
+ int16x8_t v471 = vaddq_s16(v457, v470);
+ int16x8_t v472 = vsubq_s16(v415, v418);
+ int16x8_t v473_tmp = vqrdmulhq_n_s16(v422, 16273);
+ int16x8_t v473 = vaddq_s16(v473_tmp, v422);
+ int16x8_t v474_tmp = vqrdmulhq_n_s16(v424, 5763);
+ int16x8_t v474 = vaddq_s16(v474_tmp, v424);
+ int16x8_t v475 = vsubq_s16(v473, v474);
+ int16x8_t v476 = vaddq_s16(v472, v475);
+ int16x8_t v477 = vqrdmulhq_n_s16(v476, 16890);
+ int16x8_t v478 = vqrdmulhq_n_s16(v435, 20261);
+ int16x8_t v479 = vqrdmulhq_n_s16(v437, 26472);
+ int16x8_t v480 = vsubq_s16(v478, v479);
+ int16x8_t v481 = vqrdmulhq_n_s16(v480, 30046);
+ int16x8_t v482 = vqrdmulhq_n_s16(v430, 30322);
+ int16x8_t v483 = vqrdmulhq_n_s16(v432, 30322);
+ int16x8_t v484 = vsubq_s16(v482, v483);
+ int16x8_t v485 = vqrdmulhq_n_s16(v484, 30046);
+ int16x8_t v486 = vaddq_s16(v481, v485);
+ int16x8_t v487 = vqrdmulhq_n_s16(v486, 16890);
+ int16x8_t v488 = vaddq_s16(v477, v487);
+ int16x8_t v489 = vsubq_s16(v387, v390);
+ int16x8_t v490 = vqrdmulhq_n_s16(v392, 29490);
+ int16x8_t v491_tmp = vqrdmulhq_n_s16(v395, 5763);
+ int16x8_t v491 = vaddq_s16(v491_tmp, v395);
+ int16x8_t v492 = vsubq_s16(v490, v491);
+ int16x8_t v493 = vaddq_s16(v489, v492);
+ int16x8_t v494 = vsubq_s16(v399, v402);
+ int16x8_t v495 = vqrdmulhq_n_s16(v494, 18578);
+ int16x8_t v496 = vqrdmulhq_n_s16(v407, 27803);
+ int16x8_t v497 = vqrdmulhq_n_s16(v409, 21845);
+ int16x8_t v498 = vsubq_s16(v496, v497);
+ int16x8_t v499 = vaddq_s16(v495, v498);
+ int16x8_t v500 = vaddq_s16(v493, v499);
+ int16x8_t v501 = vaddq_s16(v488, v500);
+ int16x8_t v502 = vqrdmulhq_n_s16(v501, 16508);
+ int16x8_t v503 = vaddq_s16(v471, v502);
+ int16x8_t v504 = vsubq_s16(v2, v8);
+ int16x8_t v505 = vsubq_s16(v15, v22);
+ int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 18446);
+ int16x8_t v506 = vmlaq_n_s16(v506_tmp, v505, 2);
+ int16x8_t v507 = vaddq_s16(v504, v506);
+ int16x8_t v508 = vsubq_s16(v31, v41);
+ int16x8_t v509 = vsubq_s16(v48, v56);
+ int16x8_t v510_tmp = vqrdmulhq_n_s16(v509, 18446);
+ int16x8_t v510 = vmlaq_n_s16(v510_tmp, v509, 2);
+ int16x8_t v511 = vaddq_s16(v508, v510);
+ int16x8_t v512 = vqrdmulhq_n_s16(v511, 21195);
+ int16x8_t v513 = vaddq_s16(v507, v512);
+ int16x8_t v514 = vsubq_s16(v67, v77);
+ int16x8_t v515 = vsubq_s16(v90, v99);
+ int16x8_t v516_tmp = vqrdmulhq_n_s16(v515, 18446);
+ int16x8_t v516 = vmlaq_n_s16(v516_tmp, v515, 2);
+ int16x8_t v517 = vaddq_s16(v514, v516);
+ int16x8_t v518 = vsubq_s16(v114, v126);
+ int16x8_t v519_tmp = vqrdmulhq_n_s16(v518, 18446);
+ int16x8_t v519 = vmlaq_n_s16(v519_tmp, v518, 2);
+ int16x8_t v520 = vsubq_s16(v132, v137);
+ int16x8_t v521 = vaddq_s16(v519, v520);
+ int16x8_t v522 = vqrdmulhq_n_s16(v521, 21195);
+ int16x8_t v523 = vaddq_s16(v517, v522);
+ int16x8_t v524 = vqrdmulhq_n_s16(v523, 17401);
+ int16x8_t v525 = vaddq_s16(v513, v524);
+ int16x8_t v526 = vsubq_s16(v172, v181);
+ int16x8_t v527_tmp = vqrdmulhq_n_s16(v526, 18446);
+ int16x8_t v527 = vmlaq_n_s16(v527_tmp, v526, 2);
+ int16x8_t v528 = vsubq_s16(v149, v159);
+ int16x8_t v529 = vaddq_s16(v527, v528);
+ int16x8_t v530 = vsubq_s16(v229, v234);
+ int16x8_t v531 = vsubq_s16(v208, v223);
+ int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 18446);
+ int16x8_t v532 = vmlaq_n_s16(v532_tmp, v531, 2);
+ int16x8_t v533 = vaddq_s16(v530, v532);
+ int16x8_t v534 = vqrdmulhq_n_s16(v533, 21195);
+ int16x8_t v535 = vaddq_s16(v529, v534);
+ int16x8_t v536 = vsubq_s16(v244, v253);
+ int16x8_t v537 = vsubq_s16(v266, v277);
+ int16x8_t v538_tmp = vqrdmulhq_n_s16(v537, 18446);
+ int16x8_t v538 = vmlaq_n_s16(v538_tmp, v537, 2);
+ int16x8_t v539 = vaddq_s16(v536, v538);
+ int16x8_t v540 = vqrdmulhq_n_s16(v539, 17401);
+ int16x8_t v541 = vqrdmulhq_n_s16(v287, 25826);
+ int16x8_t v542 = vqrdmulhq_n_s16(v298, 25826);
+ int16x8_t v543 = vsubq_s16(v541, v542);
+ int16x8_t v544 = vqrdmulhq_n_s16(v543, 14281);
+ int16x8_t v545_tmp = vqrdmulhq_n_s16(v309, 31509);
+ int16x8_t v545 = vaddq_s16(v545_tmp, v309);
+ int16x8_t v546 = vsubq_s16(v545, v322);
+ int16x8_t v547 = vqrdmulhq_n_s16(v546, 28847);
+ int16x8_t v548 = vaddq_s16(v544, v547);
+ int16x8_t v549 = vaddq_s16(v540, v548);
+ int16x8_t v550 = vaddq_s16(v535, v549);
+ int16x8_t v551 = vqrdmulhq_n_s16(v550, 16629);
+ int16x8_t v552 = vaddq_s16(v525, v551);
+ int16x8_t v553 = vsubq_s16(v504, v506);
+ int16x8_t v554 = vsubq_s16(v508, v510);
+ int16x8_t v555 = vqrdmulhq_n_s16(v554, 25826);
+ int16x8_t v556 = vaddq_s16(v553, v555);
+ int16x8_t v557 = vsubq_s16(v514, v516);
+ int16x8_t v558 = vsubq_s16(v520, v519);
+ int16x8_t v559 = vqrdmulhq_n_s16(v558, 25826);
+ int16x8_t v560 = vaddq_s16(v557, v559);
+ int16x8_t v561 = vqrdmulhq_n_s16(v560, 18124);
+ int16x8_t v562 = vaddq_s16(v556, v561);
+ int16x8_t v563 = vsubq_s16(v528, v527);
+ int16x8_t v564 = vsubq_s16(v530, v532);
+ int16x8_t v565 = vqrdmulhq_n_s16(v564, 25826);
+ int16x8_t v566 = vaddq_s16(v563, v565);
+ int16x8_t v567 = vsubq_s16(v536, v538);
+ int16x8_t v568 = vqrdmulhq_n_s16(v567, 18124);
+ int16x8_t v569_tmp = vqrdmulhq_n_s16(v546, 654);
+ int16x8_t v569 = vmlaq_n_s16(v569_tmp, v546, 2);
+ int16x8_t v570 = vsubq_s16(v543, v569);
+ int16x8_t v571 = vqrdmulhq_n_s16(v570, 18124);
+ int16x8_t v572 = vaddq_s16(v568, v571);
+ int16x8_t v573 = vaddq_s16(v566, v572);
+ int16x8_t v574 = vqrdmulhq_n_s16(v573, 16792);
+ int16x8_t v575 = vaddq_s16(v562, v574);
+ int16x8_t v576 = vsubq_s16(v458, v461);
+ int16x8_t v577_tmp = vqrdmulhq_n_s16(v465, 25030);
+ int16x8_t v577 = vaddq_s16(v577_tmp, v465);
+ int16x8_t v578 = vsubq_s16(v466, v577);
+ int16x8_t v579_tmp = vqrdmulhq_n_s16(v578, 1988);
+ int16x8_t v579 = vaddq_s16(v579_tmp, v578);
+ int16x8_t v580 = vaddq_s16(v576, v579);
+ int16x8_t v581 = vqrdmulhq_n_s16(v580, 19102);
+ int16x8_t v582 = vsubq_s16(v447, v449);
+ int16x8_t v583 = vsubq_s16(v451, v454);
+ int16x8_t v584_tmp = vqrdmulhq_n_s16(v583, 1988);
+ int16x8_t v584 = vaddq_s16(v584_tmp, v583);
+ int16x8_t v585 = vaddq_s16(v582, v584);
+ int16x8_t v586 = vaddq_s16(v581, v585);
+ int16x8_t v587 = vsubq_s16(v489, v492);
+ int16x8_t v588_tmp = vqrdmulhq_n_s16(v498, 25030);
+ int16x8_t v588 = vaddq_s16(v588_tmp, v498);
+ int16x8_t v589 = vsubq_s16(v494, v588);
+ int16x8_t v590_tmp = vqrdmulhq_n_s16(v589, 1988);
+ int16x8_t v590 = vaddq_s16(v590_tmp, v589);
+ int16x8_t v591 = vaddq_s16(v587, v590);
+ int16x8_t v592 = vsubq_s16(v472, v475);
+ int16x8_t v593 = vqrdmulhq_n_s16(v592, 19102);
+ int16x8_t v594 = vsubq_s16(v480, v484);
+ int16x8_t v595 = vaddq_s16(v593, v594);
+ int16x8_t v596 = vaddq_s16(v591, v595);
+ int16x8_t v597 = vqrdmulhq_n_s16(v596, 17000);
+ int16x8_t v598 = vaddq_s16(v586, v597);
+ int16x8_t v599 = vsubq_s16(v365, v371);
+ int16x8_t v600_tmp = vqrdmulhq_n_s16(v599, 23673);
+ int16x8_t v600 = vaddq_s16(v600_tmp, v599);
+ int16x8_t v601 = vsubq_s16(v377, v383);
+ int16x8_t v602 = vaddq_s16(v600, v601);
+ int16x8_t v603 = vsubq_s16(v348, v356);
+ int16x8_t v604_tmp = vqrdmulhq_n_s16(v603, 23673);
+ int16x8_t v604 = vaddq_s16(v604_tmp, v603);
+ int16x8_t v605 = vsubq_s16(v342, v337);
+ int16x8_t v606 = vaddq_s16(v604, v605);
+ int16x8_t v607 = vqrdmulhq_n_s16(v606, 20398);
+ int16x8_t v608 = vaddq_s16(v602, v607);
+ int16x8_t v609 = vsubq_s16(v391, v397);
+ int16x8_t v610 = vsubq_s16(v403, v411);
+ int16x8_t v611_tmp = vqrdmulhq_n_s16(v610, 23673);
+ int16x8_t v611 = vaddq_s16(v611_tmp, v610);
+ int16x8_t v612 = vaddq_s16(v609, v611);
+ int16x8_t v613 = vsubq_s16(v419, v426);
+ int16x8_t v614 = vqrdmulhq_n_s16(v613, 20398);
+ int16x8_t v615 = vsubq_s16(v439, v433);
+ int16x8_t v616_tmp = vqrdmulhq_n_s16(v615, 2367);
+ int16x8_t v616 = vaddq_s16(v616_tmp, v615);
+ int16x8_t v617 = vaddq_s16(v614, v616);
+ int16x8_t v618 = vaddq_s16(v612, v617);
+ int16x8_t v619 = vqrdmulhq_n_s16(v618, 17255);
+ int16x8_t v620 = vaddq_s16(v608, v619);
+ int16x8_t v621 = vsubq_s16(v160, v183);
+ int16x8_t v622 = vsubq_s16(v235, v225);
+ int16x8_t v623_tmp = vqrdmulhq_n_s16(v622, 3314);
+ int16x8_t v623 = vmlaq_n_s16(v623_tmp, v622, 5);
+ int16x8_t v624 = vaddq_s16(v621, v623);
+ int16x8_t v625 = vsubq_s16(v254, v279);
+ int16x8_t v626 = vsubq_s16(v299, v324);
+ int16x8_t v627_tmp = vqrdmulhq_n_s16(v626, 3314);
+ int16x8_t v627 = vmlaq_n_s16(v627_tmp, v626, 5);
+ int16x8_t v628 = vaddq_s16(v625, v627);
+ int16x8_t v629 = vqrdmulhq_n_s16(v628, 22112);
+ int16x8_t v630 = vaddq_s16(v624, v629);
+ int16x8_t v631 = vqrdmulhq_n_s16(v630, 17561);
+ int16x8_t v632 = vsubq_s16(v9, v24);
+ int16x8_t v633 = vsubq_s16(v42, v58);
+ int16x8_t v634_tmp = vqrdmulhq_n_s16(v633, 3314);
+ int16x8_t v634 = vmlaq_n_s16(v634_tmp, v633, 5);
+ int16x8_t v635 = vaddq_s16(v632, v634);
+ int16x8_t v636 = vsubq_s16(v78, v101);
+ int16x8_t v637 = vsubq_s16(v138, v128);
+ int16x8_t v638_tmp = vqrdmulhq_n_s16(v637, 3314);
+ int16x8_t v638 = vmlaq_n_s16(v638_tmp, v637, 5);
+ int16x8_t v639 = vaddq_s16(v636, v638);
+ int16x8_t v640 = vqrdmulhq_n_s16(v639, 22112);
+ int16x8_t v641 = vaddq_s16(v635, v640);
+ int16x8_t v642 = vaddq_s16(v631, v641);
+ int16x8_t v643 = vsubq_s16(v632, v634);
+ int16x8_t v644 = vsubq_s16(v636, v638);
+ int16x8_t v645 = vqrdmulhq_n_s16(v644, 24397);
+ int16x8_t v646 = vaddq_s16(v643, v645);
+ int16x8_t v647 = vsubq_s16(v621, v623);
+ int16x8_t v648 = vsubq_s16(v625, v627);
+ int16x8_t v649 = vqrdmulhq_n_s16(v648, 24397);
+ int16x8_t v650 = vaddq_s16(v647, v649);
+ int16x8_t v651 = vqrdmulhq_n_s16(v650, 17921);
+ int16x8_t v652 = vaddq_s16(v646, v651);
+ int16x8_t v653 = vsubq_s16(v601, v600);
+ int16x8_t v654 = vsubq_s16(v605, v604);
+ int16x8_t v655 = vqrdmulhq_n_s16(v654, 27504);
+ int16x8_t v656 = vaddq_s16(v653, v655);
+ int16x8_t v657 = vsubq_s16(v609, v611);
+ int16x8_t v658 = vqrdmulhq_n_s16(v613, 27504);
+ int16x8_t v659_tmp = vqrdmulhq_n_s16(v615, 14606);
+ int16x8_t v659 = vaddq_s16(v659_tmp, v615);
+ int16x8_t v660 = vsubq_s16(v658, v659);
+ int16x8_t v661 = vaddq_s16(v657, v660);
+ int16x8_t v662 = vqrdmulhq_n_s16(v661, 18343);
+ int16x8_t v663 = vaddq_s16(v656, v662);
+ int16x8_t v664 = vsubq_s16(v582, v584);
+ int16x8_t v665 = vsubq_s16(v576, v579);
+ int16x8_t v666 = vqrdmulhq_n_s16(v665, 31869);
+ int16x8_t v667 = vaddq_s16(v664, v666);
+ int16x8_t v668 = vsubq_s16(v587, v590);
+ int16x8_t v669_tmp = vqrdmulhq_n_s16(v594, 23444);
+ int16x8_t v669 = vaddq_s16(v669_tmp, v594);
+ int16x8_t v670 = vsubq_s16(v592, v669);
+ int16x8_t v671 = vqrdmulhq_n_s16(v670, 31869);
+ int16x8_t v672 = vaddq_s16(v668, v671);
+ int16x8_t v673 = vqrdmulhq_n_s16(v672, 18830);
+ int16x8_t v674 = vaddq_s16(v667, v673);
+ int16x8_t v675 = vsubq_s16(v553, v555);
+ int16x8_t v676 = vsubq_s16(v557, v559);
+ int16x8_t v677_tmp = vqrdmulhq_n_s16(v676, 5552);
+ int16x8_t v677 = vaddq_s16(v677_tmp, v676);
+ int16x8_t v678 = vaddq_s16(v675, v677);
+ int16x8_t v679 = vsubq_s16(v563, v565);
+ int16x8_t v680 = vsubq_s16(v567, v570);
+ int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 5552);
+ int16x8_t v681 = vaddq_s16(v681_tmp, v680);
+ int16x8_t v682 = vaddq_s16(v679, v681);
+ int16x8_t v683 = vqrdmulhq_n_s16(v682, 19393);
+ int16x8_t v684 = vaddq_s16(v678, v683);
+ int16x8_t v685 = vsubq_s16(v507, v512);
+ int16x8_t v686 = vsubq_s16(v517, v522);
+ int16x8_t v687_tmp = vqrdmulhq_n_s16(v686, 15865);
+ int16x8_t v687 = vaddq_s16(v687_tmp, v686);
+ int16x8_t v688 = vaddq_s16(v685, v687);
+ int16x8_t v689 = vsubq_s16(v529, v534);
+ int16x8_t v690_tmp = vqrdmulhq_n_s16(v548, 28937);
+ int16x8_t v690 = vaddq_s16(v690_tmp, v548);
+ int16x8_t v691 = vsubq_s16(v539, v690);
+ int16x8_t v692_tmp = vqrdmulhq_n_s16(v691, 15865);
+ int16x8_t v692 = vaddq_s16(v692_tmp, v691);
+ int16x8_t v693 = vaddq_s16(v689, v692);
+ int16x8_t v694 = vqrdmulhq_n_s16(v693, 20040);
+ int16x8_t v695 = vaddq_s16(v688, v694);
+ int16x8_t v696 = vsubq_s16(v476, v486);
+ int16x8_t v697_tmp = vqrdmulhq_n_s16(v696, 1893);
+ int16x8_t v697 = vmlaq_n_s16(v697_tmp, v696, 2);
+ int16x8_t v698 = vsubq_s16(v493, v499);
+ int16x8_t v699 = vaddq_s16(v697, v698);
+ int16x8_t v700 = vqrdmulhq_n_s16(v699, 20783);
+ int16x8_t v701 = vsubq_s16(v450, v456);
+ int16x8_t v702 = vsubq_s16(v462, v468);
+ int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 1893);
+ int16x8_t v703 = vmlaq_n_s16(v703_tmp, v702, 2);
+ int16x8_t v704 = vaddq_s16(v701, v703);
+ int16x8_t v705 = vaddq_s16(v700, v704);
+ int16x8_t v706 = vsubq_s16(v384, v373);
+ int16x8_t v707 = vsubq_s16(v343, v358);
+ int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 13357);
+ int16x8_t v708 = vmlaq_n_s16(v708_tmp, v707, 3);
+ int16x8_t v709 = vaddq_s16(v706, v708);
+ int16x8_t v710 = vsubq_s16(v398, v413);
+ int16x8_t v711 = vsubq_s16(v427, v441);
+ int16x8_t v712_tmp = vqrdmulhq_n_s16(v711, 13357);
+ int16x8_t v712 = vmlaq_n_s16(v712_tmp, v711, 3);
+ int16x8_t v713 = vaddq_s16(v710, v712);
+ int16x8_t v714 = vqrdmulhq_n_s16(v713, 21637);
+ int16x8_t v715 = vaddq_s16(v709, v714);
+ int16x8_t v716 = vsubq_s16(v25, v60);
+ int16x8_t v717 = vsubq_s16(v102, v140);
+ int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 6226);
+ int16x8_t v718 = vmlaq_n_s16(v718_tmp, v717, 10);
+ int16x8_t v719 = vaddq_s16(v716, v718);
+ int16x8_t v720 = vsubq_s16(v280, v326);
+ int16x8_t v721_tmp = vqrdmulhq_n_s16(v720, 6226);
+ int16x8_t v721 = vmlaq_n_s16(v721_tmp, v720, 10);
+ int16x8_t v722 = vsubq_s16(v184, v237);
+ int16x8_t v723 = vaddq_s16(v721, v722);
+ int16x8_t v724 = vqrdmulhq_n_s16(v723, 22622);
+ int16x8_t v725 = vaddq_s16(v719, v724);
+ int16x8_t v726 = vsubq_s16(v716, v718);
+ int16x8_t v727 = vsubq_s16(v722, v721);
+ int16x8_t v728 = vqrdmulhq_n_s16(v727, 23761);
+ int16x8_t v729 = vaddq_s16(v726, v728);
+ int16x8_t v730 = vsubq_s16(v706, v708);
+ int16x8_t v731 = vsubq_s16(v710, v712);
+ int16x8_t v732 = vqrdmulhq_n_s16(v731, 25084);
+ int16x8_t v733 = vaddq_s16(v730, v732);
+ int16x8_t v734 = vsubq_s16(v701, v703);
+ int16x8_t v735 = vsubq_s16(v698, v697);
+ int16x8_t v736 = vqrdmulhq_n_s16(v735, 26631);
+ int16x8_t v737 = vaddq_s16(v734, v736);
+ int16x8_t v738 = vsubq_s16(v685, v687);
+ int16x8_t v739 = vsubq_s16(v689, v692);
+ int16x8_t v740 = vqrdmulhq_n_s16(v739, 28454);
+ int16x8_t v741 = vaddq_s16(v738, v740);
+ int16x8_t v742 = vsubq_s16(v675, v677);
+ int16x8_t v743 = vsubq_s16(v679, v681);
+ int16x8_t v744 = vqrdmulhq_n_s16(v743, 30624);
+ int16x8_t v745 = vaddq_s16(v742, v744);
+ int16x8_t v746 = vsubq_s16(v664, v666);
+ int16x8_t v747 = vsubq_s16(v668, v671);
+ int16x8_t v748_tmp = vqrdmulhq_n_s16(v747, 472);
+ int16x8_t v748 = vaddq_s16(v748_tmp, v747);
+ int16x8_t v749 = vaddq_s16(v746, v748);
+ int16x8_t v750 = vsubq_s16(v653, v655);
+ int16x8_t v751 = vsubq_s16(v657, v660);
+ int16x8_t v752_tmp = vqrdmulhq_n_s16(v751, 3672);
+ int16x8_t v752 = vaddq_s16(v752_tmp, v751);
+ int16x8_t v753 = vaddq_s16(v750, v752);
+ int16x8_t v754 = vsubq_s16(v643, v645);
+ int16x8_t v755 = vsubq_s16(v647, v649);
+ int16x8_t v756_tmp = vqrdmulhq_n_s16(v755, 7662);
+ int16x8_t v756 = vaddq_s16(v756_tmp, v755);
+ int16x8_t v757 = vaddq_s16(v754, v756);
+ int16x8_t v758 = vsubq_s16(v635, v640);
+ int16x8_t v759 = vsubq_s16(v624, v629);
+ int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 12756);
+ int16x8_t v760 = vaddq_s16(v760_tmp, v759);
+ int16x8_t v761 = vaddq_s16(v758, v760);
+ int16x8_t v762 = vsubq_s16(v602, v607);
+ int16x8_t v763 = vsubq_s16(v612, v617);
+ int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 19463);
+ int16x8_t v764 = vaddq_s16(v764_tmp, v763);
+ int16x8_t v765 = vaddq_s16(v762, v764);
+ int16x8_t v766 = vsubq_s16(v585, v581);
+ int16x8_t v767 = vsubq_s16(v591, v595);
+ int16x8_t v768_tmp = vqrdmulhq_n_s16(v767, 28661);
+ int16x8_t v768 = vaddq_s16(v768_tmp, v767);
+ int16x8_t v769 = vaddq_s16(v766, v768);
+ int16x8_t v770 = vsubq_s16(v556, v561);
+ int16x8_t v771 = vsubq_s16(v566, v572);
+ int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 9242);
+ int16x8_t v772 = vmlaq_n_s16(v772_tmp, v771, 2);
+ int16x8_t v773 = vaddq_s16(v770, v772);
+ int16x8_t v774 = vsubq_s16(v513, v524);
+ int16x8_t v775 = vsubq_s16(v535, v549);
+ int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 30298);
+ int16x8_t v776 = vmlaq_n_s16(v776_tmp, v775, 2);
+ int16x8_t v777 = vaddq_s16(v774, v776);
+ int16x8_t v778 = vsubq_s16(v457, v470);
+ int16x8_t v779 = vsubq_s16(v500, v488);
+ int16x8_t v780_tmp = vqrdmulhq_n_s16(v779, 2773);
+ int16x8_t v780 = vmlaq_n_s16(v780_tmp, v779, 4);
+ int16x8_t v781 = vaddq_s16(v778, v780);
+ int16x8_t v782 = vsubq_s16(v385, v360);
+ int16x8_t v783 = vsubq_s16(v414, v443);
+ int16x8_t v784_tmp = vqrdmulhq_n_s16(v783, 26108);
+ int16x8_t v784 = vmlaq_n_s16(v784_tmp, v783, 6);
+ int16x8_t v785 = vaddq_s16(v782, v784);
+ int16x8_t v786 = vsubq_s16(v61, v142);
+ int16x8_t v787 = vsubq_s16(v238, v328);
+ int16x8_t v788_tmp = vqrdmulhq_n_s16(v787, 12251);
+ int16x8_t v788 = vmlaq_n_s16(v788_tmp, v787, 20);
+ int16x8_t v789 = vaddq_s16(v786, v788);
+ int16x8_t v790 = vsubq_s16(v786, v788);
+ int16x8_t v791 = vsubq_s16(v782, v784);
+ int16x8_t v792 = vsubq_s16(v778, v780);
+ int16x8_t v793 = vsubq_s16(v774, v776);
+ int16x8_t v794 = vsubq_s16(v770, v772);
+ int16x8_t v795 = vsubq_s16(v766, v768);
+ int16x8_t v796 = vsubq_s16(v762, v764);
+ int16x8_t v797 = vsubq_s16(v758, v760);
+ int16x8_t v798 = vsubq_s16(v754, v756);
+ int16x8_t v799 = vsubq_s16(v750, v752);
+ int16x8_t v800 = vsubq_s16(v746, v748);
+ int16x8_t v801 = vsubq_s16(v742, v744);
+ int16x8_t v802 = vsubq_s16(v738, v740);
+ int16x8_t v803 = vsubq_s16(v734, v736);
+ int16x8_t v804 = vsubq_s16(v730, v732);
+ int16x8_t v805 = vsubq_s16(v726, v728);
+ int16x8_t v806 = vsubq_s16(v719, v724);
+ int16x8_t v807 = vsubq_s16(v709, v714);
+ int16x8_t v808 = vsubq_s16(v704, v700);
+ int16x8_t v809 = vsubq_s16(v688, v694);
+ int16x8_t v810 = vsubq_s16(v678, v683);
+ int16x8_t v811 = vsubq_s16(v667, v673);
+ int16x8_t v812 = vsubq_s16(v656, v662);
+ int16x8_t v813 = vsubq_s16(v646, v651);
+ int16x8_t v814 = vsubq_s16(v641, v631);
+ int16x8_t v815 = vsubq_s16(v608, v619);
+ int16x8_t v816 = vsubq_s16(v586, v597);
+ int16x8_t v817 = vsubq_s16(v562, v574);
+ int16x8_t v818 = vsubq_s16(v525, v551);
+ int16x8_t v819 = vsubq_s16(v471, v502);
+ int16x8_t v820 = vsubq_s16(v386, v445);
+ int16x8_t v821 = vsubq_s16(v143, v330);
+ vst1q_s16(out + out_stride * 0 + i, v331);
+ vst1q_s16(out + out_stride * 1 + i, v446);
+ vst1q_s16(out + out_stride * 2 + i, v503);
+ vst1q_s16(out + out_stride * 3 + i, v552);
+ vst1q_s16(out + out_stride * 4 + i, v575);
+ vst1q_s16(out + out_stride * 5 + i, v598);
+ vst1q_s16(out + out_stride * 6 + i, v620);
+ vst1q_s16(out + out_stride * 7 + i, v642);
+ vst1q_s16(out + out_stride * 8 + i, v652);
+ vst1q_s16(out + out_stride * 9 + i, v663);
+ vst1q_s16(out + out_stride * 10 + i, v674);
+ vst1q_s16(out + out_stride * 11 + i, v684);
+ vst1q_s16(out + out_stride * 12 + i, v695);
+ vst1q_s16(out + out_stride * 13 + i, v705);
+ vst1q_s16(out + out_stride * 14 + i, v715);
+ vst1q_s16(out + out_stride * 15 + i, v725);
+ vst1q_s16(out + out_stride * 16 + i, v729);
+ vst1q_s16(out + out_stride * 17 + i, v733);
+ vst1q_s16(out + out_stride * 18 + i, v737);
+ vst1q_s16(out + out_stride * 19 + i, v741);
+ vst1q_s16(out + out_stride * 20 + i, v745);
+ vst1q_s16(out + out_stride * 21 + i, v749);
+ vst1q_s16(out + out_stride * 22 + i, v753);
+ vst1q_s16(out + out_stride * 23 + i, v757);
+ vst1q_s16(out + out_stride * 24 + i, v761);
+ vst1q_s16(out + out_stride * 25 + i, v765);
+ vst1q_s16(out + out_stride * 26 + i, v769);
+ vst1q_s16(out + out_stride * 27 + i, v773);
+ vst1q_s16(out + out_stride * 28 + i, v777);
+ vst1q_s16(out + out_stride * 29 + i, v781);
+ vst1q_s16(out + out_stride * 30 + i, v785);
+ vst1q_s16(out + out_stride * 31 + i, v789);
+ vst1q_s16(out + out_stride * 32 + i, v790);
+ vst1q_s16(out + out_stride * 33 + i, v791);
+ vst1q_s16(out + out_stride * 34 + i, v792);
+ vst1q_s16(out + out_stride * 35 + i, v793);
+ vst1q_s16(out + out_stride * 36 + i, v794);
+ vst1q_s16(out + out_stride * 37 + i, v795);
+ vst1q_s16(out + out_stride * 38 + i, v796);
+ vst1q_s16(out + out_stride * 39 + i, v797);
+ vst1q_s16(out + out_stride * 40 + i, v798);
+ vst1q_s16(out + out_stride * 41 + i, v799);
+ vst1q_s16(out + out_stride * 42 + i, v800);
+ vst1q_s16(out + out_stride * 43 + i, v801);
+ vst1q_s16(out + out_stride * 44 + i, v802);
+ vst1q_s16(out + out_stride * 45 + i, v803);
+ vst1q_s16(out + out_stride * 46 + i, v804);
+ vst1q_s16(out + out_stride * 47 + i, v805);
+ vst1q_s16(out + out_stride * 48 + i, v806);
+ vst1q_s16(out + out_stride * 49 + i, v807);
+ vst1q_s16(out + out_stride * 50 + i, v808);
+ vst1q_s16(out + out_stride * 51 + i, v809);
+ vst1q_s16(out + out_stride * 52 + i, v810);
+ vst1q_s16(out + out_stride * 53 + i, v811);
+ vst1q_s16(out + out_stride * 54 + i, v812);
+ vst1q_s16(out + out_stride * 55 + i, v813);
+ vst1q_s16(out + out_stride * 56 + i, v814);
+ vst1q_s16(out + out_stride * 57 + i, v815);
+ vst1q_s16(out + out_stride * 58 + i, v816);
+ vst1q_s16(out + out_stride * 59 + i, v817);
+ vst1q_s16(out + out_stride * 60 + i, v818);
+ vst1q_s16(out + out_stride * 61 + i, v819);
+ vst1q_s16(out + out_stride * 62 + i, v820);
+ vst1q_s16(out + out_stride * 63 + i, v821);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct8-inl.h b/third_party/jpeg-xl/lib/jxl/fast_dct8-inl.h
new file mode 100644
index 0000000000..946ace4a0c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct8-inl.h
@@ -0,0 +1,80 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/* This file is automatically generated. Do not modify it directly. */
+#if HWY_TARGET != HWY_NEON
+#error "only include this file from fast_dct-inl.h"
+#endif
+
+constexpr size_t FastIDCTIntegerBits(FastDCTTag<8>) { return 1; }
+
+void FastIDCT(FastDCTTag<8>, const int16_t* in, size_t in_stride, int16_t* out,
+ size_t out_stride, size_t count) {
+ JXL_ASSERT(count % 8 == 0);
+ for (size_t i = 0; i < count; i += 8) {
+ int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i);
+ int16x8_t v1 = vld1q_s16(in + in_stride * 4 + i);
+ int16x8_t v2 = vaddq_s16(v0, v1);
+ int16x8_t v3 = vld1q_s16(in + in_stride * 2 + i);
+ int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573);
+ int16x8_t v4 = vaddq_s16(v4_tmp, v3);
+ int16x8_t v5 = vld1q_s16(in + in_stride * 6 + i);
+ int16x8_t v6 = vaddq_s16(v5, v3);
+ int16x8_t v7 = vaddq_s16(v4, v6);
+ int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734);
+ int16x8_t v9 = vaddq_s16(v2, v8);
+ int16x8_t v10 = vld1q_s16(in + in_stride * 1 + i);
+ int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573);
+ int16x8_t v11 = vaddq_s16(v11_tmp, v10);
+ int16x8_t v12 = vld1q_s16(in + in_stride * 5 + i);
+ int16x8_t v13 = vld1q_s16(in + in_stride * 3 + i);
+ int16x8_t v14 = vaddq_s16(v12, v13);
+ int16x8_t v15 = vaddq_s16(v11, v14);
+ int16x8_t v16 = vaddq_s16(v13, v10);
+ int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080);
+ int16x8_t v18 = vld1q_s16(in + in_stride * 7 + i);
+ int16x8_t v19 = vaddq_s16(v18, v12);
+ int16x8_t v20 = vaddq_s16(v16, v19);
+ int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734);
+ int16x8_t v22 = vaddq_s16(v17, v21);
+ int16x8_t v23 = vaddq_s16(v15, v22);
+ int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705);
+ int16x8_t v25 = vaddq_s16(v9, v24);
+ int16x8_t v26 = vsubq_s16(v0, v1);
+ int16x8_t v27 = vsubq_s16(v4, v6);
+ int16x8_t v28_tmp = vqrdmulhq_n_s16(v27, 10045);
+ int16x8_t v28 = vaddq_s16(v28_tmp, v27);
+ int16x8_t v29 = vaddq_s16(v26, v28);
+ int16x8_t v30 = vsubq_s16(v11, v14);
+ int16x8_t v31 = vqrdmulhq_n_s16(v16, 17734);
+ int16x8_t v32_tmp = vqrdmulhq_n_s16(v19, 10045);
+ int16x8_t v32 = vaddq_s16(v32_tmp, v19);
+ int16x8_t v33 = vsubq_s16(v31, v32);
+ int16x8_t v34 = vaddq_s16(v30, v33);
+ int16x8_t v35 = vqrdmulhq_n_s16(v34, 19705);
+ int16x8_t v36 = vaddq_s16(v29, v35);
+ int16x8_t v37 = vsubq_s16(v26, v28);
+ int16x8_t v38 = vsubq_s16(v30, v33);
+ int16x8_t v39 = vqrdmulhq_n_s16(v38, 29490);
+ int16x8_t v40 = vaddq_s16(v37, v39);
+ int16x8_t v41 = vsubq_s16(v2, v8);
+ int16x8_t v42 = vsubq_s16(v15, v22);
+ int16x8_t v43_tmp = vqrdmulhq_n_s16(v42, 18446);
+ int16x8_t v43 = vmlaq_n_s16(v43_tmp, v42, 2);
+ int16x8_t v44 = vaddq_s16(v41, v43);
+ int16x8_t v45 = vsubq_s16(v41, v43);
+ int16x8_t v46 = vsubq_s16(v37, v39);
+ int16x8_t v47 = vsubq_s16(v29, v35);
+ int16x8_t v48 = vsubq_s16(v9, v24);
+ vst1q_s16(out + out_stride * 0 + i, v25);
+ vst1q_s16(out + out_stride * 1 + i, v36);
+ vst1q_s16(out + out_stride * 2 + i, v40);
+ vst1q_s16(out + out_stride * 3 + i, v44);
+ vst1q_s16(out + out_stride * 4 + i, v45);
+ vst1q_s16(out + out_stride * 5 + i, v46);
+ vst1q_s16(out + out_stride * 6 + i, v47);
+ vst1q_s16(out + out_stride * 7 + i, v48);
+ }
+}
diff --git a/third_party/jpeg-xl/lib/jxl/fast_dct_test.cc b/third_party/jpeg-xl/lib/jxl/fast_dct_test.cc
new file mode 100644
index 0000000000..5bb1a79cc5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_dct_test.cc
@@ -0,0 +1,378 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <numeric>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct-inl.h"
+#include "lib/jxl/fast_dct-inl.h"
+#include "lib/jxl/fast_dct.h"
+#include "lib/jxl/transpose-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFastTranspose() {
+#if HWY_TARGET == HWY_NEON
+ auto array_mem = hwy::AllocateAligned<int16_t>(N * M);
+ int16_t* array = array_mem.get();
+ auto transposed_mem = hwy::AllocateAligned<int16_t>(N * M);
+ int16_t* transposed = transposed_mem.get();
+ std::iota(array, array + N * M, 0);
+ for (size_t j = 0; j < 100000000 / (N * M); j++) {
+ FastTransposeBlock(array, M, N, M, transposed, N);
+ }
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < N; j++) {
+ EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
+ }
+ }
+#endif
+}
+
+template <size_t N, size_t M>
+HWY_NOINLINE void TestFloatTranspose() {
+ auto array_mem = hwy::AllocateAligned<float>(N * M);
+ float* array = array_mem.get();
+ auto transposed_mem = hwy::AllocateAligned<float>(N * M);
+ float* transposed = transposed_mem.get();
+ std::iota(array, array + N * M, 0);
+ for (size_t j = 0; j < 100000000 / (N * M); j++) {
+ Transpose<N, M>::Run(DCTFrom(array, M), DCTTo(transposed, N));
+ }
+ for (size_t i = 0; i < M; i++) {
+ for (size_t j = 0; j < N; j++) {
+ EXPECT_EQ(array[j * M + i], transposed[i * N + j]);
+ }
+ }
+}
+
+// TODO(sboukortt): re-enable the FloatIDCT tests once we find out why they fail
+// in ASAN mode in the CI runners and seemingly not locally.
+
+HWY_NOINLINE void TestFastTranspose8x8() { TestFastTranspose<8, 8>(); }
+HWY_NOINLINE void TestFloatTranspose8x8() { TestFloatTranspose<8, 8>(); }
+HWY_NOINLINE void TestFastIDCT8x8() { TestFastIDCT<8, 8>(); }
+HWY_NOINLINE void TestFloatIDCT8x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<8, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose8x16() { TestFastTranspose<8, 16>(); }
+HWY_NOINLINE void TestFloatTranspose8x16() { TestFloatTranspose<8, 16>(); }
+HWY_NOINLINE void TestFastIDCT8x16() { TestFastIDCT<8, 16>(); }
+HWY_NOINLINE void TestFloatIDCT8x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<8, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose8x32() { TestFastTranspose<8, 32>(); }
+HWY_NOINLINE void TestFloatTranspose8x32() { TestFloatTranspose<8, 32>(); }
+HWY_NOINLINE void TestFastIDCT8x32() { TestFastIDCT<8, 32>(); }
+HWY_NOINLINE void TestFloatIDCT8x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<8, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x8() { TestFastTranspose<16, 8>(); }
+HWY_NOINLINE void TestFloatTranspose16x8() { TestFloatTranspose<16, 8>(); }
+HWY_NOINLINE void TestFastIDCT16x8() { TestFastIDCT<16, 8>(); }
+HWY_NOINLINE void TestFloatIDCT16x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<16, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x16() { TestFastTranspose<16, 16>(); }
+HWY_NOINLINE void TestFloatTranspose16x16() { TestFloatTranspose<16, 16>(); }
+HWY_NOINLINE void TestFastIDCT16x16() { TestFastIDCT<16, 16>(); }
+HWY_NOINLINE void TestFloatIDCT16x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<16, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose16x32() { TestFastTranspose<16, 32>(); }
+HWY_NOINLINE void TestFloatTranspose16x32() { TestFloatTranspose<16, 32>(); }
+HWY_NOINLINE void TestFastIDCT16x32() { TestFastIDCT<16, 32>(); }
+HWY_NOINLINE void TestFloatIDCT16x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<16, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x8() { TestFastTranspose<32, 8>(); }
+HWY_NOINLINE void TestFloatTranspose32x8() { TestFloatTranspose<32, 8>(); }
+HWY_NOINLINE void TestFastIDCT32x8() { TestFastIDCT<32, 8>(); }
+HWY_NOINLINE void TestFloatIDCT32x8() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<32, 8>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x16() { TestFastTranspose<32, 16>(); }
+HWY_NOINLINE void TestFloatTranspose32x16() { TestFloatTranspose<32, 16>(); }
+HWY_NOINLINE void TestFastIDCT32x16() { TestFastIDCT<32, 16>(); }
+HWY_NOINLINE void TestFloatIDCT32x16() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<32, 16>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x32() { TestFastTranspose<32, 32>(); }
+HWY_NOINLINE void TestFloatTranspose32x32() { TestFloatTranspose<32, 32>(); }
+HWY_NOINLINE void TestFastIDCT32x32() { TestFastIDCT<32, 32>(); }
+HWY_NOINLINE void TestFloatIDCT32x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<32, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose32x64() { TestFastTranspose<32, 64>(); }
+HWY_NOINLINE void TestFloatTranspose32x64() { TestFloatTranspose<32, 64>(); }
+HWY_NOINLINE void TestFastIDCT32x64() { TestFastIDCT<32, 64>(); }
+HWY_NOINLINE void TestFloatIDCT32x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<32, 64>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x32() { TestFastTranspose<64, 32>(); }
+HWY_NOINLINE void TestFloatTranspose64x32() { TestFloatTranspose<64, 32>(); }
+HWY_NOINLINE void TestFastIDCT64x32() { TestFastIDCT<64, 32>(); }
+HWY_NOINLINE void TestFloatIDCT64x32() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<64, 32>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x64() { TestFastTranspose<64, 64>(); }
+HWY_NOINLINE void TestFloatTranspose64x64() { TestFloatTranspose<64, 64>(); }
+HWY_NOINLINE void TestFastIDCT64x64() { TestFastIDCT<64, 64>(); }
+HWY_NOINLINE void TestFloatIDCT64x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<64, 64>();
+#endif
+}
+HWY_NOINLINE void TestFastTranspose64x128() { TestFastTranspose<64, 128>(); }
+HWY_NOINLINE void TestFloatTranspose64x128() { TestFloatTranspose<64, 128>(); }
+/*
+HWY_NOINLINE void TestFastIDCT64x128() { TestFastIDCT<64, 128>(); }
+HWY_NOINLINE void TestFloatIDCT64x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<64, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x64() { TestFastTranspose<128, 64>(); }
+HWY_NOINLINE void TestFloatTranspose128x64() { TestFloatTranspose<128, 64>(); }
+/*
+HWY_NOINLINE void TestFastIDCT128x64() { TestFastIDCT<128, 64>(); }
+HWY_NOINLINE void TestFloatIDCT128x64() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<128, 64>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x128() { TestFastTranspose<128, 128>(); }
+HWY_NOINLINE void TestFloatTranspose128x128() {
+ TestFloatTranspose<128, 128>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT128x128() { TestFastIDCT<128, 128>(); }
+HWY_NOINLINE void TestFloatIDCT128x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<128, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose128x256() { TestFastTranspose<128, 256>(); }
+HWY_NOINLINE void TestFloatTranspose128x256() {
+ TestFloatTranspose<128, 256>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT128x256() { TestFastIDCT<128, 256>(); }
+HWY_NOINLINE void TestFloatIDCT128x256() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<128, 256>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose256x128() { TestFastTranspose<256, 128>(); }
+HWY_NOINLINE void TestFloatTranspose256x128() {
+ TestFloatTranspose<256, 128>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT256x128() { TestFastIDCT<256, 128>(); }
+HWY_NOINLINE void TestFloatIDCT256x128() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<256, 128>();
+#endif
+}
+*/
+HWY_NOINLINE void TestFastTranspose256x256() { TestFastTranspose<256, 256>(); }
+HWY_NOINLINE void TestFloatTranspose256x256() {
+ TestFloatTranspose<256, 256>();
+}
+/*
+HWY_NOINLINE void TestFastIDCT256x256() { TestFastIDCT<256, 256>(); }
+HWY_NOINLINE void TestFloatIDCT256x256() {
+#if HWY_TARGET == HWY_SCALAR && \
+ (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER))
+ GTEST_SKIP();
+#else
+ TestFloatIDCT<256, 256>();
+#endif
+}
+*/
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class FastDCTTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastDCTTargetTest);
+
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x8);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x16);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x32);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x64);
+/*
+ * DCT-128 and above have very large errors just by rounding inputs.
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x64);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x128);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x256);
+HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x256);
+*/
+
+TEST(FastDCTTest, TestWrapperFloat) { BenchmarkFloatIDCT32x32(); }
+TEST(FastDCTTest, TestWrapperFast) { BenchmarkFastIDCT32x32(); }
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/fast_math-inl.h b/third_party/jpeg-xl/lib/jxl/fast_math-inl.h
new file mode 100644
index 0000000000..5c48034290
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_math-inl.h
@@ -0,0 +1,236 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD math ops (log2, encoder only, cos, erf for splines)
+
+#if defined(LIB_JXL_FAST_MATH_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_FAST_MATH_INL_H_
+#undef LIB_JXL_FAST_MATH_INL_H_
+#else
+#define LIB_JXL_FAST_MATH_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::Floor;
+using hwy::HWY_NAMESPACE::Ge;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Le;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulAdd;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Xor;
+
+// Computes base-2 logarithm like std::log2. Undefined if negative / NaN.
+// L1 error ~3.9E-6
+template <class DF, class V>
+V FastLog2f(const DF df, V x) {
+ // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2).
+ HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f),
+ HWY_REP4(1.4287160470083755E+00f),
+ HWY_REP4(7.4245873327820566E-01f)};
+ HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f),
+ HWY_REP4(1.0096718572241148E+00f),
+ HWY_REP4(1.7409343003366853E-01f)};
+
+ const Rebind<int32_t, DF> di;
+ const auto x_bits = BitCast(di, x);
+
+ // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+ const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3
+ // Shifted exponent = log2; also used to clear mantissa.
+ const auto exp_shifted = ShiftRight<23>(exp_bits);
+ const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+ const auto exp_val = ConvertTo(df, exp_shifted);
+ return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q),
+ exp_val);
+}
+
+// max relative error ~3e-7
+template <class DF, class V>
+V FastPow2f(const DF df, V x) {
+ const Rebind<int32_t, DF> di;
+ auto floorx = Floor(x);
+ auto exp =
+ BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127))));
+ auto frac = Sub(x, floorx);
+ auto num = Add(frac, Set(df, 1.01749063e+01));
+ num = MulAdd(num, frac, Set(df, 4.88687798e+01));
+ num = MulAdd(num, frac, Set(df, 9.85506591e+01));
+ num = Mul(num, exp);
+ auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02));
+ den = MulAdd(den, frac, Set(df, -1.94414990e+01));
+ den = MulAdd(den, frac, Set(df, 9.85506633e+01));
+ return Div(num, den);
+}
+
+// max relative error ~3e-5
+template <class DF, class V>
+V FastPowf(const DF df, V base, V exponent) {
+ return FastPow2f(df, Mul(FastLog2f(df, base), exponent));
+}
+
+// Computes cosine like std::cos.
+// L1 error 7e-5.
+template <class DF, class V>
+V FastCosf(const DF df, V x) {
+ // Step 1: range reduction to [0, 2pi)
+ const auto pi2 = Set(df, kPi * 2.0f);
+ const auto pi2_inv = Set(df, 0.5f / kPi);
+ const auto npi2 = Mul(Floor(Mul(x, pi2_inv)), pi2);
+ const auto xmodpi2 = Sub(x, npi2);
+ // Step 2: range reduction to [0, pi]
+ const auto x_pi = Min(xmodpi2, Sub(pi2, xmodpi2));
+ // Step 3: range reduction to [0, pi/2]
+ const auto above_pihalf = Ge(x_pi, Set(df, kPi / 2.0f));
+ const auto x_pihalf = IfThenElse(above_pihalf, Sub(Set(df, kPi), x_pi), x_pi);
+ // Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle
+ // duplication steps faster, on x/4.
+ const auto xs = Mul(x_pihalf, Set(df, 0.25f));
+ const auto x2 = Mul(xs, xs);
+ const auto x4 = Mul(x2, x2);
+ const auto cosx_prescaling =
+ MulAdd(x4, Set(df, 0.06960438),
+ MulAdd(x2, Set(df, -0.84087373), Set(df, 1.68179268)));
+ // Step 5: angle duplication.
+ const auto cosx_scale1 =
+ MulAdd(cosx_prescaling, cosx_prescaling, Set(df, -1.414213562));
+ const auto cosx_scale2 = MulAdd(cosx_scale1, cosx_scale1, Set(df, -1));
+ // Step 6: change sign if needed.
+ const Rebind<uint32_t, DF> du;
+ auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, above_pihalf)));
+ return BitCast(df, Xor(signbit, BitCast(du, cosx_scale2)));
+}
+
+// Computes the error function like std::erf.
+// L1 error 7e-4.
+template <class DF, class V>
+V FastErff(const DF df, V x) {
+ // Formula from
+ // https://en.wikipedia.org/wiki/Error_function#Numerical_approximations
+ // but constants have been recomputed.
+ const auto xle0 = Le(x, Zero(df));
+ const auto absx = Abs(x);
+ // Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4
+ const auto denom1 =
+ MulAdd(absx, Set(df, 7.77394369e-02), Set(df, 2.05260015e-04));
+ const auto denom2 = MulAdd(denom1, absx, Set(df, 2.32120216e-01));
+ const auto denom3 = MulAdd(denom2, absx, Set(df, 2.77820801e-01));
+ const auto denom4 = MulAdd(denom3, absx, Set(df, 1.0f));
+ const auto denom5 = Mul(denom4, denom4);
+ const auto inv_denom5 = Div(Set(df, 1.0f), denom5);
+ const auto result = NegMulAdd(inv_denom5, inv_denom5, Set(df, 1.0f));
+ // Change sign if needed.
+ const Rebind<uint32_t, DF> du;
+ auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, xle0)));
+ return BitCast(df, Xor(signbit, BitCast(du, result)));
+}
+
+inline float FastLog2f(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastLog2f(D, Set(D, f)));
+}
+
+inline float FastPow2f(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastPow2f(D, Set(D, f)));
+}
+
+inline float FastPowf(float b, float e) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastPowf(D, Set(D, b), Set(D, e)));
+}
+
+inline float FastCosf(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastCosf(D, Set(D, f)));
+}
+
+inline float FastErff(float f) {
+ HWY_CAPPED(float, 1) D;
+ return GetLane(FastErff(D, Set(D, f)));
+}
+
+// Returns cbrt(x) + add with 6 ulp max error.
+// Modified from vectormath_exp.h, Apache 2 license.
+// https://www.agner.org/optimize/vectorclass.zip
+template <class V>
+V CubeRootAndAdd(const V x, const V add) {
+ const HWY_FULL(float) df;
+ const HWY_FULL(int32_t) di;
+
+ const auto kExpBias = Set(di, 0x54800000); // cast(1.) + cast(1.) / 3
+ const auto kExpMul = Set(di, 0x002AAAAA); // shifted 1/3
+ const auto k1_3 = Set(df, 1.0f / 3);
+ const auto k4_3 = Set(df, 4.0f / 3);
+
+ const auto xa = x; // assume inputs never negative
+ const auto xa_3 = Mul(k1_3, xa);
+
+ // Multiply exponent by -1/3
+ const auto m1 = BitCast(di, xa);
+ // Special case for 0. 0 is represented with an exponent of 0, so the
+ // "kExpBias - 1/3 * exp" below gives the wrong result. The IfThenZeroElse()
+ // sets those values as 0, which prevents having NaNs in the computations
+ // below.
+ // TODO(eustas): use fused op
+ const auto m2 = IfThenZeroElse(
+ Eq(m1, Zero(di)), Sub(kExpBias, Mul((ShiftRight<23>(m1)), kExpMul)));
+ auto r = BitCast(df, m2);
+
+ // Newton-Raphson iterations
+ for (int i = 0; i < 3; i++) {
+ const auto r2 = Mul(r, r);
+ r = NegMulAdd(xa_3, Mul(r2, r2), Mul(k4_3, r));
+ }
+ // Final iteration
+ auto r2 = Mul(r, r);
+ r = MulAdd(k1_3, NegMulAdd(xa, Mul(r2, r2), r), r);
+ r2 = Mul(r, r);
+ r = MulAdd(r2, x, add);
+
+ return r;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_FAST_MATH_INL_H_
+
+#if HWY_ONCE
+#ifndef FAST_MATH_ONCE
+#define FAST_MATH_ONCE
+
+namespace jxl {
+inline float FastLog2f(float f) { return HWY_STATIC_DISPATCH(FastLog2f)(f); }
+inline float FastPow2f(float f) { return HWY_STATIC_DISPATCH(FastPow2f)(f); }
+inline float FastPowf(float b, float e) {
+ return HWY_STATIC_DISPATCH(FastPowf)(b, e);
+}
+inline float FastCosf(float f) { return HWY_STATIC_DISPATCH(FastCosf)(f); }
+inline float FastErff(float f) { return HWY_STATIC_DISPATCH(FastErff)(f); }
+} // namespace jxl
+
+#endif // FAST_MATH_ONCE
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/fast_math_test.cc b/third_party/jpeg-xl/lib/jxl/fast_math_test.cc
new file mode 100644
index 0000000000..897aadc120
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fast_math_test.cc
@@ -0,0 +1,288 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/fast_math_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+HWY_NOINLINE void TestFastLog2() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(1e-7f, 1e3f);
+ const auto actual_v = FastLog2f(d, Set(d, f));
+ const float actual = GetLane(actual_v);
+ const float abs_err = std::abs(std::log2(f) - actual);
+ EXPECT_LT(abs_err, 3.1E-6) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPow2() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_rel_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(-100, 100);
+ const auto actual_v = FastPow2f(d, Set(d, f));
+ const float actual = GetLane(actual_v);
+ const float expected = std::pow(2, f);
+ const float rel_err = std::abs(expected - actual) / expected;
+ EXPECT_LT(rel_err, 3.1E-6) << "f = " << f;
+ max_rel_err = std::max(max_rel_err, rel_err);
+ }
+ printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastPow() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_rel_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float b = rng.UniformF(1e-3f, 1e3f);
+ const float e = rng.UniformF(-10, 10);
+ const auto actual_v = FastPowf(d, Set(d, b), Set(d, e));
+ const float actual = GetLane(actual_v);
+ const float expected = std::pow(b, e);
+ const float rel_err = std::abs(expected - actual) / expected;
+ EXPECT_LT(rel_err, 3E-5) << "b = " << b << " e = " << e;
+ max_rel_err = std::max(max_rel_err, rel_err);
+ }
+ printf("max rel err %e\n", static_cast<double>(max_rel_err));
+}
+
+HWY_NOINLINE void TestFastCos() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(-1e3f, 1e3f);
+ const auto actual_v = FastCosf(d, Set(d, f));
+ const float actual = GetLane(actual_v);
+ const float abs_err = std::abs(std::cos(f) - actual);
+ EXPECT_LT(abs_err, 7E-5) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastErf() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(-5.f, 5.f);
+ const auto actual_v = FastErff(d, Set(d, f));
+ const float actual = GetLane(actual_v);
+ const float abs_err = std::abs(std::erf(f) - actual);
+ EXPECT_LT(abs_err, 7E-4) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestCubeRoot() {
+ const HWY_FULL(float) d;
+ for (uint64_t x5 = 0; x5 < 2000000; x5++) {
+ const float x = x5 * 1E-5f;
+ const float expected = cbrtf(x);
+ HWY_ALIGN float approx[MaxLanes(d)];
+ Store(CubeRootAndAdd(Set(d, x), Zero(d)), d, approx);
+
+ // All lanes are same
+ for (size_t i = 1; i < Lanes(d); ++i) {
+ EXPECT_NEAR(approx[0], approx[i], 5E-7f);
+ }
+ EXPECT_NEAR(approx[0], expected, 8E-7f);
+ }
+}
+
+HWY_NOINLINE void TestFastSRGB() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(0.0f, 1.0f);
+ const auto actual_v = FastLinearToSRGB(d, Set(d, f));
+ const float actual = GetLane(actual_v);
+ const float expected = GetLane(TF_SRGB().EncodedFromDisplay(d, Set(d, f)));
+ const float abs_err = std::abs(expected - actual);
+ EXPECT_LT(abs_err, 1.2E-4) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQEFD() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(0.0f, 1.0f);
+ const float actual = GetLane(TF_PQ().EncodedFromDisplay(d, Set(d, f)));
+ const float expected = TF_PQ().EncodedFromDisplay(f);
+ const float abs_err = std::abs(expected - actual);
+ EXPECT_LT(abs_err, 7e-7) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastHLGEFD() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(0.0f, 1.0f);
+ const float actual = GetLane(TF_HLG().EncodedFromDisplay(d, Set(d, f)));
+ const float expected = TF_HLG().EncodedFromDisplay(f);
+ const float abs_err = std::abs(expected - actual);
+ EXPECT_LT(abs_err, 5e-7) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFast709EFD() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(0.0f, 1.0f);
+ const float actual = GetLane(TF_709().EncodedFromDisplay(d, Set(d, f)));
+ const float expected = TF_709().EncodedFromDisplay(f);
+ const float abs_err = std::abs(expected - actual);
+ EXPECT_LT(abs_err, 2e-6) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastPQDFE() {
+ constexpr size_t kNumTrials = 1 << 23;
+ Rng rng(1);
+ float max_abs_err = 0;
+ HWY_FULL(float) d;
+ for (size_t i = 0; i < kNumTrials; i++) {
+ const float f = rng.UniformF(0.0f, 1.0f);
+ const float actual = GetLane(TF_PQ().DisplayFromEncoded(d, Set(d, f)));
+ const float expected = TF_PQ().DisplayFromEncoded(f);
+ const float abs_err = std::abs(expected - actual);
+ EXPECT_LT(abs_err, 3E-6) << "f = " << f;
+ max_abs_err = std::max(max_abs_err, abs_err);
+ }
+ printf("max abs err %e\n", static_cast<double>(max_abs_err));
+}
+
+HWY_NOINLINE void TestFastXYB() {
+ if (!HasFastXYBTosRGB8()) return;
+ ImageMetadata metadata;
+ ImageBundle ib(&metadata);
+ int scaling = 1;
+ int n = 256 * scaling;
+ float inv_scaling = 1.0f / scaling;
+ int kChunk = 32;
+ // The image is divided in chunks to reduce total memory usage.
+ for (int cr = 0; cr < n; cr += kChunk) {
+ for (int cg = 0; cg < n; cg += kChunk) {
+ for (int cb = 0; cb < n; cb += kChunk) {
+ Image3F chunk(kChunk * kChunk, kChunk);
+ for (int ir = 0; ir < kChunk; ir++) {
+ for (int ig = 0; ig < kChunk; ig++) {
+ for (int ib = 0; ib < kChunk; ib++) {
+ float r = (cr + ir) * inv_scaling;
+ float g = (cg + ig) * inv_scaling;
+ float b = (cb + ib) * inv_scaling;
+ chunk.PlaneRow(0, ir)[ig * kChunk + ib] = r * (1.0f / 255);
+ chunk.PlaneRow(1, ir)[ig * kChunk + ib] = g * (1.0f / 255);
+ chunk.PlaneRow(2, ir)[ig * kChunk + ib] = b * (1.0f / 255);
+ }
+ }
+ }
+ ib.SetFromImage(std::move(chunk), ColorEncoding::SRGB());
+ Image3F xyb(kChunk * kChunk, kChunk);
+ std::vector<uint8_t> roundtrip(kChunk * kChunk * kChunk * 3);
+ ToXYB(ib, nullptr, &xyb, GetJxlCms());
+ for (int y = 0; y < kChunk; y++) {
+ const float* xyba[4] = {xyb.PlaneRow(0, y), xyb.PlaneRow(1, y),
+ xyb.PlaneRow(2, y), nullptr};
+ jxl::HWY_NAMESPACE::FastXYBTosRGB8(
+ xyba, roundtrip.data() + 3 * xyb.xsize() * y, false, xyb.xsize());
+ }
+ for (int ir = 0; ir < kChunk; ir++) {
+ for (int ig = 0; ig < kChunk; ig++) {
+ for (int ib = 0; ib < kChunk; ib++) {
+ float r = (cr + ir) * inv_scaling;
+ float g = (cg + ig) * inv_scaling;
+ float b = (cb + ib) * inv_scaling;
+ size_t idx = ir * kChunk * kChunk + ig * kChunk + ib;
+ int rr = roundtrip[3 * idx];
+ int rg = roundtrip[3 * idx + 1];
+ int rb = roundtrip[3 * idx + 2];
+ EXPECT_LT(abs(r - rr), 2) << "expected " << r << " got " << rr;
+ EXPECT_LT(abs(g - rg), 2) << "expected " << g << " got " << rg;
+ EXPECT_LT(abs(b - rb), 2) << "expected " << b << " got " << rb;
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class FastMathTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastMathTargetTest);
+
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastLog2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow2);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastCos);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastErf);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestCubeRoot);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastSRGB);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQDFE);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastHLGEFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFast709EFD);
+HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastXYB);
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/field_encodings.h b/third_party/jpeg-xl/lib/jxl/field_encodings.h
new file mode 100644
index 0000000000..613e8fad33
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/field_encodings.h
@@ -0,0 +1,134 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELD_ENCODINGS_H_
+#define LIB_JXL_FIELD_ENCODINGS_H_
+
+// Constants needed to encode/decode fields; avoids including the full fields.h.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <hwy/base.h>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Macro to define the Fields' derived class Name when compiling with debug
+// names.
+#if JXL_IS_DEBUG_BUILD
+#define JXL_FIELDS_NAME(X) \
+ const char* Name() const override { return #X; }
+#else
+#define JXL_FIELDS_NAME(X)
+#endif // JXL_IS_DEBUG_BUILD
+
+class Visitor;
+class Fields {
+ public:
+ virtual ~Fields() = default;
+#if JXL_IS_DEBUG_BUILD
+ virtual const char* Name() const = 0;
+#endif // JXL_IS_DEBUG_BUILD
+ virtual Status VisitFields(Visitor* JXL_RESTRICT visitor) = 0;
+};
+
+// Distribution of U32 values for one particular selector. Represents either a
+// power of two-sized range, or a single value. A separate type ensures this is
+// only passed to the U32Enc ctor.
+struct U32Distr {
+ // No need to validate - all `d` are legitimate.
+ constexpr explicit U32Distr(uint32_t d) : d(d) {}
+
+ static constexpr uint32_t kDirect = 0x80000000u;
+
+ constexpr bool IsDirect() const { return (d & kDirect) != 0; }
+
+ // Only call if IsDirect().
+ constexpr uint32_t Direct() const { return d & (kDirect - 1); }
+
+ // Only call if !IsDirect().
+ constexpr size_t ExtraBits() const { return (d & 0x1F) + 1; }
+ uint32_t Offset() const { return (d >> 5) & 0x3FFFFFF; }
+
+ uint32_t d;
+};
+
+// A direct-coded 31-bit value occupying 2 bits in the bitstream.
+constexpr U32Distr Val(uint32_t value) {
+ return U32Distr(value | U32Distr::kDirect);
+}
+
+// Value - `offset` will be signaled in `bits` extra bits.
+constexpr U32Distr BitsOffset(uint32_t bits, uint32_t offset) {
+ return U32Distr(((bits - 1) & 0x1F) + ((offset & 0x3FFFFFF) << 5));
+}
+
+// Value will be signaled in `bits` extra bits.
+constexpr U32Distr Bits(uint32_t bits) { return BitsOffset(bits, 0); }
+
+// See U32Coder documentation in fields.h.
+class U32Enc {
+ public:
+ constexpr U32Enc(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+ const U32Distr d3)
+ : d_{d0, d1, d2, d3} {}
+
+ // Returns the U32Distr at `selector` = 0..3, least-significant first.
+ U32Distr GetDistr(const uint32_t selector) const {
+ JXL_ASSERT(selector < 4);
+ return d_[selector];
+ }
+
+ private:
+ U32Distr d_[4];
+};
+
+// Returns bit with the given `index` (0 = least significant).
+template <typename T>
+static inline constexpr uint64_t MakeBit(T index) {
+ return 1ULL << static_cast<uint32_t>(index);
+}
+
+// Returns vector of all possible values of an Enum type. Relies on each Enum
+// providing an overload of EnumBits() that returns a bit array of its values,
+// which implies values must be in [0, 64).
+template <typename Enum>
+std::vector<Enum> Values() {
+ uint64_t bits = EnumBits(Enum());
+
+ std::vector<Enum> values;
+ values.reserve(hwy::PopCount(bits));
+
+ // For each 1-bit in bits: add its index as value
+ while (bits != 0) {
+ const int index = Num0BitsBelowLS1Bit_Nonzero(bits);
+ values.push_back(static_cast<Enum>(index));
+ bits &= bits - 1; // clear least-significant bit
+ }
+ return values;
+}
+
+// Returns true if value is one of Values<Enum>().
+template <class Enum>
+Status EnumValid(const Enum value) {
+ if (static_cast<uint32_t>(value) >= 64) {
+ return JXL_FAILURE("Value %u too large for %s\n",
+ static_cast<uint32_t>(value), EnumName(Enum()));
+ }
+ const uint64_t bit = MakeBit(value);
+ if ((EnumBits(Enum()) & bit) == 0) {
+ return JXL_FAILURE("Invalid value %u for %s\n",
+ static_cast<uint32_t>(value), EnumName(Enum()));
+ }
+ return true;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_FIELD_ENCODINGS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/fields.cc b/third_party/jpeg-xl/lib/jxl/fields.cc
new file mode 100644
index 0000000000..cd1e72bd94
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fields.cc
@@ -0,0 +1,642 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/printf_macros.h"
+
+namespace jxl {
+
+namespace {
+
+using ::jxl::fields_internal::VisitorBase;
+
+struct InitVisitor : public VisitorBase {
+ Status Bits(const size_t /*unused*/, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status U64(const uint64_t default_value,
+ uint64_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status F16(const float default_value, float* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ // Always visit conditional fields to ensure they are initialized.
+ Status Conditional(bool /*condition*/) override { return true; }
+
+ Status AllDefault(const Fields& /*fields*/,
+ bool* JXL_RESTRICT all_default) override {
+ // Just initialize this field and don't skip initializing others.
+ JXL_RETURN_IF_ERROR(Bool(true, all_default));
+ return false;
+ }
+
+ Status VisitNested(Fields* /*fields*/) override {
+ // Avoid re-initializing nested bundles (their ctors already called
+ // Bundle::Init for their fields).
+ return true;
+ }
+};
+
+// Similar to InitVisitor, but also initializes nested fields.
+struct SetDefaultVisitor : public VisitorBase {
+ Status Bits(const size_t /*unused*/, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status U64(const uint64_t default_value,
+ uint64_t* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ Status F16(const float default_value, float* JXL_RESTRICT value) override {
+ *value = default_value;
+ return true;
+ }
+
+ // Always visit conditional fields to ensure they are initialized.
+ Status Conditional(bool /*condition*/) override { return true; }
+
+ Status AllDefault(const Fields& /*fields*/,
+ bool* JXL_RESTRICT all_default) override {
+ // Just initialize this field and don't skip initializing others.
+ JXL_RETURN_IF_ERROR(Bool(true, all_default));
+ return false;
+ }
+};
+
+class AllDefaultVisitor : public VisitorBase {
+ public:
+ explicit AllDefaultVisitor() : VisitorBase() {}
+
+ Status Bits(const size_t bits, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ all_default_ &= *value == default_value;
+ return true;
+ }
+
+ Status U32(const U32Enc /*unused*/, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) override {
+ all_default_ &= *value == default_value;
+ return true;
+ }
+
+ Status U64(const uint64_t default_value,
+ uint64_t* JXL_RESTRICT value) override {
+ all_default_ &= *value == default_value;
+ return true;
+ }
+
+ Status F16(const float default_value, float* JXL_RESTRICT value) override {
+ all_default_ &= std::abs(*value - default_value) < 1E-6f;
+ return true;
+ }
+
+ Status AllDefault(const Fields& /*fields*/,
+ bool* JXL_RESTRICT /*all_default*/) override {
+ // Visit all fields so we can compute the actual all_default_ value.
+ return false;
+ }
+
+ bool AllDefault() const { return all_default_; }
+
+ private:
+ bool all_default_ = true;
+};
+
+class ReadVisitor : public VisitorBase {
+ public:
+ explicit ReadVisitor(BitReader* reader) : VisitorBase(), reader_(reader) {}
+
+ Status Bits(const size_t bits, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = BitsCoder::Read(bits, reader_);
+ if (!reader_->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for header");
+ }
+ return true;
+ }
+
+ Status U32(const U32Enc dist, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ *value = U32Coder::Read(dist, reader_);
+ if (!reader_->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for header");
+ }
+ return true;
+ }
+
+ Status U64(const uint64_t /*default_value*/,
+ uint64_t* JXL_RESTRICT value) override {
+ *value = U64Coder::Read(reader_);
+ if (!reader_->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for header");
+ }
+ return true;
+ }
+
+ Status F16(const float /*default_value*/,
+ float* JXL_RESTRICT value) override {
+ ok_ &= F16Coder::Read(reader_, value);
+ if (!reader_->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for header");
+ }
+ return true;
+ }
+
+ void SetDefault(Fields* fields) override { Bundle::SetDefault(fields); }
+
+ bool IsReading() const override { return true; }
+
+ // This never fails because visitors are expected to keep reading until
+ // EndExtensions, see comment there.
+ Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+ JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+ if (*extensions == 0) return true;
+
+ // For each nonzero bit, i.e. extension that is present:
+ for (uint64_t remaining_extensions = *extensions; remaining_extensions != 0;
+ remaining_extensions &= remaining_extensions - 1) {
+ const size_t idx_extension =
+ Num0BitsBelowLS1Bit_Nonzero(remaining_extensions);
+ // Read additional U64 (one per extension) indicating the number of bits
+ // (allows skipping individual extensions).
+ JXL_RETURN_IF_ERROR(U64(0, &extension_bits_[idx_extension]));
+ if (!SafeAdd(total_extension_bits_, extension_bits_[idx_extension],
+ total_extension_bits_)) {
+ return JXL_FAILURE("Extension bits overflowed, invalid codestream");
+ }
+ }
+ // Used by EndExtensions to skip past any _remaining_ extensions.
+ pos_after_ext_size_ = reader_->TotalBitsConsumed();
+ JXL_ASSERT(pos_after_ext_size_ != 0);
+ return true;
+ }
+
+ Status EndExtensions() override {
+ JXL_QUIET_RETURN_IF_ERROR(VisitorBase::EndExtensions());
+ // Happens if extensions == 0: don't read size, done.
+ if (pos_after_ext_size_ == 0) return true;
+
+ // Not enough bytes as set by BeginExtensions or earlier. Do not return
+ // this as a JXL_FAILURE or false (which can also propagate to error
+ // through e.g. JXL_RETURN_IF_ERROR), since this may be used while
+ // silently checking whether there are enough bytes. If this case must be
+ // treated as an error, reader_>Close() will do this, just like is already
+ // done for non-extension fields.
+ if (!enough_bytes_) return true;
+
+ // Skip new fields this (old?) decoder didn't know about, if any.
+ const size_t bits_read = reader_->TotalBitsConsumed();
+ uint64_t end;
+ if (!SafeAdd(pos_after_ext_size_, total_extension_bits_, end)) {
+ return JXL_FAILURE("Invalid extension size, caused overflow");
+ }
+ if (bits_read > end) {
+ return JXL_FAILURE("Read more extension bits than budgeted");
+ }
+ const size_t remaining_bits = end - bits_read;
+ if (remaining_bits != 0) {
+ JXL_WARNING("Skipping %" PRIuS "-bit extension(s)", remaining_bits);
+ reader_->SkipBits(remaining_bits);
+ if (!reader_->AllReadsWithinBounds()) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for header");
+ }
+ }
+ return true;
+ }
+
+ Status OK() const { return ok_; }
+
+ private:
+ // Whether any error other than not enough bytes occurred.
+ bool ok_ = true;
+
+ // Whether there are enough input bytes to read from.
+ bool enough_bytes_ = true;
+ BitReader* const reader_;
+ // May be 0 even if the corresponding extension is present.
+ uint64_t extension_bits_[Bundle::kMaxExtensions] = {0};
+ uint64_t total_extension_bits_ = 0;
+ size_t pos_after_ext_size_ = 0; // 0 iff extensions == 0.
+};
+
+class MaxBitsVisitor : public VisitorBase {
+ public:
+ Status Bits(const size_t bits, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT /*value*/) override {
+ max_bits_ += BitsCoder::MaxEncodedBits(bits);
+ return true;
+ }
+
+ Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT /*value*/) override {
+ max_bits_ += U32Coder::MaxEncodedBits(enc);
+ return true;
+ }
+
+ Status U64(const uint64_t /*default_value*/,
+ uint64_t* JXL_RESTRICT /*value*/) override {
+ max_bits_ += U64Coder::MaxEncodedBits();
+ return true;
+ }
+
+ Status F16(const float /*default_value*/,
+ float* JXL_RESTRICT /*value*/) override {
+ max_bits_ += F16Coder::MaxEncodedBits();
+ return true;
+ }
+
+ Status AllDefault(const Fields& /*fields*/,
+ bool* JXL_RESTRICT all_default) override {
+ JXL_RETURN_IF_ERROR(Bool(true, all_default));
+ return false; // For max bits, assume nothing is default
+ }
+
+ // Always visit conditional fields to get a (loose) upper bound.
+ Status Conditional(bool /*condition*/) override { return true; }
+
+ Status BeginExtensions(uint64_t* JXL_RESTRICT /*extensions*/) override {
+ // Skip - extensions are not included in "MaxBits" because their length
+ // is potentially unbounded.
+ return true;
+ }
+
+ Status EndExtensions() override { return true; }
+
+ size_t MaxBits() const { return max_bits_; }
+
+ private:
+ size_t max_bits_ = 0;
+};
+
+class CanEncodeVisitor : public VisitorBase {
+ public:
+ explicit CanEncodeVisitor() : VisitorBase() {}
+
+ Status Bits(const size_t bits, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ size_t encoded_bits = 0;
+ ok_ &= BitsCoder::CanEncode(bits, *value, &encoded_bits);
+ encoded_bits_ += encoded_bits;
+ return true;
+ }
+
+ Status U32(const U32Enc enc, const uint32_t /*default_value*/,
+ uint32_t* JXL_RESTRICT value) override {
+ size_t encoded_bits = 0;
+ ok_ &= U32Coder::CanEncode(enc, *value, &encoded_bits);
+ encoded_bits_ += encoded_bits;
+ return true;
+ }
+
+ Status U64(const uint64_t /*default_value*/,
+ uint64_t* JXL_RESTRICT value) override {
+ size_t encoded_bits = 0;
+ ok_ &= U64Coder::CanEncode(*value, &encoded_bits);
+ encoded_bits_ += encoded_bits;
+ return true;
+ }
+
+ Status F16(const float /*default_value*/,
+ float* JXL_RESTRICT value) override {
+ size_t encoded_bits = 0;
+ ok_ &= F16Coder::CanEncode(*value, &encoded_bits);
+ encoded_bits_ += encoded_bits;
+ return true;
+ }
+
+ Status AllDefault(const Fields& fields,
+ bool* JXL_RESTRICT all_default) override {
+ *all_default = Bundle::AllDefault(fields);
+ JXL_RETURN_IF_ERROR(Bool(true, all_default));
+ return *all_default;
+ }
+
+ Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+ JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions));
+ extensions_ = *extensions;
+ if (*extensions != 0) {
+ JXL_ASSERT(pos_after_ext_ == 0);
+ pos_after_ext_ = encoded_bits_;
+ JXL_ASSERT(pos_after_ext_ != 0); // visited "extensions"
+ }
+ return true;
+ }
+ // EndExtensions = default.
+
+ Status GetSizes(size_t* JXL_RESTRICT extension_bits,
+ size_t* JXL_RESTRICT total_bits) {
+ JXL_RETURN_IF_ERROR(ok_);
+ *extension_bits = 0;
+ *total_bits = encoded_bits_;
+ // Only if extension field was nonzero will we encode their sizes.
+ if (pos_after_ext_ != 0) {
+ JXL_ASSERT(encoded_bits_ >= pos_after_ext_);
+ *extension_bits = encoded_bits_ - pos_after_ext_;
+ // Also need to encode *extension_bits and bill it to *total_bits.
+ size_t encoded_bits = 0;
+ ok_ &= U64Coder::CanEncode(*extension_bits, &encoded_bits);
+ *total_bits += encoded_bits;
+
+ // TODO(janwas): support encoding individual extension sizes. We
+ // currently ascribe all bits to the first and send zeros for the
+ // others.
+ for (size_t i = 1; i < hwy::PopCount(extensions_); ++i) {
+ encoded_bits = 0;
+ ok_ &= U64Coder::CanEncode(0, &encoded_bits);
+ *total_bits += encoded_bits;
+ }
+ }
+ return true;
+ }
+
+ private:
+ bool ok_ = true;
+ size_t encoded_bits_ = 0;
+ uint64_t extensions_ = 0;
+ // Snapshot of encoded_bits_ after visiting the extension field, but NOT
+ // including the hidden extension sizes.
+ uint64_t pos_after_ext_ = 0;
+};
+} // namespace
+
+void Bundle::Init(Fields* fields) {
+ InitVisitor visitor;
+ if (!visitor.Visit(fields)) {
+ JXL_ABORT("Init should never fail");
+ }
+}
+void Bundle::SetDefault(Fields* fields) {
+ SetDefaultVisitor visitor;
+ if (!visitor.Visit(fields)) {
+ JXL_ABORT("SetDefault should never fail");
+ }
+}
+bool Bundle::AllDefault(const Fields& fields) {
+ AllDefaultVisitor visitor;
+ if (!visitor.VisitConst(fields)) {
+ JXL_ABORT("AllDefault should never fail");
+ }
+ return visitor.AllDefault();
+}
+size_t Bundle::MaxBits(const Fields& fields) {
+ MaxBitsVisitor visitor;
+#if JXL_ENABLE_ASSERT
+ Status ret =
+#else
+ (void)
+#endif // JXL_ENABLE_ASSERT
+ visitor.VisitConst(fields);
+ JXL_ASSERT(ret);
+ return visitor.MaxBits();
+}
+Status Bundle::CanEncode(const Fields& fields, size_t* extension_bits,
+ size_t* total_bits) {
+ CanEncodeVisitor visitor;
+ JXL_QUIET_RETURN_IF_ERROR(visitor.VisitConst(fields));
+ JXL_QUIET_RETURN_IF_ERROR(visitor.GetSizes(extension_bits, total_bits));
+ return true;
+}
+Status Bundle::Read(BitReader* reader, Fields* fields) {
+ ReadVisitor visitor(reader);
+ JXL_RETURN_IF_ERROR(visitor.Visit(fields));
+ return visitor.OK();
+}
+bool Bundle::CanRead(BitReader* reader, Fields* fields) {
+ ReadVisitor visitor(reader);
+ Status status = visitor.Visit(fields);
+ // We are only checking here whether there are enough bytes. We still return
+ // true for other errors because it means there are enough bytes to determine
+ // there's an error. Use Read() to determine which error it is.
+ return status.code() != StatusCode::kNotEnoughBytes;
+}
+
+size_t BitsCoder::MaxEncodedBits(const size_t bits) { return bits; }
+
+Status BitsCoder::CanEncode(const size_t bits, const uint32_t value,
+ size_t* JXL_RESTRICT encoded_bits) {
+ *encoded_bits = bits;
+ if (value >= (1ULL << bits)) {
+ return JXL_FAILURE("Value %u too large for %" PRIu64 " bits", value,
+ static_cast<uint64_t>(bits));
+ }
+ return true;
+}
+
+uint32_t BitsCoder::Read(const size_t bits, BitReader* JXL_RESTRICT reader) {
+ return reader->ReadBits(bits);
+}
+
+size_t U32Coder::MaxEncodedBits(const U32Enc enc) {
+ size_t extra_bits = 0;
+ for (uint32_t selector = 0; selector < 4; ++selector) {
+ const U32Distr d = enc.GetDistr(selector);
+ if (d.IsDirect()) {
+ continue;
+ } else {
+ extra_bits = std::max<size_t>(extra_bits, d.ExtraBits());
+ }
+ }
+ return 2 + extra_bits;
+}
+
+Status U32Coder::CanEncode(const U32Enc enc, const uint32_t value,
+ size_t* JXL_RESTRICT encoded_bits) {
+ uint32_t selector;
+ size_t total_bits;
+ const Status ok = ChooseSelector(enc, value, &selector, &total_bits);
+ *encoded_bits = ok ? total_bits : 0;
+ return ok;
+}
+
+uint32_t U32Coder::Read(const U32Enc enc, BitReader* JXL_RESTRICT reader) {
+ const uint32_t selector = reader->ReadFixedBits<2>();
+ const U32Distr d = enc.GetDistr(selector);
+ if (d.IsDirect()) {
+ return d.Direct();
+ } else {
+ return reader->ReadBits(d.ExtraBits()) + d.Offset();
+ }
+}
+
+Status U32Coder::ChooseSelector(const U32Enc enc, const uint32_t value,
+ uint32_t* JXL_RESTRICT selector,
+ size_t* JXL_RESTRICT total_bits) {
+#if JXL_ENABLE_ASSERT
+ const size_t bits_required = 32 - Num0BitsAboveMS1Bit(value);
+#endif // JXL_ENABLE_ASSERT
+ JXL_ASSERT(bits_required <= 32);
+
+ *selector = 0;
+ *total_bits = 0;
+
+ // It is difficult to verify whether Dist32Byte are sorted, so check all
+ // selectors and keep the one with the fewest total_bits.
+ *total_bits = 64; // more than any valid encoding
+ for (uint32_t s = 0; s < 4; ++s) {
+ const U32Distr d = enc.GetDistr(s);
+ if (d.IsDirect()) {
+ if (d.Direct() == value) {
+ *selector = s;
+ *total_bits = 2;
+ return true; // Done, direct is always the best possible.
+ }
+ continue;
+ }
+ const size_t extra_bits = d.ExtraBits();
+ const uint32_t offset = d.Offset();
+ if (value < offset || value >= offset + (1ULL << extra_bits)) continue;
+
+ // Better than prior encoding, remember it:
+ if (2 + extra_bits < *total_bits) {
+ *selector = s;
+ *total_bits = 2 + extra_bits;
+ }
+ }
+
+ if (*total_bits == 64) {
+ return JXL_FAILURE("No feasible selector for %u", value);
+ }
+
+ return true;
+}
+
+uint64_t U64Coder::Read(BitReader* JXL_RESTRICT reader) {
+ uint64_t selector = reader->ReadFixedBits<2>();
+ if (selector == 0) {
+ return 0;
+ }
+ if (selector == 1) {
+ return 1 + reader->ReadFixedBits<4>();
+ }
+ if (selector == 2) {
+ return 17 + reader->ReadFixedBits<8>();
+ }
+
+ // selector 3, varint, groups have first 12, then 8, and last 4 bits.
+ uint64_t result = reader->ReadFixedBits<12>();
+
+ uint64_t shift = 12;
+ while (reader->ReadFixedBits<1>()) {
+ if (shift == 60) {
+ result |= static_cast<uint64_t>(reader->ReadFixedBits<4>()) << shift;
+ break;
+ }
+ result |= static_cast<uint64_t>(reader->ReadFixedBits<8>()) << shift;
+ shift += 8;
+ }
+
+ return result;
+}
+
+// Can always encode, but useful because it also returns bit size.
+Status U64Coder::CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits) {
+ if (value == 0) {
+ *encoded_bits = 2; // 2 selector bits
+ } else if (value <= 16) {
+ *encoded_bits = 2 + 4; // 2 selector bits + 4 payload bits
+ } else if (value <= 272) {
+ *encoded_bits = 2 + 8; // 2 selector bits + 8 payload bits
+ } else {
+ *encoded_bits = 2 + 12; // 2 selector bits + 12 payload bits
+ value >>= 12;
+ int shift = 12;
+ while (value > 0 && shift < 60) {
+ *encoded_bits += 1 + 8; // 1 continuation bit + 8 payload bits
+ value >>= 8;
+ shift += 8;
+ }
+ if (value > 0) {
+ // This only could happen if shift == N - 4.
+ *encoded_bits += 1 + 4; // 1 continuation bit + 4 payload bits
+ } else {
+ *encoded_bits += 1; // 1 stop bit
+ }
+ }
+
+ return true;
+}
+
+Status F16Coder::Read(BitReader* JXL_RESTRICT reader,
+ float* JXL_RESTRICT value) {
+ const uint32_t bits16 = reader->ReadFixedBits<16>();
+ const uint32_t sign = bits16 >> 15;
+ const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+ const uint32_t mantissa = bits16 & 0x3FF;
+
+ if (JXL_UNLIKELY(biased_exp == 31)) {
+ return JXL_FAILURE("F16 infinity or NaN are not supported");
+ }
+
+ // Subnormal or zero
+ if (JXL_UNLIKELY(biased_exp == 0)) {
+ *value = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+ if (sign) *value = -*value;
+ return true;
+ }
+
+ // Normalized: convert the representation directly (faster than ldexp/tables).
+ const uint32_t biased_exp32 = biased_exp + (127 - 15);
+ const uint32_t mantissa32 = mantissa << (23 - 10);
+ const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+ memcpy(value, &bits32, sizeof(bits32));
+ return true;
+}
+
+Status F16Coder::CanEncode(float value, size_t* JXL_RESTRICT encoded_bits) {
+ *encoded_bits = MaxEncodedBits();
+ if (std::isnan(value) || std::isinf(value)) {
+ return JXL_FAILURE("Should not attempt to store NaN and infinity");
+ }
+ return std::abs(value) <= 65504.0f;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/fields.h b/third_party/jpeg-xl/lib/jxl/fields.h
new file mode 100644
index 0000000000..10d0b7aa30
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fields.h
@@ -0,0 +1,377 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FIELDS_H_
+#define LIB_JXL_FIELDS_H_
+
+// Forward/backward-compatible 'bundles' with auto-serialized 'fields'.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <cinttypes>
+#include <cmath> // abs
+#include <cstdarg>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+struct AuxOut;
+struct BitWriter;
+
+// Integer coders: BitsCoder (raw), U32Coder (table), U64Coder (varint).
+
+// Reads/writes a given (fixed) number of bits <= 32.
+namespace BitsCoder {
+size_t MaxEncodedBits(size_t bits);
+
+Status CanEncode(size_t bits, uint32_t value,
+ size_t* JXL_RESTRICT encoded_bits);
+
+uint32_t Read(size_t bits, BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(size_t bits, uint32_t value, BitWriter* JXL_RESTRICT writer);
+} // namespace BitsCoder
+
+// Encodes u32 using a lookup table and/or extra bits, governed by a per-field
+// encoding `enc` which consists of four distributions `d` chosen via a 2-bit
+// selector (least significant = 0). Each d may have two modes:
+// - direct: if d.IsDirect(), the value is d.Direct();
+// - offset: the value is derived from d.ExtraBits() extra bits plus d.Offset();
+// This encoding is denser than Exp-Golomb or Gamma codes when both small and
+// large values occur.
+//
+// Examples:
+// Direct: U32Enc(Val(8), Val(16), Val(32), Bits(6)), value 32 => 10b.
+// Offset: U32Enc(Val(0), BitsOffset(1, 1), BitsOffset(2, 3), BitsOffset(8, 8))
+// defines the following prefix code:
+// 00 -> 0
+// 01x -> 1..2
+// 10xx -> 3..7
+// 11xxxxxxxx -> 8..263
+namespace U32Coder {
+size_t MaxEncodedBits(U32Enc enc);
+Status CanEncode(U32Enc enc, uint32_t value, size_t* JXL_RESTRICT encoded_bits);
+uint32_t Read(U32Enc enc, BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(U32Enc enc, uint32_t value, BitWriter* JXL_RESTRICT writer);
+
+// "private"
+Status ChooseSelector(U32Enc enc, uint32_t value,
+ uint32_t* JXL_RESTRICT selector,
+ size_t* JXL_RESTRICT total_bits);
+} // namespace U32Coder
+
+// Encodes 64-bit unsigned integers with a fixed distribution, taking 2 bits
+// to encode 0, 6 bits to encode 1 to 16, 10 bits to encode 17 to 272, 15 bits
+// to encode up to 4095, and on the order of log2(value) * 1.125 bits for
+// larger values.
+namespace U64Coder {
+constexpr size_t MaxEncodedBits() { return 2 + 12 + 6 * (8 + 1) + (4 + 1); }
+
+uint64_t Read(BitReader* JXL_RESTRICT reader);
+
+// Returns false if the value is too large to encode.
+Status Write(uint64_t value, BitWriter* JXL_RESTRICT writer);
+
+// Can always encode, but useful because it also returns bit size.
+Status CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits);
+} // namespace U64Coder
+
+// IEEE 754 half-precision (binary16). Refuses to read/write NaN/Inf.
+namespace F16Coder {
+constexpr size_t MaxEncodedBits() { return 16; }
+
+// Returns false if the bit representation is NaN or infinity
+Status Read(BitReader* JXL_RESTRICT reader, float* JXL_RESTRICT value);
+
+// Returns false if the value is too large to encode.
+Status Write(float value, BitWriter* JXL_RESTRICT writer);
+Status CanEncode(float value, size_t* JXL_RESTRICT encoded_bits);
+} // namespace F16Coder
+
+// A "bundle" is a forward- and backward compatible collection of fields.
+// They are used for SizeHeader/FrameHeader/GroupHeader. Bundles can be
+// extended by appending(!) fields. Optional fields may be omitted from the
+// bitstream by conditionally visiting them. When reading new bitstreams with
+// old code, we skip unknown fields at the end of the bundle. This requires
+// storing the amount of extra appended bits, and that fields are visited in
+// chronological order of being added to the format, because old decoders
+// cannot skip some future fields and resume reading old fields. Similarly,
+// new readers query bits in an "extensions" field to skip (groups of) fields
+// not present in old bitstreams. Note that each bundle must include an
+// "extensions" field prior to freezing the format, otherwise it cannot be
+// extended.
+//
+// To ensure interoperability, there will be no opaque fields.
+//
+// HOWTO:
+// - basic usage: define a struct with member variables ("fields") and a
+// VisitFields(v) member function that calls v->U32/Bool etc. for each
+// field, specifying their default values. The ctor must call
+// Bundle::Init(this).
+//
+// - print a trace of visitors: ensure each bundle has a static Name() member
+// function, and change Bundle::Print* to return true.
+//
+// - optional fields: in VisitFields, add if (v->Conditional(your_condition))
+// { v->Bool(default, &field); }. This prevents reading/writing field
+// if !your_condition, which is typically computed from a prior field.
+// WARNING: to ensure all fields are initialized, do not add an else branch;
+// instead add another if (v->Conditional(!your_condition)).
+//
+// - repeated fields: for dynamic sizes, use e.g. std::vector and in
+// VisitFields, if (v->IsReading()) field.resize(size) before accessing field.
+// For static or bounded sizes, use an array or std::array. In all cases,
+// simply visit each array element as if it were a normal field.
+//
+// - nested bundles: add a bundle as a normal field and in VisitFields call
+// JXL_RETURN_IF_ERROR(v->VisitNested(&nested));
+//
+// - allow future extensions: define a "uint64_t extensions" field and call
+// v->BeginExtensions(&extensions) after visiting all non-extension fields,
+// and `return v->EndExtensions();` after the last extension field.
+//
+// - encode an entire bundle in one bit if ALL its fields equal their default
+// values: add a "mutable bool all_default" field and as the first visitor:
+// if (v->AllDefault(*this, &all_default)) {
+// // Overwrite all serialized fields, but not any nonserialized_*.
+// v->SetDefault(this);
+// return true;
+// }
+// Note: if extensions are present, AllDefault() == false.
+
+namespace Bundle {
+constexpr size_t kMaxExtensions = 64; // bits in u64
+
+// Initializes fields to the default values. It is not recursive to nested
+// fields, this function is intended to be called in the constructors so
+// each nested field will already Init itself.
+void Init(Fields* JXL_RESTRICT fields);
+
+// Similar to Init, but recursive to nested fields.
+void SetDefault(Fields* JXL_RESTRICT fields);
+
+// Returns whether ALL fields (including `extensions`, if present) are equal
+// to their default value.
+bool AllDefault(const Fields& fields);
+
+// Returns max number of bits required to encode a T.
+size_t MaxBits(const Fields& fields);
+
+// Returns whether a header's fields can all be encoded, i.e. they have a
+// valid representation. If so, "*total_bits" is the exact number of bits
+// required. Called by Write.
+Status CanEncode(const Fields& fields, size_t* JXL_RESTRICT extension_bits,
+ size_t* JXL_RESTRICT total_bits);
+
+Status Read(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+// Returns whether enough bits are available to fully read this bundle using
+// Read. Also returns true in case of a codestream error (other than not being
+// large enough): that means enough bits are available to determine there's an
+// error, use Read to get such error status.
+// NOTE: this advances the BitReader, a different one pointing back at the
+// original bit position in the codestream must be created to use Read after
+// this.
+bool CanRead(BitReader* reader, Fields* JXL_RESTRICT fields);
+
+Status Write(const Fields& fields, BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* aux_out);
+} // namespace Bundle
+
+// Different subclasses of Visitor are passed to implementations of Fields
+// throughout their lifetime. Templates used to be used for this but dynamic
+// polymorphism produces more compact executables than template reification did.
+class Visitor {
+ public:
+ virtual ~Visitor() = default;
+ virtual Status Visit(Fields* fields) = 0;
+
+ virtual Status Bool(bool default_value, bool* JXL_RESTRICT value) = 0;
+ virtual Status U32(U32Enc, uint32_t, uint32_t*) = 0;
+
+ // Helper to construct U32Enc from U32Distr.
+ Status U32(const U32Distr d0, const U32Distr d1, const U32Distr d2,
+ const U32Distr d3, const uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) {
+ return U32(U32Enc(d0, d1, d2, d3), default_value, value);
+ }
+
+ template <typename EnumT>
+ Status Enum(const EnumT default_value, EnumT* JXL_RESTRICT value) {
+ uint32_t u32 = static_cast<uint32_t>(*value);
+ // 00 -> 0
+ // 01 -> 1
+ // 10xxxx -> 2..17
+ // 11yyyyyy -> 18..81
+ JXL_RETURN_IF_ERROR(U32(Val(0), Val(1), BitsOffset(4, 2), BitsOffset(6, 18),
+ static_cast<uint32_t>(default_value), &u32));
+ *value = static_cast<EnumT>(u32);
+ return EnumValid(*value);
+ }
+
+ virtual Status Bits(size_t bits, uint32_t default_value,
+ uint32_t* JXL_RESTRICT value) = 0;
+ virtual Status U64(uint64_t default_value, uint64_t* JXL_RESTRICT value) = 0;
+ virtual Status F16(float default_value, float* JXL_RESTRICT value) = 0;
+
+ // Returns whether VisitFields should visit some subsequent fields.
+ // "condition" is typically from prior fields, e.g. flags.
+ // Overridden by InitVisitor and MaxBitsVisitor.
+ virtual Status Conditional(bool condition) { return condition; }
+
+ // Overridden by InitVisitor, AllDefaultVisitor and CanEncodeVisitor.
+ virtual Status AllDefault(const Fields& /*fields*/,
+ bool* JXL_RESTRICT all_default) {
+ JXL_RETURN_IF_ERROR(Bool(true, all_default));
+ return *all_default;
+ }
+
+ virtual void SetDefault(Fields* /*fields*/) {
+ // Do nothing by default, this is overridden by ReadVisitor.
+ }
+
+ // Returns the result of visiting a nested Bundle.
+ // Overridden by InitVisitor.
+ virtual Status VisitNested(Fields* fields) { return Visit(fields); }
+
+ // Overridden by ReadVisitor. Enables dynamically-sized fields.
+ virtual bool IsReading() const { return false; }
+
+ virtual Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) = 0;
+ virtual Status EndExtensions() = 0;
+};
+
+namespace fields_internal {
+// A bundle can be in one of three states concerning extensions: not-begun,
+// active, ended. Bundles may be nested, so we need a stack of states.
+class ExtensionStates {
+ public:
+ void Push() {
+ // Initial state = not-begun.
+ begun_ <<= 1;
+ ended_ <<= 1;
+ }
+
+ // Clears current state; caller must check IsEnded beforehand.
+ void Pop() {
+ begun_ >>= 1;
+ ended_ >>= 1;
+ }
+
+ // Returns true if state == active || state == ended.
+ Status IsBegun() const { return (begun_ & 1) != 0; }
+ // Returns true if state != not-begun && state != active.
+ Status IsEnded() const { return (ended_ & 1) != 0; }
+
+ void Begin() {
+ JXL_ASSERT(!IsBegun());
+ JXL_ASSERT(!IsEnded());
+ begun_ += 1;
+ }
+
+ void End() {
+ JXL_ASSERT(IsBegun());
+ JXL_ASSERT(!IsEnded());
+ ended_ += 1;
+ }
+
+ private:
+ // Current state := least-significant bit of begun_ and ended_.
+ uint64_t begun_ = 0;
+ uint64_t ended_ = 0;
+};
+
+// Visitors generate Init/AllDefault/Read/Write logic for all fields. Each
+// bundle's VisitFields member function calls visitor->U32 etc. We do not
+// overload operator() because a function name is easier to search for.
+
+class VisitorBase : public Visitor {
+ public:
+ explicit VisitorBase() {}
+ ~VisitorBase() override { JXL_ASSERT(depth_ == 0); }
+
+ // This is the only call site of Fields::VisitFields.
+ // Ensures EndExtensions was called.
+ Status Visit(Fields* fields) override {
+ depth_ += 1;
+ JXL_ASSERT(depth_ <= Bundle::kMaxExtensions);
+ extension_states_.Push();
+
+ const Status ok = fields->VisitFields(this);
+
+ if (ok) {
+ // If VisitFields called BeginExtensions, must also call
+ // EndExtensions.
+ JXL_ASSERT(!extension_states_.IsBegun() || extension_states_.IsEnded());
+ } else {
+ // Failed, undefined state: don't care whether EndExtensions was
+ // called.
+ }
+
+ extension_states_.Pop();
+ JXL_ASSERT(depth_ != 0);
+ depth_ -= 1;
+
+ return ok;
+ }
+
+ // For visitors accepting a const Visitor, need to const-cast so we can call
+ // the non-const Visitor::VisitFields. NOTE: C is not modified except the
+ // `all_default` field by CanEncodeVisitor.
+ Status VisitConst(const Fields& t) { return Visit(const_cast<Fields*>(&t)); }
+
+ // Derived types (overridden by InitVisitor because it is unsafe to read
+ // from *value there)
+
+ Status Bool(bool default_value, bool* JXL_RESTRICT value) override {
+ uint32_t bits = *value ? 1 : 0;
+ JXL_RETURN_IF_ERROR(Bits(1, static_cast<uint32_t>(default_value), &bits));
+ JXL_DASSERT(bits <= 1);
+ *value = bits == 1;
+ return true;
+ }
+
+ // Overridden by ReadVisitor and WriteVisitor.
+ // Called before any conditional visit based on "extensions".
+ // Overridden by ReadVisitor, CanEncodeVisitor and WriteVisitor.
+ Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override {
+ JXL_RETURN_IF_ERROR(U64(0, extensions));
+
+ extension_states_.Begin();
+ return true;
+ }
+
+ // Called after all extension fields (if any). Although non-extension
+ // fields could be visited afterward, we prefer the convention that
+ // extension fields are always the last to be visited. Overridden by
+ // ReadVisitor.
+ Status EndExtensions() override {
+ extension_states_.End();
+ return true;
+ }
+
+ private:
+ size_t depth_ = 0; // to check nesting
+ ExtensionStates extension_states_;
+};
+} // namespace fields_internal
+
+} // namespace jxl
+
+#endif // LIB_JXL_FIELDS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/fields_test.cc b/third_party/jpeg-xl/lib/jxl/fields_test.cc
new file mode 100644
index 0000000000..cf54c780ea
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/fields_test.cc
@@ -0,0 +1,429 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/fields.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+// Ensures `value` round-trips and in exactly `expected_bits_written`.
+void TestU32Coder(const uint32_t value, const size_t expected_bits_written) {
+ const U32Enc enc(Val(0), Bits(4), Val(0x7FFFFFFF), Bits(32));
+
+ BitWriter writer;
+ BitWriter::Allotment allotment(
+ &writer, RoundUpBitsToByteMultiple(U32Coder::MaxEncodedBits(enc)));
+
+ size_t precheck_pos;
+ EXPECT_TRUE(U32Coder::CanEncode(enc, value, &precheck_pos));
+ EXPECT_EQ(expected_bits_written, precheck_pos);
+
+ EXPECT_TRUE(U32Coder::Write(enc, value, &writer));
+ EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+ BitReader reader(writer.GetSpan());
+ const uint32_t decoded_value = U32Coder::Read(enc, &reader);
+ EXPECT_EQ(value, decoded_value);
+ EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U32CoderTest) {
+ TestU32Coder(0, 2);
+ TestU32Coder(1, 6);
+ TestU32Coder(15, 6);
+ TestU32Coder(0x7FFFFFFF, 2);
+ TestU32Coder(128, 34);
+ TestU32Coder(0x7FFFFFFEu, 34);
+ TestU32Coder(0x80000000u, 34);
+ TestU32Coder(0xFFFFFFFFu, 34);
+}
+
+void TestU64Coder(const uint64_t value, const size_t expected_bits_written) {
+ BitWriter writer;
+ BitWriter::Allotment allotment(
+ &writer, RoundUpBitsToByteMultiple(U64Coder::MaxEncodedBits()));
+
+ size_t precheck_pos;
+ EXPECT_TRUE(U64Coder::CanEncode(value, &precheck_pos));
+ EXPECT_EQ(expected_bits_written, precheck_pos);
+
+ EXPECT_TRUE(U64Coder::Write(value, &writer));
+ EXPECT_EQ(expected_bits_written, writer.BitsWritten());
+
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+ BitReader reader(writer.GetSpan());
+ const uint64_t decoded_value = U64Coder::Read(&reader);
+ EXPECT_EQ(value, decoded_value);
+ EXPECT_TRUE(reader.Close());
+}
+
+TEST(FieldsTest, U64CoderTest) {
+ // Values that should take 2 bits (selector 00): 0
+ TestU64Coder(0, 2);
+
+ // Values that should take 6 bits (2 for selector, 4 for value): 1..16
+ TestU64Coder(1, 6);
+ TestU64Coder(2, 6);
+ TestU64Coder(8, 6);
+ TestU64Coder(15, 6);
+ TestU64Coder(16, 6);
+
+ // Values that should take 10 bits (2 for selector, 8 for value): 17..272
+ TestU64Coder(17, 10);
+ TestU64Coder(18, 10);
+ TestU64Coder(100, 10);
+ TestU64Coder(271, 10);
+ TestU64Coder(272, 10);
+
+ // Values that should take 15 bits (2 for selector, 12 for value, 1 for varint
+ // end): (0)..273..4095
+ TestU64Coder(273, 15);
+ TestU64Coder(274, 15);
+ TestU64Coder(1000, 15);
+ TestU64Coder(4094, 15);
+ TestU64Coder(4095, 15);
+
+ // Take 24 bits (of which 20 actual value): (0)..4096..1048575
+ TestU64Coder(4096, 24);
+ TestU64Coder(4097, 24);
+ TestU64Coder(10000, 24);
+ TestU64Coder(1048574, 24);
+ TestU64Coder(1048575, 24);
+
+ // Take 33 bits (of which 28 actual value): (0)..1048576..268435455
+ TestU64Coder(1048576, 33);
+ TestU64Coder(1048577, 33);
+ TestU64Coder(10000000, 33);
+ TestU64Coder(268435454, 33);
+ TestU64Coder(268435455, 33);
+
+ // Take 42 bits (of which 36 actual value): (0)..268435456..68719476735
+ TestU64Coder(268435456ull, 42);
+ TestU64Coder(268435457ull, 42);
+ TestU64Coder(1000000000ull, 42);
+ TestU64Coder(68719476734ull, 42);
+ TestU64Coder(68719476735ull, 42);
+
+ // Take 51 bits (of which 44 actual value): (0)..68719476736..17592186044415
+ TestU64Coder(68719476736ull, 51);
+ TestU64Coder(68719476737ull, 51);
+ TestU64Coder(1000000000000ull, 51);
+ TestU64Coder(17592186044414ull, 51);
+ TestU64Coder(17592186044415ull, 51);
+
+ // Take 60 bits (of which 52 actual value):
+ // (0)..17592186044416..4503599627370495
+ TestU64Coder(17592186044416ull, 60);
+ TestU64Coder(17592186044417ull, 60);
+ TestU64Coder(100000000000000ull, 60);
+ TestU64Coder(4503599627370494ull, 60);
+ TestU64Coder(4503599627370495ull, 60);
+
+ // Take 69 bits (of which 60 actual value):
+ // (0)..4503599627370496..1152921504606846975
+ TestU64Coder(4503599627370496ull, 69);
+ TestU64Coder(4503599627370497ull, 69);
+ TestU64Coder(10000000000000000ull, 69);
+ TestU64Coder(1152921504606846974ull, 69);
+ TestU64Coder(1152921504606846975ull, 69);
+
+ // Take 73 bits (of which 64 actual value):
+ // (0)..1152921504606846976..18446744073709551615
+ TestU64Coder(1152921504606846976ull, 73);
+ TestU64Coder(1152921504606846977ull, 73);
+ TestU64Coder(10000000000000000000ull, 73);
+ TestU64Coder(18446744073709551614ull, 73);
+ TestU64Coder(18446744073709551615ull, 73);
+}
+
+Status TestF16Coder(const float value) {
+ size_t max_encoded_bits;
+ // It is not a fatal error if it can't be encoded.
+ if (!F16Coder::CanEncode(value, &max_encoded_bits)) return false;
+ EXPECT_EQ(F16Coder::MaxEncodedBits(), max_encoded_bits);
+
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer,
+ RoundUpBitsToByteMultiple(max_encoded_bits));
+
+ EXPECT_TRUE(F16Coder::Write(value, &writer));
+ EXPECT_EQ(F16Coder::MaxEncodedBits(), writer.BitsWritten());
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, 0, nullptr);
+
+ BitReader reader(writer.GetSpan());
+ float decoded_value;
+ EXPECT_TRUE(F16Coder::Read(&reader, &decoded_value));
+ // All values we test can be represented exactly.
+ EXPECT_EQ(value, decoded_value);
+ EXPECT_TRUE(reader.Close());
+ return true;
+}
+
+TEST(FieldsTest, F16CoderTest) {
+ for (float sign : {-1.0f, 1.0f}) {
+ // (anything less than 1E-3 are subnormals)
+ for (float mag : {0.0f, 0.5f, 1.0f, 2.0f, 2.5f, 16.015625f, 1.0f / 4096,
+ 1.0f / 16384, 65504.0f}) {
+ EXPECT_TRUE(TestF16Coder(sign * mag));
+ }
+ }
+
+ // Out of range
+ EXPECT_FALSE(TestF16Coder(65504.01f));
+ EXPECT_FALSE(TestF16Coder(-65505.0f));
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripSize) {
+ for (int i = 0; i < 8; i++) {
+ SizeHeader size;
+ ASSERT_TRUE(size.Set(123 + 77 * i, 7 + i));
+
+ size_t extension_bits = 999, total_bits = 999; // Initialize as garbage.
+ ASSERT_TRUE(Bundle::CanEncode(size, &extension_bits, &total_bits));
+ EXPECT_EQ(0u, extension_bits);
+
+ BitWriter writer;
+ ASSERT_TRUE(WriteSizeHeader(size, &writer, 0, nullptr));
+ EXPECT_EQ(total_bits, writer.BitsWritten());
+ writer.ZeroPadToByte();
+
+ SizeHeader size2;
+ BitReader reader(writer.GetSpan());
+ ASSERT_TRUE(ReadSizeHeader(&reader, &size2));
+ EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+ EXPECT_TRUE(reader.Close());
+
+ EXPECT_EQ(size.xsize(), size2.xsize());
+ EXPECT_EQ(size.ysize(), size2.ysize());
+ }
+}
+
+// Ensure all values can be reached by the encoding.
+TEST(FieldsTest, TestCropRect) {
+ CodecMetadata metadata;
+ for (int32_t i = -999; i < 19000; ++i) {
+ FrameHeader f(&metadata);
+ f.custom_size_or_origin = true;
+ f.frame_origin.x0 = i;
+ f.frame_origin.y0 = i;
+ f.frame_size.xsize = 1000 + i;
+ f.frame_size.ysize = 1000 + i;
+ size_t extension_bits = 0, total_bits = 0;
+ ASSERT_TRUE(Bundle::CanEncode(f, &extension_bits, &total_bits));
+ EXPECT_EQ(0u, extension_bits);
+ EXPECT_GE(total_bits, 9u);
+ }
+}
+TEST(FieldsTest, TestPreview) {
+ // (div8 cannot represent 4360, but !div8 can go a little higher)
+ for (uint32_t i = 1; i < 4360; ++i) {
+ PreviewHeader p;
+ ASSERT_TRUE(p.Set(i, i));
+ size_t extension_bits = 0, total_bits = 0;
+ ASSERT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+ EXPECT_EQ(0u, extension_bits);
+ EXPECT_GE(total_bits, 6u);
+ }
+}
+
+// Ensures Read(Write()) returns the same fields.
+TEST(FieldsTest, TestRoundtripFrame) {
+ CodecMetadata metadata;
+ FrameHeader h(&metadata);
+ h.extensions = 0x800;
+
+ size_t extension_bits = 999, total_bits = 999; // Initialize as garbage.
+ ASSERT_TRUE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+ EXPECT_EQ(0u, extension_bits);
+ BitWriter writer;
+ ASSERT_TRUE(WriteFrameHeader(h, &writer, nullptr));
+ EXPECT_EQ(total_bits, writer.BitsWritten());
+ writer.ZeroPadToByte();
+
+ FrameHeader h2(&metadata);
+ BitReader reader(writer.GetSpan());
+ ASSERT_TRUE(ReadFrameHeader(&reader, &h2));
+ EXPECT_EQ(total_bits, reader.TotalBitsConsumed());
+ EXPECT_TRUE(reader.Close());
+
+ EXPECT_EQ(h.extensions, h2.extensions);
+ EXPECT_EQ(h.flags, h2.flags);
+}
+
+#ifndef JXL_CRASH_ON_ERROR
+// Ensure out-of-bounds values cause an error.
+TEST(FieldsTest, TestOutOfRange) {
+ SizeHeader h;
+ ASSERT_TRUE(h.Set(0xFFFFFFFFull, 0xFFFFFFFFull));
+ size_t extension_bits = 999, total_bits = 999; // Initialize as garbage.
+ ASSERT_FALSE(Bundle::CanEncode(h, &extension_bits, &total_bits));
+}
+#endif
+
+struct OldBundle : public Fields {
+ OldBundle() { Bundle::Init(this); }
+ JXL_FIELDS_NAME(OldBundle)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+ return visitor->EndExtensions();
+ }
+
+ uint32_t old_small;
+ float old_f;
+ uint32_t old_large;
+ uint64_t extensions;
+};
+
+struct NewBundle : public Fields {
+ NewBundle() { Bundle::Init(this); }
+ JXL_FIELDS_NAME(NewBundle)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+ if (visitor->Conditional(extensions & 1)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(2), Bits(2), Bits(3), Bits(4), 2, &new_small));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(-2.0f, &new_f));
+ }
+ if (visitor->Conditional(extensions & 2)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Bits(9), Bits(12), Bits(16), Bits(32), 0, &new_large));
+ }
+ return visitor->EndExtensions();
+ }
+
+ uint32_t old_small;
+ float old_f;
+ uint32_t old_large;
+ uint64_t extensions;
+
+ // If extensions & 1
+ uint32_t new_small = 2;
+ float new_f = -2.0f;
+ // If extensions & 2
+ uint32_t new_large = 0;
+};
+
+TEST(FieldsTest, TestNewDecoderOldData) {
+ OldBundle old_bundle;
+ old_bundle.old_large = 123;
+ old_bundle.old_f = 3.75f;
+ old_bundle.extensions = 0;
+
+ // Write to bit stream
+ const size_t kMaxOutBytes = 999;
+ BitWriter writer;
+ // Make sure values are initialized by code under test.
+ size_t extension_bits = 12345, total_bits = 12345;
+ ASSERT_TRUE(Bundle::CanEncode(old_bundle, &extension_bits, &total_bits));
+ ASSERT_LE(total_bits, kMaxOutBytes * kBitsPerByte);
+ EXPECT_EQ(0u, extension_bits);
+ AuxOut aux_out;
+ ASSERT_TRUE(Bundle::Write(old_bundle, &writer, kLayerHeader, &aux_out));
+
+ BitWriter::Allotment allotment(&writer,
+ kMaxOutBytes * kBitsPerByte - total_bits);
+ writer.Write(20, 0xA55A); // sentinel
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr);
+
+ ASSERT_LE(writer.GetSpan().size(), kMaxOutBytes);
+ BitReader reader(writer.GetSpan());
+ NewBundle new_bundle;
+ ASSERT_TRUE(Bundle::Read(&reader, &new_bundle));
+ EXPECT_EQ(reader.TotalBitsConsumed(),
+ aux_out.layers[kLayerHeader].total_bits);
+ EXPECT_EQ(reader.ReadBits(20), 0xA55Au);
+ EXPECT_TRUE(reader.Close());
+
+ // Old fields are the same in both
+ EXPECT_EQ(old_bundle.extensions, new_bundle.extensions);
+ EXPECT_EQ(old_bundle.old_small, new_bundle.old_small);
+ EXPECT_EQ(old_bundle.old_f, new_bundle.old_f);
+ EXPECT_EQ(old_bundle.old_large, new_bundle.old_large);
+ // New fields match their defaults
+ EXPECT_EQ(2u, new_bundle.new_small);
+ EXPECT_EQ(-2.0f, new_bundle.new_f);
+ EXPECT_EQ(0u, new_bundle.new_large);
+}
+
+TEST(FieldsTest, TestOldDecoderNewData) {
+ NewBundle new_bundle;
+ new_bundle.old_large = 123;
+ new_bundle.extensions = 3;
+ new_bundle.new_f = 999.0f;
+ new_bundle.new_large = 456;
+
+ // Write to bit stream
+ constexpr size_t kMaxOutBytes = 999;
+ BitWriter writer;
+ // Make sure values are initialized by code under test.
+ size_t extension_bits = 12345, total_bits = 12345;
+ ASSERT_TRUE(Bundle::CanEncode(new_bundle, &extension_bits, &total_bits));
+ EXPECT_NE(0u, extension_bits);
+ AuxOut aux_out;
+ ASSERT_TRUE(Bundle::Write(new_bundle, &writer, kLayerHeader, &aux_out));
+ ASSERT_LE(aux_out.layers[kLayerHeader].total_bits,
+ kMaxOutBytes * kBitsPerByte);
+
+ BitWriter::Allotment allotment(
+ &writer,
+ kMaxOutBytes * kBitsPerByte - aux_out.layers[kLayerHeader].total_bits);
+ // Ensure Read skips the additional fields
+ writer.Write(20, 0xA55A); // sentinel
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr);
+
+ BitReader reader(writer.GetSpan());
+ OldBundle old_bundle;
+ ASSERT_TRUE(Bundle::Read(&reader, &old_bundle));
+ EXPECT_EQ(reader.TotalBitsConsumed(),
+ aux_out.layers[kLayerHeader].total_bits);
+ EXPECT_EQ(reader.ReadBits(20), 0xA55Au);
+ EXPECT_TRUE(reader.Close());
+
+ // Old fields are the same in both
+ EXPECT_EQ(new_bundle.extensions, old_bundle.extensions);
+ EXPECT_EQ(new_bundle.old_small, old_bundle.old_small);
+ EXPECT_EQ(new_bundle.old_f, old_bundle.old_f);
+ EXPECT_EQ(new_bundle.old_large, old_bundle.old_large);
+ // (Can't check new fields because old decoder doesn't know about them)
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/frame_header.cc b/third_party/jpeg-xl/lib/jxl/frame_header.cc
new file mode 100644
index 0000000000..475ce8e05e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/frame_header.cc
@@ -0,0 +1,494 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/frame_header.h"
+
+#include <sstream>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+constexpr uint8_t YCbCrChromaSubsampling::kHShift[] = {0, 1, 1, 0};
+constexpr uint8_t YCbCrChromaSubsampling::kVShift[] = {0, 1, 0, 1};
+
+static Status VisitBlendMode(Visitor* JXL_RESTRICT visitor,
+ BlendMode default_value, BlendMode* blend_mode) {
+ uint32_t encoded = static_cast<uint32_t>(*blend_mode);
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val(static_cast<uint32_t>(BlendMode::kReplace)),
+ Val(static_cast<uint32_t>(BlendMode::kAdd)),
+ Val(static_cast<uint32_t>(BlendMode::kBlend)), BitsOffset(2, 3),
+ static_cast<uint32_t>(default_value), &encoded));
+ if (encoded > 4) {
+ return JXL_FAILURE("Invalid blend_mode");
+ }
+ *blend_mode = static_cast<BlendMode>(encoded);
+ return true;
+}
+
+static Status VisitFrameType(Visitor* JXL_RESTRICT visitor,
+ FrameType default_value, FrameType* frame_type) {
+ uint32_t encoded = static_cast<uint32_t>(*frame_type);
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(static_cast<uint32_t>(FrameType::kRegularFrame)),
+ Val(static_cast<uint32_t>(FrameType::kDCFrame)),
+ Val(static_cast<uint32_t>(FrameType::kReferenceOnly)),
+ Val(static_cast<uint32_t>(FrameType::kSkipProgressive)),
+ static_cast<uint32_t>(default_value), &encoded));
+ *frame_type = static_cast<FrameType>(encoded);
+ return true;
+}
+
+BlendingInfo::BlendingInfo() { Bundle::Init(this); }
+
+Status BlendingInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ VisitBlendMode(visitor, BlendMode::kReplace, &mode));
+ if (visitor->Conditional(nonserialized_num_extra_channels > 0 &&
+ (mode == BlendMode::kBlend ||
+ mode == BlendMode::kAlphaWeightedAdd))) {
+ // Up to 11 alpha channels for blending.
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val(0), Val(1), Val(2), BitsOffset(3, 3), 0, &alpha_channel));
+ if (visitor->IsReading() &&
+ alpha_channel >= nonserialized_num_extra_channels) {
+ return JXL_FAILURE("Invalid alpha channel for blending");
+ }
+ }
+ if (visitor->Conditional((nonserialized_num_extra_channels > 0 &&
+ (mode == BlendMode::kBlend ||
+ mode == BlendMode::kAlphaWeightedAdd)) ||
+ mode == BlendMode::kMul)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &clamp));
+ }
+ // 'old' frame for blending. Only necessary if this is not a full frame, or
+ // blending is not kReplace.
+ if (visitor->Conditional(mode != BlendMode::kReplace ||
+ nonserialized_is_partial_frame)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &source));
+ }
+ return true;
+}
+
+std::string BlendingInfo::DebugString() const {
+ std::ostringstream os;
+ os << (mode == BlendMode::kReplace ? "Replace"
+ : mode == BlendMode::kAdd ? "Add"
+ : mode == BlendMode::kBlend ? "Blend"
+ : mode == BlendMode::kAlphaWeightedAdd ? "AlphaWeightedAdd"
+ : "Mul");
+ if (nonserialized_num_extra_channels > 0 &&
+ (mode == BlendMode::kBlend || mode == BlendMode::kAlphaWeightedAdd)) {
+ os << ",alpha=" << alpha_channel << ",clamp=" << clamp;
+ } else if (mode == BlendMode::kMul) {
+ os << ",clamp=" << clamp;
+ }
+ if (mode != BlendMode::kReplace || nonserialized_is_partial_frame) {
+ os << ",source=" << source;
+ }
+ return os.str();
+}
+
+AnimationFrame::AnimationFrame(const CodecMetadata* metadata)
+ : nonserialized_metadata(metadata) {
+ Bundle::Init(this);
+}
+Status AnimationFrame::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->Conditional(nonserialized_metadata != nullptr &&
+ nonserialized_metadata->m.have_animation)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(1), Bits(8), Bits(32), 0, &duration));
+ }
+
+ if (visitor->Conditional(
+ nonserialized_metadata != nullptr &&
+ nonserialized_metadata->m.animation.have_timecodes)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(32, 0, &timecode));
+ }
+ return true;
+}
+
+YCbCrChromaSubsampling::YCbCrChromaSubsampling() { Bundle::Init(this); }
+Passes::Passes() { Bundle::Init(this); }
+Status Passes::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), BitsOffset(3, 4), 1, &num_passes));
+ JXL_ASSERT(num_passes <= kMaxNumPasses); // Cannot happen when reading
+
+ if (visitor->Conditional(num_passes != 1)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val(0), Val(1), Val(2), BitsOffset(1, 3), 0, &num_downsample));
+ JXL_ASSERT(num_downsample <= 4); // 1,2,4,8
+ if (num_downsample > num_passes) {
+ return JXL_FAILURE("num_downsample %u > num_passes %u", num_downsample,
+ num_passes);
+ }
+
+ for (uint32_t i = 0; i < num_passes - 1; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &shift[i]));
+ }
+ shift[num_passes - 1] = 0;
+
+ for (uint32_t i = 0; i < num_downsample; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &downsample[i]));
+ if (i > 0 && downsample[i] >= downsample[i - 1]) {
+ return JXL_FAILURE("downsample sequence should be decreasing");
+ }
+ }
+ for (uint32_t i = 0; i < num_downsample; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(1), Val(2), Bits(3), 0, &last_pass[i]));
+ if (i > 0 && last_pass[i] <= last_pass[i - 1]) {
+ return JXL_FAILURE("last_pass sequence should be increasing");
+ }
+ if (last_pass[i] >= num_passes) {
+ return JXL_FAILURE("last_pass %u >= num_passes %u", last_pass[i],
+ num_passes);
+ }
+ }
+ }
+
+ return true;
+}
+
+std::string Passes::DebugString() const {
+ std::ostringstream os;
+ os << "p=" << num_passes;
+ if (num_downsample) {
+ os << ",ds=";
+ for (uint32_t i = 0; i < num_downsample; ++i) {
+ os << last_pass[i] << ":" << downsample[i];
+ if (i + 1 < num_downsample) os << ";";
+ }
+ }
+ bool have_shifts = false;
+ for (uint32_t i = 0; i < num_passes; ++i) {
+ if (shift[i]) have_shifts = true;
+ }
+ if (have_shifts) {
+ os << ",shifts=";
+ for (uint32_t i = 0; i < num_passes; ++i) {
+ os << shift[i];
+ if (i + 1 < num_passes) os << ";";
+ }
+ }
+ return os.str();
+}
+
+FrameHeader::FrameHeader(const CodecMetadata* metadata)
+ : animation_frame(metadata), nonserialized_metadata(metadata) {
+ Bundle::Init(this);
+}
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+ FrameHeader* JXL_RESTRICT frame) {
+ return Bundle::Read(reader, frame);
+}
+
+Status FrameHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ VisitFrameType(visitor, FrameType::kRegularFrame, &frame_type));
+ if (visitor->IsReading() && nonserialized_is_preview &&
+ frame_type != kRegularFrame) {
+ return JXL_FAILURE("Only regular frame could be a preview");
+ }
+
+ // FrameEncoding.
+ bool is_modular = (encoding == FrameEncoding::kModular);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &is_modular));
+ encoding = (is_modular ? FrameEncoding::kModular : FrameEncoding::kVarDCT);
+
+ // Flags
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U64(0, &flags));
+
+ // Color transform
+ bool xyb_encoded = nonserialized_metadata == nullptr ||
+ nonserialized_metadata->m.xyb_encoded;
+
+ if (xyb_encoded) {
+ color_transform = ColorTransform::kXYB;
+ } else {
+ // Alternate if kYCbCr.
+ bool alternate = color_transform == ColorTransform::kYCbCr;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alternate));
+ color_transform =
+ (alternate ? ColorTransform::kYCbCr : ColorTransform::kNone);
+ }
+
+ // Chroma subsampling for YCbCr, if no DC frame is used.
+ if (visitor->Conditional(color_transform == ColorTransform::kYCbCr &&
+ ((flags & kUseDcFrame) == 0))) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&chroma_subsampling));
+ }
+
+ size_t num_extra_channels =
+ nonserialized_metadata != nullptr
+ ? nonserialized_metadata->m.extra_channel_info.size()
+ : 0;
+
+ // Upsampling
+ if (visitor->Conditional((flags & kUseDcFrame) == 0)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &upsampling));
+ if (nonserialized_metadata != nullptr &&
+ visitor->Conditional(num_extra_channels != 0)) {
+ const std::vector<ExtraChannelInfo>& extra_channels =
+ nonserialized_metadata->m.extra_channel_info;
+ extra_channel_upsampling.resize(extra_channels.size(), 1);
+ for (size_t i = 0; i < extra_channels.size(); ++i) {
+ uint32_t dim_shift =
+ nonserialized_metadata->m.extra_channel_info[i].dim_shift;
+ uint32_t& ec_upsampling = extra_channel_upsampling[i];
+ ec_upsampling >>= dim_shift;
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &ec_upsampling));
+ ec_upsampling <<= dim_shift;
+ if (ec_upsampling < upsampling) {
+ return JXL_FAILURE(
+ "EC upsampling (%u) < color upsampling (%u), which is invalid.",
+ ec_upsampling, upsampling);
+ }
+ if (ec_upsampling > 8) {
+ return JXL_FAILURE("EC upsampling too large (%u)", ec_upsampling);
+ }
+ }
+ } else {
+ extra_channel_upsampling.clear();
+ }
+ }
+
+ // Modular- or VarDCT-specific data.
+ if (visitor->Conditional(encoding == FrameEncoding::kModular)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 1, &group_size_shift));
+ }
+ if (visitor->Conditional(encoding == FrameEncoding::kVarDCT &&
+ color_transform == ColorTransform::kXYB)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 3, &x_qm_scale));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 2, &b_qm_scale));
+ } else {
+ x_qm_scale = b_qm_scale = 2; // noop
+ }
+
+ // Not useful for kPatchSource
+ if (visitor->Conditional(frame_type != FrameType::kReferenceOnly)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&passes));
+ }
+
+ if (visitor->Conditional(frame_type == FrameType::kDCFrame)) {
+ // Up to 4 pyramid levels - for up to 16384x downsampling.
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &dc_level));
+ }
+ if (frame_type != FrameType::kDCFrame) {
+ dc_level = 0;
+ }
+
+ bool is_partial_frame = false;
+ if (visitor->Conditional(frame_type != FrameType::kDCFrame)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &custom_size_or_origin));
+ if (visitor->Conditional(custom_size_or_origin)) {
+ const U32Enc enc(Bits(8), BitsOffset(11, 256), BitsOffset(14, 2304),
+ BitsOffset(30, 18688));
+ // Frame offset, only if kRegularFrame or kSkipProgressive.
+ if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive)) {
+ uint32_t ux0 = PackSigned(frame_origin.x0);
+ uint32_t uy0 = PackSigned(frame_origin.y0);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &ux0));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &uy0));
+ frame_origin.x0 = UnpackSigned(ux0);
+ frame_origin.y0 = UnpackSigned(uy0);
+ }
+ // Frame size
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.xsize));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.ysize));
+ if (custom_size_or_origin &&
+ (frame_size.xsize == 0 || frame_size.ysize == 0)) {
+ return JXL_FAILURE(
+ "Invalid crop dimensions for frame: zero width or height");
+ }
+ int32_t image_xsize = default_xsize();
+ int32_t image_ysize = default_ysize();
+ if (frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive) {
+ is_partial_frame |= frame_origin.x0 > 0;
+ is_partial_frame |= frame_origin.y0 > 0;
+ is_partial_frame |= (static_cast<int32_t>(frame_size.xsize) +
+ frame_origin.x0) < image_xsize;
+ is_partial_frame |= (static_cast<int32_t>(frame_size.ysize) +
+ frame_origin.y0) < image_ysize;
+ }
+ }
+ }
+
+ // Blending info, animation info and whether this is the last frame or not.
+ if (visitor->Conditional(frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive)) {
+ blending_info.nonserialized_num_extra_channels = num_extra_channels;
+ blending_info.nonserialized_is_partial_frame = is_partial_frame;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blending_info));
+ bool replace_all = (blending_info.mode == BlendMode::kReplace);
+ extra_channel_blending_info.resize(num_extra_channels);
+ for (size_t i = 0; i < num_extra_channels; i++) {
+ auto& ec_blending_info = extra_channel_blending_info[i];
+ ec_blending_info.nonserialized_is_partial_frame = is_partial_frame;
+ ec_blending_info.nonserialized_num_extra_channels = num_extra_channels;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&ec_blending_info));
+ replace_all &= (ec_blending_info.mode == BlendMode::kReplace);
+ }
+ if (visitor->IsReading() && nonserialized_is_preview) {
+ if (!replace_all || custom_size_or_origin) {
+ return JXL_FAILURE("Preview is not compatible with blending");
+ }
+ }
+ if (visitor->Conditional(nonserialized_metadata != nullptr &&
+ nonserialized_metadata->m.have_animation)) {
+ animation_frame.nonserialized_metadata = nonserialized_metadata;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation_frame));
+ }
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &is_last));
+ }
+ if (frame_type != FrameType::kRegularFrame) {
+ is_last = false;
+ }
+
+ // ID of that can be used to refer to this frame. 0 for a non-zero-duration
+ // frame means that it will not be referenced. Not necessary for the last
+ // frame.
+ if (visitor->Conditional(frame_type != kDCFrame && !is_last)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &save_as_reference));
+ }
+
+ // If this frame is not blended on another frame post-color-transform, it may
+ // be stored for being referenced either before or after the color transform.
+ // If it is blended post-color-transform, it must be blended after. It must
+ // also be blended after if this is a kRegular frame that does not cover the
+ // full frame, as samples outside the partial region are from a
+ // post-color-transform frame.
+ if (frame_type != FrameType::kDCFrame) {
+ if (visitor->Conditional(CanBeReferenced() &&
+ blending_info.mode == BlendMode::kReplace &&
+ !is_partial_frame &&
+ (frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive))) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Bool(false, &save_before_color_transform));
+ } else if (visitor->Conditional(frame_type == FrameType::kReferenceOnly)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Bool(true, &save_before_color_transform));
+ if (!save_before_color_transform &&
+ (frame_size.xsize < nonserialized_metadata->xsize() ||
+ frame_size.ysize < nonserialized_metadata->ysize() ||
+ frame_origin.x0 != 0 || frame_origin.y0 != 0)) {
+ return JXL_FAILURE(
+ "non-patch reference frame with invalid crop: %" PRIuS "x%" PRIuS
+ "%+d%+d",
+ static_cast<size_t>(frame_size.xsize),
+ static_cast<size_t>(frame_size.ysize),
+ static_cast<int>(frame_origin.x0),
+ static_cast<int>(frame_origin.y0));
+ }
+ }
+ } else {
+ save_before_color_transform = true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+ loop_filter.nonserialized_is_modular = is_modular;
+ JXL_RETURN_IF_ERROR(visitor->VisitNested(&loop_filter));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+ // Extensions: in chronological order of being added to the format.
+ return visitor->EndExtensions();
+}
+
+std::string FrameHeader::DebugString() const {
+ std::ostringstream os;
+ os << (encoding == FrameEncoding::kVarDCT ? "VarDCT" : "Modular");
+ os << ",";
+ os << (frame_type == FrameType::kRegularFrame ? "Regular"
+ : frame_type == FrameType::kDCFrame ? "DC"
+ : frame_type == FrameType::kReferenceOnly ? "Reference"
+ : "SkipProgressive");
+ if (frame_type == FrameType::kDCFrame) {
+ os << "(lv" << dc_level << ")";
+ }
+
+ if (flags) {
+ os << ",";
+ uint32_t remaining = flags;
+
+#define TEST_FLAG(name) \
+ if (flags & Flags::k##name) { \
+ remaining &= ~Flags::k##name; \
+ os << #name; \
+ if (remaining) os << "|"; \
+ }
+ TEST_FLAG(Noise);
+ TEST_FLAG(Patches);
+ TEST_FLAG(Splines);
+ TEST_FLAG(UseDcFrame);
+ TEST_FLAG(SkipAdaptiveDCSmoothing);
+#undef TEST_FLAG
+ }
+
+ os << ",";
+ os << (color_transform == ColorTransform::kXYB ? "XYB"
+ : color_transform == ColorTransform::kYCbCr ? "YCbCr"
+ : "None");
+
+ if (encoding == FrameEncoding::kModular) {
+ os << ",shift=" << group_size_shift;
+ } else if (color_transform == ColorTransform::kXYB) {
+ os << ",qm=" << x_qm_scale << ";" << b_qm_scale;
+ }
+ if (frame_type != FrameType::kReferenceOnly) {
+ os << "," << passes.DebugString();
+ }
+ if (custom_size_or_origin) {
+ os << ",xs=" << frame_size.xsize;
+ os << ",ys=" << frame_size.ysize;
+ if (frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive) {
+ os << ",x0=" << frame_origin.x0;
+ os << ",y0=" << frame_origin.y0;
+ }
+ }
+ if (upsampling > 1) os << ",up=" << upsampling;
+ if (loop_filter.gab) os << ",Gaborish";
+ if (loop_filter.epf_iters > 0) os << ",epf=" << loop_filter.epf_iters;
+ if (animation_frame.duration > 0) os << ",dur=" << animation_frame.duration;
+ if (frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive) {
+ os << ",";
+ os << blending_info.DebugString();
+ for (size_t i = 0; i < extra_channel_blending_info.size(); ++i) {
+ os << (i == 0 ? "[" : ";");
+ os << extra_channel_blending_info[i].DebugString();
+ if (i + 1 == extra_channel_blending_info.size()) os << "]";
+ }
+ }
+ if (save_as_reference > 0) os << ",ref=" << save_as_reference;
+ os << "," << (save_before_color_transform ? "before" : "after") << "_ct";
+ if (is_last) os << ",last";
+ return os.str();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/frame_header.h b/third_party/jpeg-xl/lib/jxl/frame_header.h
new file mode 100644
index 0000000000..5580bcd6fe
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/frame_header.h
@@ -0,0 +1,503 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_FRAME_HEADER_H_
+#define LIB_JXL_FRAME_HEADER_H_
+
+// Frame header with backward and forward-compatible extension capability and
+// compressed integer fields.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/loop_filter.h"
+
+namespace jxl {
+
+// TODO(eustas): move to proper place?
+// Also used by extra channel names.
+static inline Status VisitNameString(Visitor* JXL_RESTRICT visitor,
+ std::string* name) {
+ uint32_t name_length = static_cast<uint32_t>(name->length());
+ // Allows layer name lengths up to 1071 bytes
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Bits(4), BitsOffset(5, 16),
+ BitsOffset(10, 48), 0, &name_length));
+ if (visitor->IsReading()) {
+ name->resize(name_length);
+ }
+ for (size_t i = 0; i < name_length; i++) {
+ uint32_t c = static_cast<uint8_t>((*name)[i]);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(8, 0, &c));
+ (*name)[i] = static_cast<char>(c);
+ }
+ return true;
+}
+
+enum class FrameEncoding : uint32_t {
+ kVarDCT,
+ kModular,
+};
+
+enum class ColorTransform : uint32_t {
+ kXYB, // Values are encoded with XYB. May only be used if
+ // ImageBundle::xyb_encoded.
+ kNone, // Values are encoded according to the attached color profile. May
+ // only be used if !ImageBundle::xyb_encoded.
+ kYCbCr, // Values are encoded according to the attached color profile, but
+ // transformed to YCbCr. May only be used if
+ // !ImageBundle::xyb_encoded.
+};
+
+inline std::array<int, 3> JpegOrder(ColorTransform ct, bool is_gray) {
+ if (is_gray) {
+ return {{0, 0, 0}};
+ }
+ JXL_ASSERT(ct != ColorTransform::kXYB);
+ if (ct == ColorTransform::kYCbCr) {
+ return {{1, 0, 2}};
+ } else {
+ return {{0, 1, 2}};
+ }
+}
+
+struct YCbCrChromaSubsampling : public Fields {
+ YCbCrChromaSubsampling();
+ JXL_FIELDS_NAME(YCbCrChromaSubsampling)
+ size_t HShift(size_t c) const { return maxhs_ - kHShift[channel_mode_[c]]; }
+ size_t VShift(size_t c) const { return maxvs_ - kVShift[channel_mode_[c]]; }
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override {
+ // TODO(veluca): consider allowing 4x downsamples
+ for (size_t i = 0; i < 3; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &channel_mode_[i]));
+ }
+ Recompute();
+ return true;
+ }
+
+ uint8_t MaxHShift() const { return maxhs_; }
+ uint8_t MaxVShift() const { return maxvs_; }
+
+ uint8_t RawHShift(size_t c) const { return kHShift[channel_mode_[c]]; }
+ uint8_t RawVShift(size_t c) const { return kVShift[channel_mode_[c]]; }
+
+ // Uses JPEG channel order (Y, Cb, Cr).
+ Status Set(const uint8_t* hsample, const uint8_t* vsample) {
+ for (size_t c = 0; c < 3; c++) {
+ size_t cjpeg = c < 2 ? c ^ 1 : c;
+ size_t i = 0;
+ for (; i < 4; i++) {
+ if (1 << kHShift[i] == hsample[cjpeg] &&
+ 1 << kVShift[i] == vsample[cjpeg]) {
+ channel_mode_[c] = i;
+ break;
+ }
+ }
+ if (i == 4) {
+ return JXL_FAILURE("Invalid subsample mode");
+ }
+ }
+ Recompute();
+ return true;
+ }
+
+ bool Is444() const {
+ return HShift(0) == 0 && VShift(0) == 0 && // Cb
+ HShift(2) == 0 && VShift(2) == 0 && // Cr
+ HShift(1) == 0 && VShift(1) == 0; // Y
+ }
+
+ bool Is420() const {
+ return HShift(0) == 1 && VShift(0) == 1 && // Cb
+ HShift(2) == 1 && VShift(2) == 1 && // Cr
+ HShift(1) == 0 && VShift(1) == 0; // Y
+ }
+
+ bool Is422() const {
+ return HShift(0) == 1 && VShift(0) == 0 && // Cb
+ HShift(2) == 1 && VShift(2) == 0 && // Cr
+ HShift(1) == 0 && VShift(1) == 0; // Y
+ }
+
+ bool Is440() const {
+ return HShift(0) == 0 && VShift(0) == 1 && // Cb
+ HShift(2) == 0 && VShift(2) == 1 && // Cr
+ HShift(1) == 0 && VShift(1) == 0; // Y
+ }
+
+ std::string DebugString() const {
+ if (Is444()) return "444";
+ if (Is420()) return "420";
+ if (Is422()) return "422";
+ if (Is440()) return "440";
+ return "cs" + std::to_string(channel_mode_[0]) +
+ std::to_string(channel_mode_[1]) + std::to_string(channel_mode_[2]);
+ }
+
+ private:
+ void Recompute() {
+ maxhs_ = 0;
+ maxvs_ = 0;
+ for (size_t i = 0; i < 3; i++) {
+ maxhs_ = std::max(maxhs_, kHShift[channel_mode_[i]]);
+ maxvs_ = std::max(maxvs_, kVShift[channel_mode_[i]]);
+ }
+ }
+ static const uint8_t kHShift[4];
+ static const uint8_t kVShift[4];
+ uint32_t channel_mode_[3];
+ uint8_t maxhs_;
+ uint8_t maxvs_;
+};
+
+// Indicates how to combine the current frame with a previously-saved one. Can
+// be independently controlled for color and extra channels. Formulas are
+// indicative and treat alpha as if it is in range 0.0-1.0. In descriptions
+// below, alpha channel is the extra channel of type alpha used for blending
+// according to the blend_channel, or fully opaque if there is no alpha channel.
+// The blending specified here is used for performing blending *after* color
+// transforms - in linear sRGB if blending a XYB-encoded frame on another
+// XYB-encoded frame, in sRGB if blending a frame with kColorSpace == kSRGB, or
+// in the original colorspace otherwise. Blending in XYB or YCbCr is done by
+// using patches.
+enum class BlendMode {
+ // The new values (in the crop) replace the old ones: sample = new
+ kReplace = 0,
+ // The new values (in the crop) get added to the old ones: sample = old + new
+ kAdd = 1,
+ // The new values (in the crop) replace the old ones if alpha>0:
+ // For the alpha channel that is used as source:
+ // alpha = old + new * (1 - old)
+ // For other channels if !alpha_associated:
+ // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha
+ // For other channels if alpha_associated:
+ // sample = (1 - new_alpha) * old + new
+ // The alpha formula applies to the alpha used for the division in the other
+ // channels formula, and applies to the alpha channel itself if its
+ // blend_channel value matches itself.
+ kBlend = 2,
+ // The new values (in the crop) are added to the old ones if alpha>0:
+ // For the alpha channel that is used as source:
+ // sample = sample = old + new * (1 - old)
+ // For other channels: sample = old + alpha * new
+ kAlphaWeightedAdd = 3,
+ // The new values (in the crop) get multiplied by the old ones:
+ // sample = old * new
+ // The range of the new value matters for multiplication purposes, and its
+ // nominal range of 0..1 is computed the same way as this is done for the
+ // alpha values in kBlend and kAlphaWeightedAdd.
+ // If using kMul as a blend mode for color channels, no color transform is
+ // performed on the current frame.
+ kMul = 4,
+};
+
+struct BlendingInfo : public Fields {
+ BlendingInfo();
+ JXL_FIELDS_NAME(BlendingInfo)
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+ BlendMode mode;
+ // Which extra channel to use as alpha channel for blending, only encoded
+ // for blend modes that involve alpha and if there are more than 1 extra
+ // channels.
+ uint32_t alpha_channel;
+ // Clamp alpha or channel values to 0-1 range.
+ bool clamp;
+ // Frame ID to copy from (0-3). Only encoded if blend_mode is not kReplace.
+ uint32_t source;
+
+ std::string DebugString() const;
+
+ size_t nonserialized_num_extra_channels = 0;
+ bool nonserialized_is_partial_frame = false;
+};
+
+// Origin of the current frame. Not present for frames of type
+// kOnlyPatches.
+struct FrameOrigin {
+ int32_t x0, y0; // can be negative.
+};
+
+// Size of the current frame.
+struct FrameSize {
+ uint32_t xsize, ysize;
+};
+
+// AnimationFrame defines duration of animation frames.
+struct AnimationFrame : public Fields {
+ explicit AnimationFrame(const CodecMetadata* metadata);
+ JXL_FIELDS_NAME(AnimationFrame)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // How long to wait [in ticks, see Animation{}] after rendering.
+ // May be 0 if the current frame serves as a foundation for another frame.
+ uint32_t duration;
+
+ uint32_t timecode; // 0xHHMMSSFF
+
+ // Must be set to the one ImageMetadata acting as the full codestream header,
+ // with correct xyb_encoded, list of extra channels, etc...
+ const CodecMetadata* nonserialized_metadata = nullptr;
+};
+
+// For decoding to lower resolutions. Only used for kRegular frames.
+struct Passes : public Fields {
+ Passes();
+ JXL_FIELDS_NAME(Passes)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ void GetDownsamplingBracket(size_t pass, int& minShift, int& maxShift) const {
+ maxShift = 2;
+ minShift = 3;
+ for (size_t i = 0;; i++) {
+ for (uint32_t j = 0; j < num_downsample; ++j) {
+ if (i == last_pass[j]) {
+ if (downsample[j] == 8) minShift = 3;
+ if (downsample[j] == 4) minShift = 2;
+ if (downsample[j] == 2) minShift = 1;
+ if (downsample[j] == 1) minShift = 0;
+ }
+ }
+ if (i == num_passes - 1) minShift = 0;
+ if (i == pass) return;
+ maxShift = minShift - 1;
+ }
+ }
+
+ uint32_t GetDownsamplingTargetForCompletedPasses(uint32_t num_p) const {
+ if (num_p >= num_passes) return 1;
+ uint32_t retval = 8;
+ for (uint32_t i = 0; i < num_downsample; ++i) {
+ if (num_p > last_pass[i]) {
+ retval = std::min(retval, downsample[i]);
+ }
+ }
+ return retval;
+ }
+
+ std::string DebugString() const;
+
+ uint32_t num_passes; // <= kMaxNumPasses
+ uint32_t num_downsample; // <= num_passes
+
+ // Array of num_downsample pairs. downsample=1/last_pass=num_passes-1 and
+ // downsample=8/last_pass=0 need not be specified; they are implicit.
+ uint32_t downsample[kMaxNumPasses];
+ uint32_t last_pass[kMaxNumPasses];
+ // Array of shift values for each pass. It is implicitly assumed to be 0 for
+ // the last pass.
+ uint32_t shift[kMaxNumPasses];
+};
+
+enum FrameType {
+ // A "regular" frame: might be a crop, and will be blended on a previous
+ // frame, if any, and displayed or blended in future frames.
+ kRegularFrame = 0,
+ // A DC frame: this frame is downsampled and will be *only* used as the DC of
+ // a future frame and, possibly, for previews. Cannot be cropped, blended, or
+ // referenced by patches or blending modes. Frames that *use* a DC frame
+ // cannot have non-default sizes either.
+ kDCFrame = 1,
+ // A PatchesSource frame: this frame will be only used as a source frame for
+ // taking patches. Can be cropped, but cannot have non-(0, 0) x0 and y0.
+ kReferenceOnly = 2,
+ // Same as kRegularFrame, but not used for progressive rendering. This also
+ // implies no early display of DC.
+ kSkipProgressive = 3,
+};
+
+// Image/frame := one of more of these, where the last has is_last = true.
+// Starts at a byte-aligned address "a"; the next pass starts at "a + size".
+struct FrameHeader : public Fields {
+ // Optional postprocessing steps. These flags are the source of truth;
+ // Override must set/clear them rather than change their meaning. Values
+ // chosen such that typical flags == 0 (encoded in only two bits).
+ enum Flags {
+ // Often but not always off => low bit value:
+
+ // Inject noise into decoded output.
+ kNoise = 1,
+
+ // Overlay patches.
+ kPatches = 2,
+
+ // 4, 8 = reserved for future sometimes-off
+
+ // Overlay splines.
+ kSplines = 16,
+
+ kUseDcFrame = 32, // Implies kSkipAdaptiveDCSmoothing.
+
+ // 64 = reserved for future often-off
+
+ // Almost always on => negated:
+
+ kSkipAdaptiveDCSmoothing = 128,
+ };
+
+ explicit FrameHeader(const CodecMetadata* metadata);
+ JXL_FIELDS_NAME(FrameHeader)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Sets/clears `flag` based upon `condition`.
+ void UpdateFlag(const bool condition, const uint64_t flag) {
+ if (condition) {
+ flags |= flag;
+ } else {
+ flags &= ~flag;
+ }
+ }
+
+ // Returns true if this frame is supposed to be saved for future usage by
+ // other frames.
+ bool CanBeReferenced() const {
+ // DC frames cannot be referenced. The last frame cannot be referenced. A
+ // duration 0 frame makes little sense if it is not referenced. A
+ // non-duration 0 frame may or may not be referenced.
+ return !is_last && frame_type != FrameType::kDCFrame &&
+ (animation_frame.duration == 0 || save_as_reference != 0);
+ }
+
+ mutable bool all_default;
+
+ // Always present
+ FrameEncoding encoding;
+ // Some versions of UBSAN complain in VisitFrameType if not initialized.
+ FrameType frame_type = FrameType::kRegularFrame;
+
+ uint64_t flags;
+
+ ColorTransform color_transform;
+ YCbCrChromaSubsampling chroma_subsampling;
+
+ uint32_t group_size_shift; // only if encoding == kModular;
+
+ uint32_t x_qm_scale; // only if VarDCT and color_transform == kXYB
+ uint32_t b_qm_scale; // only if VarDCT and color_transform == kXYB
+
+ std::string name;
+
+ // Skipped for kReferenceOnly.
+ Passes passes;
+
+ // Skipped for kDCFrame
+ bool custom_size_or_origin;
+ FrameSize frame_size;
+
+ // upsampling factors for color and extra channels.
+ // Upsampling is always performed before applying any inverse color transform.
+ // Skipped (1) if kUseDCFrame
+ uint32_t upsampling;
+ std::vector<uint32_t> extra_channel_upsampling;
+
+ // Only for kRegular frames.
+ FrameOrigin frame_origin;
+
+ BlendingInfo blending_info;
+ std::vector<BlendingInfo> extra_channel_blending_info;
+
+ // Animation info for this frame.
+ AnimationFrame animation_frame;
+
+ // This is the last frame.
+ bool is_last;
+
+ // ID to refer to this frame with. 0-3, not present if kDCFrame.
+ // 0 has a special meaning for kRegular frames of nonzero duration: it defines
+ // a frame that will not be referenced in the future.
+ uint32_t save_as_reference;
+
+ // Whether to save this frame before or after the color transform. A frame
+ // that is saved before the color tansform can only be used for blending
+ // through patches. On the contrary, a frame that is saved after the color
+ // transform can only be used for blending through blending modes.
+ // Irrelevant for extra channel blending. Can only be true if
+ // blending_info.mode == kReplace and this is not a partial kRegularFrame; if
+ // this is a DC frame, it is always true.
+ bool save_before_color_transform;
+
+ uint32_t dc_level; // 1-4 if kDCFrame (0 otherwise).
+
+ // Must be set to the one ImageMetadata acting as the full codestream header,
+ // with correct xyb_encoded, list of extra channels, etc...
+ const CodecMetadata* nonserialized_metadata = nullptr;
+
+ // NOTE: This is ignored by AllDefault.
+ LoopFilter loop_filter;
+
+ bool nonserialized_is_preview = false;
+
+ size_t default_xsize() const {
+ if (!nonserialized_metadata) return 0;
+ if (nonserialized_is_preview) {
+ return nonserialized_metadata->m.preview_size.xsize();
+ }
+ return nonserialized_metadata->xsize();
+ }
+
+ size_t default_ysize() const {
+ if (!nonserialized_metadata) return 0;
+ if (nonserialized_is_preview) {
+ return nonserialized_metadata->m.preview_size.ysize();
+ }
+ return nonserialized_metadata->ysize();
+ }
+
+ FrameDimensions ToFrameDimensions() const {
+ size_t xsize = default_xsize();
+ size_t ysize = default_ysize();
+
+ xsize = frame_size.xsize ? frame_size.xsize : xsize;
+ ysize = frame_size.ysize ? frame_size.ysize : ysize;
+
+ if (dc_level != 0) {
+ xsize = DivCeil(xsize, 1 << (3 * dc_level));
+ ysize = DivCeil(ysize, 1 << (3 * dc_level));
+ }
+
+ FrameDimensions frame_dim;
+ frame_dim.Set(xsize, ysize, group_size_shift,
+ chroma_subsampling.MaxHShift(),
+ chroma_subsampling.MaxVShift(),
+ encoding == FrameEncoding::kModular, upsampling);
+ return frame_dim;
+ }
+
+ // True if a color transform should be applied to this frame.
+ bool needs_color_transform() const {
+ return !save_before_color_transform ||
+ frame_type == FrameType::kRegularFrame ||
+ frame_type == FrameType::kSkipProgressive;
+ }
+
+ std::string DebugString() const;
+
+ uint64_t extensions;
+};
+
+Status ReadFrameHeader(BitReader* JXL_RESTRICT reader,
+ FrameHeader* JXL_RESTRICT frame);
+
+// Shared by enc/dec. 5F and 13 are by far the most common for d1/2/4/8, 0
+// ensures low overhead for small images.
+static constexpr U32Enc kOrderEnc =
+ U32Enc(Val(0x5F), Val(0x13), Val(0), Bits(kNumOrders));
+
+} // namespace jxl
+
+#endif // LIB_JXL_FRAME_HEADER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/gamma_correct_test.cc b/third_party/jpeg-xl/lib/jxl/gamma_correct_test.cc
new file mode 100644
index 0000000000..131ec4fa83
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gamma_correct_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdlib.h>
+
+#include <algorithm>
+
+#include "lib/jxl/enc_gamma_correct.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(GammaCorrectTest, TestLinearToSrgbEdgeCases) {
+ EXPECT_EQ(0, LinearToSrgb8Direct(0.0));
+ EXPECT_NEAR(0, LinearToSrgb8Direct(1E-6f), 2E-5);
+ EXPECT_EQ(0, LinearToSrgb8Direct(-1E-6f));
+ EXPECT_EQ(0, LinearToSrgb8Direct(-1E6));
+ EXPECT_NEAR(1, LinearToSrgb8Direct(1 - 1E-6f), 1E-5);
+ EXPECT_EQ(1, LinearToSrgb8Direct(1 + 1E-6f));
+ EXPECT_EQ(1, LinearToSrgb8Direct(1E6));
+}
+
+TEST(GammaCorrectTest, TestRoundTrip) {
+ // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+ for (double linear = 0.0; linear <= 1.0; linear += 1E-7) {
+ const double srgb = LinearToSrgb8Direct(linear);
+ const double linear2 = Srgb8ToLinearDirect(srgb);
+ ASSERT_LT(std::abs(linear - linear2), 2E-13)
+ << "linear = " << linear << ", linear2 = " << linear2;
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/gauss_blur.cc b/third_party/jpeg-xl/lib/jxl/gauss_blur.cc
new file mode 100644
index 0000000000..82384e4c64
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gauss_blur.cc
@@ -0,0 +1,623 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/gauss_blur.cc"
+#include <hwy/cache_control.h>
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/matrix_ops.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Broadcast;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::NegMulSub;
+#if HWY_TARGET != HWY_SCALAR
+using hwy::HWY_NAMESPACE::ShiftLeftLanes;
+#endif
+using hwy::HWY_NAMESPACE::Vec;
+
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const float* JXL_RESTRICT in, intptr_t width,
+ float* JXL_RESTRICT out) {
+ // Although the current output depends on the previous output, we can unroll
+ // up to 4x by precomputing up to fourth powers of the constants. Beyond that,
+ // numerical precision might become a problem. Macro because this is tested
+ // in #if alongside HWY_TARGET.
+#define JXL_GAUSS_MAX_LANES 4
+ using D = HWY_CAPPED(float, JXL_GAUSS_MAX_LANES);
+ using V = Vec<D>;
+ const D d;
+ const V mul_in_1 = Load(d, rg->mul_in + 0 * 4);
+ const V mul_in_3 = Load(d, rg->mul_in + 1 * 4);
+ const V mul_in_5 = Load(d, rg->mul_in + 2 * 4);
+ const V mul_prev_1 = Load(d, rg->mul_prev + 0 * 4);
+ const V mul_prev_3 = Load(d, rg->mul_prev + 1 * 4);
+ const V mul_prev_5 = Load(d, rg->mul_prev + 2 * 4);
+ const V mul_prev2_1 = Load(d, rg->mul_prev2 + 0 * 4);
+ const V mul_prev2_3 = Load(d, rg->mul_prev2 + 1 * 4);
+ const V mul_prev2_5 = Load(d, rg->mul_prev2 + 2 * 4);
+ V prev_1 = Zero(d);
+ V prev_3 = Zero(d);
+ V prev_5 = Zero(d);
+ V prev2_1 = Zero(d);
+ V prev2_3 = Zero(d);
+ V prev2_5 = Zero(d);
+
+ const intptr_t N = rg->radius;
+
+ intptr_t n = -N + 1;
+ // Left side with bounds checks and only write output after n >= 0.
+ const intptr_t first_aligned = RoundUpTo(N + 1, Lanes(d));
+ for (; n < std::min(first_aligned, width); ++n) {
+ const intptr_t left = n - N - 1;
+ const intptr_t right = n + N - 1;
+ const float left_val = left >= 0 ? in[left] : 0.0f;
+ const float right_val = right < width ? in[right] : 0.0f;
+ const V sum = Set(d, left_val + right_val);
+
+ // (Only processing a single lane here, no need to broadcast)
+ V out_1 = Mul(sum, mul_in_1);
+ V out_3 = Mul(sum, mul_in_3);
+ V out_5 = Mul(sum, mul_in_5);
+
+ out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+ out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+ out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+ prev2_1 = prev_1;
+ prev2_3 = prev_3;
+ prev2_5 = prev_5;
+
+ out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+ out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+ out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+ prev_1 = out_1;
+ prev_3 = out_3;
+ prev_5 = out_5;
+
+ if (n >= 0) {
+ out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
+ }
+ }
+
+ // The above loop is effectively scalar but it is convenient to use the same
+ // prev/prev2 variables, so broadcast to each lane before the unrolled loop.
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES > 1
+ prev2_1 = Broadcast<0>(prev2_1);
+ prev2_3 = Broadcast<0>(prev2_3);
+ prev2_5 = Broadcast<0>(prev2_5);
+ prev_1 = Broadcast<0>(prev_1);
+ prev_3 = Broadcast<0>(prev_3);
+ prev_5 = Broadcast<0>(prev_5);
+#endif
+
+ // Unrolled, no bounds checking needed.
+ for (; n < width - N + 1 - (JXL_GAUSS_MAX_LANES - 1); n += Lanes(d)) {
+ const V sum = Add(LoadU(d, in + n - N - 1), LoadU(d, in + n + N - 1));
+
+ // To get a vector of output(s), we multiply broadcasted vectors (of each
+ // input plus the two previous outputs) and add them all together.
+ // Incremental broadcasting and shifting is expected to be cheaper than
+ // horizontal adds or transposing 4x4 values because they run on a different
+ // port, concurrently with the FMA.
+ const V in0 = Broadcast<0>(sum);
+ V out_1 = Mul(in0, mul_in_1);
+ V out_3 = Mul(in0, mul_in_3);
+ V out_5 = Mul(in0, mul_in_5);
+
+#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES >= 2
+ const V in1 = Broadcast<1>(sum);
+ out_1 = MulAdd(ShiftLeftLanes<1>(mul_in_1), in1, out_1);
+ out_3 = MulAdd(ShiftLeftLanes<1>(mul_in_3), in1, out_3);
+ out_5 = MulAdd(ShiftLeftLanes<1>(mul_in_5), in1, out_5);
+
+#if JXL_GAUSS_MAX_LANES >= 4
+ const V in2 = Broadcast<2>(sum);
+ out_1 = MulAdd(ShiftLeftLanes<2>(mul_in_1), in2, out_1);
+ out_3 = MulAdd(ShiftLeftLanes<2>(mul_in_3), in2, out_3);
+ out_5 = MulAdd(ShiftLeftLanes<2>(mul_in_5), in2, out_5);
+
+ const V in3 = Broadcast<3>(sum);
+ out_1 = MulAdd(ShiftLeftLanes<3>(mul_in_1), in3, out_1);
+ out_3 = MulAdd(ShiftLeftLanes<3>(mul_in_3), in3, out_3);
+ out_5 = MulAdd(ShiftLeftLanes<3>(mul_in_5), in3, out_5);
+#endif
+#endif
+
+ out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+ out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+ out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+
+ out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+ out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+ out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+#if HWY_TARGET == HWY_SCALAR || JXL_GAUSS_MAX_LANES == 1
+ prev2_1 = prev_1;
+ prev2_3 = prev_3;
+ prev2_5 = prev_5;
+ prev_1 = out_1;
+ prev_3 = out_3;
+ prev_5 = out_5;
+#else
+ prev2_1 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_1);
+ prev2_3 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_3);
+ prev2_5 = Broadcast<JXL_GAUSS_MAX_LANES - 2>(out_5);
+ prev_1 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_1);
+ prev_3 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_3);
+ prev_5 = Broadcast<JXL_GAUSS_MAX_LANES - 1>(out_5);
+#endif
+
+ Store(Add(out_1, Add(out_3, out_5)), d, out + n);
+ }
+
+ // Remainder handling with bounds checks
+ for (; n < width; ++n) {
+ const intptr_t left = n - N - 1;
+ const intptr_t right = n + N - 1;
+ const float left_val = left >= 0 ? in[left] : 0.0f;
+ const float right_val = right < width ? in[right] : 0.0f;
+ const V sum = Set(d, left_val + right_val);
+
+ // (Only processing a single lane here, no need to broadcast)
+ V out_1 = Mul(sum, mul_in_1);
+ V out_3 = Mul(sum, mul_in_3);
+ V out_5 = Mul(sum, mul_in_5);
+
+ out_1 = MulAdd(mul_prev2_1, prev2_1, out_1);
+ out_3 = MulAdd(mul_prev2_3, prev2_3, out_3);
+ out_5 = MulAdd(mul_prev2_5, prev2_5, out_5);
+ prev2_1 = prev_1;
+ prev2_3 = prev_3;
+ prev2_5 = prev_5;
+
+ out_1 = MulAdd(mul_prev_1, prev_1, out_1);
+ out_3 = MulAdd(mul_prev_3, prev_3, out_3);
+ out_5 = MulAdd(mul_prev_5, prev_5, out_5);
+ prev_1 = out_1;
+ prev_3 = out_3;
+ prev_5 = out_5;
+
+ out[n] = GetLane(Add(out_1, Add(out_3, out_5)));
+ }
+}
+
+// Ring buffer is for n, n-1, n-2; round up to 4 for faster modulo.
+constexpr size_t kMod = 4;
+
+// Avoids an unnecessary store during warmup.
+struct OutputNone {
+ template <class V>
+ void operator()(const V& /*unused*/, float* JXL_RESTRICT /*pos*/,
+ ptrdiff_t /*offset*/) const {}
+};
+
+// Common case: write output vectors in all VerticalBlock except warmup.
+struct OutputStore {
+ template <class V>
+ void operator()(const V& out, float* JXL_RESTRICT pos,
+ ptrdiff_t offset) const {
+ // Stream helps for large images but is slower for images that fit in cache.
+ Store(out, HWY_FULL(float)(), pos + offset);
+ }
+};
+
+// At top/bottom borders, we don't have two inputs to load, so avoid addition.
+// pos may even point to all zeros if the row is outside the input image.
+class SingleInput {
+ public:
+ explicit SingleInput(const float* pos) : pos_(pos) {}
+ Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+ return Load(HWY_FULL(float)(), pos_ + offset);
+ }
+ const float* pos_;
+};
+
+// In the middle of the image, we need to load from a row above and below, and
+// return the sum.
+class TwoInputs {
+ public:
+ TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {}
+ Vec<HWY_FULL(float)> operator()(const size_t offset) const {
+ const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset);
+ const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset);
+ return Add(in1, in2);
+ }
+
+ private:
+ const float* pos1_;
+ const float* pos2_;
+};
+
+// Block := kVectors consecutive full vectors (one cache line except on the
+// right boundary, where we can only rely on having one vector). Unrolling to
+// the cache line size improves cache utilization.
+template <size_t kVectors, class V, class Input, class Output>
+void VerticalBlock(const V& d1_1, const V& d1_3, const V& d1_5, const V& n2_1,
+ const V& n2_3, const V& n2_5, const Input& input,
+ size_t& ctr, float* ring_buffer, const Output output,
+ float* JXL_RESTRICT out_pos) {
+ const HWY_FULL(float) d;
+ constexpr size_t kVN = MaxLanes(d);
+ // More cache-friendly to process an entirely cache line at a time
+ constexpr size_t kLanes = kVectors * kVN;
+
+ float* JXL_RESTRICT y_1 = ring_buffer + 0 * kLanes * kMod;
+ float* JXL_RESTRICT y_3 = ring_buffer + 1 * kLanes * kMod;
+ float* JXL_RESTRICT y_5 = ring_buffer + 2 * kLanes * kMod;
+
+ const size_t n_0 = (++ctr) % kMod;
+ const size_t n_1 = (ctr - 1) % kMod;
+ const size_t n_2 = (ctr - 2) % kMod;
+
+ for (size_t idx_vec = 0; idx_vec < kVectors; ++idx_vec) {
+ const V sum = input(idx_vec * kVN);
+
+ const V y_n1_1 = Load(d, y_1 + kLanes * n_1 + idx_vec * kVN);
+ const V y_n1_3 = Load(d, y_3 + kLanes * n_1 + idx_vec * kVN);
+ const V y_n1_5 = Load(d, y_5 + kLanes * n_1 + idx_vec * kVN);
+ const V y_n2_1 = Load(d, y_1 + kLanes * n_2 + idx_vec * kVN);
+ const V y_n2_3 = Load(d, y_3 + kLanes * n_2 + idx_vec * kVN);
+ const V y_n2_5 = Load(d, y_5 + kLanes * n_2 + idx_vec * kVN);
+ // (35)
+ const V y1 = MulAdd(n2_1, sum, NegMulSub(d1_1, y_n1_1, y_n2_1));
+ const V y3 = MulAdd(n2_3, sum, NegMulSub(d1_3, y_n1_3, y_n2_3));
+ const V y5 = MulAdd(n2_5, sum, NegMulSub(d1_5, y_n1_5, y_n2_5));
+ Store(y1, d, y_1 + kLanes * n_0 + idx_vec * kVN);
+ Store(y3, d, y_3 + kLanes * n_0 + idx_vec * kVN);
+ Store(y5, d, y_5 + kLanes * n_0 + idx_vec * kVN);
+ output(Add(y1, Add(y3, y5)), out_pos, idx_vec * kVN);
+ }
+ // NOTE: flushing cache line out_pos hurts performance - less so with
+ // clflushopt than clflush but still a significant slowdown.
+}
+
+// Reads/writes one block (kVectors full vectors) in each row.
+template <size_t kVectors>
+void VerticalStrip(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const ImageF& in, const size_t x, ImageF* JXL_RESTRICT out) {
+ // We're iterating vertically, so use multiple full-length vectors (each lane
+ // is one column of row n).
+ using D = HWY_FULL(float);
+ using V = Vec<D>;
+ const D d;
+ constexpr size_t kVN = MaxLanes(d);
+ // More cache-friendly to process an entirely cache line at a time
+ constexpr size_t kLanes = kVectors * kVN;
+#if HWY_TARGET == HWY_SCALAR
+ const V d1_1 = Set(d, rg->d1[0 * 4]);
+ const V d1_3 = Set(d, rg->d1[1 * 4]);
+ const V d1_5 = Set(d, rg->d1[2 * 4]);
+ const V n2_1 = Set(d, rg->n2[0 * 4]);
+ const V n2_3 = Set(d, rg->n2[1 * 4]);
+ const V n2_5 = Set(d, rg->n2[2 * 4]);
+#else
+ const V d1_1 = LoadDup128(d, rg->d1 + 0 * 4);
+ const V d1_3 = LoadDup128(d, rg->d1 + 1 * 4);
+ const V d1_5 = LoadDup128(d, rg->d1 + 2 * 4);
+ const V n2_1 = LoadDup128(d, rg->n2 + 0 * 4);
+ const V n2_3 = LoadDup128(d, rg->n2 + 1 * 4);
+ const V n2_5 = LoadDup128(d, rg->n2 + 2 * 4);
+#endif
+
+ const size_t N = rg->radius;
+ const size_t ysize = in.ysize();
+
+ size_t ctr = 0;
+ HWY_ALIGN float ring_buffer[3 * kLanes * kMod] = {0};
+ HWY_ALIGN static constexpr float zero[kLanes] = {0};
+
+ // Warmup: top is out of bounds (zero padded), bottom is usually in-bounds.
+ ssize_t n = -static_cast<ssize_t>(N) + 1;
+ for (; n < 0; ++n) {
+ // bottom is always non-negative since n is initialized in -N + 1.
+ const size_t bottom = n + N - 1;
+ VerticalBlock<kVectors>(
+ d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+ SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+ ring_buffer, OutputNone(), nullptr);
+ }
+ JXL_DASSERT(n >= 0);
+
+ // Start producing output; top is still out of bounds.
+ for (; static_cast<size_t>(n) < std::min(N + 1, ysize); ++n) {
+ const size_t bottom = n + N - 1;
+ VerticalBlock<kVectors>(
+ d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+ SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr,
+ ring_buffer, OutputStore(), out->Row(n) + x);
+ }
+
+ // Interior outputs with prefetching and without bounds checks.
+ constexpr size_t kPrefetchRows = 8;
+ for (; n < static_cast<ssize_t>(ysize - N + 1 - kPrefetchRows); ++n) {
+ const size_t top = n - N - 1;
+ const size_t bottom = n + N - 1;
+ VerticalBlock<kVectors>(
+ d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+ TwoInputs(in.ConstRow(top) + x, in.ConstRow(bottom) + x), ctr,
+ ring_buffer, OutputStore(), out->Row(n) + x);
+ hwy::Prefetch(in.ConstRow(top + kPrefetchRows) + x);
+ hwy::Prefetch(in.ConstRow(bottom + kPrefetchRows) + x);
+ }
+
+ // Bottom border without prefetching and with bounds checks.
+ for (; static_cast<size_t>(n) < ysize; ++n) {
+ const size_t top = n - N - 1;
+ const size_t bottom = n + N - 1;
+ VerticalBlock<kVectors>(
+ d1_1, d1_3, d1_5, n2_1, n2_3, n2_5,
+ TwoInputs(in.ConstRow(top) + x,
+ bottom < ysize ? in.ConstRow(bottom) + x : zero),
+ ctr, ring_buffer, OutputStore(), out->Row(n) + x);
+ }
+}
+
+// Apply 1D vertical scan to multiple columns (one per vector lane).
+// Not yet parallelized.
+void FastGaussianVertical(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const ImageF& in, ThreadPool* /*pool*/,
+ ImageF* JXL_RESTRICT out) {
+ PROFILER_FUNC;
+ JXL_CHECK(SameSize(in, *out));
+
+ constexpr size_t kCacheLineLanes = 64 / sizeof(float);
+ constexpr size_t kVN = MaxLanes(HWY_FULL(float)());
+ constexpr size_t kCacheLineVectors =
+ (kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4;
+ constexpr size_t kFastPace = kCacheLineVectors * kVN;
+
+ size_t x = 0;
+ for (; x + kFastPace <= in.xsize(); x += kFastPace) {
+ VerticalStrip<kCacheLineVectors>(rg, in, x, out);
+ }
+ for (; x < in.xsize(); x += kVN) {
+ VerticalStrip<1>(rg, in, x, out);
+ }
+}
+
+// TODO(veluca): consider replacing with FastGaussian.
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+ const std::vector<float>& kernel,
+ const size_t res) {
+ JXL_ASSERT(kernel.size() % 2 == 1);
+ JXL_ASSERT(in.xsize() % res == 0);
+ const size_t offset = res / 2;
+ const size_t out_xsize = in.xsize() / res;
+ ImageF out(in.ysize(), out_xsize);
+ const int r = kernel.size() / 2;
+ HWY_FULL(float) df;
+ std::vector<float> row_tmp(in.xsize() + 2 * r + Lanes(df));
+ float* const JXL_RESTRICT rowp = &row_tmp[r];
+ std::vector<float> padded_k = kernel;
+ padded_k.resize(padded_k.size() + Lanes(df));
+ const float* const kernelp = &padded_k[r];
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+ size_t x = offset, ox = 0;
+ for (; x < static_cast<uint32_t>(r) && x < in.xsize(); x += res, ++ox) {
+ float sum = 0.0f;
+ for (int i = -r; i <= r; ++i) {
+ sum += rowp[std::max<int>(
+ 0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+ kernelp[i];
+ }
+ out.Row(ox)[y] = sum;
+ }
+ for (; x + r < in.xsize(); x += res, ++ox) {
+ auto sum = Zero(df);
+ for (int i = -r; i <= r; i += Lanes(df)) {
+ sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum);
+ }
+ out.Row(ox)[y] = GetLane(SumOfLanes(df, sum));
+ }
+ for (; x < in.xsize(); x += res, ++ox) {
+ float sum = 0.0f;
+ for (int i = -r; i <= r; ++i) {
+ sum += rowp[std::max<int>(
+ 0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+ kernelp[i];
+ }
+ out.Row(ox)[y] = sum;
+ }
+ }
+ return out;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FastGaussian1D);
+HWY_EXPORT(ConvolveXSampleAndTranspose);
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const float* JXL_RESTRICT in, intptr_t width,
+ float* JXL_RESTRICT out) {
+ return HWY_DYNAMIC_DISPATCH(FastGaussian1D)(rg, in, width, out);
+}
+
+HWY_EXPORT(FastGaussianVertical); // Local function.
+
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+ float* const JXL_RESTRICT row_out, const int xsize,
+ const int radius) {
+ const int lastcol = xsize - 1;
+ for (int x = 1; x <= radius; ++x) {
+ row_out[-x] = row_in[std::min(x, xsize - 1)];
+ }
+ memcpy(row_out, row_in, xsize * sizeof(row_out[0]));
+ for (int x = 1; x <= radius; ++x) {
+ row_out[lastcol + x] = row_in[std::max(0, lastcol - x)];
+ }
+}
+
+ImageF ConvolveXSampleAndTranspose(const ImageF& in,
+ const std::vector<float>& kernel,
+ const size_t res) {
+ return HWY_DYNAMIC_DISPATCH(ConvolveXSampleAndTranspose)(in, kernel, res);
+}
+
+Image3F ConvolveXSampleAndTranspose(const Image3F& in,
+ const std::vector<float>& kernel,
+ const size_t res) {
+ return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res),
+ ConvolveXSampleAndTranspose(in.Plane(1), kernel, res),
+ ConvolveXSampleAndTranspose(in.Plane(2), kernel, res));
+}
+
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+ const size_t res) {
+ ImageF tmp = ConvolveXSampleAndTranspose(in, kernel, res);
+ return ConvolveXSampleAndTranspose(tmp, kernel, res);
+}
+
+// Implements "Recursive Implementation of the Gaussian Filter Using Truncated
+// Cosine Functions" by Charalampidis [2016].
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma) {
+ PROFILER_FUNC;
+ auto rg = hwy::MakeUniqueAligned<RecursiveGaussian>();
+ constexpr double kPi = 3.141592653589793238;
+
+ const double radius = roundf(3.2795 * sigma + 0.2546); // (57), "N"
+
+ // Table I, first row
+ const double pi_div_2r = kPi / (2.0 * radius);
+ const double omega[3] = {pi_div_2r, 3.0 * pi_div_2r, 5.0 * pi_div_2r};
+
+ // (37), k={1,3,5}
+ const double p_1 = +1.0 / std::tan(0.5 * omega[0]);
+ const double p_3 = -1.0 / std::tan(0.5 * omega[1]);
+ const double p_5 = +1.0 / std::tan(0.5 * omega[2]);
+
+ // (44), k={1,3,5}
+ const double r_1 = +p_1 * p_1 / std::sin(omega[0]);
+ const double r_3 = -p_3 * p_3 / std::sin(omega[1]);
+ const double r_5 = +p_5 * p_5 / std::sin(omega[2]);
+
+ // (50), k={1,3,5}
+ const double neg_half_sigma2 = -0.5 * sigma * sigma;
+ const double recip_radius = 1.0 / radius;
+ double rho[3];
+ for (size_t i = 0; i < 3; ++i) {
+ rho[i] = std::exp(neg_half_sigma2 * omega[i] * omega[i]) * recip_radius;
+ }
+
+ // second part of (52), k1,k2 = 1,3; 3,5; 5,1
+ const double D_13 = p_1 * r_3 - r_1 * p_3;
+ const double D_35 = p_3 * r_5 - r_3 * p_5;
+ const double D_51 = p_5 * r_1 - r_5 * p_1;
+
+ // (52), k=5
+ const double recip_d13 = 1.0 / D_13;
+ const double zeta_15 = D_35 * recip_d13;
+ const double zeta_35 = D_51 * recip_d13;
+
+ double A[9] = {p_1, p_3, p_5, //
+ r_1, r_3, r_5, // (56)
+ zeta_15, zeta_35, 1};
+ JXL_CHECK(Inv3x3Matrix(A));
+ const double gamma[3] = {1, radius * radius - sigma * sigma, // (55)
+ zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]};
+ double beta[3];
+ Mul3x3Vector(A, gamma, beta); // (53)
+
+ // Sanity check: correctly solved for beta (IIR filter weights are normalized)
+ const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5; // (39)
+ JXL_ASSERT(std::abs(sum - 1) < 1E-12);
+ (void)sum;
+
+ rg->radius = static_cast<int>(radius);
+
+ double n2[3];
+ double d1[3];
+ for (size_t i = 0; i < 3; ++i) {
+ n2[i] = -beta[i] * std::cos(omega[i] * (radius + 1.0)); // (33)
+ d1[i] = -2.0 * std::cos(omega[i]); // (33)
+
+ for (size_t lane = 0; lane < 4; ++lane) {
+ rg->n2[4 * i + lane] = static_cast<float>(n2[i]);
+ rg->d1[4 * i + lane] = static_cast<float>(d1[i]);
+ }
+
+ const double d_2 = d1[i] * d1[i];
+
+ // Obtained by expanding (35) for four consecutive outputs via sympy:
+ // n, d, p, pp = symbols('n d p pp')
+ // i0, i1, i2, i3 = symbols('i0 i1 i2 i3')
+ // o0, o1, o2, o3 = symbols('o0 o1 o2 o3')
+ // o0 = n*i0 - d*p - pp
+ // o1 = n*i1 - d*o0 - p
+ // o2 = n*i2 - d*o1 - o0
+ // o3 = n*i3 - d*o2 - o1
+ // Then expand(o3) and gather terms for p(prev), pp(prev2) etc.
+ rg->mul_prev[4 * i + 0] = -d1[i];
+ rg->mul_prev[4 * i + 1] = d_2 - 1.0;
+ rg->mul_prev[4 * i + 2] = -d_2 * d1[i] + 2.0 * d1[i];
+ rg->mul_prev[4 * i + 3] = d_2 * d_2 - 3.0 * d_2 + 1.0;
+ rg->mul_prev2[4 * i + 0] = -1.0;
+ rg->mul_prev2[4 * i + 1] = d1[i];
+ rg->mul_prev2[4 * i + 2] = -d_2 + 1.0;
+ rg->mul_prev2[4 * i + 3] = d_2 * d1[i] - 2.0 * d1[i];
+ rg->mul_in[4 * i + 0] = n2[i];
+ rg->mul_in[4 * i + 1] = -d1[i] * n2[i];
+ rg->mul_in[4 * i + 2] = d_2 * n2[i] - n2[i];
+ rg->mul_in[4 * i + 3] = -d_2 * d1[i] * n2[i] + 2.0 * d1[i] * n2[i];
+ }
+ return rg;
+}
+
+namespace {
+
+// Apply 1D horizontal scan to each row.
+void FastGaussianHorizontal(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const ImageF& in, ThreadPool* pool,
+ ImageF* JXL_RESTRICT out) {
+ PROFILER_FUNC;
+ JXL_CHECK(SameSize(in, *out));
+
+ const intptr_t xsize = in.xsize();
+ JXL_CHECK(RunOnPool(
+ pool, 0, in.ysize(), ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /*thread*/) {
+ const size_t y = task;
+ const float* row_in = in.ConstRow(y);
+ float* JXL_RESTRICT row_out = out->Row(y);
+ FastGaussian1D(rg, row_in, xsize, row_out);
+ },
+ "FastGaussianHorizontal"));
+}
+
+} // namespace
+
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+ ImageF* JXL_RESTRICT out) {
+ FastGaussianHorizontal(rg, in, pool, temp);
+ HWY_DYNAMIC_DISPATCH(FastGaussianVertical)(rg, *temp, pool, out);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/gauss_blur.h b/third_party/jpeg-xl/lib/jxl/gauss_blur.h
new file mode 100644
index 0000000000..fb4741f03a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gauss_blur.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_GAUSS_BLUR_H_
+#define LIB_JXL_GAUSS_BLUR_H_
+
+#include <stddef.h>
+
+#include <cmath>
+#include <hwy/aligned_allocator.h>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+std::vector<T> GaussianKernel(int radius, T sigma) {
+ JXL_ASSERT(sigma > 0.0);
+ std::vector<T> kernel(2 * radius + 1);
+ const T scaler = -1.0 / (2 * sigma * sigma);
+ double sum = 0.0;
+ for (int i = -radius; i <= radius; ++i) {
+ const T val = std::exp(scaler * i * i);
+ kernel[i + radius] = val;
+ sum += val;
+ }
+ for (size_t i = 0; i < kernel.size(); ++i) {
+ kernel[i] /= sum;
+ }
+ return kernel;
+}
+
+// All convolution functions below apply mirroring of the input on the borders
+// in the following way:
+//
+// input: [a0 a1 a2 ... aN]
+// mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R]
+//
+// where R is the radius of the kernel (i.e. kernel size is 2*R+1).
+
+// REQUIRES: in.xsize() and in.ysize() are integer multiples of res.
+ImageF ConvolveAndSample(const ImageF& in, const std::vector<float>& kernel,
+ const size_t res);
+
+// Private, used by test.
+void ExtrapolateBorders(const float* const JXL_RESTRICT row_in,
+ float* const JXL_RESTRICT row_out, const int xsize,
+ const int radius);
+
+// Only for use by CreateRecursiveGaussian and FastGaussian*.
+#pragma pack(push, 1)
+struct RecursiveGaussian {
+ // For k={1,3,5} in that order, each broadcasted 4x for LoadDup128. Used only
+ // for vertical passes.
+ float n2[3 * 4];
+ float d1[3 * 4];
+
+ // We unroll horizontal passes 4x - one output per lane. These are each lane's
+ // multiplier for the previous output (relative to the first of the four
+ // outputs). Indexing: 4 * 0..2 (for {1,3,5}) + 0..3 for the lane index.
+ float mul_prev[3 * 4];
+ // Ditto for the second to last output.
+ float mul_prev2[3 * 4];
+
+ // We multiply a vector of inputs 0..3 by a vector shifted from this array.
+ // in=0 uses all 4 (nonzero) terms; for in=3, the lower three lanes are 0.
+ float mul_in[3 * 4];
+
+ size_t radius;
+};
+#pragma pack(pop)
+
+// Precomputation for FastGaussian*; users may use the same pointer/storage in
+// subsequent calls to FastGaussian* with the same sigma.
+hwy::AlignedUniquePtr<RecursiveGaussian> CreateRecursiveGaussian(double sigma);
+
+// 1D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian1D(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const float* JXL_RESTRICT in, intptr_t width,
+ float* JXL_RESTRICT out);
+
+// 2D Gaussian with zero-pad boundary handling and runtime independent of sigma.
+void FastGaussian(const hwy::AlignedUniquePtr<RecursiveGaussian>& rg,
+ const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp,
+ ImageF* JXL_RESTRICT out);
+
+} // namespace jxl
+
+#endif // LIB_JXL_GAUSS_BLUR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/gauss_blur_gbench.cc b/third_party/jpeg-xl/lib/jxl/gauss_blur_gbench.cc
new file mode 100644
index 0000000000..b1bb64abc5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gauss_blur_gbench.cc
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <hwy/targets.h>
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/gauss_blur.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+namespace {
+
+JXL_MAYBE_UNUSED ImageF Convolve(const ImageF& in,
+ const std::vector<float>& kernel) {
+ return ConvolveAndSample(in, kernel, 1);
+}
+
+void BM_GaussBlur1d(benchmark::State& state) {
+ // Uncomment to disable SIMD and force and scalar implementation
+ // hwy::DisableTargets(~HWY_SCALAR);
+ // Uncomment to run AVX2
+ // hwy::DisableTargets(HWY_AVX3);
+
+ const size_t length = state.range();
+ const double sigma = 7.0; // (from Butteraugli application)
+ ImageF in(length, 1);
+ const float expected = length;
+ FillImage(expected, &in);
+
+ ImageF temp(length, 1);
+ ImageF out(length, 1);
+ const auto rg = CreateRecursiveGaussian(sigma);
+ for (auto _ : state) {
+ FastGaussian1D(rg, in.Row(0), length, out.Row(0));
+ // Prevent optimizing out
+ JXL_ASSERT(std::abs(out.ConstRow(0)[length / 2] - expected) / expected <
+ 9E-5);
+ }
+ state.SetItemsProcessed(length * state.iterations());
+}
+
+void BM_GaussBlur2d(benchmark::State& state) {
+ // See GaussBlur1d for SIMD changes.
+
+ const size_t xsize = state.range();
+ const size_t ysize = xsize;
+ const double sigma = 7.0; // (from Butteraugli application)
+ ImageF in(xsize, ysize);
+ const float expected = xsize + ysize;
+ FillImage(expected, &in);
+
+ ImageF temp(xsize, ysize);
+ ImageF out(xsize, ysize);
+ ThreadPool* null_pool = nullptr;
+ const auto rg = CreateRecursiveGaussian(sigma);
+ for (auto _ : state) {
+ FastGaussian(rg, in, null_pool, &temp, &out);
+ // Prevent optimizing out
+ JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
+ expected <
+ 9E-5);
+ }
+ state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+void BM_GaussBlurFir(benchmark::State& state) {
+ // See GaussBlur1d for SIMD changes.
+
+ const size_t xsize = state.range();
+ const size_t ysize = xsize;
+ const double sigma = 7.0; // (from Butteraugli application)
+ ImageF in(xsize, ysize);
+ const float expected = xsize + ysize;
+ FillImage(expected, &in);
+
+ ImageF temp(xsize, ysize);
+ ImageF out(xsize, ysize);
+ const std::vector<float> kernel =
+ GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+ for (auto _ : state) {
+ // Prevent optimizing out
+ JXL_ASSERT(std::abs(Convolve(in, kernel).ConstRow(ysize / 2)[xsize / 2] -
+ expected) /
+ expected <
+ 9E-5);
+ }
+ state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+void BM_GaussBlurSep7(benchmark::State& state) {
+ // See GaussBlur1d for SIMD changes.
+
+ const size_t xsize = state.range();
+ const size_t ysize = xsize;
+ ImageF in(xsize, ysize);
+ const float expected = xsize + ysize;
+ FillImage(expected, &in);
+
+ ImageF temp(xsize, ysize);
+ ImageF out(xsize, ysize);
+ ThreadPool* null_pool = nullptr;
+ // Gaussian with sigma 1
+ const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+ HWY_REP4(0.060626f), HWY_REP4(0.00598f)},
+ {HWY_REP4(0.383103f), HWY_REP4(0.241843f),
+ HWY_REP4(0.060626f), HWY_REP4(0.00598f)}};
+ for (auto _ : state) {
+ Separable7(in, Rect(in), weights, null_pool, &out);
+ // Prevent optimizing out
+ JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) /
+ expected <
+ 9E-5);
+ }
+ state.SetItemsProcessed(xsize * ysize * state.iterations());
+}
+
+BENCHMARK(BM_GaussBlur1d)->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_GaussBlur2d)->Range(1 << 7, 1 << 10);
+BENCHMARK(BM_GaussBlurFir)->Range(1 << 7, 1 << 10);
+BENCHMARK(BM_GaussBlurSep7)->Range(1 << 7, 1 << 10);
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/gauss_blur_test.cc b/third_party/jpeg-xl/lib/jxl/gauss_blur_test.cc
new file mode 100644
index 0000000000..097c1aa8df
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gauss_blur_test.cc
@@ -0,0 +1,453 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/gauss_blur.h"
+
+#include <cmath>
+#include <hwy/targets.h>
+#include <vector>
+
+#include "lib/extras/time.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/convolve.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+bool NearEdge(const int64_t width, const int64_t peak) {
+ // When around 3*sigma from the edge, there is negligible truncation.
+ return peak < 10 || peak > width - 10;
+}
+
+// Follow the curve downwards by scanning right from `peak` and verifying
+// identical values at the same offset to the left.
+void VerifySymmetric(const int64_t width, const int64_t peak,
+ const float* out) {
+ const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7;
+ for (int64_t i = 1;; ++i) {
+ // Stop if we passed either end of the array
+ if (peak - i < 0 || peak + i >= width) break;
+ EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]); // descending
+ EXPECT_NEAR(out[peak - i], out[peak + i], tolerance); // symmetric
+ }
+}
+
+void TestImpulseResponse(size_t width, size_t peak) {
+ const auto rg3 = CreateRecursiveGaussian(3.0);
+ const auto rg4 = CreateRecursiveGaussian(4.0);
+ const auto rg5 = CreateRecursiveGaussian(5.0);
+
+ // Extra padding for 4x unrolling
+ auto in = hwy::AllocateAligned<float>(width + 3);
+ memset(in.get(), 0, sizeof(float) * (width + 3));
+ in[peak] = 1.0f;
+
+ auto out3 = hwy::AllocateAligned<float>(width + 3);
+ auto out4 = hwy::AllocateAligned<float>(width + 3);
+ auto out5 = hwy::AllocateAligned<float>(width + 3);
+ FastGaussian1D(rg3, in.get(), width, out3.get());
+ FastGaussian1D(rg4, out3.get(), width, out4.get());
+ FastGaussian1D(rg5, in.get(), width, out5.get());
+
+ VerifySymmetric(width, peak, out3.get());
+ VerifySymmetric(width, peak, out4.get());
+ VerifySymmetric(width, peak, out5.get());
+
+ // Wider kernel has flatter peak
+ EXPECT_LT(out5[peak] + 0.05, out3[peak]);
+
+ // Gauss3 o Gauss4 ~= Gauss5
+ const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01;
+ for (size_t i = 0; i < width; ++i) {
+ EXPECT_NEAR(out4[i], out5[i], tolerance);
+ }
+}
+
+void TestImpulseResponseForWidth(size_t width) {
+ for (size_t i = 0; i < width; ++i) {
+ TestImpulseResponse(width, i);
+ }
+}
+
+TEST(GaussBlurTest, ImpulseResponse) {
+ TestImpulseResponseForWidth(10); // tiny even
+ TestImpulseResponseForWidth(15); // small odd
+ TestImpulseResponseForWidth(32); // power of two
+ TestImpulseResponseForWidth(31); // power of two - 1
+ TestImpulseResponseForWidth(33); // power of two + 1
+}
+
+ImageF Convolve(const ImageF& in, const std::vector<float>& kernel) {
+ return ConvolveAndSample(in, kernel, 1);
+}
+
+// Higher-precision version for accuracy test.
+ImageF ConvolveAndTransposeF64(const ImageF& in,
+ const std::vector<double>& kernel) {
+ JXL_ASSERT(kernel.size() % 2 == 1);
+ ImageF out(in.ysize(), in.xsize());
+ const int r = kernel.size() / 2;
+ std::vector<float> row_tmp(in.xsize() + 2 * r);
+ float* const JXL_RESTRICT rowp = &row_tmp[r];
+ const double* const kernelp = &kernel[r];
+ for (size_t y = 0; y < in.ysize(); ++y) {
+ ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r);
+ for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) {
+ double sum = 0.0;
+ for (int i = -r; i <= r; ++i) {
+ sum += rowp[std::max<int>(
+ 0, std::min<int>(static_cast<int>(x) + i, in.xsize()))] *
+ kernelp[i];
+ }
+ out.Row(ox)[y] = static_cast<float>(sum);
+ }
+ }
+ return out;
+}
+
+ImageF ConvolveF64(const ImageF& in, const std::vector<double>& kernel) {
+ ImageF tmp = ConvolveAndTransposeF64(in, kernel);
+ return ConvolveAndTransposeF64(tmp, kernel);
+}
+
+void TestDirac2D(size_t xsize, size_t ysize, double sigma) {
+ ImageF in(xsize, ysize);
+ ZeroFillImage(&in);
+ // We anyway ignore the border below, so might as well choose the middle.
+ in.Row(ysize / 2)[xsize / 2] = 1.0f;
+
+ ImageF temp(xsize, ysize);
+ ImageF out(xsize, ysize);
+ const auto rg = CreateRecursiveGaussian(sigma);
+ ThreadPool* null_pool = nullptr;
+ FastGaussian(rg, in, null_pool, &temp, &out);
+
+ const std::vector<float> kernel =
+ GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+ const ImageF expected = Convolve(in, kernel);
+
+ const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4;
+ const size_t border = 2 * sigma;
+
+ JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, 1E-8, _, border));
+}
+
+TEST(GaussBlurTest, Test2D) {
+ const std::vector<int> dimensions{6, 15, 17, 64, 50, 49};
+ for (int xsize : dimensions) {
+ for (int ysize : dimensions) {
+ for (double sigma : {1.0, 2.5, 3.6, 7.0}) {
+ TestDirac2D(static_cast<size_t>(xsize), static_cast<size_t>(ysize),
+ sigma);
+ }
+ }
+ }
+}
+
+// Slow (44 sec). To run, remove the disabled prefix.
+TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) {
+ const double sigma = 7.0;
+ const auto rg = CreateRecursiveGaussian(sigma);
+
+ // IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11.
+ const size_t radius = static_cast<size_t>(7 * sigma);
+ const std::vector<double> kernel = GaussianKernel(radius, sigma);
+
+ const size_t length = 16384;
+ ImageF inputs(length, 1);
+ ZeroFillImage(&inputs);
+
+ auto outputs = hwy::AllocateAligned<float>(length);
+
+ // One per center position
+ auto sum_abs_err = hwy::AllocateAligned<double>(length);
+ std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0);
+
+ for (size_t center = radius; center < length - radius; ++center) {
+ inputs.Row(0)[center - 1] = 0.0f; // reset last peak, entire array now 0
+ inputs.Row(0)[center] = 1.0f;
+ FastGaussian1D(rg, inputs.Row(0), length, outputs.get());
+
+ const ImageF outputs_fir = ConvolveF64(inputs, kernel);
+
+ for (size_t i = 0; i < length; ++i) {
+ const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]);
+ sum_abs_err[i] += static_cast<double>(abs_err);
+ }
+ }
+
+ const double max_abs_err =
+ *std::max_element(sum_abs_err.get(), sum_abs_err.get() + length);
+ printf("Max abs err: %.8e\n", max_abs_err);
+}
+
+void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma,
+ double max_l1, double max_rel) {
+ printf("%4" PRIuS " x %4" PRIuS " %4.1f %4.1f sigma %.1f\n", xsize, ysize,
+ min, max, sigma);
+ ImageF in(xsize, ysize);
+ RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize);
+ // FastGaussian/Convolve handle borders differently, so keep those pixels 0.
+ const size_t border = 4 * sigma;
+ SetBorder(border, 0.0f, &in);
+
+ ImageF temp(xsize, ysize);
+ ImageF out(xsize, ysize);
+ const auto rg = CreateRecursiveGaussian(sigma);
+ ThreadPool* null_pool = nullptr;
+ FastGaussian(rg, in, null_pool, &temp, &out);
+
+ const std::vector<float> kernel =
+ GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+ const ImageF expected = Convolve(in, kernel);
+
+ JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, max_rel, _, border));
+}
+
+void TestRandomForSizes(float min, float max, double sigma) {
+ double max_l1 = 6E-3;
+ double max_rel = 3E-3;
+ TestRandom(128, 1, min, max, sigma, max_l1, max_rel);
+ TestRandom(1, 128, min, max, sigma, max_l1, max_rel);
+ TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+ TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2);
+ TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2);
+}
+
+TEST(GaussBlurTest, TestRandom) {
+ // small non-negative
+ TestRandomForSizes(0.0f, 10.0f, 3.0f);
+ TestRandomForSizes(0.0f, 10.0f, 7.0f);
+
+ // small negative
+ TestRandomForSizes(-4.0f, -1.0f, 3.0f);
+ TestRandomForSizes(-4.0f, -1.0f, 7.0f);
+
+ // mixed positive/negative
+ TestRandomForSizes(-6.0f, 6.0f, 3.0f);
+ TestRandomForSizes(-6.0f, 6.0f, 7.0f);
+}
+
+TEST(GaussBlurTest, TestSign) {
+ const size_t xsize = 500;
+ const size_t ysize = 606;
+ ImageF in(xsize, ysize);
+
+ ZeroFillImage(&in);
+ const float center[33 * 33] = {
+ -0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f, -0.271332f,
+ -0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f, 4.787300f,
+ 5.834560f, 5.272720f, 3.967960f, 3.547935f, 3.432732f, 3.383015f,
+ 3.239326f, 3.290806f, 3.298954f, 3.397808f, 3.359730f, 3.533844f,
+ 3.511856f, 3.436787f, 3.428310f, 3.460209f, 3.550011f, 3.590942f,
+ 3.593109f, 3.560005f, 3.443165f, 0.089741f, 0.179230f, -0.032997f,
+ -0.182610f, 0.005669f, -0.244759f, -0.395123f, -0.514961f, -1.003529f,
+ -1.798656f, -2.377975f, 0.222191f, 3.957664f, 5.946804f, 5.543129f,
+ 4.290096f, 3.621010f, 3.407257f, 3.392494f, 3.345367f, 3.391903f,
+ 3.441605f, 3.429260f, 3.444969f, 3.507130f, 3.518612f, 3.443111f,
+ 3.475948f, 3.536148f, 3.470333f, 3.628311f, 3.600243f, 3.292892f,
+ -0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f,
+ -0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f,
+ -0.491032f, 2.817703f, 5.213242f, 5.675629f, 4.864548f, 3.876324f,
+ 3.535587f, 3.530312f, 3.413765f, 3.386261f, 3.404854f, 3.383472f,
+ 3.420830f, 3.326496f, 3.257877f, 3.362152f, 3.489609f, 3.619587f,
+ 3.555805f, 3.423164f, 3.309708f, -0.483940f, -0.502926f, -0.592983f,
+ -0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f,
+ -0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f,
+ 4.871609f, 5.767548f, 5.103741f, 3.983868f, 3.633003f, 3.458263f,
+ 3.507309f, 3.247021f, 3.220612f, 3.326061f, 3.352814f, 3.291061f,
+ 3.322739f, 3.444302f, 3.506207f, 3.556839f, 3.529575f, 3.457024f,
+ -0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f,
+ -0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f,
+ -0.984100f, -1.805332f, -2.028198f, 0.773205f, 4.423046f, 5.604839f,
+ 5.231617f, 4.080299f, 3.603008f, 3.498741f, 3.517010f, 3.333897f,
+ 3.381336f, 3.342617f, 3.369686f, 3.434155f, 3.490452f, 3.607029f,
+ 3.555298f, 3.702297f, 3.618679f, -0.503609f, -0.578564f, -0.419014f,
+ -0.239883f, 0.269836f, 0.022984f, -0.455067f, -0.621777f, -0.304176f,
+ -0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f,
+ -1.895836f, 0.036537f, 3.462456f, 5.586445f, 5.658791f, 4.434784f,
+ 3.423435f, 3.318848f, 3.202328f, 3.532764f, 3.436687f, 3.354881f,
+ 3.356941f, 3.382645f, 3.503902f, 3.512867f, 3.632366f, 3.537312f,
+ -0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f, -0.064991f,
+ -0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f,
+ -0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f,
+ 2.451153f, 5.555855f, 6.096313f, 5.230045f, 4.068172f, 3.404274f,
+ 3.392586f, 3.326065f, 3.156670f, 3.284828f, 3.347012f, 3.319252f,
+ 3.352310f, 3.610790f, 3.499847f, -0.150600f, -0.314445f, -0.093575f,
+ -0.057384f, 0.053688f, -0.189255f, -0.263515f, -0.318653f, 0.053246f,
+ 0.080627f, -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f,
+ -0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f, 4.626896f,
+ 5.888351f, 5.450989f, 4.070587f, 3.539200f, 3.383492f, 3.296318f,
+ 3.267334f, 3.436028f, 3.463005f, 3.502625f, 3.522282f, 3.403763f,
+ -0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f,
+ -0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f,
+ -0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f,
+ -1.684219f, -1.972137f, 0.448399f, 4.019985f, 5.648144f, 5.647846f,
+ 4.295094f, 3.641884f, 3.374790f, 3.197342f, 3.425545f, 3.507481f,
+ 3.478065f, 3.430889f, 3.341900f, -1.016304f, -0.959221f, -0.909466f,
+ -0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f,
+ -0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f,
+ -0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f,
+ -1.142891f, 2.885162f, 5.652863f, 5.718426f, 4.911140f, 3.234222f,
+ 3.473373f, 3.577183f, 3.271603f, 3.410435f, 3.505489f, 3.434032f,
+ -0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f,
+ -0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f,
+ -0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f,
+ -0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f,
+ 4.929745f, 5.630871f, 5.444199f, 4.042740f, 3.739189f, 3.691399f,
+ 3.391956f, 3.469696f, 3.431232f, 0.204849f, 0.205433f, -0.131927f,
+ -0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f,
+ -0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f,
+ -0.245853f, -0.043140f, 0.024242f, -0.038998f, -0.044151f, -0.425991f,
+ -1.240753f, -1.943146f, -2.174755f, 0.523415f, 4.376751f, 5.956558f,
+ 5.850082f, 4.403152f, 3.517399f, 3.560753f, 3.554836f, 3.471985f,
+ -0.508503f, -0.109783f, 0.057747f, 0.190079f, -0.257153f, -0.591980f,
+ -0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f,
+ -0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f,
+ -0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f,
+ -1.994583f, -0.588058f, 3.415840f, 5.603470f, 5.811581f, 4.959423f,
+ 3.721760f, 3.710499f, 3.785461f, -0.554588f, -0.565517f, -0.434578f,
+ -0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f,
+ -0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f,
+ -0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f,
+ -0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f,
+ 1.572483f, 4.826286f, 5.741898f, 5.162853f, 4.028049f, 3.686325f,
+ -0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f,
+ 0.076477f, 0.187706f, -0.068787f, -0.293491f, -0.361145f, -0.292515f,
+ -0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f,
+ -0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f,
+ -1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f, 4.276672f,
+ 5.501350f, 5.331257f, 3.809288f, -0.727722f, -0.533619f, -0.511524f,
+ -0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f,
+ -0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f,
+ -0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f,
+ -0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f,
+ -1.860954f, -2.290253f, -0.424774f, 3.050060f, 5.083332f, 5.291920f,
+ -0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f,
+ -0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f,
+ -0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f,
+ -0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f,
+ -0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f,
+ -0.323456f, 2.042049f, 4.605103f, -0.310631f, -0.279568f, -0.012695f,
+ -0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f,
+ -0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f,
+ 0.219872f, 0.264984f, -0.193432f, -0.363182f, -0.467472f, -0.462009f,
+ -0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f,
+ -0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f,
+ -0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f,
+ -0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f,
+ -0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f,
+ -0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f,
+ -0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f,
+ -1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f,
+ -0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f,
+ -0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f,
+ -0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f,
+ -0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f,
+ -0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f,
+ -0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f,
+ -0.520420f, -0.675438f, -0.255753f, 0.141357f, -0.079639f, -0.419476f,
+ -0.374069f, -0.046253f, 0.116116f, -0.145847f, -0.380371f, -0.563412f,
+ -0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f,
+ -0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f, -0.050201f,
+ -0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f,
+ -0.180031f, 0.129711f, -0.127811f, -0.384750f, -0.499542f, -0.418613f,
+ -0.121635f, 0.203197f, -0.167290f, -0.397270f, -0.355461f, -0.218746f,
+ -0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f,
+ -0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f,
+ -0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f,
+ -0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f,
+ -0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f,
+ -0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f,
+ -0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f,
+ -0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f,
+ 0.084972f, 0.039902f, 0.000618f, -0.404430f, -0.447456f, -0.418076f,
+ -0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f,
+ -0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f,
+ -0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f,
+ -0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f,
+ -0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f,
+ -0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f,
+ -0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f,
+ -0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f,
+ -0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f,
+ -0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f,
+ -0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f,
+ -0.382458f, -0.319771f, -0.049927f, 0.124329f, 0.266102f, -0.106606f,
+ -0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f,
+ -0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f,
+ -0.379494f, 0.041626f, -0.113361f, -0.277767f, 0.083366f, 0.126476f,
+ 0.139057f, 0.038040f, 0.038162f, -0.242126f, -0.411736f, -0.370049f,
+ -0.455357f, -0.039257f, 0.264442f, -0.271492f, -0.425346f, -0.514847f,
+ -0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f,
+ -0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f,
+ -0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f, -0.063602f,
+ -0.364529f, -0.179867f, 0.078197f, 0.117947f, -0.093410f, -0.359119f,
+ -0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f,
+ -0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f,
+ -0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f,
+ -0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f,
+ -0.223667f, 0.174572f, -0.348743f, -0.798901f, -0.692307f, -0.607609f,
+ -0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f,
+ -0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f,
+ -0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f,
+ -0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f,
+ -0.741020f, -0.404503f, 0.184814f, -0.075315f, -0.281513f, -0.532906f,
+ -0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f, 0.103310f,
+ -0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f,
+ -0.371559f, -0.125580f, 0.028625f, -0.154713f, -0.442024f, -0.492764f,
+ -0.199371f, 0.236305f, 0.225925f, 0.075577f, -0.285812f, -0.437145f,
+ -0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f,
+ -0.313152f, -0.337982f, -0.358421f, 0.040070f, 0.038925f, -0.333313f,
+ -0.351662f, 0.023014f, 0.091362f, -0.282890f, -0.373253f, -0.389050f,
+ -0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f,
+ -0.222801f, -0.106323f, -0.056358f, 0.027222f, 0.390732f, 0.033558f,
+ -0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f,
+ -0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f,
+ -0.246254f, 0.132244f, 0.090134f, 0.234634f, -0.089249f, -0.460854f,
+ -0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f,
+ -0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f,
+ -0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f,
+ -0.263580f, -0.268929f, 0.080826f, 0.115616f, -0.097324f, -0.325310f,
+ -0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f,
+ -0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f,
+ -0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f,
+ -0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f,
+ -0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f,
+ 0.103271f, -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f,
+ -0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f,
+ -0.370911f, -0.564224f, -0.242714f};
+ const size_t xtest = xsize / 2;
+ const size_t ytest = ysize / 2;
+
+ for (intptr_t dy = -16; dy <= 16; ++dy) {
+ float* row = in.Row(ytest + dy);
+ for (intptr_t dx = -16; dx <= 16; ++dx)
+ row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)];
+ }
+
+ const double sigma = 7.155933;
+
+ ImageF temp(xsize, ysize);
+ ImageF out_rg(xsize, ysize);
+ const auto rg = CreateRecursiveGaussian(sigma);
+ ThreadPool* null_pool = nullptr;
+ FastGaussian(rg, in, null_pool, &temp, &out_rg);
+
+ ImageF out_old;
+ {
+ const std::vector<float> kernel =
+ GaussianKernel(static_cast<int>(4 * sigma), static_cast<float>(sigma));
+ printf("old kernel size %" PRIuS "\n", kernel.size());
+ out_old = Convolve(in, kernel);
+ }
+
+ printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest],
+ out_old.Row(ytest)[xtest]);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/gradient_test.cc b/third_party/jpeg-xl/lib/jxl/gradient_test.cc
new file mode 100644
index 0000000000..282fe89f0a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/gradient_test.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+
+// Returns distance of point p to line p0..p1, the result is signed and is not
+// normalized.
+double PointLineDist(double x0, double y0, double x1, double y1, double x,
+ double y) {
+ return (y1 - y0) * x - (x1 - x0) * y + x1 * y0 - y1 * x0;
+}
+
+// Generates a test image with a gradient from one color to another.
+// Angle in degrees, colors can be given in hex as 0xRRGGBB. The angle is the
+// angle in which the change direction happens.
+Image3F GenerateTestGradient(uint32_t color0, uint32_t color1, double angle,
+ size_t xsize, size_t ysize) {
+ Image3F image(xsize, ysize);
+
+ double x0 = xsize / 2;
+ double y0 = ysize / 2;
+ double x1 = x0 + std::sin(angle / 360.0 * 2.0 * kPi);
+ double y1 = y0 + std::cos(angle / 360.0 * 2.0 * kPi);
+
+ double maxdist =
+ std::max<double>(fabs(PointLineDist(x0, y0, x1, y1, 0, 0)),
+ fabs(PointLineDist(x0, y0, x1, y1, xsize, 0)));
+
+ for (size_t c = 0; c < 3; ++c) {
+ float c0 = ((color0 >> (8 * (2 - c))) & 255);
+ float c1 = ((color1 >> (8 * (2 - c))) & 255);
+ for (size_t y = 0; y < ysize; ++y) {
+ float* row = image.PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ double dist = PointLineDist(x0, y0, x1, y1, x, y);
+ double v = ((dist / maxdist) + 1.0) / 2.0;
+ float color = c0 * (1.0 - v) + c1 * v;
+ row[x] = color;
+ }
+ }
+ }
+
+ return image;
+}
+
+// Computes the max of the horizontal and vertical second derivative for each
+// pixel, where second derivative means absolute value of difference of left
+// delta and right delta (top/bottom for vertical direction).
+// The radius over which the derivative is computed is only 1 pixel and it only
+// checks two angles (hor and ver), but this approximation works well enough.
+static ImageF Gradient2(const ImageF& image) {
+ size_t xsize = image.xsize();
+ size_t ysize = image.ysize();
+ ImageF image2(image.xsize(), image.ysize());
+ for (size_t y = 1; y + 1 < ysize; y++) {
+ const auto* JXL_RESTRICT row0 = image.Row(y - 1);
+ const auto* JXL_RESTRICT row1 = image.Row(y);
+ const auto* JXL_RESTRICT row2 = image.Row(y + 1);
+ auto* row_out = image2.Row(y);
+ for (size_t x = 1; x + 1 < xsize; x++) {
+ float ddx = (row1[x] - row1[x - 1]) - (row1[x + 1] - row1[x]);
+ float ddy = (row1[x] - row0[x]) - (row2[x] - row1[x]);
+ row_out[x] = std::max(fabsf(ddx), fabsf(ddy));
+ }
+ }
+ // Copy to the borders
+ if (ysize > 2) {
+ auto* JXL_RESTRICT row0 = image2.Row(0);
+ const auto* JXL_RESTRICT row1 = image2.Row(1);
+ const auto* JXL_RESTRICT row2 = image2.Row(ysize - 2);
+ auto* JXL_RESTRICT row3 = image2.Row(ysize - 1);
+ for (size_t x = 1; x + 1 < xsize; x++) {
+ row0[x] = row1[x];
+ row3[x] = row2[x];
+ }
+ } else {
+ const auto* row0_in = image.Row(0);
+ const auto* row1_in = image.Row(ysize - 1);
+ auto* row0_out = image2.Row(0);
+ auto* row1_out = image2.Row(ysize - 1);
+ for (size_t x = 1; x + 1 < xsize; x++) {
+ // Image too narrow, take first derivative instead
+ row0_out[x] = row1_out[x] = fabsf(row0_in[x] - row1_in[x]);
+ }
+ }
+ if (xsize > 2) {
+ for (size_t y = 0; y < ysize; y++) {
+ auto* row = image2.Row(y);
+ row[0] = row[1];
+ row[xsize - 1] = row[xsize - 2];
+ }
+ } else {
+ for (size_t y = 0; y < ysize; y++) {
+ const auto* JXL_RESTRICT row_in = image.Row(y);
+ auto* row_out = image2.Row(y);
+ // Image too narrow, take first derivative instead
+ row_out[0] = row_out[xsize - 1] = fabsf(row_in[0] - row_in[xsize - 1]);
+ }
+ }
+ return image2;
+}
+
+static Image3F Gradient2(const Image3F& image) {
+ return Image3F(Gradient2(image.Plane(0)), Gradient2(image.Plane(1)),
+ Gradient2(image.Plane(2)));
+}
+
+/*
+Tests if roundtrip with jxl on a gradient image doesn't cause banding.
+Only tests if use_gradient is true. Set to false for debugging to see the
+distance values.
+Angle in degrees, colors can be given in hex as 0xRRGGBB.
+*/
+void TestGradient(ThreadPool* pool, uint32_t color0, uint32_t color1,
+ size_t xsize, size_t ysize, float angle, bool fast_mode,
+ float butteraugli_distance, bool use_gradient = true) {
+ CompressParams cparams;
+ cparams.butteraugli_distance = butteraugli_distance;
+ if (fast_mode) {
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ }
+ Image3F gradient = GenerateTestGradient(color0, color1, angle, xsize, ysize);
+
+ CodecInOut io;
+ io.metadata.m.SetUintSamples(8);
+ io.metadata.m.color_encoding = ColorEncoding::SRGB();
+ io.SetFromImage(std::move(gradient), io.metadata.m.color_encoding);
+
+ CodecInOut io2;
+
+ PaddedBytes compressed;
+ AuxOut* aux_out = nullptr;
+ PassesEncoderState enc_state;
+ EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ aux_out, pool));
+ EXPECT_TRUE(
+ test::DecodeFile({}, Span<const uint8_t>(compressed), &io2, pool));
+ EXPECT_TRUE(
+ io2.Main().TransformTo(io2.metadata.m.color_encoding, GetJxlCms(), pool));
+
+ if (use_gradient) {
+ // Test that the gradient map worked. For that, we take a second derivative
+ // of the image with Gradient2 to measure how linear the change is in x and
+ // y direction. For a well handled gradient, we expect max values around
+ // 0.1, while if there is noticeable banding, which means the gradient map
+ // failed, the values are around 0.5-1.0 (regardless of
+ // butteraugli_distance).
+ Image3F gradient2 = Gradient2(*io2.Main().color());
+
+ std::array<float, 3> image_max;
+ Image3Max(gradient2, &image_max);
+
+ // TODO(jyrki): These values used to work with 0.2, 0.2, 0.2.
+ EXPECT_LE(image_max[0], 3.15);
+ EXPECT_LE(image_max[1], 1.72);
+ EXPECT_LE(image_max[2], 5.05);
+ }
+}
+
+static constexpr bool fast_mode = true;
+
+TEST(GradientTest, SteepGradient) {
+ test::ThreadPoolForTests pool(8);
+ // Relatively steep gradients, colors from the sky of stp.png
+ TestGradient(&pool, 0xd99d58, 0x889ab1, 512, 512, 90, fast_mode, 3.0);
+}
+
+TEST(GradientTest, SubtleGradient) {
+ test::ThreadPoolForTests pool(8);
+ // Very subtle gradient
+ TestGradient(&pool, 0xb89b7b, 0xa89b8d, 512, 512, 90, fast_mode, 4.0);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/headers.cc b/third_party/jpeg-xl/lib/jxl/headers.cc
new file mode 100644
index 0000000000..dc53726385
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/headers.cc
@@ -0,0 +1,194 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/headers.h"
+
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+
+struct Rational {
+ constexpr explicit Rational(uint32_t num, uint32_t den)
+ : num(num), den(den) {}
+
+ // Returns floor(multiplicand * rational).
+ constexpr uint32_t MulTruncate(uint32_t multiplicand) const {
+ return uint64_t(multiplicand) * num / den;
+ }
+
+ uint32_t num;
+ uint32_t den;
+};
+
+Rational FixedAspectRatios(uint32_t ratio) {
+ JXL_ASSERT(0 != ratio && ratio < 8);
+ // Other candidates: 5/4, 7/5, 14/9, 16/10, 5/3, 21/9, 12/5
+ constexpr Rational kRatios[7] = {Rational(1, 1), // square
+ Rational(12, 10), //
+ Rational(4, 3), // camera
+ Rational(3, 2), // mobile camera
+ Rational(16, 9), // camera/display
+ Rational(5, 4), //
+ Rational(2, 1)}; //
+ return kRatios[ratio - 1];
+}
+
+uint32_t FindAspectRatio(uint32_t xsize, uint32_t ysize) {
+ for (uint32_t r = 1; r < 8; ++r) {
+ if (xsize == FixedAspectRatios(r).MulTruncate(ysize)) {
+ return r;
+ }
+ }
+ return 0; // Must send xsize instead
+}
+
+} // namespace
+
+size_t SizeHeader::xsize() const {
+ if (ratio_ != 0) {
+ return FixedAspectRatios(ratio_).MulTruncate(
+ static_cast<uint32_t>(ysize()));
+ }
+ return small_ ? ((xsize_div8_minus_1_ + 1) * 8) : xsize_;
+}
+
+Status SizeHeader::Set(size_t xsize64, size_t ysize64) {
+ if (xsize64 > 0xFFFFFFFFull || ysize64 > 0xFFFFFFFFull) {
+ return JXL_FAILURE("Image too large");
+ }
+ const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+ const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+ if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty image");
+ ratio_ = FindAspectRatio(xsize32, ysize32);
+ small_ = ysize64 <= 256 && (ysize64 % kBlockDim) == 0 &&
+ (ratio_ != 0 || (xsize64 <= 256 && (xsize64 % kBlockDim) == 0));
+ if (small_) {
+ ysize_div8_minus_1_ = ysize32 / 8 - 1;
+ } else {
+ ysize_ = ysize32;
+ }
+
+ if (ratio_ == 0) {
+ if (small_) {
+ xsize_div8_minus_1_ = xsize32 / 8 - 1;
+ } else {
+ xsize_ = xsize32;
+ }
+ }
+ JXL_ASSERT(xsize() == xsize64);
+ JXL_ASSERT(ysize() == ysize64);
+ return true;
+}
+
+Status PreviewHeader::Set(size_t xsize64, size_t ysize64) {
+ const uint32_t xsize32 = static_cast<uint32_t>(xsize64);
+ const uint32_t ysize32 = static_cast<uint32_t>(ysize64);
+ if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty preview");
+ div8_ = (xsize64 % kBlockDim) == 0 && (ysize64 % kBlockDim) == 0;
+ if (div8_) {
+ ysize_div8_ = ysize32 / 8;
+ } else {
+ ysize_ = ysize32;
+ }
+
+ ratio_ = FindAspectRatio(xsize32, ysize32);
+ if (ratio_ == 0) {
+ if (div8_) {
+ xsize_div8_ = xsize32 / 8;
+ } else {
+ xsize_ = xsize32;
+ }
+ }
+ JXL_ASSERT(xsize() == xsize64);
+ JXL_ASSERT(ysize() == ysize64);
+ return true;
+}
+
+size_t PreviewHeader::xsize() const {
+ if (ratio_ != 0) {
+ return FixedAspectRatios(ratio_).MulTruncate(
+ static_cast<uint32_t>(ysize()));
+ }
+ return div8_ ? (xsize_div8_ * 8) : xsize_;
+}
+
+SizeHeader::SizeHeader() { Bundle::Init(this); }
+Status SizeHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &small_));
+
+ if (visitor->Conditional(small_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &ysize_div8_minus_1_));
+ }
+ if (visitor->Conditional(!small_)) {
+ // (Could still be small, but non-multiple of 8.)
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+ BitsOffset(18, 1), BitsOffset(30, 1),
+ 1, &ysize_));
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+ if (visitor->Conditional(ratio_ == 0 && small_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &xsize_div8_minus_1_));
+ }
+ if (visitor->Conditional(ratio_ == 0 && !small_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1),
+ BitsOffset(18, 1), BitsOffset(30, 1),
+ 1, &xsize_));
+ }
+
+ return true;
+}
+
+PreviewHeader::PreviewHeader() { Bundle::Init(this); }
+Status PreviewHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &div8_));
+
+ if (visitor->Conditional(div8_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+ BitsOffset(9, 33), 1, &ysize_div8_));
+ }
+ if (visitor->Conditional(!div8_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+ BitsOffset(10, 321),
+ BitsOffset(12, 1345), 1, &ysize_));
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_));
+ if (visitor->Conditional(ratio_ == 0 && div8_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1),
+ BitsOffset(9, 33), 1, &xsize_div8_));
+ }
+ if (visitor->Conditional(ratio_ == 0 && !div8_)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65),
+ BitsOffset(10, 321),
+ BitsOffset(12, 1345), 1, &xsize_));
+ }
+
+ return true;
+}
+
+AnimationHeader::AnimationHeader() { Bundle::Init(this); }
+Status AnimationHeader::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(100), Val(1000), BitsOffset(10, 1),
+ BitsOffset(30, 1), 1, &tps_numerator));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Val(1001), BitsOffset(8, 1),
+ BitsOffset(10, 1), 1,
+ &tps_denominator));
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Bits(3), Bits(16), Bits(32), 0, &num_loops));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_timecodes));
+ return true;
+}
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+ SizeHeader* JXL_RESTRICT size) {
+ return Bundle::Read(reader, size);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/headers.h b/third_party/jpeg-xl/lib/jxl/headers.h
new file mode 100644
index 0000000000..3cce84dabc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/headers.h
@@ -0,0 +1,97 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HEADERS_H_
+#define LIB_JXL_HEADERS_H_
+
+// Codestream headers.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// Reserved by ISO/IEC 10918-1. LF causes files opened in text mode to be
+// rejected because the marker changes to 0x0D instead. The 0xFF prefix also
+// ensures there were no 7-bit transmission limitations.
+static constexpr uint8_t kCodestreamMarker = 0x0A;
+
+// Compact representation of image dimensions (best case: 9 bits) so decoders
+// can preallocate early.
+class SizeHeader : public Fields {
+ public:
+ SizeHeader();
+ JXL_FIELDS_NAME(SizeHeader)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ Status Set(size_t xsize, size_t ysize);
+
+ size_t xsize() const;
+ size_t ysize() const {
+ return small_ ? ((ysize_div8_minus_1_ + 1) * 8) : ysize_;
+ }
+
+ private:
+ bool small_; // xsize and ysize <= 256 and divisible by 8.
+
+ uint32_t ysize_div8_minus_1_;
+ uint32_t ysize_;
+
+ uint32_t ratio_;
+ uint32_t xsize_div8_minus_1_;
+ uint32_t xsize_;
+};
+
+// (Similar to SizeHeader but different encoding because previews are smaller)
+class PreviewHeader : public Fields {
+ public:
+ PreviewHeader();
+ JXL_FIELDS_NAME(PreviewHeader)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ Status Set(size_t xsize, size_t ysize);
+
+ size_t xsize() const;
+ size_t ysize() const { return div8_ ? (ysize_div8_ * 8) : ysize_; }
+
+ private:
+ bool div8_; // xsize and ysize divisible by 8.
+
+ uint32_t ysize_div8_;
+ uint32_t ysize_;
+
+ uint32_t ratio_;
+ uint32_t xsize_div8_;
+ uint32_t xsize_;
+};
+
+struct AnimationHeader : public Fields {
+ AnimationHeader();
+ JXL_FIELDS_NAME(AnimationHeader)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Ticks per second (expressed as rational number to support NTSC)
+ uint32_t tps_numerator;
+ uint32_t tps_denominator;
+
+ uint32_t num_loops; // 0 means to repeat infinitely.
+
+ bool have_timecodes;
+};
+
+Status ReadSizeHeader(BitReader* JXL_RESTRICT reader,
+ SizeHeader* JXL_RESTRICT size);
+
+} // namespace jxl
+
+#endif // LIB_JXL_HEADERS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/huffman_table.cc b/third_party/jpeg-xl/lib/jxl/huffman_table.cc
new file mode 100644
index 0000000000..9ae7865af6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/huffman_table.cc
@@ -0,0 +1,161 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/huffman_table.h"
+
+#include <cstring> /* for memcpy */
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/dec_huffman.h"
+
+namespace jxl {
+
+/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the
+ bit-wise reversal of the len least significant bits of key. */
+static inline int GetNextKey(int key, int len) {
+ int step = 1u << (len - 1);
+ while (key & step) {
+ step >>= 1;
+ }
+ return (key & (step - 1)) + step;
+}
+
+/* Stores code in table[0], table[step], table[2*step], ..., table[end] */
+/* Assumes that end is an integer multiple of step */
+static inline void ReplicateValue(HuffmanCode* table, int step, int end,
+ HuffmanCode code) {
+ do {
+ end -= step;
+ table[end] = code;
+ } while (end > 0);
+}
+
+/* Returns the table width of the next 2nd level table. count is the histogram
+ of bit lengths for the remaining symbols, len is the code length of the next
+ processed symbol */
+static inline size_t NextTableBitSize(const uint16_t* const count, size_t len,
+ int root_bits) {
+ size_t left = 1u << (len - root_bits);
+ while (len < PREFIX_MAX_BITS) {
+ if (left <= count[len]) break;
+ left -= count[len];
+ ++len;
+ left <<= 1;
+ }
+ return len - root_bits;
+}
+
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+ const uint8_t* const code_lengths,
+ size_t code_lengths_size, uint16_t* count) {
+ HuffmanCode code; /* current table entry */
+ HuffmanCode* table; /* next available space in table */
+ size_t len; /* current code length */
+ size_t symbol; /* symbol index in original or sorted table */
+ int key; /* reversed prefix code */
+ int step; /* step size to replicate values in current table */
+ int low; /* low bits for current root entry */
+ int mask; /* mask for low bits */
+ size_t table_bits; /* key length of current table */
+ int table_size; /* size of current table */
+ int total_size; /* sum of root table size and 2nd level table sizes */
+ /* offsets in sorted table for each length */
+ uint16_t offset[PREFIX_MAX_BITS + 1];
+ size_t max_length = 1;
+
+ if (code_lengths_size > 1u << PREFIX_MAX_BITS) return 0;
+
+ /* symbols sorted by code length */
+ std::vector<uint16_t> sorted_storage(code_lengths_size);
+ uint16_t* sorted = sorted_storage.data();
+
+ /* generate offsets into sorted symbol table by code length */
+ {
+ uint16_t sum = 0;
+ for (len = 1; len <= PREFIX_MAX_BITS; len++) {
+ offset[len] = sum;
+ if (count[len]) {
+ sum = static_cast<uint16_t>(sum + count[len]);
+ max_length = len;
+ }
+ }
+ }
+
+ /* sort symbols by length, by symbol order within each length */
+ for (symbol = 0; symbol < code_lengths_size; symbol++) {
+ if (code_lengths[symbol] != 0) {
+ sorted[offset[code_lengths[symbol]]++] = symbol;
+ }
+ }
+
+ table = root_table;
+ table_bits = root_bits;
+ table_size = 1u << table_bits;
+ total_size = table_size;
+
+ /* special case code with only one value */
+ if (offset[PREFIX_MAX_BITS] == 1) {
+ code.bits = 0;
+ code.value = static_cast<uint16_t>(sorted[0]);
+ for (key = 0; key < total_size; ++key) {
+ table[key] = code;
+ }
+ return total_size;
+ }
+
+ /* fill in root table */
+ /* let's reduce the table size to a smaller size if possible, and */
+ /* create the repetitions by memcpy if possible in the coming loop */
+ if (table_bits > max_length) {
+ table_bits = max_length;
+ table_size = 1u << table_bits;
+ }
+ key = 0;
+ symbol = 0;
+ code.bits = 1;
+ step = 2;
+ do {
+ for (; count[code.bits] != 0; --count[code.bits]) {
+ code.value = static_cast<uint16_t>(sorted[symbol++]);
+ ReplicateValue(&table[key], step, table_size, code);
+ key = GetNextKey(key, code.bits);
+ }
+ step <<= 1;
+ } while (++code.bits <= table_bits);
+
+ /* if root_bits != table_bits we only created one fraction of the */
+ /* table, and we need to replicate it now. */
+ while (total_size != table_size) {
+ memcpy(&table[table_size], &table[0], table_size * sizeof(table[0]));
+ table_size <<= 1;
+ }
+
+ /* fill in 2nd level tables and add pointers to root table */
+ mask = total_size - 1;
+ low = -1;
+ for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) {
+ for (; count[len] != 0; --count[len]) {
+ if ((key & mask) != low) {
+ table += table_size;
+ table_bits = NextTableBitSize(count, len, root_bits);
+ table_size = 1u << table_bits;
+ total_size += table_size;
+ low = key & mask;
+ root_table[low].bits = static_cast<uint8_t>(table_bits + root_bits);
+ root_table[low].value =
+ static_cast<uint16_t>((table - root_table) - low);
+ }
+ code.bits = static_cast<uint8_t>(len - root_bits);
+ code.value = static_cast<uint16_t>(sorted[symbol++]);
+ ReplicateValue(&table[key >> root_bits], step, table_size, code);
+ key = GetNextKey(key, len);
+ }
+ }
+
+ return total_size;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/huffman_table.h b/third_party/jpeg-xl/lib/jxl/huffman_table.h
new file mode 100644
index 0000000000..11cdb2fc45
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/huffman_table.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_HUFFMAN_TABLE_H_
+#define LIB_JXL_HUFFMAN_TABLE_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+namespace jxl {
+
+struct HuffmanCode {
+ uint8_t bits; /* number of bits used for this symbol */
+ uint16_t value; /* symbol value or table offset */
+};
+
+/* Builds Huffman lookup table assuming code lengths are in symbol order. */
+/* Returns 0 in case of error (invalid tree or memory error), otherwise
+ populated size of table. */
+uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits,
+ const uint8_t* code_lengths,
+ size_t code_lengths_size, uint16_t* count);
+
+} // namespace jxl
+
+#endif // LIB_JXL_HUFFMAN_TABLE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/iaca_test.cc b/third_party/jpeg-xl/lib/jxl/iaca_test.cc
new file mode 100644
index 0000000000..e25d9316d5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/iaca_test.cc
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/iaca.h"
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(IacaTest, MarkersDefaultToDisabledAndDoNotCrash) {
+ BeginIACA();
+ EndIACA();
+}
+
+TEST(IacaTest, ScopeDefaultToDisabledAndDoNotCrash) { ScopeIACA iaca; }
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/icc_codec.cc b/third_party/jpeg-xl/lib/jxl/icc_codec.cc
new file mode 100644
index 0000000000..f367461c0f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec.cc
@@ -0,0 +1,389 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/icc_codec_common.h"
+
+namespace jxl {
+namespace {
+
+// Shuffles or interleaves bytes, for example with width 2, turns "ABCDabcd"
+// into "AaBbCcDc". Transposes a matrix of ceil(size / width) columns and
+// width rows. There are size elements, size may be < width * height, if so the
+// last elements of the rightmost column are missing, the missing spots are
+// transposed along with the filled spots, and the result has the missing
+// elements at the end of the bottom row. The input is the input matrix in
+// scanline order but with missing elements skipped (which may occur in multiple
+// locations), the output is the result matrix in scanline order (with
+// no need to skip missing elements as they are past the end of the data).
+void Shuffle(uint8_t* data, size_t size, size_t width) {
+ size_t height = (size + width - 1) / width; // amount of rows of output
+ PaddedBytes result(size);
+ // i = output index, j input index
+ size_t s = 0, j = 0;
+ for (size_t i = 0; i < size; i++) {
+ result[i] = data[j];
+ j += height;
+ if (j >= size) j = ++s;
+ }
+
+ for (size_t i = 0; i < size; i++) {
+ data[i] = result[i];
+ }
+}
+
+// TODO(eustas): should be 20, or even 18, once DecodeVarInt is improved;
+// currently DecodeVarInt does not signal the errors, and marks
+// 11 bytes as used even if only 10 are used (and 9 is enough for
+// 63-bit values).
+constexpr const size_t kPreambleSize = 22; // enough for reading 2 VarInts
+
+} // namespace
+
+// Mimics the beginning of UnpredictICC for quick validity check.
+// At least kPreambleSize bytes of data should be valid at invocation time.
+Status CheckPreamble(const PaddedBytes& data, size_t enc_size,
+ size_t output_limit) {
+ const uint8_t* enc = data.data();
+ size_t size = data.size();
+ size_t pos = 0;
+ uint64_t osize = DecodeVarInt(enc, size, &pos);
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+ if (pos >= size) return JXL_FAILURE("Out of bounds");
+ uint64_t csize = DecodeVarInt(enc, size, &pos);
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+ // We expect that UnpredictICC inflates input, not the other way round.
+ if (osize + 65536 < enc_size) return JXL_FAILURE("Malformed ICC");
+ if (output_limit && osize > output_limit) {
+ return JXL_FAILURE("Decoded ICC is too large");
+ }
+ return true;
+}
+
+// Decodes the result of PredictICC back to a valid ICC profile.
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result) {
+ if (!result->empty()) return JXL_FAILURE("result must be empty initially");
+ size_t pos = 0;
+ // TODO(lode): technically speaking we need to check that the entire varint
+ // decoding never goes out of bounds, not just the first byte. This requires
+ // a DecodeVarInt function that returns an error code. It is safe to use
+ // DecodeVarInt with out of bounds values, it silently returns, but the
+ // specification requires an error. Idem for all DecodeVarInt below.
+ if (pos >= size) return JXL_FAILURE("Out of bounds");
+ uint64_t osize = DecodeVarInt(enc, size, &pos); // Output size
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(osize));
+ if (pos >= size) return JXL_FAILURE("Out of bounds");
+ uint64_t csize = DecodeVarInt(enc, size, &pos); // Commands size
+ // Every command is translated to at least on byte.
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(csize));
+ size_t cpos = pos; // pos in commands stream
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size));
+ size_t commands_end = cpos + csize;
+ pos = commands_end; // pos in data stream
+
+ // Header
+ PaddedBytes header = ICCInitialHeaderPrediction();
+ EncodeUint32(0, osize, &header);
+ for (size_t i = 0; i <= kICCHeaderSize; i++) {
+ if (result->size() == osize) {
+ if (cpos != commands_end) return JXL_FAILURE("Not all commands used");
+ if (pos != size) return JXL_FAILURE("Not all data used");
+ return true; // Valid end
+ }
+ if (i == kICCHeaderSize) break; // Done
+ ICCPredictHeader(result->data(), result->size(), header.data(), i);
+ if (pos >= size) return JXL_FAILURE("Out of bounds");
+ result->push_back(enc[pos++] + header[i]);
+ }
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+
+ // Tag list
+ uint64_t numtags = DecodeVarInt(enc, size, &cpos);
+
+ if (numtags != 0) {
+ numtags--;
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(numtags));
+ AppendUint32(numtags, result);
+ uint64_t prevtagstart = kICCHeaderSize + numtags * 12;
+ uint64_t prevtagsize = 0;
+ for (;;) {
+ if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+ if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+ if (cpos == commands_end) break; // Valid end
+ uint8_t command = enc[cpos++];
+ uint8_t tagcode = command & 63;
+ Tag tag;
+ if (tagcode == 0) {
+ break;
+ } else if (tagcode == kCommandTagUnknown) {
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 4, size));
+ tag = DecodeKeyword(enc, size, pos);
+ pos += 4;
+ } else if (tagcode == kCommandTagTRC) {
+ tag = kRtrcTag;
+ } else if (tagcode == kCommandTagXYZ) {
+ tag = kRxyzTag;
+ } else {
+ if (tagcode - kCommandTagStringFirst >= kNumTagStrings) {
+ return JXL_FAILURE("Unknown tagcode");
+ }
+ tag = *kTagStrings[tagcode - kCommandTagStringFirst];
+ }
+ AppendKeyword(tag, result);
+
+ uint64_t tagstart;
+ uint64_t tagsize = prevtagsize;
+ if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag ||
+ tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag ||
+ tag == kLumiTag) {
+ tagsize = 20;
+ }
+
+ if (command & kFlagBitOffset) {
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ tagstart = DecodeVarInt(enc, size, &cpos);
+ } else {
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(prevtagstart));
+ tagstart = prevtagstart + prevtagsize;
+ }
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart));
+ AppendUint32(tagstart, result);
+ if (command & kFlagBitSize) {
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ tagsize = DecodeVarInt(enc, size, &cpos);
+ }
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(tagsize));
+ AppendUint32(tagsize, result);
+ prevtagstart = tagstart;
+ prevtagsize = tagsize;
+
+ if (tagcode == kCommandTagTRC) {
+ AppendKeyword(kGtrcTag, result);
+ AppendUint32(tagstart, result);
+ AppendUint32(tagsize, result);
+ AppendKeyword(kBtrcTag, result);
+ AppendUint32(tagstart, result);
+ AppendUint32(tagsize, result);
+ }
+
+ if (tagcode == kCommandTagXYZ) {
+ JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart + tagsize * 2));
+ AppendKeyword(kGxyzTag, result);
+ AppendUint32(tagstart + tagsize, result);
+ AppendUint32(tagsize, result);
+ AppendKeyword(kBxyzTag, result);
+ AppendUint32(tagstart + tagsize * 2, result);
+ AppendUint32(tagsize, result);
+ }
+ }
+ }
+
+ // Main Content
+ for (;;) {
+ if (result->size() > osize) return JXL_FAILURE("Invalid result size");
+ if (cpos > commands_end) return JXL_FAILURE("Out of bounds");
+ if (cpos == commands_end) break; // Valid end
+ uint8_t command = enc[cpos++];
+ if (command == kCommandInsert) {
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ uint64_t num = DecodeVarInt(enc, size, &cpos);
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+ for (size_t i = 0; i < num; i++) {
+ result->push_back(enc[pos++]);
+ }
+ } else if (command == kCommandShuffle2 || command == kCommandShuffle4) {
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ uint64_t num = DecodeVarInt(enc, size, &cpos);
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+ PaddedBytes shuffled(num);
+ for (size_t i = 0; i < num; i++) {
+ shuffled[i] = enc[pos + i];
+ }
+ if (command == kCommandShuffle2) {
+ Shuffle(shuffled.data(), num, 2);
+ } else if (command == kCommandShuffle4) {
+ Shuffle(shuffled.data(), num, 4);
+ }
+ for (size_t i = 0; i < num; i++) {
+ result->push_back(shuffled[i]);
+ pos++;
+ }
+ } else if (command == kCommandPredict) {
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(cpos, 2, commands_end));
+ uint8_t flags = enc[cpos++];
+
+ size_t width = (flags & 3) + 1;
+ if (width == 3) return JXL_FAILURE("Invalid width");
+
+ int order = (flags & 12) >> 2;
+ if (order == 3) return JXL_FAILURE("Invalid order");
+
+ uint64_t stride = width;
+ if (flags & 16) {
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ stride = DecodeVarInt(enc, size, &cpos);
+ if (stride < width) {
+ return JXL_FAILURE("Invalid stride");
+ }
+ }
+ // If stride * 4 >= result->size(), return failure. The check
+ // "size == 0 || ((size - 1) >> 2) < stride" corresponds to
+ // "stride * 4 >= size", but does not suffer from integer overflow.
+ // This check is more strict than necessary but follows the specification
+ // and the encoder should ensure this is followed.
+ if (result->empty() || ((result->size() - 1u) >> 2u) < stride) {
+ return JXL_FAILURE("Invalid stride");
+ }
+
+ if (cpos >= commands_end) return JXL_FAILURE("Out of bounds");
+ uint64_t num = DecodeVarInt(enc, size, &cpos); // in bytes
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size));
+
+ PaddedBytes shuffled(num);
+ for (size_t i = 0; i < num; i++) {
+ shuffled[i] = enc[pos + i];
+ }
+ if (width > 1) Shuffle(shuffled.data(), num, width);
+
+ size_t start = result->size();
+ for (size_t i = 0; i < num; i++) {
+ uint8_t predicted = LinearPredictICCValue(result->data(), start, i,
+ stride, width, order);
+ result->push_back(predicted + shuffled[i]);
+ }
+ pos += num;
+ } else if (command == kCommandXYZ) {
+ AppendKeyword(kXyz_Tag, result);
+ for (int i = 0; i < 4; i++) result->push_back(0);
+ JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 12, size));
+ for (size_t i = 0; i < 12; i++) {
+ result->push_back(enc[pos++]);
+ }
+ } else if (command >= kCommandTypeStartFirst &&
+ command < kCommandTypeStartFirst + kNumTypeStrings) {
+ AppendKeyword(*kTypeStrings[command - kCommandTypeStartFirst], result);
+ for (size_t i = 0; i < 4; i++) {
+ result->push_back(0);
+ }
+ } else {
+ return JXL_FAILURE("Unknown command");
+ }
+ }
+
+ if (pos != size) return JXL_FAILURE("Not all data used");
+ if (result->size() != osize) return JXL_FAILURE("Invalid result size");
+
+ return true;
+}
+
+Status ICCReader::Init(BitReader* reader, size_t output_limit) {
+ JXL_RETURN_IF_ERROR(CheckEOI(reader));
+ used_bits_base_ = reader->TotalBitsConsumed();
+ if (bits_to_skip_ == 0) {
+ enc_size_ = U64Coder::Read(reader);
+ if (enc_size_ > 268435456) {
+ // Avoid too large memory allocation for invalid file.
+ return JXL_FAILURE("Too large encoded profile");
+ }
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(reader, kNumICCContexts, &code_, &context_map_));
+ ans_reader_ = ANSSymbolReader(&code_, reader);
+ i_ = 0;
+ decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+ for (; i_ < std::min<size_t>(2, enc_size_); i_++) {
+ decompressed_[i_] = ans_reader_.ReadHybridUint(
+ ICCANSContext(i_, i_ > 0 ? decompressed_[i_ - 1] : 0,
+ i_ > 1 ? decompressed_[i_ - 2] : 0),
+ reader, context_map_);
+ }
+ if (enc_size_ > kPreambleSize) {
+ for (; i_ < kPreambleSize; i_++) {
+ decompressed_[i_] = ans_reader_.ReadHybridUint(
+ ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]),
+ reader, context_map_);
+ }
+ JXL_RETURN_IF_ERROR(CheckEOI(reader));
+ JXL_RETURN_IF_ERROR(
+ CheckPreamble(decompressed_, enc_size_, output_limit));
+ }
+ bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+ } else {
+ reader->SkipBits(bits_to_skip_);
+ }
+ return true;
+}
+
+Status ICCReader::Process(BitReader* reader, PaddedBytes* icc) {
+ ANSSymbolReader::Checkpoint checkpoint;
+ size_t saved_i = 0;
+ auto save = [&]() {
+ ans_reader_.Save(&checkpoint);
+ bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+ saved_i = i_;
+ };
+ save();
+ auto check_and_restore = [&]() {
+ Status status = CheckEOI(reader);
+ if (!status) {
+ // not enough bytes.
+ ans_reader_.Restore(checkpoint);
+ i_ = saved_i;
+ return status;
+ }
+ return Status(true);
+ };
+ for (; i_ < enc_size_; i_++) {
+ if (i_ % ANSSymbolReader::kMaxCheckpointInterval == 0 && i_ > 0) {
+ JXL_RETURN_IF_ERROR(check_and_restore());
+ save();
+ if ((i_ > 0) && (((i_ & 0xFFFF) == 0))) {
+ float used_bytes =
+ (reader->TotalBitsConsumed() - used_bits_base_) / 8.0f;
+ if (i_ > used_bytes * 256) return JXL_FAILURE("Corrupted stream");
+ }
+ decompressed_.resize(std::min<size_t>(i_ + 0x400, enc_size_));
+ }
+ JXL_DASSERT(i_ >= 2);
+ decompressed_[i_] = ans_reader_.ReadHybridUint(
+ ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), reader,
+ context_map_);
+ }
+ JXL_RETURN_IF_ERROR(check_and_restore());
+ bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_;
+ if (!ans_reader_.CheckANSFinalState()) {
+ return JXL_FAILURE("Corrupted ICC profile");
+ }
+
+ icc->clear();
+ return UnpredictICC(decompressed_.data(), decompressed_.size(), icc);
+}
+
+Status ICCReader::CheckEOI(BitReader* reader) {
+ if (reader->AllReadsWithinBounds()) return true;
+ return JXL_STATUS(StatusCode::kNotEnoughBytes,
+ "Not enough bytes for reading ICC profile");
+}
+
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+ size_t output_limit) {
+ ICCReader icc_reader;
+ JXL_RETURN_IF_ERROR(icc_reader.Init(reader, output_limit));
+ JXL_RETURN_IF_ERROR(icc_reader.Process(reader, icc));
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/icc_codec.h b/third_party/jpeg-xl/lib/jxl/icc_codec.h
new file mode 100644
index 0000000000..a6c7477c60
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec.h
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_H_
+#define LIB_JXL_ICC_CODEC_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+
+namespace jxl {
+
+struct ICCReader {
+ Status Init(BitReader* reader, size_t output_limit);
+ Status Process(BitReader* reader, PaddedBytes* icc);
+ void Reset() {
+ bits_to_skip_ = 0;
+ decompressed_.clear();
+ }
+
+ private:
+ Status CheckEOI(BitReader* reader);
+ size_t i_ = 0;
+ size_t bits_to_skip_ = 0;
+ size_t used_bits_base_ = 0;
+ uint64_t enc_size_ = 0;
+ std::vector<uint8_t> context_map_;
+ ANSCode code_;
+ ANSSymbolReader ans_reader_;
+ PaddedBytes decompressed_;
+};
+
+// `icc` may be empty afterwards - if so, call CreateProfile. Does not append,
+// clears any original data that was in icc.
+// If `output_limit` is not 0, then returns error if resulting profile would be
+// longer than `output_limit`
+Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc,
+ size_t output_limit = 0);
+
+// Exposed only for testing
+Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result);
+
+// Exposed only for testing
+Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ICC_CODEC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/icc_codec_common.cc b/third_party/jpeg-xl/lib/jxl/icc_codec_common.cc
new file mode 100644
index 0000000000..212387e78f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec_common.cc
@@ -0,0 +1,190 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec_common.h"
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace {
+static uint8_t ByteKind1(uint8_t b) {
+ if ('a' <= b && b <= 'z') return 0;
+ if ('A' <= b && b <= 'Z') return 0;
+ if ('0' <= b && b <= '9') return 1;
+ if (b == '.' || b == ',') return 1;
+ if (b == 0) return 2;
+ if (b == 1) return 3;
+ if (b < 16) return 4;
+ if (b == 255) return 6;
+ if (b > 240) return 5;
+ return 7;
+}
+
+static uint8_t ByteKind2(uint8_t b) {
+ if ('a' <= b && b <= 'z') return 0;
+ if ('A' <= b && b <= 'Z') return 0;
+ if ('0' <= b && b <= '9') return 1;
+ if (b == '.' || b == ',') return 1;
+ if (b < 16) return 2;
+ if (b > 240) return 3;
+ return 4;
+}
+
+template <typename T>
+T PredictValue(T p1, T p2, T p3, int order) {
+ if (order == 0) return p1;
+ if (order == 1) return 2 * p1 - p2;
+ if (order == 2) return 3 * p1 - 3 * p2 + p3;
+ return 0;
+}
+} // namespace
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos) {
+ return pos + 4 > size ? 0 : LoadBE32(data + pos);
+}
+
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data) {
+ if (pos + 4 > data->size()) return;
+ StoreBE32(value, data->data() + pos);
+}
+
+void AppendUint32(uint32_t value, PaddedBytes* data) {
+ data->resize(data->size() + 4);
+ EncodeUint32(data->size() - 4, value, data);
+}
+
+typedef std::array<uint8_t, 4> Tag;
+
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos) {
+ if (pos + 4 > size) return {{' ', ' ', ' ', ' '}};
+ return {{data[pos], data[pos + 1], data[pos + 2], data[pos + 3]}};
+}
+
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos) {
+ if (keyword.size() != 4 || pos + 3 >= size) return;
+ for (size_t i = 0; i < 4; ++i) data[pos + i] = keyword[i];
+}
+
+void AppendKeyword(const Tag& keyword, PaddedBytes* data) {
+ JXL_ASSERT(keyword.size() == 4);
+ data->append(keyword);
+}
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size) {
+ size_t pos = a + b;
+ if (pos > size) return JXL_FAILURE("Out of bounds");
+ if (pos < a) return JXL_FAILURE("Out of bounds"); // overflow happened
+ return true;
+}
+
+Status CheckIs32Bit(uint64_t v) {
+ static constexpr const uint64_t kUpper32 = ~static_cast<uint64_t>(0xFFFFFFFF);
+ if ((v & kUpper32) != 0) return JXL_FAILURE("32-bit value expected");
+ return true;
+}
+
+PaddedBytes ICCInitialHeaderPrediction() {
+ PaddedBytes result(kICCHeaderSize);
+ for (size_t i = 0; i < kICCHeaderSize; i++) {
+ result[i] = 0;
+ }
+ result[8] = 4;
+ EncodeKeyword(kMntrTag, result.data(), result.size(), 12);
+ EncodeKeyword(kRgb_Tag, result.data(), result.size(), 16);
+ EncodeKeyword(kXyz_Tag, result.data(), result.size(), 20);
+ EncodeKeyword(kAcspTag, result.data(), result.size(), 36);
+ result[68] = 0;
+ result[69] = 0;
+ result[70] = 246;
+ result[71] = 214;
+ result[72] = 0;
+ result[73] = 1;
+ result[74] = 0;
+ result[75] = 0;
+ result[76] = 0;
+ result[77] = 0;
+ result[78] = 211;
+ result[79] = 45;
+ return result;
+}
+
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+ size_t pos) {
+ if (pos == 8 && size >= 8) {
+ header[80] = icc[4];
+ header[81] = icc[5];
+ header[82] = icc[6];
+ header[83] = icc[7];
+ }
+ if (pos == 41 && size >= 41) {
+ if (icc[40] == 'A') {
+ header[41] = 'P';
+ header[42] = 'P';
+ header[43] = 'L';
+ }
+ if (icc[40] == 'M') {
+ header[41] = 'S';
+ header[42] = 'F';
+ header[43] = 'T';
+ }
+ }
+ if (pos == 42 && size >= 42) {
+ if (icc[40] == 'S' && icc[41] == 'G') {
+ header[42] = 'I';
+ header[43] = ' ';
+ }
+ if (icc[40] == 'S' && icc[41] == 'U') {
+ header[42] = 'N';
+ header[43] = 'W';
+ }
+ }
+}
+
+// Predicts a value with linear prediction of given order (0-2), for integers
+// with width bytes and given stride in bytes between values.
+// The start position is at start + i, and the relevant modulus of i describes
+// which byte of the multi-byte integer is being handled.
+// The value start + i must be at least stride * 4.
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+ size_t stride, size_t width, int order) {
+ size_t pos = start + i;
+ if (width == 1) {
+ uint8_t p1 = data[pos - stride];
+ uint8_t p2 = data[pos - stride * 2];
+ uint8_t p3 = data[pos - stride * 3];
+ return PredictValue(p1, p2, p3, order);
+ } else if (width == 2) {
+ size_t p = start + (i & ~1);
+ uint16_t p1 = (data[p - stride * 1] << 8) + data[p - stride * 1 + 1];
+ uint16_t p2 = (data[p - stride * 2] << 8) + data[p - stride * 2 + 1];
+ uint16_t p3 = (data[p - stride * 3] << 8) + data[p - stride * 3 + 1];
+ uint16_t pred = PredictValue(p1, p2, p3, order);
+ return (i & 1) ? (pred & 255) : ((pred >> 8) & 255);
+ } else {
+ size_t p = start + (i & ~3);
+ uint32_t p1 = DecodeUint32(data, pos, p - stride);
+ uint32_t p2 = DecodeUint32(data, pos, p - stride * 2);
+ uint32_t p3 = DecodeUint32(data, pos, p - stride * 3);
+ uint32_t pred = PredictValue(p1, p2, p3, order);
+ unsigned shiftbytes = 3 - (i & 3);
+ return (pred >> (shiftbytes * 8)) & 255;
+ }
+}
+
+size_t ICCANSContext(size_t i, size_t b1, size_t b2) {
+ if (i <= 128) return 0;
+ return 1 + ByteKind1(b1) + ByteKind2(b2) * 8;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/icc_codec_common.h b/third_party/jpeg-xl/lib/jxl/icc_codec_common.h
new file mode 100644
index 0000000000..e91e908669
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec_common.h
@@ -0,0 +1,106 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_ICC_CODEC_COMMON_H_
+#define LIB_JXL_ICC_CODEC_COMMON_H_
+
+// Compressed representation of ICC profiles.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+static constexpr size_t kICCHeaderSize = 128;
+
+typedef std::array<uint8_t, 4> Tag;
+
+static const Tag kAcspTag = {{'a', 'c', 's', 'p'}};
+static const Tag kBkptTag = {{'b', 'k', 'p', 't'}};
+static const Tag kBtrcTag = {{'b', 'T', 'R', 'C'}};
+static const Tag kBxyzTag = {{'b', 'X', 'Y', 'Z'}};
+static const Tag kChadTag = {{'c', 'h', 'a', 'd'}};
+static const Tag kChrmTag = {{'c', 'h', 'r', 'm'}};
+static const Tag kCprtTag = {{'c', 'p', 'r', 't'}};
+static const Tag kCurvTag = {{'c', 'u', 'r', 'v'}};
+static const Tag kDescTag = {{'d', 'e', 's', 'c'}};
+static const Tag kDmddTag = {{'d', 'm', 'd', 'd'}};
+static const Tag kDmndTag = {{'d', 'm', 'n', 'd'}};
+static const Tag kGbd_Tag = {{'g', 'b', 'd', ' '}};
+static const Tag kGtrcTag = {{'g', 'T', 'R', 'C'}};
+static const Tag kGxyzTag = {{'g', 'X', 'Y', 'Z'}};
+static const Tag kKtrcTag = {{'k', 'T', 'R', 'C'}};
+static const Tag kKxyzTag = {{'k', 'X', 'Y', 'Z'}};
+static const Tag kLumiTag = {{'l', 'u', 'm', 'i'}};
+static const Tag kMab_Tag = {{'m', 'A', 'B', ' '}};
+static const Tag kMba_Tag = {{'m', 'B', 'A', ' '}};
+static const Tag kMlucTag = {{'m', 'l', 'u', 'c'}};
+static const Tag kMntrTag = {{'m', 'n', 't', 'r'}};
+static const Tag kParaTag = {{'p', 'a', 'r', 'a'}};
+static const Tag kRgb_Tag = {{'R', 'G', 'B', ' '}};
+static const Tag kRtrcTag = {{'r', 'T', 'R', 'C'}};
+static const Tag kRxyzTag = {{'r', 'X', 'Y', 'Z'}};
+static const Tag kSf32Tag = {{'s', 'f', '3', '2'}};
+static const Tag kTextTag = {{'t', 'e', 'x', 't'}};
+static const Tag kVcgtTag = {{'v', 'c', 'g', 't'}};
+static const Tag kWtptTag = {{'w', 't', 'p', 't'}};
+static const Tag kXyz_Tag = {{'X', 'Y', 'Z', ' '}};
+
+// Tag names focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTagStrings = 17;
+static constexpr const Tag* kTagStrings[kNumTagStrings] = {
+ &kCprtTag, &kWtptTag, &kBkptTag, &kRxyzTag, &kGxyzTag, &kBxyzTag,
+ &kKxyzTag, &kRtrcTag, &kGtrcTag, &kBtrcTag, &kKtrcTag, &kChadTag,
+ &kDescTag, &kChrmTag, &kDmndTag, &kDmddTag, &kLumiTag};
+
+static constexpr size_t kCommandTagUnknown = 1;
+static constexpr size_t kCommandTagTRC = 2;
+static constexpr size_t kCommandTagXYZ = 3;
+static constexpr size_t kCommandTagStringFirst = 4;
+
+// Tag types focused on RGB and GRAY monitor profiles
+static constexpr size_t kNumTypeStrings = 8;
+static constexpr const Tag* kTypeStrings[kNumTypeStrings] = {
+ &kXyz_Tag, &kDescTag, &kTextTag, &kMlucTag,
+ &kParaTag, &kCurvTag, &kSf32Tag, &kGbd_Tag};
+
+static constexpr size_t kCommandInsert = 1;
+static constexpr size_t kCommandShuffle2 = 2;
+static constexpr size_t kCommandShuffle4 = 3;
+static constexpr size_t kCommandPredict = 4;
+static constexpr size_t kCommandXYZ = 10;
+static constexpr size_t kCommandTypeStartFirst = 16;
+
+static constexpr size_t kFlagBitOffset = 64;
+static constexpr size_t kFlagBitSize = 128;
+
+static constexpr size_t kNumICCContexts = 41;
+
+uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos);
+void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data);
+void AppendUint32(uint32_t value, PaddedBytes* data);
+Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos);
+void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos);
+void AppendKeyword(const Tag& keyword, PaddedBytes* data);
+
+// Checks if a + b > size, taking possible integer overflow into account.
+Status CheckOutOfBounds(size_t a, size_t b, size_t size);
+Status CheckIs32Bit(uint64_t v);
+
+PaddedBytes ICCInitialHeaderPrediction();
+void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header,
+ size_t pos);
+uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i,
+ size_t stride, size_t width, int order);
+size_t ICCANSContext(size_t i, size_t b1, size_t b2);
+
+} // namespace jxl
+
+#endif // LIB_JXL_ICC_CODEC_COMMON_H_
diff --git a/third_party/jpeg-xl/lib/jxl/icc_codec_test.cc b/third_party/jpeg-xl/lib/jxl/icc_codec_test.cc
new file mode 100644
index 0000000000..af02094e99
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/icc_codec_test.cc
@@ -0,0 +1,207 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/icc_codec.h"
+
+#include <string>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/enc_icc_codec.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void TestProfile(const PaddedBytes& icc) {
+ BitWriter writer;
+ ASSERT_TRUE(WriteICC(icc, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ PaddedBytes dec;
+ BitReader reader(writer.GetSpan());
+ ASSERT_TRUE(ReadICC(&reader, &dec));
+ ASSERT_TRUE(reader.Close());
+ EXPECT_EQ(icc.size(), dec.size());
+ if (icc.size() == dec.size()) {
+ for (size_t i = 0; i < icc.size(); i++) {
+ EXPECT_EQ(icc[i], dec[i]);
+ if (icc[i] != dec[i]) break; // One output is enough
+ }
+ }
+}
+
+void TestProfile(const std::string& icc) {
+ PaddedBytes bytes(icc.size());
+ for (size_t i = 0; i < icc.size(); i++) {
+ bytes[i] = icc[i];
+ }
+ TestProfile(bytes);
+}
+
+// Valid profile from one of the images output by the decoder.
+static const unsigned char kTestProfile[] = {
+ 0x00, 0x00, 0x03, 0x80, 0x6c, 0x63, 0x6d, 0x73, 0x04, 0x30, 0x00, 0x00,
+ 0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+ 0x07, 0xe3, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x0f, 0x00, 0x32, 0x00, 0x2e,
+ 0x61, 0x63, 0x73, 0x70, 0x41, 0x50, 0x50, 0x4c, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xf6, 0xd6,
+ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x6c, 0x63, 0x6d, 0x73,
+ 0x5f, 0x07, 0x0d, 0x3e, 0x4d, 0x32, 0xf2, 0x6e, 0x5d, 0x77, 0x26, 0xcc,
+ 0x23, 0xb0, 0x6a, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d,
+ 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x42,
+ 0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x64, 0x00, 0x00, 0x01, 0x00,
+ 0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x02, 0x64, 0x00, 0x00, 0x00, 0x14,
+ 0x63, 0x68, 0x61, 0x64, 0x00, 0x00, 0x02, 0x78, 0x00, 0x00, 0x00, 0x2c,
+ 0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xa4, 0x00, 0x00, 0x00, 0x14,
+ 0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xb8, 0x00, 0x00, 0x00, 0x14,
+ 0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xcc, 0x00, 0x00, 0x00, 0x14,
+ 0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+ 0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+ 0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20,
+ 0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x24,
+ 0x64, 0x6d, 0x6e, 0x64, 0x00, 0x00, 0x03, 0x24, 0x00, 0x00, 0x00, 0x28,
+ 0x64, 0x6d, 0x64, 0x64, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00, 0x32,
+ 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x26,
+ 0x00, 0x00, 0x00, 0x1c, 0x00, 0x52, 0x00, 0x47, 0x00, 0x42, 0x00, 0x5f,
+ 0x00, 0x44, 0x00, 0x36, 0x00, 0x35, 0x00, 0x5f, 0x00, 0x53, 0x00, 0x52,
+ 0x00, 0x47, 0x00, 0x5f, 0x00, 0x52, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x5f,
+ 0x00, 0x37, 0x00, 0x30, 0x00, 0x39, 0x00, 0x00, 0x6d, 0x6c, 0x75, 0x63,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c,
+ 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x1c,
+ 0x00, 0x43, 0x00, 0x6f, 0x00, 0x70, 0x00, 0x79, 0x00, 0x72, 0x00, 0x69,
+ 0x00, 0x67, 0x00, 0x68, 0x00, 0x74, 0x00, 0x20, 0x00, 0x32, 0x00, 0x30,
+ 0x00, 0x31, 0x00, 0x38, 0x00, 0x20, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f,
+ 0x00, 0x67, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x20, 0x00, 0x4c, 0x00, 0x4c,
+ 0x00, 0x43, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x43, 0x00, 0x43, 0x00, 0x2d,
+ 0x00, 0x42, 0x00, 0x59, 0x00, 0x2d, 0x00, 0x53, 0x00, 0x41, 0x00, 0x20,
+ 0x00, 0x33, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e,
+ 0x00, 0x70, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x74, 0x00, 0x65, 0x00, 0x64,
+ 0x00, 0x20, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+ 0x00, 0x73, 0x00, 0x65, 0x00, 0x28, 0x00, 0x68, 0x00, 0x74, 0x00, 0x74,
+ 0x00, 0x70, 0x00, 0x73, 0x00, 0x3a, 0x00, 0x2f, 0x00, 0x2f, 0x00, 0x63,
+ 0x00, 0x72, 0x00, 0x65, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x76,
+ 0x00, 0x65, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x6f,
+ 0x00, 0x6e, 0x00, 0x73, 0x00, 0x2e, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x67,
+ 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e,
+ 0x00, 0x73, 0x00, 0x65, 0x00, 0x73, 0x00, 0x2f, 0x00, 0x62, 0x00, 0x79,
+ 0x00, 0x2d, 0x00, 0x73, 0x00, 0x61, 0x00, 0x2f, 0x00, 0x33, 0x00, 0x2e,
+ 0x00, 0x30, 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x67, 0x00, 0x61,
+ 0x00, 0x6c, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x65, 0x00, 0x29,
+ 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x73, 0x66, 0x33, 0x32,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x42, 0x00, 0x00, 0x05, 0xde,
+ 0xff, 0xff, 0xf3, 0x25, 0x00, 0x00, 0x07, 0x93, 0x00, 0x00, 0xfd, 0x90,
+ 0xff, 0xff, 0xfb, 0xa1, 0xff, 0xff, 0xfd, 0xa2, 0x00, 0x00, 0x03, 0xdc,
+ 0x00, 0x00, 0xc0, 0x6e, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x6f, 0xa0, 0x00, 0x00, 0x38, 0xf5, 0x00, 0x00, 0x03, 0x90,
+ 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x9f,
+ 0x00, 0x00, 0x0f, 0x84, 0x00, 0x00, 0xb6, 0xc4, 0x58, 0x59, 0x5a, 0x20,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x97, 0x00, 0x00, 0xb7, 0x87,
+ 0x00, 0x00, 0x18, 0xd9, 0x70, 0x61, 0x72, 0x61, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x38, 0xe4, 0x00, 0x00, 0xe8, 0xf0,
+ 0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x38, 0xe4, 0x00, 0x00, 0x14, 0xbc,
+ 0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00,
+ 0x00, 0x00, 0xa3, 0xd7, 0x00, 0x00, 0x54, 0x7c, 0x00, 0x00, 0x4c, 0xcd,
+ 0x00, 0x00, 0x99, 0x9a, 0x00, 0x00, 0x26, 0x67, 0x00, 0x00, 0x0f, 0x5c,
+ 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x0c,
+ 0x00, 0x00, 0x00, 0x1c, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x67,
+ 0x00, 0x6c, 0x00, 0x65, 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53,
+ 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x49, 0x00, 0x6d,
+ 0x00, 0x61, 0x00, 0x67, 0x00, 0x65, 0x00, 0x20, 0x00, 0x63, 0x00, 0x6f,
+ 0x00, 0x64, 0x00, 0x65, 0x00, 0x63, 0x00, 0x00,
+};
+
+} // namespace
+
+TEST(IccCodecTest, Icc) {
+ // Empty string cannot be tested, encoder checks against writing it.
+ TestProfile("a");
+ TestProfile("ab");
+ TestProfile("aaaa");
+
+ {
+ // Exactly the ICC header size
+ PaddedBytes profile(128);
+ for (size_t i = 0; i < 128; i++) {
+ profile[i] = 0;
+ }
+ TestProfile(profile);
+ }
+
+ {
+ PaddedBytes profile;
+ profile.append(kTestProfile, kTestProfile + sizeof(kTestProfile));
+ TestProfile(profile);
+ }
+
+ // Test substrings of full profile
+ {
+ PaddedBytes profile;
+ for (size_t i = 0; i <= 256; i++) {
+ profile.push_back(kTestProfile[i]);
+ TestProfile(profile);
+ }
+ }
+}
+
+// kTestProfile after encoding with the ICC codec
+static const unsigned char kEncodedTestProfile[] = {
+ 0x1f, 0x8b, 0x1, 0x13, 0x10, 0x0, 0x0, 0x0, 0x20, 0x4c, 0xcc, 0x3,
+ 0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26,
+ 0x57, 0x54, 0xef, 0x0, 0xe8, 0x97, 0x2, 0xce, 0xa1, 0xd7, 0x85, 0x16,
+ 0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c,
+ 0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4, 0x75, 0x12, 0xc9, 0xcc,
+ 0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae,
+ 0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd, 0x40,
+ 0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7, 0xa6, 0xb9,
+ 0x27, 0x92, 0x38, 0x0, 0x3, 0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf,
+ 0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66,
+ 0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4,
+ 0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd,
+ 0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f,
+ 0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93,
+ 0x92, 0x3, 0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7,
+ 0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7, 0x38, 0x38, 0xd4, 0xa, 0x66,
+ 0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb, 0x7a, 0x24,
+ 0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3,
+ 0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8, 0xa8, 0xc4, 0x2a, 0x86,
+ 0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6,
+ 0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9,
+ 0x8, 0x98, 0xe1, 0x21, 0x4a, 0x9, 0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0,
+ 0x69, 0x1a, 0xeb, 0x52, 0x1, 0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29,
+ 0x70, 0xee, 0x4, 0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff,
+ 0xfe, 0xdb, 0xaf, 0x8, 0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8,
+ 0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8, 0xe9, 0x7, 0xee, 0x4b, 0x80, 0xda,
+ 0x4a, 0x4, 0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60,
+ 0xb, 0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6, 0x20, 0xb8, 0x64, 0x18,
+ 0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b,
+ 0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9, 0xf7,
+ 0x72, 0xf0, 0x7a, 0xe, 0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc,
+ 0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28,
+ 0xc1, 0xa7, 0x59, 0xe3, 0x0,
+};
+
+// Tests that the decoded kEncodedTestProfile matches kTestProfile.
+TEST(IccCodecTest, EncodedIccProfile) {
+ jxl::BitReader reader(jxl::Span<const uint8_t>(kEncodedTestProfile,
+ sizeof(kEncodedTestProfile)));
+ jxl::PaddedBytes dec;
+ ASSERT_TRUE(ReadICC(&reader, &dec));
+ ASSERT_TRUE(reader.Close());
+ EXPECT_EQ(sizeof(kTestProfile), dec.size());
+ if (sizeof(kTestProfile) == dec.size()) {
+ for (size_t i = 0; i < dec.size(); i++) {
+ EXPECT_EQ(kTestProfile[i], dec[i]);
+ if (kTestProfile[i] != dec[i]) break; // One output is enough
+ }
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/image.cc b/third_party/jpeg-xl/lib/jxl/image.cc
new file mode 100644
index 0000000000..3faff6aefb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image.cc
@@ -0,0 +1,251 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image.h"
+
+#include <algorithm> // swap
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/image.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return HWY_LANES(uint8_t); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(GetVectorSize); // Local function.
+
+// Returns distance [bytes] between the start of two consecutive rows, a
+// multiple of vector/cache line size but NOT CacheAligned::kAlias - see below.
+size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+ const size_t vec_size = VectorSize();
+ size_t valid_bytes = xsize * sizeof_t;
+
+ // Allow unaligned accesses starting at the last valid value - this may raise
+ // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+ // Skip for the scalar case because no extra lanes will be loaded.
+ if (vec_size != 0) {
+ valid_bytes += vec_size - sizeof_t;
+ }
+
+ // Round up to vector and cache line size.
+ const size_t align = std::max(vec_size, CacheAligned::kAlignment);
+ size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+ // During the lengthy window before writes are committed to memory, CPUs
+ // guard against read after write hazards by checking the address, but
+ // only the lower 11 bits. We avoid a false dependency between writes to
+ // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+ // Avoid2K prevents the same problem for the planes of an Image3.
+ if (bytes_per_row % CacheAligned::kAlias == 0) {
+ bytes_per_row += align;
+ }
+
+ JXL_ASSERT(bytes_per_row % align == 0);
+ return bytes_per_row;
+}
+
+} // namespace
+
+size_t VectorSize() {
+ static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+ return bytes;
+}
+
+PlaneBase::PlaneBase(const size_t xsize, const size_t ysize,
+ const size_t sizeof_t)
+ : xsize_(static_cast<uint32_t>(xsize)),
+ ysize_(static_cast<uint32_t>(ysize)),
+ orig_xsize_(static_cast<uint32_t>(xsize)),
+ orig_ysize_(static_cast<uint32_t>(ysize)) {
+ // (Can't profile CacheAligned itself because it is used by profiler.h)
+ PROFILER_FUNC;
+
+ JXL_CHECK(xsize == xsize_);
+ JXL_CHECK(ysize == ysize_);
+
+ JXL_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+ bytes_per_row_ = 0;
+ // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+ // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+ if (xsize != 0 && ysize != 0) {
+ bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+ bytes_ = AllocateArray(bytes_per_row_ * ysize);
+ JXL_CHECK(bytes_.get());
+ InitializePadding(sizeof_t, Padding::kRoundUp);
+ }
+}
+
+void PlaneBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if defined(MEMORY_SANITIZER) || HWY_IDE
+ if (xsize_ == 0 || ysize_ == 0) return;
+
+ const size_t vec_size = VectorSize();
+ if (vec_size == 0) return; // Scalar mode: no padding needed
+
+ const size_t valid_size = xsize_ * sizeof_t;
+ const size_t initialize_size = padding == Padding::kRoundUp
+ ? RoundUpTo(valid_size, vec_size)
+ : valid_size + vec_size - sizeof_t;
+ if (valid_size == initialize_size) return;
+
+ for (size_t y = 0; y < ysize_; ++y) {
+ uint8_t* JXL_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) && \
+ ((!defined(__apple_build_version__) && __clang_major__ <= 6) || \
+ (defined(__apple_build_version__) && \
+ __apple_build_version__ <= 10001145))
+ // There's a bug in msan in clang-6 when handling AVX2 operations. This
+ // workaround allows tests to pass on msan, although it is slower and
+ // prevents msan warnings from uninitialized images.
+ std::fill(row, msan::kSanitizerSentinelByte, initialize_size);
+#else
+ memset(row + valid_size, msan::kSanitizerSentinelByte,
+ initialize_size - valid_size);
+#endif // clang6
+ }
+#endif // MEMORY_SANITIZER
+}
+
+void PlaneBase::Swap(PlaneBase& other) {
+ std::swap(xsize_, other.xsize_);
+ std::swap(ysize_, other.ysize_);
+ std::swap(orig_xsize_, other.orig_xsize_);
+ std::swap(orig_ysize_, other.orig_ysize_);
+ std::swap(bytes_per_row_, other.bytes_per_row_);
+ std::swap(bytes_, other.bytes_);
+}
+
+Image3F PadImageMirror(const Image3F& in, const size_t xborder,
+ const size_t yborder) {
+ size_t xsize = in.xsize();
+ size_t ysize = in.ysize();
+ Image3F out(xsize + 2 * xborder, ysize + 2 * yborder);
+ if (xborder > xsize || yborder > ysize) {
+ for (size_t c = 0; c < 3; c++) {
+ for (int32_t y = 0; y < static_cast<int32_t>(out.ysize()); y++) {
+ float* row_out = out.PlaneRow(c, y);
+ const float* row_in = in.PlaneRow(
+ c, Mirror(y - static_cast<int32_t>(yborder), in.ysize()));
+ for (int32_t x = 0; x < static_cast<int32_t>(out.xsize()); x++) {
+ int32_t xin = Mirror(x - static_cast<int32_t>(xborder), in.xsize());
+ row_out[x] = row_in[xin];
+ }
+ }
+ }
+ return out;
+ }
+ CopyImageTo(in, Rect(xborder, yborder, xsize, ysize), &out);
+ for (size_t c = 0; c < 3; c++) {
+ // Horizontal pad.
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xborder; x++) {
+ out.PlaneRow(c, y + yborder)[x] =
+ in.ConstPlaneRow(c, y)[xborder - x - 1];
+ out.PlaneRow(c, y + yborder)[x + xsize + xborder] =
+ in.ConstPlaneRow(c, y)[xsize - 1 - x];
+ }
+ }
+ // Vertical pad.
+ for (size_t y = 0; y < yborder; y++) {
+ memcpy(out.PlaneRow(c, y), out.ConstPlaneRow(c, 2 * yborder - 1 - y),
+ out.xsize() * sizeof(float));
+ memcpy(out.PlaneRow(c, y + ysize + yborder),
+ out.ConstPlaneRow(c, ysize + yborder - 1 - y),
+ out.xsize() * sizeof(float));
+ }
+ }
+ return out;
+}
+
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in,
+ size_t block_dim) {
+ PROFILER_FUNC;
+ const size_t xsize_orig = in->xsize();
+ const size_t ysize_orig = in->ysize();
+ const size_t xsize = RoundUpTo(xsize_orig, block_dim);
+ const size_t ysize = RoundUpTo(ysize_orig, block_dim);
+ // Expands image size to the originally-allocated size.
+ in->ShrinkTo(xsize, ysize);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < ysize_orig; y++) {
+ float* JXL_RESTRICT row = in->PlaneRow(c, y);
+ for (size_t x = xsize_orig; x < xsize; x++) {
+ row[x] = row[xsize_orig - 1];
+ }
+ }
+ const float* JXL_RESTRICT row_src = in->ConstPlaneRow(c, ysize_orig - 1);
+ for (size_t y = ysize_orig; y < ysize; y++) {
+ memcpy(in->PlaneRow(c, y), row_src, xsize * sizeof(float));
+ }
+ }
+}
+
+static void DownsampleImage(const ImageF& input, size_t factor,
+ ImageF* output) {
+ JXL_ASSERT(factor != 1);
+ output->ShrinkTo(DivCeil(input.xsize(), factor),
+ DivCeil(input.ysize(), factor));
+ size_t in_stride = input.PixelsPerRow();
+ for (size_t y = 0; y < output->ysize(); y++) {
+ float* row_out = output->Row(y);
+ const float* row_in = input.Row(factor * y);
+ for (size_t x = 0; x < output->xsize(); x++) {
+ size_t cnt = 0;
+ float sum = 0;
+ for (size_t iy = 0; iy < factor && iy + factor * y < input.ysize();
+ iy++) {
+ for (size_t ix = 0; ix < factor && ix + factor * x < input.xsize();
+ ix++) {
+ sum += row_in[iy * in_stride + x * factor + ix];
+ cnt++;
+ }
+ }
+ row_out[x] = sum / cnt;
+ }
+ }
+}
+
+void DownsampleImage(ImageF* image, size_t factor) {
+ // Allocate extra space to avoid a reallocation when padding.
+ ImageF downsampled(DivCeil(image->xsize(), factor) + kBlockDim,
+ DivCeil(image->ysize(), factor) + kBlockDim);
+ DownsampleImage(*image, factor, &downsampled);
+ *image = std::move(downsampled);
+}
+
+void DownsampleImage(Image3F* opsin, size_t factor) {
+ JXL_ASSERT(factor != 1);
+ // Allocate extra space to avoid a reallocation when padding.
+ Image3F downsampled(DivCeil(opsin->xsize(), factor) + kBlockDim,
+ DivCeil(opsin->ysize(), factor) + kBlockDim);
+ downsampled.ShrinkTo(downsampled.xsize() - kBlockDim,
+ downsampled.ysize() - kBlockDim);
+ for (size_t c = 0; c < 3; c++) {
+ DownsampleImage(opsin->Plane(c), factor, &downsampled.Plane(c));
+ }
+ *opsin = std::move(downsampled);
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/image.h b/third_party/jpeg-xl/lib/jxl/image.h
new file mode 100644
index 0000000000..e66534220c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image.h
@@ -0,0 +1,497 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_H_
+#define LIB_JXL_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <sstream>
+#include <utility> // std::move
+
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+// Helper function to create rows that are multiples of SIMD vector size.
+size_t VectorSize();
+
+// Type-independent parts of Plane<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct PlaneBase {
+ PlaneBase()
+ : xsize_(0),
+ ysize_(0),
+ orig_xsize_(0),
+ orig_ysize_(0),
+ bytes_per_row_(0),
+ bytes_(nullptr) {}
+ PlaneBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo() instead.
+ PlaneBase(const PlaneBase& other) = delete;
+ PlaneBase& operator=(const PlaneBase& other) = delete;
+
+ // Move constructor (required for returning Image from function)
+ PlaneBase(PlaneBase&& other) noexcept = default;
+
+ // Move assignment (required for std::vector)
+ PlaneBase& operator=(PlaneBase&& other) noexcept = default;
+
+ void Swap(PlaneBase& other);
+
+ // Useful for pre-allocating image with some padding for alignment purposes
+ // and later reporting the actual valid dimensions. May also be used to
+ // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+ // the original dimensions.
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ JXL_CHECK(xsize <= orig_xsize_);
+ JXL_CHECK(ysize <= orig_ysize_);
+ xsize_ = static_cast<uint32_t>(xsize);
+ ysize_ = static_cast<uint32_t>(ysize);
+ // NOTE: we can't recompute bytes_per_row for more compact storage and
+ // better locality because that would invalidate the image contents.
+ }
+
+ // How many pixels.
+ JXL_INLINE size_t xsize() const { return xsize_; }
+ JXL_INLINE size_t ysize() const { return ysize_; }
+
+ // NOTE: do not use this for copying rows - the valid xsize may be much less.
+ JXL_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+ // Raw access to byte contents, for interfacing with other libraries.
+ // Unsigned char instead of char to avoid surprises (sign extension).
+ JXL_INLINE uint8_t* bytes() {
+ void* p = bytes_.get();
+ return static_cast<uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+ }
+ JXL_INLINE const uint8_t* bytes() const {
+ const void* p = bytes_.get();
+ return static_cast<const uint8_t * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(p, 64));
+ }
+
+ protected:
+ // Returns pointer to the start of a row.
+ JXL_INLINE void* VoidRow(const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+ if (y >= ysize_) {
+ JXL_ABORT("Row(%" PRIu64 ") in (%u x %u) image\n", (uint64_t)y, xsize_,
+ ysize_);
+ }
+#endif
+
+ void* row = bytes_.get() + y * bytes_per_row_;
+ return JXL_ASSUME_ALIGNED(row, 64);
+ }
+
+ enum class Padding {
+ // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+ kRoundUp,
+ // Allow LoadU(d, row + x) for x = xsize() - 1. This requires an extra
+ // vector to be initialized. If done by default, this would suppress
+ // legitimate msan warnings. We therefore require users to explicitly call
+ // InitializePadding before using unaligned loads (e.g. convolution).
+ kUnaligned
+ };
+
+ // Initializes the minimum bytes required to suppress msan warnings from
+ // legitimate (according to Padding mode) vector loads/stores on the right
+ // border, where some lanes are uninitialized and assumed to be unused.
+ void InitializePadding(size_t sizeof_t, Padding padding);
+
+ // (Members are non-const to enable assignment during move-assignment.)
+ uint32_t xsize_; // In valid pixels, not including any padding.
+ uint32_t ysize_;
+ uint32_t orig_xsize_;
+ uint32_t orig_ysize_;
+ size_t bytes_per_row_; // Includes padding.
+ CacheAlignedUniquePtr bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Plane : public PlaneBase {
+ public:
+ using T = ComponentType;
+ static constexpr size_t kNumPlanes = 1;
+
+ Plane() = default;
+ Plane(const size_t xsize, const size_t ysize)
+ : PlaneBase(xsize, ysize, sizeof(T)) {}
+
+ void InitializePaddingForUnalignedAccesses() {
+ InitializePadding(sizeof(T), Padding::kUnaligned);
+ }
+
+ JXL_INLINE T* Row(const size_t y) { return static_cast<T*>(VoidRow(y)); }
+
+ // Returns pointer to const (see above).
+ JXL_INLINE const T* Row(const size_t y) const {
+ return static_cast<const T*>(VoidRow(y));
+ }
+
+ // Documents that the access is const.
+ JXL_INLINE const T* ConstRow(const size_t y) const {
+ return static_cast<const T*>(VoidRow(y));
+ }
+
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must
+ // NOT be used to determine xsize.
+ JXL_INLINE intptr_t PixelsPerRow() const {
+ return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+ }
+};
+
+using ImageSB = Plane<int8_t>;
+using ImageB = Plane<uint8_t>;
+using ImageS = Plane<int16_t>; // signed integer or half-float
+using ImageU = Plane<uint16_t>;
+using ImageI = Plane<int32_t>;
+using ImageF = Plane<float>;
+using ImageD = Plane<double>;
+
+// Also works for Image3 and mixed argument types.
+template <class Image1, class Image2>
+bool SameSize(const Image1& image1, const Image2& image2) {
+ return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+template <typename T>
+class Image3;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions (e.g. color transform and quantization field).
+// Can compare using SameSize(rect1, rect2).
+template <typename T>
+class RectT {
+ public:
+ // Most windows are xsize_max * ysize_max, except those on the borders where
+ // begin + size_max > end.
+ constexpr RectT(T xbegin, T ybegin, size_t xsize_max, size_t ysize_max,
+ T xend, T yend)
+ : x0_(xbegin),
+ y0_(ybegin),
+ xsize_(ClampedSize(xbegin, xsize_max, xend)),
+ ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+ // Construct with origin and known size (typically from another Rect).
+ constexpr RectT(T xbegin, T ybegin, size_t xsize, size_t ysize)
+ : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+ // Construct a rect that covers a whole image/plane/ImageBundle etc.
+ template <typename ImageT>
+ explicit RectT(const ImageT& image)
+ : RectT(0, 0, image.xsize(), image.ysize()) {}
+
+ RectT() : RectT(0, 0, 0, 0) {}
+
+ RectT(const RectT&) = default;
+ RectT& operator=(const RectT&) = default;
+
+ // Construct a subrect that resides in an image/plane/ImageBundle etc.
+ template <typename ImageT>
+ RectT Crop(const ImageT& image) const {
+ return Intersection(RectT(image));
+ }
+
+ // Construct a subrect that resides in the [0, ysize) x [0, xsize) region of
+ // the current rect.
+ RectT Crop(size_t area_xsize, size_t area_ysize) const {
+ return Intersection(RectT(0, 0, area_xsize, area_ysize));
+ }
+
+ // Returns a rect that only contains `num` lines with offset `y` from `y0()`.
+ RectT Lines(size_t y, size_t num) const {
+ JXL_DASSERT(y + num <= ysize_);
+ return RectT(x0_, y0_ + y, xsize_, num);
+ }
+
+ RectT Line(size_t y) const { return Lines(y, 1); }
+
+ JXL_MUST_USE_RESULT RectT Intersection(const RectT& other) const {
+ return RectT(std::max(x0_, other.x0_), std::max(y0_, other.y0_), xsize_,
+ ysize_, std::min(x1(), other.x1()),
+ std::min(y1(), other.y1()));
+ }
+
+ JXL_MUST_USE_RESULT RectT Translate(int64_t x_offset,
+ int64_t y_offset) const {
+ return RectT(x0_ + x_offset, y0_ + y_offset, xsize_, ysize_);
+ }
+
+ template <typename V>
+ V* Row(Plane<V>* image, size_t y) const {
+ JXL_DASSERT(y + y0_ >= 0);
+ return image->Row(y + y0_) + x0_;
+ }
+
+ template <typename V>
+ const V* Row(const Plane<V>* image, size_t y) const {
+ JXL_DASSERT(y + y0_ >= 0);
+ return image->Row(y + y0_) + x0_;
+ }
+
+ template <typename V>
+ V* PlaneRow(Image3<V>* image, const size_t c, size_t y) const {
+ JXL_DASSERT(y + y0_ >= 0);
+ return image->PlaneRow(c, y + y0_) + x0_;
+ }
+
+ template <typename V>
+ const V* ConstRow(const Plane<V>& image, size_t y) const {
+ JXL_DASSERT(y + y0_ >= 0);
+ return image.ConstRow(y + y0_) + x0_;
+ }
+
+ template <typename V>
+ const V* ConstPlaneRow(const Image3<V>& image, size_t c, size_t y) const {
+ JXL_DASSERT(y + y0_ >= 0);
+ return image.ConstPlaneRow(c, y + y0_) + x0_;
+ }
+
+ bool IsInside(const RectT& other) const {
+ return x0_ >= other.x0() && x1() <= other.x1() && y0_ >= other.y0() &&
+ y1() <= other.y1();
+ }
+
+ // Returns true if this Rect fully resides in the given image. ImageT could be
+ // Plane<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+ template <class ImageT>
+ bool IsInside(const ImageT& image) const {
+ return IsInside(RectT(image));
+ }
+
+ T x0() const { return x0_; }
+ T y0() const { return y0_; }
+ size_t xsize() const { return xsize_; }
+ size_t ysize() const { return ysize_; }
+ T x1() const { return x0_ + xsize_; }
+ T y1() const { return y0_ + ysize_; }
+
+ RectT<T> ShiftLeft(size_t shiftx, size_t shifty) const {
+ return RectT<T>(x0_ * (1 << shiftx), y0_ * (1 << shifty), xsize_ << shiftx,
+ ysize_ << shifty);
+ }
+ RectT<T> ShiftLeft(size_t shift) const { return ShiftLeft(shift, shift); }
+
+ // Requires x0(), y0() to be multiples of 1<<shiftx, 1<<shifty.
+ RectT<T> CeilShiftRight(size_t shiftx, size_t shifty) const {
+ JXL_ASSERT(x0_ % (1 << shiftx) == 0);
+ JXL_ASSERT(y0_ % (1 << shifty) == 0);
+ return RectT<T>(x0_ / (1 << shiftx), y0_ / (1 << shifty),
+ DivCeil(xsize_, T{1} << shiftx),
+ DivCeil(ysize_, T{1} << shifty));
+ }
+ RectT<T> CeilShiftRight(std::pair<size_t, size_t> shift) const {
+ return CeilShiftRight(shift.first, shift.second);
+ }
+ RectT<T> CeilShiftRight(size_t shift) const {
+ return CeilShiftRight(shift, shift);
+ }
+
+ template <typename U>
+ RectT<U> As() const {
+ return RectT<U>(U(x0_), U(y0_), U(xsize_), U(ysize_));
+ }
+
+ private:
+ // Returns size_max, or whatever is left in [begin, end).
+ static constexpr size_t ClampedSize(T begin, size_t size_max, T end) {
+ return (static_cast<T>(begin + size_max) <= end)
+ ? size_max
+ : (end > begin ? end - begin : 0);
+ }
+
+ T x0_;
+ T y0_;
+
+ size_t xsize_;
+ size_t ysize_;
+};
+
+template <typename T>
+std::string Description(RectT<T> r) {
+ std::ostringstream os;
+ os << "[" << r.x0() << ".." << r.x1() << ")x"
+ << "[" << r.y0() << ".." << r.y1() << ")";
+ return os.str();
+}
+
+using Rect = RectT<size_t>;
+
+// Currently, we abuse Image to either refer to an image that owns its storage
+// or one that doesn't. In similar vein, we abuse Image* function parameters to
+// either mean "assign to me" or "fill the provided image with data".
+// Hopefully, the "assign to me" meaning will go away and most images in the
+// codebase will not be backed by own storage. When this happens we can redesign
+// Image to be a non-storage-holding view class and introduce BackedImage in
+// those places that actually need it.
+
+// NOTE: we can't use Image as a view because invariants are violated
+// (alignment and the presence of padding before/after each "row").
+
+// A bundle of 3 same-sized images. Typically constructed by moving from three
+// rvalue references to Image. To overwrite an existing Image3 using
+// single-channel producers, we also need access to Image*. Constructing
+// temporary non-owning Image pointing to one plane of an existing Image3 risks
+// dangling references, especially if the wrapper is moved. Therefore, we
+// store an array of Image (which are compact enough that size is not a concern)
+// and provide Plane+Row accessors.
+template <typename ComponentType>
+class Image3 {
+ public:
+ using T = ComponentType;
+ using PlaneT = jxl::Plane<T>;
+ static constexpr size_t kNumPlanes = 3;
+
+ Image3() : planes_{PlaneT(), PlaneT(), PlaneT()} {}
+
+ Image3(const size_t xsize, const size_t ysize)
+ : planes_{PlaneT(xsize, ysize), PlaneT(xsize, ysize),
+ PlaneT(xsize, ysize)} {}
+
+ Image3(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ }
+
+ Image3(PlaneT&& plane0, PlaneT&& plane1, PlaneT&& plane2) {
+ JXL_CHECK(SameSize(plane0, plane1));
+ JXL_CHECK(SameSize(plane0, plane2));
+ planes_[0] = std::move(plane0);
+ planes_[1] = std::move(plane1);
+ planes_[2] = std::move(plane2);
+ }
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo instead.
+ Image3(const Image3& other) = delete;
+ Image3& operator=(const Image3& other) = delete;
+
+ Image3& operator=(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ return *this;
+ }
+
+ // Returns row pointer; usage: PlaneRow(idx_plane, y)[x] = val.
+ JXL_INLINE T* PlaneRow(const size_t c, const size_t y) {
+ // Custom implementation instead of calling planes_[c].Row ensures only a
+ // single multiplication is needed for PlaneRow(0..2, y).
+ PlaneRowBoundsCheck(c, y);
+ const size_t row_offset = y * planes_[0].bytes_per_row();
+ void* row = planes_[c].bytes() + row_offset;
+ return static_cast<T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+ }
+
+ // Returns const row pointer; usage: val = PlaneRow(idx_plane, y)[x].
+ JXL_INLINE const T* PlaneRow(const size_t c, const size_t y) const {
+ PlaneRowBoundsCheck(c, y);
+ const size_t row_offset = y * planes_[0].bytes_per_row();
+ const void* row = planes_[c].bytes() + row_offset;
+ return static_cast<const T * JXL_RESTRICT>(JXL_ASSUME_ALIGNED(row, 64));
+ }
+
+ // Returns const row pointer, even if called from a non-const Image3.
+ JXL_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+ PlaneRowBoundsCheck(c, y);
+ return PlaneRow(c, y);
+ }
+
+ JXL_INLINE const PlaneT& Plane(size_t idx) const { return planes_[idx]; }
+
+ JXL_INLINE PlaneT& Plane(size_t idx) { return planes_[idx]; }
+
+ void Swap(Image3& other) {
+ for (size_t c = 0; c < 3; ++c) {
+ other.planes_[c].Swap(planes_[c]);
+ }
+ }
+
+ // Useful for pre-allocating image with some padding for alignment purposes
+ // and later reporting the actual valid dimensions. May also be used to
+ // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <=
+ // the original dimensions.
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ for (PlaneT& plane : planes_) {
+ plane.ShrinkTo(xsize, ysize);
+ }
+ }
+
+ // Sizes of all three images are guaranteed to be equal.
+ JXL_INLINE size_t xsize() const { return planes_[0].xsize(); }
+ JXL_INLINE size_t ysize() const { return planes_[0].ysize(); }
+ // Returns offset [bytes] from one row to the next row of the same plane.
+ // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+ // the valid xsize may be much less.
+ JXL_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+ // to determine xsize.
+ JXL_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+ void PlaneRowBoundsCheck(const size_t c, const size_t y) const {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+ if (c >= kNumPlanes || y >= ysize()) {
+ JXL_ABORT("PlaneRow(%" PRIu64 ", %" PRIu64 ") in (%" PRIu64 " x %" PRIu64
+ ") image\n",
+ static_cast<uint64_t>(c), static_cast<uint64_t>(y),
+ static_cast<uint64_t>(xsize()), static_cast<uint64_t>(ysize()));
+ }
+#endif
+ }
+
+ private:
+ PlaneT planes_[kNumPlanes];
+};
+
+using Image3B = Image3<uint8_t>;
+using Image3S = Image3<int16_t>;
+using Image3U = Image3<uint16_t>;
+using Image3I = Image3<int32_t>;
+using Image3F = Image3<float>;
+using Image3D = Image3<double>;
+
+} // namespace jxl
+
+#endif // LIB_JXL_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/image_bundle.cc b/third_party/jpeg-xl/lib/jxl/image_bundle.cc
new file mode 100644
index 0000000000..7e7051b608
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_bundle.cc
@@ -0,0 +1,125 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+void ImageBundle::ShrinkTo(size_t xsize, size_t ysize) {
+ if (HasColor()) color_.ShrinkTo(xsize, ysize);
+ for (ImageF& ec : extra_channels_) {
+ ec.ShrinkTo(xsize, ysize);
+ }
+}
+
+// Called by all other SetFrom*.
+void ImageBundle::SetFromImage(Image3F&& color,
+ const ColorEncoding& c_current) {
+ JXL_CHECK(color.xsize() != 0 && color.ysize() != 0);
+ JXL_CHECK(metadata_->color_encoding.IsGray() == c_current.IsGray());
+ color_ = std::move(color);
+ c_current_ = c_current;
+ VerifySizes();
+}
+
+void ImageBundle::VerifyMetadata() const {
+ JXL_CHECK(!c_current_.ICC().empty());
+ JXL_CHECK(metadata_->color_encoding.IsGray() == IsGray());
+
+ if (metadata_->HasAlpha() && alpha().xsize() == 0) {
+ JXL_ABORT("MD alpha_bits %u IB alpha %" PRIuS " x %" PRIuS "\n",
+ metadata_->GetAlphaBits(), alpha().xsize(), alpha().ysize());
+ }
+ const uint32_t alpha_bits = metadata_->GetAlphaBits();
+ JXL_CHECK(alpha_bits <= 32);
+
+ // metadata_->num_extra_channels may temporarily differ from
+ // extra_channels_.size(), e.g. after SetAlpha. They are synced by the next
+ // call to VisitFields.
+}
+
+void ImageBundle::VerifySizes() const {
+ const size_t xs = xsize();
+ const size_t ys = ysize();
+
+ if (HasExtraChannels()) {
+ JXL_CHECK(xs != 0 && ys != 0);
+ for (const ImageF& ec : extra_channels_) {
+ JXL_CHECK(ec.xsize() == xs);
+ JXL_CHECK(ec.ysize() == ys);
+ }
+ }
+}
+
+size_t ImageBundle::DetectRealBitdepth() const {
+ return metadata_->bit_depth.bits_per_sample;
+
+ // TODO(lode): let this function return lower bit depth if possible, e.g.
+ // return 8 bits in case the original image came from a 16-bit PNG that
+ // was in fact representable as 8-bit PNG. Ensure that the implementation
+ // returns 16 if e.g. two consecutive 16-bit values appeared in the original
+ // image (such as 32768 and 32769), take into account that e.g. the values
+ // 3-bit can represent is not a superset of the values 2-bit can represent,
+ // and there may be slight imprecisions in the floating point image.
+}
+
+const ImageF& ImageBundle::black() const {
+ JXL_ASSERT(HasBlack());
+ const size_t ec = metadata_->Find(ExtraChannel::kBlack) -
+ metadata_->extra_channel_info.data();
+ JXL_ASSERT(ec < extra_channels_.size());
+ return extra_channels_[ec];
+}
+const ImageF& ImageBundle::alpha() const {
+ JXL_ASSERT(HasAlpha());
+ const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+ metadata_->extra_channel_info.data();
+ JXL_ASSERT(ec < extra_channels_.size());
+ return extra_channels_[ec];
+}
+ImageF* ImageBundle::alpha() {
+ JXL_ASSERT(HasAlpha());
+ const size_t ec = metadata_->Find(ExtraChannel::kAlpha) -
+ metadata_->extra_channel_info.data();
+ JXL_ASSERT(ec < extra_channels_.size());
+ return &extra_channels_[ec];
+}
+
+void ImageBundle::SetAlpha(ImageF&& alpha) {
+ const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+ // Must call SetAlphaBits first, otherwise we don't know which channel index
+ JXL_CHECK(eci != nullptr);
+ JXL_CHECK(alpha.xsize() != 0 && alpha.ysize() != 0);
+ if (extra_channels_.size() < metadata_->extra_channel_info.size()) {
+ // TODO(jon): get rid of this case
+ extra_channels_.insert(
+ extra_channels_.begin() + (eci - metadata_->extra_channel_info.data()),
+ std::move(alpha));
+ } else {
+ extra_channels_[eci - metadata_->extra_channel_info.data()] =
+ std::move(alpha);
+ }
+ // num_extra_channels is automatically set in visitor
+ VerifySizes();
+}
+
+void ImageBundle::SetExtraChannels(std::vector<ImageF>&& extra_channels) {
+ for (const ImageF& plane : extra_channels) {
+ JXL_CHECK(plane.xsize() != 0 && plane.ysize() != 0);
+ }
+ extra_channels_ = std::move(extra_channels);
+ VerifySizes();
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/image_bundle.h b/third_party/jpeg-xl/lib/jxl/image_bundle.h
new file mode 100644
index 0000000000..c7b812b59a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_bundle.h
@@ -0,0 +1,254 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_BUNDLE_H_
+#define LIB_JXL_IMAGE_BUNDLE_H_
+
+// The main image or frame consists of a bundle of associated images.
+
+#include <jxl/cms_interface.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+
+// A bundle of color/alpha/depth/plane images.
+class ImageBundle {
+ public:
+ // Uninitialized state for use as output parameter.
+ ImageBundle() : metadata_(nullptr) {}
+ // Caller is responsible for setting metadata before calling Set*.
+ explicit ImageBundle(const ImageMetadata* metadata) : metadata_(metadata) {}
+
+ // Move-only (allows storing in std::vector).
+ ImageBundle(ImageBundle&&) = default;
+ ImageBundle& operator=(ImageBundle&&) = default;
+
+ ImageBundle Copy() const {
+ ImageBundle copy(metadata_);
+ copy.color_ = CopyImage(color_);
+ copy.c_current_ = c_current_;
+ copy.extra_channels_.reserve(extra_channels_.size());
+ for (const ImageF& plane : extra_channels_) {
+ copy.extra_channels_.emplace_back(CopyImage(plane));
+ }
+
+ copy.jpeg_data =
+ jpeg_data ? make_unique<jpeg::JPEGData>(*jpeg_data) : nullptr;
+ copy.color_transform = color_transform;
+ copy.chroma_subsampling = chroma_subsampling;
+
+ return copy;
+ }
+
+ // -- SIZE
+
+ size_t xsize() const {
+ if (IsJPEG()) return jpeg_data->width;
+ if (color_.xsize() != 0) return color_.xsize();
+ return extra_channels_.empty() ? 0 : extra_channels_[0].xsize();
+ }
+ size_t ysize() const {
+ if (IsJPEG()) return jpeg_data->height;
+ if (color_.ysize() != 0) return color_.ysize();
+ return extra_channels_.empty() ? 0 : extra_channels_[0].ysize();
+ }
+ void ShrinkTo(size_t xsize, size_t ysize);
+
+ // sizes taking orientation into account
+ size_t oriented_xsize() const {
+ if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+ return ysize();
+ } else {
+ return xsize();
+ }
+ }
+ size_t oriented_ysize() const {
+ if (static_cast<uint32_t>(metadata_->GetOrientation()) > 4) {
+ return xsize();
+ } else {
+ return ysize();
+ }
+ }
+
+ // -- COLOR
+
+ // Whether color() is valid/usable. Returns true in most cases. Even images
+ // with spot colors (one example of when !planes().empty()) typically have a
+ // part that can be converted to RGB.
+ bool HasColor() const { return color_.xsize() != 0; }
+
+ // For resetting the size when switching from a reference to main frame.
+ void RemoveColor() { color_ = Image3F(); }
+
+ // Do not use if !HasColor().
+ const Image3F& color() const {
+ // If this fails, Set* was not called - perhaps because decoding failed?
+ JXL_DASSERT(HasColor());
+ return color_;
+ }
+
+ // Do not use if !HasColor().
+ Image3F* color() {
+ JXL_DASSERT(HasColor());
+ return &color_;
+ }
+
+ // If c_current.IsGray(), all planes must be identical. NOTE: c_current is
+ // independent of metadata()->color_encoding, which is the original, whereas
+ // a decoder might return pixels in a different c_current.
+ // This only sets the color channels, you must also make extra channels
+ // match the amount that is in the metadata.
+ void SetFromImage(Image3F&& color, const ColorEncoding& c_current);
+
+ // -- COLOR ENCODING
+
+ const ColorEncoding& c_current() const { return c_current_; }
+
+ // Returns whether the color image has identical planes. Once established by
+ // Set*, remains unchanged until a subsequent Set* or TransformTo.
+ bool IsGray() const { return c_current_.IsGray(); }
+
+ bool IsSRGB() const { return c_current_.IsSRGB(); }
+ bool IsLinearSRGB() const {
+ return c_current_.white_point == WhitePoint::kD65 &&
+ c_current_.primaries == Primaries::kSRGB && c_current_.tf.IsLinear();
+ }
+
+ // Set the c_current profile without doing any transformation, e.g. if the
+ // transformation was already applied.
+ void OverrideProfile(const ColorEncoding& new_c_current) {
+ c_current_ = new_c_current;
+ }
+
+ // TODO(lode): TransformTo and CopyTo are implemented in enc_image_bundle.cc,
+ // move these functions out of this header file and class, to
+ // enc_image_bundle.h.
+
+ // Transforms color to c_desired and sets c_current to c_desired. Alpha and
+ // metadata remains unchanged.
+ Status TransformTo(const ColorEncoding& c_desired, const JxlCmsInterface& cms,
+ ThreadPool* pool = nullptr);
+ // Copies this:rect, converts to c_desired, and allocates+fills out.
+ Status CopyTo(const Rect& rect, const ColorEncoding& c_desired,
+ const JxlCmsInterface& cms, Image3F* out,
+ ThreadPool* pool = nullptr) const;
+
+ // Detect 'real' bit depth, which can be lower than nominal bit depth
+ // (this is common in PNG), returns 'real' bit depth
+ size_t DetectRealBitdepth() const;
+
+ // -- ALPHA
+
+ void SetAlpha(ImageF&& alpha);
+ bool HasAlpha() const {
+ return metadata_->Find(ExtraChannel::kAlpha) != nullptr;
+ }
+ bool AlphaIsPremultiplied() const {
+ const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha);
+ return (eci == nullptr) ? false : eci->alpha_associated;
+ }
+ const ImageF& alpha() const;
+ ImageF* alpha();
+
+ // -- EXTRA CHANNELS
+ bool HasBlack() const {
+ return metadata_->Find(ExtraChannel::kBlack) != nullptr;
+ }
+ const ImageF& black() const;
+
+ // Extra channels of unknown interpretation (e.g. spot colors).
+ void SetExtraChannels(std::vector<ImageF>&& extra_channels);
+ void ClearExtraChannels() { extra_channels_.clear(); }
+ bool HasExtraChannels() const { return !extra_channels_.empty(); }
+ const std::vector<ImageF>& extra_channels() const { return extra_channels_; }
+ std::vector<ImageF>& extra_channels() { return extra_channels_; }
+
+ const ImageMetadata* metadata() const { return metadata_; }
+
+ void VerifyMetadata() const;
+
+ void SetDecodedBytes(size_t decoded_bytes) { decoded_bytes_ = decoded_bytes; }
+ size_t decoded_bytes() const { return decoded_bytes_; }
+
+ // -- JPEG transcoding:
+
+ // Returns true if image does or will represent quantized DCT-8 coefficients,
+ // stored in 8x8 pixel regions.
+ bool IsJPEG() const {
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ return jpeg_data != nullptr;
+#else // JPEGXL_ENABLE_TRANSCODE_JPEG
+ return false;
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+ }
+
+ std::unique_ptr<jpeg::JPEGData> jpeg_data;
+ // these fields are used to signal the input JPEG color space
+ // NOTE: JPEG doesn't actually provide a way to determine whether YCbCr was
+ // applied or not.
+ ColorTransform color_transform = ColorTransform::kNone;
+ YCbCrChromaSubsampling chroma_subsampling;
+
+ FrameOrigin origin{0, 0};
+
+ // Animation-related information, corresponding to the timecode and duration
+ // fields of the jxl::AnimationFrame of the jxl::FrameHeader.
+ // TODO(lode): ImageBundle is used here to carry the information from
+ // jxl::FrameHeader, consider instead passing a jxl::FrameHeader directly to
+ // EncodeFrame or having a field of that type here.
+ uint32_t duration = 0;
+ uint32_t timecode = 0;
+
+ // TODO(lode): these fields do not match the JXL frame header, it should be
+ // possible to specify up to 4 (3 if nonzero duration) slots to save this
+ // frame as reference (see save_as_reference).
+ bool use_for_next_frame = false;
+ bool blend = false;
+ BlendMode blendmode = BlendMode::kBlend;
+
+ std::string name;
+
+ private:
+ // Called after any Set* to ensure their sizes are compatible.
+ void VerifySizes() const;
+
+ // Required for TransformTo so that an ImageBundle is self-sufficient. Always
+ // points to the same thing, but cannot be const-pointer because that prevents
+ // the compiler from generating a move ctor.
+ const ImageMetadata* metadata_;
+
+ // Initialized by Set*:
+ Image3F color_; // If empty, planes_ is not; all planes equal if IsGray().
+ ColorEncoding c_current_; // of color_
+
+ // Initialized by SetPlanes; size = ImageMetadata.num_extra_channels
+ std::vector<ImageF> extra_channels_;
+
+ // How many bytes of the input were actually read.
+ size_t decoded_bytes_ = 0;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_IMAGE_BUNDLE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/image_bundle_test.cc b/third_party/jpeg-xl/lib/jxl/image_bundle_test.cc
new file mode 100644
index 0000000000..1a10598fe2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_bundle_test.cc
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_bundle.h"
+
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(ImageBundleTest, ExtraChannelName) {
+ AuxOut aux_out;
+ BitWriter writer;
+ BitWriter::Allotment allotment(&writer, 99);
+
+ ImageMetadata metadata;
+ ExtraChannelInfo eci;
+ eci.type = ExtraChannel::kBlack;
+ eci.name = "testK";
+ metadata.extra_channel_info.push_back(std::move(eci));
+ ASSERT_TRUE(WriteImageMetadata(metadata, &writer, /*layer=*/0, &aux_out));
+ writer.ZeroPadToByte();
+ allotment.ReclaimAndCharge(&writer, /*layer=*/0, &aux_out);
+
+ BitReader reader(writer.GetSpan());
+ ImageMetadata metadata_out;
+ ASSERT_TRUE(ReadImageMetadata(&reader, &metadata_out));
+ EXPECT_TRUE(reader.Close());
+ EXPECT_EQ("testK", metadata_out.Find(ExtraChannel::kBlack)->name);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/image_metadata.cc b/third_party/jpeg-xl/lib/jxl/image_metadata.cc
new file mode 100644
index 0000000000..20b0d6f95a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_metadata.cc
@@ -0,0 +1,472 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_metadata.h"
+
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/quantizer.h"
+
+namespace jxl {
+BitDepth::BitDepth() { Bundle::Init(this); }
+Status BitDepth::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &floating_point_sample));
+ // The same fields (bits_per_sample and exponent_bits_per_sample) are read
+ // in a different way depending on floating_point_sample's value. It's still
+ // default-initialized correctly so using visitor->Conditional is not
+ // required.
+ if (!floating_point_sample) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val(8), Val(10), Val(12), BitsOffset(6, 1), 8, &bits_per_sample));
+ exponent_bits_per_sample = 0;
+ } else {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val(32), Val(16), Val(24), BitsOffset(6, 1), 32, &bits_per_sample));
+ // The encoded value is exponent_bits_per_sample - 1, encoded in 3 bits
+ // so the value can be in range [1, 8].
+ const uint32_t offset = 1;
+ exponent_bits_per_sample -= offset;
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Bits(4, 8 - offset, &exponent_bits_per_sample));
+ exponent_bits_per_sample += offset;
+ }
+
+ // Error-checking for floating point ranges.
+ if (floating_point_sample) {
+ if (exponent_bits_per_sample < 2 || exponent_bits_per_sample > 8) {
+ return JXL_FAILURE("Invalid exponent_bits_per_sample: %u",
+ exponent_bits_per_sample);
+ }
+ int mantissa_bits =
+ static_cast<int>(bits_per_sample) - exponent_bits_per_sample - 1;
+ if (mantissa_bits < 2 || mantissa_bits > 23) {
+ return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+ }
+ } else {
+ if (bits_per_sample > 31) {
+ return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample);
+ }
+ }
+ return true;
+}
+
+std::string BitDepth::DebugString() const {
+ std::ostringstream os;
+ os << (floating_point_sample ? "F" : "U");
+ os << bits_per_sample;
+ if (floating_point_sample) os << "." << exponent_bits_per_sample;
+ return os.str();
+}
+
+CustomTransformData::CustomTransformData() { Bundle::Init(this); }
+Status CustomTransformData::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+ if (visitor->Conditional(nonserialized_xyb_encoded)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&opsin_inverse_matrix));
+ }
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &custom_weights_mask));
+ if (visitor->Conditional((custom_weights_mask & 0x1) != 0)) {
+ // 4 5x5 kernels, but all of them can be obtained by symmetry from one,
+ // which is symmetric along its main diagonal. The top-left kernel is
+ // defined by
+ //
+ // 0 1 2 3 4
+ // 1 5 6 7 8
+ // 2 6 9 10 11
+ // 3 7 10 12 13
+ // 4 8 11 13 14
+ float constexpr kWeights2[15] = {
+ -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f,
+ 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f, 0.56661550f,
+ 0.03777607f, -0.01986694f, -0.03144731f, -0.01185068f, -0.00213539f};
+ for (size_t i = 0; i < 15; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kWeights2[i], &upsampling2_weights[i]));
+ }
+ }
+ if (visitor->Conditional((custom_weights_mask & 0x2) != 0)) {
+ // 16 5x5 kernels, but all of them can be obtained by symmetry from
+ // three, two of which are symmetric along their main diagonals. The top
+ // left 4 kernels are defined by
+ //
+ // 0 1 2 3 4 5 6 7 8 9
+ // 1 10 11 12 13 14 15 16 17 18
+ // 2 11 19 20 21 22 23 24 25 26
+ // 3 12 20 27 28 29 30 31 32 33
+ // 4 13 21 28 34 35 36 37 38 39
+ //
+ // 5 14 22 29 35 40 41 42 43 44
+ // 6 15 23 30 36 41 45 46 47 48
+ // 7 16 24 31 37 42 46 49 50 51
+ // 8 17 25 32 38 43 47 50 52 53
+ // 9 18 26 33 39 44 48 51 53 54
+ constexpr float kWeights4[55] = {
+ -0.02419067f, -0.03491987f, -0.03693351f, -0.03094285f, -0.00529785f,
+ -0.01663432f, -0.03556863f, -0.03888905f, -0.03516850f, -0.00989469f,
+ 0.23651958f, 0.33392945f, -0.01073543f, -0.01313181f, -0.03556694f,
+ 0.13048175f, 0.40103025f, 0.03951150f, -0.02077584f, 0.46914198f,
+ -0.00209270f, -0.01484589f, -0.04064806f, 0.18942530f, 0.56279892f,
+ 0.06674400f, -0.02335494f, -0.03551682f, -0.00754830f, -0.02267919f,
+ -0.02363578f, 0.00315804f, -0.03399098f, -0.01359519f, -0.00091653f,
+ -0.00335467f, -0.01163294f, -0.01610294f, -0.00974088f, -0.00191622f,
+ -0.01095446f, -0.03198464f, -0.04455121f, -0.02799790f, -0.00645912f,
+ 0.06390599f, 0.22963888f, 0.00630981f, -0.01897349f, 0.67537268f,
+ 0.08483369f, -0.02534994f, -0.02205197f, -0.01667999f, -0.00384443f};
+ for (size_t i = 0; i < 55; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kWeights4[i], &upsampling4_weights[i]));
+ }
+ }
+ if (visitor->Conditional((custom_weights_mask & 0x4) != 0)) {
+ // 64 5x5 kernels, all of them can be obtained by symmetry from
+ // 10, 4 of which are symmetric along their main diagonals. The top
+ // left 16 kernels are defined by
+ // 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 13
+ // 1 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26
+ // 2 15 27 28 29 2a 2b 2c 2d 2e 2f 30 31 32 33 34 35 36 37 38
+ // 3 16 28 39 3a 3b 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49
+ // 4 17 29 3a 4a 4b 4c 4d 4e 4f 50 51 52 53 54 55 56 57 58 59
+
+ // 5 18 2a 3b 4b 5a 5b 5c 5d 5e 5f 60 61 62 63 64 65 66 67 68
+ // 6 19 2b 3c 4c 5b 69 6a 6b 6c 6d 6e 6f 70 71 72 73 74 75 76
+ // 7 1a 2c 3d 4d 5c 6a 77 78 79 7a 7b 7c 7d 7e 7f 80 81 82 83
+ // 8 1b 2d 3e 4e 5d 6b 78 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f
+ // 9 1c 2e 3f 4f 5e 6c 79 85 90 91 92 93 94 95 96 97 98 99 9a
+
+ // a 1d 2f 40 50 5f 6d 7a 86 91 9b 9c 9d 9e 9f a0 a1 a2 a3 a4
+ // b 1e 30 41 51 60 6e 7b 87 92 9c a5 a6 a7 a8 a9 aa ab ac ad
+ // c 1f 31 42 52 61 6f 7c 88 93 9d a6 ae af b0 b1 b2 b3 b4 b5
+ // d 20 32 43 53 62 70 7d 89 94 9e a7 af b6 b7 b8 b9 ba bb bc
+ // e 21 33 44 54 63 71 7e 8a 95 9f a8 b0 b7 bd be bf c0 c1 c2
+
+ // f 22 34 45 55 64 72 7f 8b 96 a0 a9 b1 b8 be c3 c4 c5 c6 c7
+ // 10 23 35 46 56 65 73 80 8c 97 a1 aa b2 b9 bf c4 c8 c9 ca cb
+ // 11 24 36 47 57 66 74 81 8d 98 a2 ab b3 ba c0 c5 c9 cc cd ce
+ // 12 25 37 48 58 67 75 82 8e 99 a3 ac b4 bb c1 c6 ca cd cf d0
+ // 13 26 38 49 59 68 76 83 8f 9a a4 ad b5 bc c2 c7 cb ce d0 d1
+ constexpr float kWeights8[210] = {
+ -0.02928613f, -0.03706353f, -0.03783812f, -0.03324558f, -0.00447632f,
+ -0.02519406f, -0.03752601f, -0.03901508f, -0.03663285f, -0.00646649f,
+ -0.02066407f, -0.03838633f, -0.04002101f, -0.03900035f, -0.00901973f,
+ -0.01626393f, -0.03954148f, -0.04046620f, -0.03979621f, -0.01224485f,
+ 0.29895328f, 0.35757708f, -0.02447552f, -0.01081748f, -0.04314594f,
+ 0.23903219f, 0.41119301f, -0.00573046f, -0.01450239f, -0.04246845f,
+ 0.17567618f, 0.45220643f, 0.02287757f, -0.01936783f, -0.03583255f,
+ 0.11572472f, 0.47416733f, 0.06284440f, -0.02685066f, 0.42720050f,
+ -0.02248939f, -0.01155273f, -0.04562755f, 0.28689496f, 0.49093869f,
+ -0.00007891f, -0.01545926f, -0.04562659f, 0.21238920f, 0.53980934f,
+ 0.03369474f, -0.02070211f, -0.03866988f, 0.14229550f, 0.56593398f,
+ 0.08045181f, -0.02888298f, -0.03680918f, -0.00542229f, -0.02920477f,
+ -0.02788574f, -0.02118180f, -0.03942402f, -0.00775547f, -0.02433614f,
+ -0.03193943f, -0.02030828f, -0.04044014f, -0.01074016f, -0.01930822f,
+ -0.03620399f, -0.01974125f, -0.03919545f, -0.01456093f, -0.00045072f,
+ -0.00360110f, -0.01020207f, -0.01231907f, -0.00638988f, -0.00071592f,
+ -0.00279122f, -0.00957115f, -0.01288327f, -0.00730937f, -0.00107783f,
+ -0.00210156f, -0.00890705f, -0.01317668f, -0.00813895f, -0.00153491f,
+ -0.02128481f, -0.04173044f, -0.04831487f, -0.03293190f, -0.00525260f,
+ -0.01720322f, -0.04052736f, -0.05045706f, -0.03607317f, -0.00738030f,
+ -0.01341764f, -0.03965629f, -0.05151616f, -0.03814886f, -0.01005819f,
+ 0.18968273f, 0.33063684f, -0.01300105f, -0.01372950f, -0.04017465f,
+ 0.13727832f, 0.36402234f, 0.01027890f, -0.01832107f, -0.03365072f,
+ 0.08734506f, 0.38194295f, 0.04338228f, -0.02525993f, 0.56408126f,
+ 0.00458352f, -0.01648227f, -0.04887868f, 0.24585519f, 0.62026135f,
+ 0.04314807f, -0.02213737f, -0.04158014f, 0.16637289f, 0.65027023f,
+ 0.09621636f, -0.03101388f, -0.04082742f, -0.00904519f, -0.02790922f,
+ -0.02117818f, 0.00798662f, -0.03995711f, -0.01243427f, -0.02231705f,
+ -0.02946266f, 0.00992055f, -0.03600283f, -0.01684920f, -0.00111684f,
+ -0.00411204f, -0.01297130f, -0.01723725f, -0.01022545f, -0.00165306f,
+ -0.00313110f, -0.01218016f, -0.01763266f, -0.01125620f, -0.00231663f,
+ -0.01374149f, -0.03797620f, -0.05142937f, -0.03117307f, -0.00581914f,
+ -0.01064003f, -0.03608089f, -0.05272168f, -0.03375670f, -0.00795586f,
+ 0.09628104f, 0.27129991f, -0.00353779f, -0.01734151f, -0.03153981f,
+ 0.05686230f, 0.28500998f, 0.02230594f, -0.02374955f, 0.68214326f,
+ 0.05018048f, -0.02320852f, -0.04383616f, 0.18459474f, 0.71517975f,
+ 0.10805613f, -0.03263677f, -0.03637639f, -0.01394373f, -0.02511203f,
+ -0.01728636f, 0.05407331f, -0.02867568f, -0.01893131f, -0.00240854f,
+ -0.00446511f, -0.01636187f, -0.02377053f, -0.01522848f, -0.00333334f,
+ -0.00819975f, -0.02964169f, -0.04499287f, -0.02745350f, -0.00612408f,
+ 0.02727416f, 0.19446600f, 0.00159832f, -0.02232473f, 0.74982506f,
+ 0.11452620f, -0.03348048f, -0.01605681f, -0.02070339f, -0.00458223f};
+ for (size_t i = 0; i < 210; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kWeights8[i], &upsampling8_weights[i]));
+ }
+ }
+ return true;
+}
+
+ExtraChannelInfo::ExtraChannelInfo() { Bundle::Init(this); }
+Status ExtraChannelInfo::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ // General
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ExtraChannel::kAlpha, &type));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(3), Val(4), BitsOffset(3, 1), 0, &dim_shift));
+ if ((1U << dim_shift) > 8) {
+ return JXL_FAILURE("dim_shift %u too large", dim_shift);
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name));
+
+ // Conditional
+ if (visitor->Conditional(type == ExtraChannel::kAlpha)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alpha_associated));
+ }
+ if (visitor->Conditional(type == ExtraChannel::kSpotColor)) {
+ for (float& c : spot_color) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0, &c));
+ }
+ }
+ if (visitor->Conditional(type == ExtraChannel::kCFA)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Bits(2), BitsOffset(4, 3),
+ BitsOffset(8, 19), 1, &cfa_channel));
+ }
+
+ if (type == ExtraChannel::kUnknown ||
+ (int(ExtraChannel::kReserved0) <= int(type) &&
+ int(type) <= int(ExtraChannel::kReserved7))) {
+ return JXL_FAILURE("Unknown extra channel (bits %u, shift %u, name '%s')\n",
+ bit_depth.bits_per_sample, dim_shift, name.c_str());
+ }
+ return true;
+}
+
+std::string ExtraChannelInfo::DebugString() const {
+ std::ostringstream os;
+ os << (type == ExtraChannel::kAlpha ? "Alpha"
+ : type == ExtraChannel::kDepth ? "Depth"
+ : type == ExtraChannel::kSpotColor ? "Spot"
+ : type == ExtraChannel::kSelectionMask ? "Mask"
+ : type == ExtraChannel::kBlack ? "Black"
+ : type == ExtraChannel::kCFA ? "CFA"
+ : type == ExtraChannel::kThermal ? "Thermal"
+ : "Unknown");
+ if (type == ExtraChannel::kAlpha && alpha_associated) os << "(premul)";
+ os << " " << bit_depth.DebugString();
+ os << " shift: " << dim_shift;
+ return os.str();
+}
+
+ImageMetadata::ImageMetadata() { Bundle::Init(this); }
+Status ImageMetadata::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ // Bundle::AllDefault does not allow usage when reading (it may abort the
+ // program when a codestream has invalid values), but when reading we
+ // overwrite the extra_fields value, so do not need to call AllDefault.
+ bool tone_mapping_default =
+ visitor->IsReading() ? false : Bundle::AllDefault(tone_mapping);
+
+ bool extra_fields = (orientation != 1 || have_preview || have_animation ||
+ have_intrinsic_size || !tone_mapping_default);
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &extra_fields));
+ if (visitor->Conditional(extra_fields)) {
+ orientation--;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &orientation));
+ orientation++;
+ // (No need for bounds checking because we read exactly 3 bits)
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_intrinsic_size));
+ if (visitor->Conditional(have_intrinsic_size)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&intrinsic_size));
+ }
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_preview));
+ if (visitor->Conditional(have_preview)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&preview_size));
+ }
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_animation));
+ if (visitor->Conditional(have_animation)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation));
+ }
+ } else {
+ orientation = 1; // identity
+ have_intrinsic_size = false;
+ have_preview = false;
+ have_animation = false;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Bool(true, &modular_16_bit_buffer_sufficient));
+
+ num_extra_channels = extra_channel_info.size();
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+ BitsOffset(12, 1), 0,
+ &num_extra_channels));
+
+ if (visitor->Conditional(num_extra_channels != 0)) {
+ if (visitor->IsReading()) {
+ extra_channel_info.resize(num_extra_channels);
+ }
+ for (ExtraChannelInfo& eci : extra_channel_info) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&eci));
+ }
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &xyb_encoded));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&color_encoding));
+ if (visitor->Conditional(extra_fields)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tone_mapping));
+ }
+
+ // Treat as if only the fields up to extra channels exist.
+ if (visitor->IsReading() && nonserialized_only_parse_basic_info) {
+ return true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+ // Extensions: in chronological order of being added to the format.
+ return visitor->EndExtensions();
+}
+
+OpsinInverseMatrix::OpsinInverseMatrix() { Bundle::Init(this); }
+Status OpsinInverseMatrix::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+ for (int i = 0; i < 9; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+ DefaultInverseOpsinAbsorbanceMatrix()[i], &inverse_matrix[i]));
+ }
+ for (int i = 0; i < 3; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kNegOpsinAbsorbanceBiasRGB[i], &opsin_biases[i]));
+ }
+ for (int i = 0; i < 4; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kDefaultQuantBias[i], &quant_biases[i]));
+ }
+ return true;
+}
+
+ToneMapping::ToneMapping() { Bundle::Init(this); }
+Status ToneMapping::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(kDefaultIntensityTarget, &intensity_target));
+ if (intensity_target <= 0.f) {
+ return JXL_FAILURE("invalid intensity target");
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &min_nits));
+ if (min_nits < 0.f || min_nits > intensity_target) {
+ return JXL_FAILURE("invalid min %f vs max %f", min_nits, intensity_target);
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &relative_to_max_display));
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &linear_below));
+ if (linear_below < 0 || (relative_to_max_display && linear_below > 1.0f)) {
+ return JXL_FAILURE("invalid linear_below %f (%s)", linear_below,
+ relative_to_max_display ? "relative" : "absolute");
+ }
+
+ return true;
+}
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+ ImageMetadata* JXL_RESTRICT metadata) {
+ return Bundle::Read(reader, metadata);
+}
+
+void ImageMetadata::SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied) {
+ std::vector<ExtraChannelInfo>& eciv = extra_channel_info;
+ ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+ if (bits == 0) {
+ if (alpha != nullptr) {
+ // Remove the alpha channel from the extra channel info. It's
+ // theoretically possible that there are multiple, remove all in that
+ // case. This ensure a next HasAlpha() will return false.
+ const auto is_alpha = [](const ExtraChannelInfo& eci) {
+ return eci.type == ExtraChannel::kAlpha;
+ };
+ eciv.erase(std::remove_if(eciv.begin(), eciv.end(), is_alpha),
+ eciv.end());
+ }
+ } else {
+ if (alpha == nullptr) {
+ ExtraChannelInfo info;
+ info.type = ExtraChannel::kAlpha;
+ info.bit_depth.bits_per_sample = bits;
+ info.dim_shift = 0;
+ info.alpha_associated = alpha_is_premultiplied;
+ // Prepend rather than append: in case there already are other extra
+ // channels, prefer alpha channel to be listed first.
+ eciv.insert(eciv.begin(), info);
+ } else {
+ // Ignores potential extra alpha channels, only sets to first one.
+ alpha->bit_depth.bits_per_sample = bits;
+ alpha->bit_depth.floating_point_sample = false;
+ alpha->bit_depth.exponent_bits_per_sample = 0;
+ alpha->alpha_associated = alpha_is_premultiplied;
+ }
+ }
+ num_extra_channels = extra_channel_info.size();
+ if (bits > 12) modular_16_bit_buffer_sufficient = false;
+}
+
+std::string ImageMetadata::DebugString() const {
+ std::ostringstream os;
+ os << bit_depth.DebugString();
+ if (modular_16_bit_buffer_sufficient) {
+ os << " (modular 16)";
+ }
+ os << (xyb_encoded ? " xyb encoded" : " orig profile");
+ os << " " << Description(color_encoding);
+ if (num_extra_channels > 0) {
+ os << " extra channels:";
+ for (size_t i = 0; i < num_extra_channels; ++i) {
+ os << " (" << extra_channel_info[i].DebugString() << ")";
+ if (i + 1 < num_extra_channels) os << ",";
+ }
+ }
+ if (have_preview) {
+ os << " preview: " << preview_size.xsize() << "x" << preview_size.ysize();
+ }
+ if (orientation != 1) {
+ os << " orientation: " << orientation;
+ }
+ return os.str();
+}
+
+std::string CodecMetadata::DebugString() const {
+ std::ostringstream os;
+ os << size.xsize() << "x" << size.ysize();
+ os << " " << m.DebugString();
+ return os.str();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/image_metadata.h b/third_party/jpeg-xl/lib/jxl/image_metadata.h
new file mode 100644
index 0000000000..ca69eb3a3d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_metadata.h
@@ -0,0 +1,425 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Main codestream header bundles, the metadata that applies to all frames.
+// Enums must align with the C API definitions in codestream_header.h.
+
+#ifndef LIB_JXL_IMAGE_METADATA_H_
+#define LIB_JXL_IMAGE_METADATA_H_
+
+#include <jxl/codestream_header.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+// EXIF orientation of the image. This field overrides any field present in
+// actual EXIF metadata. The value tells which transformation the decoder must
+// apply after decoding to display the image with the correct orientation.
+enum class Orientation : uint32_t {
+ // Values 1..8 match the EXIF definitions.
+ kIdentity = JXL_ORIENT_IDENTITY,
+ kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL,
+ kRotate180 = JXL_ORIENT_ROTATE_180,
+ kFlipVertical = JXL_ORIENT_FLIP_VERTICAL,
+ kTranspose = JXL_ORIENT_TRANSPOSE,
+ kRotate90 = JXL_ORIENT_ROTATE_90_CW,
+ kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE,
+ kRotate270 = JXL_ORIENT_ROTATE_90_CCW,
+};
+// Don't need an EnumBits because Orientation is not read via Enum().
+
+enum class ExtraChannel : uint32_t {
+ // First two enumerators (most common) are cheaper to encode
+ kAlpha = JXL_CHANNEL_ALPHA,
+ kDepth = JXL_CHANNEL_DEPTH,
+
+ kSpotColor = JXL_CHANNEL_SPOT_COLOR,
+ kSelectionMask = JXL_CHANNEL_SELECTION_MASK,
+ kBlack = JXL_CHANNEL_BLACK, // for CMYK
+ kCFA = JXL_CHANNEL_CFA, // Bayer channel
+ kThermal = JXL_CHANNEL_THERMAL,
+ kReserved0 = JXL_CHANNEL_RESERVED0,
+ kReserved1 = JXL_CHANNEL_RESERVED1,
+ kReserved2 = JXL_CHANNEL_RESERVED2,
+ kReserved3 = JXL_CHANNEL_RESERVED3,
+ kReserved4 = JXL_CHANNEL_RESERVED4,
+ kReserved5 = JXL_CHANNEL_RESERVED5,
+ kReserved6 = JXL_CHANNEL_RESERVED6,
+ kReserved7 = JXL_CHANNEL_RESERVED7,
+ // disambiguated via name string, raise warning if unsupported
+ kUnknown = JXL_CHANNEL_UNKNOWN,
+ // like kUnknown but can silently be ignored
+ kOptional = JXL_CHANNEL_OPTIONAL
+};
+static inline const char* EnumName(ExtraChannel /*unused*/) {
+ return "ExtraChannel";
+}
+static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) {
+ using EC = ExtraChannel;
+ return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) |
+ MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) |
+ MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional);
+}
+
+// Used in ImageMetadata and ExtraChannelInfo.
+struct BitDepth : public Fields {
+ BitDepth();
+ JXL_FIELDS_NAME(BitDepth)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ std::string DebugString() const;
+
+ // Whether the original (uncompressed) samples are floating point or
+ // unsigned integer.
+ bool floating_point_sample;
+
+ // Bit depth of the original (uncompressed) image samples. Must be in the
+ // range [1, 32].
+ uint32_t bits_per_sample;
+
+ // Floating point exponent bits of the original (uncompressed) image samples,
+ // only used if floating_point_sample is true.
+ // If used, the samples are floating point with:
+ // - 1 sign bit
+ // - exponent_bits_per_sample exponent bits
+ // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits
+ // If used, exponent_bits_per_sample must be in the range
+ // [2, 8] and amount of mantissa bits must be in the range [2, 23].
+ // NOTE: exponent_bits_per_sample is 8 for single precision binary32
+ // point, 5 for half precision binary16, 7 for fp24.
+ uint32_t exponent_bits_per_sample;
+};
+
+// Describes one extra channel.
+struct ExtraChannelInfo : public Fields {
+ ExtraChannelInfo();
+ JXL_FIELDS_NAME(ExtraChannelInfo)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ std::string DebugString() const;
+
+ mutable bool all_default;
+
+ ExtraChannel type;
+ BitDepth bit_depth;
+ uint32_t dim_shift; // downsampled by 2^dim_shift on each axis
+
+ std::string name; // UTF-8
+
+ // Conditional:
+ bool alpha_associated; // i.e. premultiplied
+ float spot_color[4]; // spot color in linear RGBA
+ uint32_t cfa_channel;
+};
+
+struct OpsinInverseMatrix : public Fields {
+ OpsinInverseMatrix();
+ JXL_FIELDS_NAME(OpsinInverseMatrix)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ mutable bool all_default;
+
+ float inverse_matrix[9];
+ float opsin_biases[3];
+ float quant_biases[4];
+};
+
+// Information useful for mapping HDR images to lower dynamic range displays.
+struct ToneMapping : public Fields {
+ ToneMapping();
+ JXL_FIELDS_NAME(ToneMapping)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ mutable bool all_default;
+
+ // Upper bound on the intensity level present in the image. For unsigned
+ // integer pixel encodings, this is the brightness of the largest
+ // representable value. The image does not necessarily contain a pixel
+ // actually this bright. An encoder is allowed to set 255 for SDR images
+ // without computing a histogram.
+ float intensity_target; // [nits]
+
+ // Lower bound on the intensity level present in the image. This may be
+ // loose, i.e. lower than the actual darkest pixel. When tone mapping, a
+ // decoder will map [min_nits, intensity_target] to the display range.
+ float min_nits;
+
+ bool relative_to_max_display; // see below
+ // The tone mapping will leave unchanged (linear mapping) any pixels whose
+ // brightness is strictly below this. The interpretation depends on
+ // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum
+ // display brightness [nits], otherwise an absolute brightness [nits].
+ float linear_below;
+};
+
+// Contains weights to customize some trasnforms - in particular, XYB and
+// upsampling.
+struct CustomTransformData : public Fields {
+ CustomTransformData();
+ JXL_FIELDS_NAME(CustomTransformData)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Must be set before calling VisitFields. Must equal xyb_encoded of
+ // ImageMetadata, should be set by ImageMetadata during VisitFields.
+ bool nonserialized_xyb_encoded = false;
+
+ mutable bool all_default;
+
+ OpsinInverseMatrix opsin_inverse_matrix;
+
+ uint32_t custom_weights_mask;
+ float upsampling2_weights[15];
+ float upsampling4_weights[55];
+ float upsampling8_weights[210];
+};
+
+// Properties of the original image bundle. This enables Encode(Decode()) to
+// re-create an equivalent image without user input.
+struct ImageMetadata : public Fields {
+ ImageMetadata();
+ JXL_FIELDS_NAME(ImageMetadata)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha
+ // channel present. In the theoretical case that there are multiple alpha
+ // channels, returns the bit depht of the first.
+ uint32_t GetAlphaBits() const {
+ const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha);
+ if (alpha == nullptr) return 0;
+ JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0);
+ return alpha->bit_depth.bits_per_sample;
+ }
+
+ // Sets bit depth of alpha channel, adding extra channel if needed, or
+ // removing all alpha channels if bits is 0.
+ // Assumes integer alpha channel and not designed to support multiple
+ // alpha channels (it's possible to use those features by manipulating
+ // extra_channel_info directly).
+ //
+ // Callers must insert the actual channel image at the same index before any
+ // further modifications to extra_channel_info.
+ void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false);
+
+ bool HasAlpha() const { return GetAlphaBits() != 0; }
+
+ // Sets the original bit depth fields to indicate unsigned integer of the
+ // given bit depth.
+ // TODO(lode): move function to BitDepth
+ void SetUintSamples(uint32_t bits) {
+ bit_depth.bits_per_sample = bits;
+ bit_depth.exponent_bits_per_sample = 0;
+ bit_depth.floating_point_sample = false;
+ // RCT / Squeeze may add one bit each, and this is about int16_t,
+ // so uint13 should still be OK but limiting it to 12 seems safer.
+ // TODO(jon): figure out a better way to set this header field.
+ // (in particular, if modular mode is not used it doesn't matter,
+ // and if transforms are restricted, up to 15-bit could be done)
+ if (bits > 12) modular_16_bit_buffer_sufficient = false;
+ }
+ // Sets the original bit depth fields to indicate single precision floating
+ // point.
+ // TODO(lode): move function to BitDepth
+ void SetFloat32Samples() {
+ bit_depth.bits_per_sample = 32;
+ bit_depth.exponent_bits_per_sample = 8;
+ bit_depth.floating_point_sample = true;
+ modular_16_bit_buffer_sufficient = false;
+ }
+
+ void SetFloat16Samples() {
+ bit_depth.bits_per_sample = 16;
+ bit_depth.exponent_bits_per_sample = 5;
+ bit_depth.floating_point_sample = true;
+ modular_16_bit_buffer_sufficient = false;
+ }
+
+ void SetIntensityTarget(float intensity_target) {
+ tone_mapping.intensity_target = intensity_target;
+ }
+ float IntensityTarget() const {
+ JXL_ASSERT(tone_mapping.intensity_target != 0);
+ return tone_mapping.intensity_target;
+ }
+
+ // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+ const ExtraChannelInfo* Find(ExtraChannel type) const {
+ for (const ExtraChannelInfo& eci : extra_channel_info) {
+ if (eci.type == type) return &eci;
+ }
+ return nullptr;
+ }
+
+ // Returns first ExtraChannelInfo of the given type, or nullptr if none.
+ ExtraChannelInfo* Find(ExtraChannel type) {
+ for (ExtraChannelInfo& eci : extra_channel_info) {
+ if (eci.type == type) return &eci;
+ }
+ return nullptr;
+ }
+
+ Orientation GetOrientation() const {
+ return static_cast<Orientation>(orientation);
+ }
+
+ bool ExtraFieldsDefault() const;
+
+ std::string DebugString() const;
+
+ mutable bool all_default;
+
+ BitDepth bit_depth;
+ bool modular_16_bit_buffer_sufficient; // otherwise 32 is.
+
+ // Whether the colors values of the pixels of frames are encoded in the
+ // codestream using the absolute XYB color space, or the using values that
+ // follow the color space defined by the ColorEncoding or ICC profile. This
+ // determines when or whether a CMS (Color Management System) is needed to get
+ // the pixels in a desired color space. In one case, the pixels have one known
+ // color space and a CMS is needed to convert them to the original image's
+ // color space, in the other case the pixels have the color space of the
+ // original image and a CMS is required if a different display space, or a
+ // single known consistent color space for multiple decoded images, is
+ // desired. In all cases, the color space of all frames from a single image is
+ // the same, both VarDCT and modular frames.
+ //
+ // If true: then frames can be decoded to XYB (which can also be converted to
+ // linear and non-linear sRGB with the built in conversion without CMS). The
+ // attached ColorEncoding or ICC profile has no effect on the meaning of the
+ // pixel's color values, but instead indicates what the color profile of the
+ // original image was, and what color profile one should convert to when
+ // decoding to integers to prevent clipping and precision loss. To do that
+ // conversion requires a CMS.
+ //
+ // If false: then the color values of decoded frames are in the space defined
+ // by the attached ColorEncoding or ICC profile. To instead get the pixels in
+ // a chosen known color space, such as sRGB, requires a CMS, since the
+ // attached ColorEncoding or ICC profile could be any arbitrary color space.
+ // This mode is typically used for lossless images encoded as integers.
+ // Frames can also use YCbCr encoding, some frames may and some may not, but
+ // this is not a different color space but a certain encoding of the RGB
+ // values.
+ //
+ // Note: if !xyb_encoded, but the attached color profile indicates XYB (which
+ // can happen either if it's a ColorEncoding with color_space_ ==
+ // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to
+ // represent XYB), then the frames still may not use ColorEncoding kXYB, they
+ // must still use kNone (or kYCbCr, which would mean applying the YCbCr
+ // transform to the 3-channel XYB data), since with !xyb_encoded, the 3
+ // channels are stored as-is, no matter what meaning the color profile assigns
+ // to them. To use ColorEncoding::kXYB, xyb_encoded must be true.
+ //
+ // This value is defined in image metadata because this is the global
+ // codestream header. This value does not affect the image itself, so is not
+ // image metadata per se, it only affects the encoding, and what color space
+ // the decoder can receive the pixels in without needing a CMS.
+ bool xyb_encoded;
+
+ ColorEncoding color_encoding;
+
+ // These values are initialized to defaults such that the 'extra_fields'
+ // condition in VisitFields uses correctly initialized values.
+ uint32_t orientation = 1;
+ bool have_preview = false;
+ bool have_animation = false;
+ bool have_intrinsic_size = false;
+
+ // If present, the stored image has the dimensions of the first SizeHeader,
+ // but decoders are advised to resample or display per `intrinsic_size`.
+ SizeHeader intrinsic_size; // only if have_intrinsic_size
+
+ ToneMapping tone_mapping;
+
+ // When reading: deserialized. When writing: automatically set from vector.
+ uint32_t num_extra_channels;
+ std::vector<ExtraChannelInfo> extra_channel_info;
+
+ // Only present if m.have_preview.
+ PreviewHeader preview_size;
+ // Only present if m.have_animation.
+ AnimationHeader animation;
+
+ uint64_t extensions;
+
+ // Option to stop parsing after basic info, and treat as if the later
+ // fields do not participate. Use to parse only basic image information
+ // excluding the final larger or variable sized data.
+ bool nonserialized_only_parse_basic_info = false;
+};
+
+Status ReadImageMetadata(BitReader* JXL_RESTRICT reader,
+ ImageMetadata* JXL_RESTRICT metadata);
+
+Status WriteImageMetadata(const ImageMetadata& metadata,
+ BitWriter* JXL_RESTRICT writer, size_t layer,
+ AuxOut* aux_out);
+
+// All metadata applicable to the entire codestream (dimensions, extra channels,
+// ...)
+struct CodecMetadata {
+ // TODO(lode): use the preview and animation fields too, in place of the
+ // nonserialized_ ones in ImageMetadata.
+ ImageMetadata m;
+ // The size of the codestream: this is the nominal size applicable to all
+ // frames, although some frames can have a different effective size through
+ // crop, dc_level or representing a the preview.
+ SizeHeader size;
+ // Often default.
+ CustomTransformData transform_data;
+
+ size_t xsize() const { return size.xsize(); }
+ size_t ysize() const { return size.ysize(); }
+ size_t oriented_xsize(bool keep_orientation) const {
+ if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+ return ysize();
+ } else {
+ return xsize();
+ }
+ }
+ size_t oriented_preview_xsize(bool keep_orientation) const {
+ if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+ return m.preview_size.ysize();
+ } else {
+ return m.preview_size.xsize();
+ }
+ }
+ size_t oriented_ysize(bool keep_orientation) const {
+ if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+ return xsize();
+ } else {
+ return ysize();
+ }
+ }
+ size_t oriented_preview_ysize(bool keep_orientation) const {
+ if (static_cast<uint32_t>(m.GetOrientation()) > 4 && !keep_orientation) {
+ return m.preview_size.xsize();
+ } else {
+ return m.preview_size.ysize();
+ }
+ }
+
+ std::string DebugString() const;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_IMAGE_METADATA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/image_ops.h b/third_party/jpeg-xl/lib/jxl/image_ops.h
new file mode 100644
index 0000000000..c025007e95
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_ops.h
@@ -0,0 +1,805 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_OPS_H_
+#define LIB_JXL_IMAGE_OPS_H_
+
+// Operations on images.
+
+#include <algorithm>
+#include <array>
+#include <limits>
+#include <vector>
+
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+void CopyImageTo(const Plane<T>& from, Plane<T>* JXL_RESTRICT to) {
+ PROFILER_ZONE("CopyImage1");
+ JXL_ASSERT(SameSize(from, *to));
+ if (from.ysize() == 0 || from.xsize() == 0) return;
+ for (size_t y = 0; y < from.ysize(); ++y) {
+ const T* JXL_RESTRICT row_from = from.ConstRow(y);
+ T* JXL_RESTRICT row_to = to->Row(y);
+ memcpy(row_to, row_from, from.xsize() * sizeof(T));
+ }
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Plane<T> CopyImage(const Plane<T>& from) {
+ Plane<T> to(from.xsize(), from.ysize());
+ CopyImageTo(from, &to);
+ return to;
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Plane<T>& from,
+ const Rect& rect_to, Plane<T>* JXL_RESTRICT to) {
+ PROFILER_ZONE("CopyImageR");
+ JXL_DASSERT(SameSize(rect_from, rect_to));
+ JXL_DASSERT(rect_from.IsInside(from));
+ JXL_DASSERT(rect_to.IsInside(*to));
+ if (rect_from.xsize() == 0) return;
+ for (size_t y = 0; y < rect_from.ysize(); ++y) {
+ const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+ T* JXL_RESTRICT row_to = rect_to.Row(to, y);
+ memcpy(row_to, row_from, rect_from.xsize() * sizeof(T));
+ }
+}
+
+// DEPRECATED - Returns a copy of the "image" pixels that lie in "rect".
+template <typename T>
+Plane<T> CopyImage(const Rect& rect, const Plane<T>& image) {
+ Plane<T> copy(rect.xsize(), rect.ysize());
+ CopyImageTo(rect, image, &copy);
+ return copy;
+}
+
+// Copies `from:rect_from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const Image3<T>& from,
+ const Rect& rect_to, Image3<T>* JXL_RESTRICT to) {
+ PROFILER_ZONE("CopyImageR");
+ JXL_ASSERT(SameSize(rect_from, rect_to));
+ for (size_t c = 0; c < 3; c++) {
+ CopyImageTo(rect_from, from.Plane(c), rect_to, &to->Plane(c));
+ }
+}
+
+template <typename T, typename U>
+void ConvertPlaneAndClamp(const Rect& rect_from, const Plane<T>& from,
+ const Rect& rect_to, Plane<U>* JXL_RESTRICT to) {
+ PROFILER_ZONE("ConvertPlane");
+ JXL_ASSERT(SameSize(rect_from, rect_to));
+ using M = decltype(T() + U());
+ for (size_t y = 0; y < rect_to.ysize(); ++y) {
+ const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y);
+ U* JXL_RESTRICT row_to = rect_to.Row(to, y);
+ for (size_t x = 0; x < rect_to.xsize(); ++x) {
+ row_to[x] =
+ std::min<M>(std::max<M>(row_from[x], std::numeric_limits<U>::min()),
+ std::numeric_limits<U>::max());
+ }
+ }
+}
+
+// Copies `from` to `to`.
+template <typename T>
+void CopyImageTo(const T& from, T* JXL_RESTRICT to) {
+ return CopyImageTo(Rect(from), from, Rect(*to), to);
+}
+
+// Copies `from:rect_from` to `to`.
+template <typename T>
+void CopyImageTo(const Rect& rect_from, const T& from, T* JXL_RESTRICT to) {
+ return CopyImageTo(rect_from, from, Rect(*to), to);
+}
+
+// Copies `from` to `to:rect_to`.
+template <typename T>
+void CopyImageTo(const T& from, const Rect& rect_to, T* JXL_RESTRICT to) {
+ return CopyImageTo(Rect(from), from, rect_to, to);
+}
+
+// Copies `from:rect_from` to `to:rect_to`; also copies `padding` pixels of
+// border around `from:rect_from`, in all directions, whenever they are inside
+// the first image.
+template <typename T>
+void CopyImageToWithPadding(const Rect& from_rect, const T& from,
+ size_t padding, const Rect& to_rect, T* to) {
+ size_t xextra0 = std::min(padding, from_rect.x0());
+ size_t xextra1 =
+ std::min(padding, from.xsize() - from_rect.x0() - from_rect.xsize());
+ size_t yextra0 = std::min(padding, from_rect.y0());
+ size_t yextra1 =
+ std::min(padding, from.ysize() - from_rect.y0() - from_rect.ysize());
+ JXL_DASSERT(to_rect.x0() >= xextra0);
+ JXL_DASSERT(to_rect.y0() >= yextra0);
+
+ return CopyImageTo(Rect(from_rect.x0() - xextra0, from_rect.y0() - yextra0,
+ from_rect.xsize() + xextra0 + xextra1,
+ from_rect.ysize() + yextra0 + yextra1),
+ from,
+ Rect(to_rect.x0() - xextra0, to_rect.y0() - yextra0,
+ to_rect.xsize() + xextra0 + xextra1,
+ to_rect.ysize() + yextra0 + yextra1),
+ to);
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Image3<T>& from) {
+ Image3<T> copy(from.xsize(), from.ysize());
+ CopyImageTo(from, &copy);
+ return copy;
+}
+
+// DEPRECATED - prefer to preallocate result.
+template <typename T>
+Image3<T> CopyImage(const Rect& rect, const Image3<T>& from) {
+ Image3<T> to(rect.xsize(), rect.ysize());
+ CopyImageTo(rect, from.Plane(0), to.Plane(0));
+ CopyImageTo(rect, from.Plane(1), to.Plane(1));
+ CopyImageTo(rect, from.Plane(2), to.Plane(2));
+ return to;
+}
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Image3<T>* image) {
+ const size_t xsize = image->xsize();
+ const size_t ysize = image->ysize();
+ // Top: fill entire row
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
+ T* JXL_RESTRICT row = image->PlaneRow(c, y);
+ std::fill(row, row + xsize, value);
+ }
+
+ // Bottom: fill entire row
+ for (size_t y = ysize - thickness; y < ysize; ++y) {
+ T* JXL_RESTRICT row = image->PlaneRow(c, y);
+ std::fill(row, row + xsize, value);
+ }
+
+ // Left/right: fill the 'columns' on either side, but only if the image is
+ // big enough that they don't already belong to the top/bottom rows.
+ if (ysize >= 2 * thickness) {
+ for (size_t y = thickness; y < ysize - thickness; ++y) {
+ T* JXL_RESTRICT row = image->PlaneRow(c, y);
+ std::fill(row, row + thickness, value);
+ std::fill(row + xsize - thickness, row + xsize, value);
+ }
+ }
+ }
+}
+
+template <class ImageIn, class ImageOut>
+void Subtract(const ImageIn& image1, const ImageIn& image2, ImageOut* out) {
+ using T = typename ImageIn::T;
+ const size_t xsize = image1.xsize();
+ const size_t ysize = image1.ysize();
+ JXL_CHECK(xsize == image2.xsize());
+ JXL_CHECK(ysize == image2.ysize());
+
+ for (size_t y = 0; y < ysize; ++y) {
+ const T* const JXL_RESTRICT row1 = image1.Row(y);
+ const T* const JXL_RESTRICT row2 = image2.Row(y);
+ T* const JXL_RESTRICT row_out = out->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row1[x] - row2[x];
+ }
+ }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void SubtractFrom(const Plane<Tin>& what, Plane<Tout>* to) {
+ const size_t xsize = what.xsize();
+ const size_t ysize = what.ysize();
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+ Tout* JXL_RESTRICT row_to = to->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] -= row_what[x];
+ }
+ }
+}
+
+// In-place.
+template <typename Tin, typename Tout>
+void AddTo(const Plane<Tin>& what, Plane<Tout>* to) {
+ const size_t xsize = what.xsize();
+ const size_t ysize = what.ysize();
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = what.ConstRow(y);
+ Tout* JXL_RESTRICT row_to = to->Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] += row_what[x];
+ }
+ }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(Rect rectFrom, const Plane<Tin>& what, Rect rectTo,
+ Plane<Tout>* to) {
+ JXL_ASSERT(SameSize(rectFrom, rectTo));
+ const size_t xsize = rectTo.xsize();
+ const size_t ysize = rectTo.ysize();
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = rectFrom.ConstRow(what, y);
+ Tout* JXL_RESTRICT row_to = rectTo.Row(to, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] += row_what[x];
+ }
+ }
+}
+
+// Returns linear combination of two grayscale images.
+template <typename T>
+Plane<T> LinComb(const T lambda1, const Plane<T>& image1, const T lambda2,
+ const Plane<T>& image2) {
+ const size_t xsize = image1.xsize();
+ const size_t ysize = image1.ysize();
+ JXL_CHECK(xsize == image2.xsize());
+ JXL_CHECK(ysize == image2.ysize());
+ Plane<T> out(xsize, ysize);
+ for (size_t y = 0; y < ysize; ++y) {
+ const T* const JXL_RESTRICT row1 = image1.Row(y);
+ const T* const JXL_RESTRICT row2 = image2.Row(y);
+ T* const JXL_RESTRICT row_out = out.Row(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = lambda1 * row1[x] + lambda2 * row2[x];
+ }
+ }
+ return out;
+}
+
+// Returns a pixel-by-pixel multiplication of image by lambda.
+template <typename T>
+Plane<T> ScaleImage(const T lambda, const Plane<T>& image) {
+ Plane<T> out(image.xsize(), image.ysize());
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const T* const JXL_RESTRICT row = image.Row(y);
+ T* const JXL_RESTRICT row_out = out.Row(y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ row_out[x] = lambda * row[x];
+ }
+ }
+ return out;
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Plane<T>* image) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ row[x] = lambda * row[x];
+ }
+ }
+}
+
+template <typename T>
+Plane<T> Product(const Plane<T>& a, const Plane<T>& b) {
+ Plane<T> c(a.xsize(), a.ysize());
+ for (size_t y = 0; y < a.ysize(); ++y) {
+ const T* const JXL_RESTRICT row_a = a.Row(y);
+ const T* const JXL_RESTRICT row_b = b.Row(y);
+ T* const JXL_RESTRICT row_c = c.Row(y);
+ for (size_t x = 0; x < a.xsize(); ++x) {
+ row_c[x] = row_a[x] * row_b[x];
+ }
+ }
+ return c;
+}
+
+template <typename T>
+void FillImage(const T value, Plane<T>* image) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ row[x] = value;
+ }
+ }
+}
+
+template <typename T>
+void ZeroFillImage(Plane<T>* image) {
+ if (image->xsize() == 0) return;
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ memset(row, 0, image->xsize() * sizeof(T));
+ }
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static inline int64_t Mirror(int64_t x, const int64_t xsize) {
+ JXL_DASSERT(xsize != 0);
+
+ // TODO(janwas): replace with branchless version
+ while (x < 0 || x >= xsize) {
+ if (x < 0) {
+ x = -x - 1;
+ } else {
+ x = 2 * xsize - 1 - x;
+ }
+ }
+ return x;
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+ JXL_INLINE int64_t operator()(const int64_t coord, const int64_t size) const {
+ return Mirror(coord, size);
+ }
+};
+
+// Returns the same coordinate: required for TFNode with Border(), or useful
+// when we know "coord" is already valid (e.g. interior of an image).
+struct WrapUnchanged {
+ JXL_INLINE int64_t operator()(const int64_t coord, int64_t /*size*/) const {
+ return coord;
+ }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+ template <class ImageOrView>
+ WrapRowMirror(const ImageOrView& image, size_t ysize)
+ : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+ const float* operator()(const float* const JXL_RESTRICT row,
+ const int64_t stride) const {
+ if (row < first_row_) {
+ const int64_t num_before = first_row_ - row;
+ // Mirrored; one row before => row 0, two before = row 1, ...
+ return first_row_ + num_before - stride;
+ }
+ if (row > last_row_) {
+ const int64_t num_after = row - last_row_;
+ // Mirrored; one row after => last row, two after = last - 1, ...
+ return last_row_ - num_after + stride;
+ }
+ return row;
+ }
+
+ private:
+ const float* const JXL_RESTRICT first_row_;
+ const float* const JXL_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+ JXL_INLINE const float* operator()(const float* const JXL_RESTRICT row,
+ int64_t /*stride*/) const {
+ return row;
+ }
+};
+
+// Sets "thickness" pixels on each border to "value". This is faster than
+// initializing the entire image and overwriting valid/interior pixels.
+template <typename T>
+void SetBorder(const size_t thickness, const T value, Plane<T>* image) {
+ const size_t xsize = image->xsize();
+ const size_t ysize = image->ysize();
+ // Top: fill entire row
+ for (size_t y = 0; y < std::min(thickness, ysize); ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ std::fill(row, row + xsize, value);
+ }
+
+ // Bottom: fill entire row
+ for (size_t y = ysize - thickness; y < ysize; ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ std::fill(row, row + xsize, value);
+ }
+
+ // Left/right: fill the 'columns' on either side, but only if the image is
+ // big enough that they don't already belong to the top/bottom rows.
+ if (ysize >= 2 * thickness) {
+ for (size_t y = thickness; y < ysize - thickness; ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ std::fill(row, row + thickness, value);
+ std::fill(row + xsize - thickness, row + xsize, value);
+ }
+ }
+}
+
+// Computes the minimum and maximum pixel value.
+template <typename T>
+void ImageMinMax(const Plane<T>& image, T* const JXL_RESTRICT min,
+ T* const JXL_RESTRICT max) {
+ *min = std::numeric_limits<T>::max();
+ *max = std::numeric_limits<T>::lowest();
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const T* const JXL_RESTRICT row = image.Row(y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ *min = std::min(*min, row[x]);
+ *max = std::max(*max, row[x]);
+ }
+ }
+}
+
+// Copies pixels, scaling their value relative to the "from" min/max by
+// "to_range". Example: U8 [0, 255] := [0.0, 1.0], to_range = 1.0 =>
+// outputs [0.0, 1.0].
+template <typename FromType, typename ToType>
+void ImageConvert(const Plane<FromType>& from, const float to_range,
+ Plane<ToType>* const JXL_RESTRICT to) {
+ JXL_ASSERT(SameSize(from, *to));
+ FromType min_from, max_from;
+ ImageMinMax(from, &min_from, &max_from);
+ const float scale = to_range / (max_from - min_from);
+ for (size_t y = 0; y < from.ysize(); ++y) {
+ const FromType* const JXL_RESTRICT row_from = from.Row(y);
+ ToType* const JXL_RESTRICT row_to = to->Row(y);
+ for (size_t x = 0; x < from.xsize(); ++x) {
+ row_to[x] = static_cast<ToType>((row_from[x] - min_from) * scale);
+ }
+ }
+}
+
+template <typename From>
+Plane<float> ConvertToFloat(const Plane<From>& from) {
+ float factor = 1.0f / std::numeric_limits<From>::max();
+ if (std::is_same<From, double>::value || std::is_same<From, float>::value) {
+ factor = 1.0f;
+ }
+ Plane<float> to(from.xsize(), from.ysize());
+ for (size_t y = 0; y < from.ysize(); ++y) {
+ const From* const JXL_RESTRICT row_from = from.Row(y);
+ float* const JXL_RESTRICT row_to = to.Row(y);
+ for (size_t x = 0; x < from.xsize(); ++x) {
+ row_to[x] = row_from[x] * factor;
+ }
+ }
+ return to;
+}
+
+template <typename T>
+Plane<T> ImageFromPacked(const std::vector<T>& packed, const size_t xsize,
+ const size_t ysize) {
+ Plane<T> out(xsize, ysize);
+ for (size_t y = 0; y < ysize; ++y) {
+ T* const JXL_RESTRICT row = out.Row(y);
+ const T* const JXL_RESTRICT packed_row = &packed[y * xsize];
+ memcpy(row, packed_row, xsize * sizeof(T));
+ }
+ return out;
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, const Rect& rect,
+ std::array<T, 3>* out_min, std::array<T, 3>* out_max) {
+ for (size_t c = 0; c < 3; ++c) {
+ T min = std::numeric_limits<T>::max();
+ T max = std::numeric_limits<T>::min();
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ const T* JXL_RESTRICT row = rect.ConstPlaneRow(image, c, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ min = std::min(min, row[x]);
+ max = std::max(max, row[x]);
+ }
+ }
+ (*out_min)[c] = min;
+ (*out_max)[c] = max;
+ }
+}
+
+// Computes independent minimum and maximum values for each plane.
+template <typename T>
+void Image3MinMax(const Image3<T>& image, std::array<T, 3>* out_min,
+ std::array<T, 3>* out_max) {
+ Image3MinMax(image, Rect(image), out_min, out_max);
+}
+
+template <typename T>
+void Image3Max(const Image3<T>& image, std::array<T, 3>* out_max) {
+ for (size_t c = 0; c < 3; ++c) {
+ T max = std::numeric_limits<T>::min();
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ max = std::max(max, row[x]);
+ }
+ }
+ (*out_max)[c] = max;
+ }
+}
+
+// Computes the sum of the pixels in `rect`.
+template <typename T>
+T ImageSum(const Plane<T>& image, const Rect& rect) {
+ T result = 0;
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ const T* JXL_RESTRICT row = rect.ConstRow(image, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ result += row[x];
+ }
+ }
+ return result;
+}
+
+template <typename T>
+T ImageSum(const Plane<T>& image) {
+ return ImageSum(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image, const Rect& rect) {
+ std::array<T, 3> out_sum = 0;
+ for (size_t c = 0; c < 3; ++c) {
+ (out_sum)[c] = ImageSum(image.Plane(c), rect);
+ }
+ return out_sum;
+}
+
+template <typename T>
+std::array<T, 3> Image3Sum(const Image3<T>& image) {
+ return Image3Sum(image, Rect(image));
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image, const Rect& rect) {
+ const size_t xsize = rect.xsize();
+ const size_t ysize = rect.ysize();
+ std::vector<T> packed(xsize * ysize);
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ memcpy(&packed[y * xsize], rect.ConstRow(image, y), xsize * sizeof(T));
+ }
+ return packed;
+}
+
+template <typename T>
+std::vector<T> PackedFromImage(const Plane<T>& image) {
+ return PackedFromImage(image, Rect(image));
+}
+
+// Computes the median pixel value.
+template <typename T>
+T ImageMedian(const Plane<T>& image, const Rect& rect) {
+ std::vector<T> pixels = PackedFromImage(image, rect);
+ return Median(&pixels);
+}
+
+template <typename T>
+T ImageMedian(const Plane<T>& image) {
+ return ImageMedian(image, Rect(image));
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image, const Rect& rect) {
+ std::array<T, 3> out_median;
+ for (size_t c = 0; c < 3; ++c) {
+ (out_median)[c] = ImageMedian(image.Plane(c), rect);
+ }
+ return out_median;
+}
+
+template <typename T>
+std::array<T, 3> Image3Median(const Image3<T>& image) {
+ return Image3Median(image, Rect(image));
+}
+
+template <typename FromType, typename ToType>
+void Image3Convert(const Image3<FromType>& from, const float to_range,
+ Image3<ToType>* const JXL_RESTRICT to) {
+ JXL_ASSERT(SameSize(from, *to));
+ std::array<FromType, 3> min_from, max_from;
+ Image3MinMax(from, &min_from, &max_from);
+ float scales[3];
+ for (size_t c = 0; c < 3; ++c) {
+ scales[c] = to_range / (max_from[c] - min_from[c]);
+ }
+ float scale = std::min(scales[0], std::min(scales[1], scales[2]));
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < from.ysize(); ++y) {
+ const FromType* JXL_RESTRICT row_from = from.ConstPlaneRow(c, y);
+ ToType* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+ for (size_t x = 0; x < from.xsize(); ++x) {
+ const float to = (row_from[x] - min_from[c]) * scale;
+ row_to[x] = static_cast<ToType>(to);
+ }
+ }
+ }
+}
+
+template <typename From>
+Image3F ConvertToFloat(const Image3<From>& from) {
+ return Image3F(ConvertToFloat(from.Plane(0)), ConvertToFloat(from.Plane(1)),
+ ConvertToFloat(from.Plane(2)));
+}
+
+template <typename Tin, typename Tout>
+void Subtract(const Image3<Tin>& image1, const Image3<Tin>& image2,
+ Image3<Tout>* out) {
+ const size_t xsize = image1.xsize();
+ const size_t ysize = image1.ysize();
+ JXL_CHECK(xsize == image2.xsize());
+ JXL_CHECK(ysize == image2.ysize());
+
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* const JXL_RESTRICT row1 = image1.ConstPlaneRow(c, y);
+ const Tin* const JXL_RESTRICT row2 = image2.ConstPlaneRow(c, y);
+ Tout* const JXL_RESTRICT row_out = out->PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_out[x] = row1[x] - row2[x];
+ }
+ }
+ }
+}
+
+template <typename Tin, typename Tout>
+void SubtractFrom(const Image3<Tin>& what, Image3<Tout>* to) {
+ const size_t xsize = what.xsize();
+ const size_t ysize = what.ysize();
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+ Tout* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] -= row_what[x];
+ }
+ }
+ }
+}
+
+template <typename Tin, typename Tout>
+void AddTo(const Image3<Tin>& what, Image3<Tout>* to) {
+ const size_t xsize = what.xsize();
+ const size_t ysize = what.ysize();
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+ Tout* JXL_RESTRICT row_to = to->PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] += row_what[x];
+ }
+ }
+ }
+}
+
+// Adds `what` of the size of `rect` to `to` in the position of `rect`.
+template <typename Tin, typename Tout>
+void AddTo(const Rect& rect, const Image3<Tin>& what, Image3<Tout>* to) {
+ const size_t xsize = what.xsize();
+ const size_t ysize = what.ysize();
+ JXL_ASSERT(xsize == rect.xsize());
+ JXL_ASSERT(ysize == rect.ysize());
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < ysize; ++y) {
+ const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y);
+ Tout* JXL_RESTRICT row_to = rect.PlaneRow(to, c, y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row_to[x] += row_what[x];
+ }
+ }
+ }
+}
+
+template <typename T>
+Image3<T> ScaleImage(const T lambda, const Image3<T>& image) {
+ Image3<T> out(image.xsize(), image.ysize());
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < image.ysize(); ++y) {
+ const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y);
+ T* JXL_RESTRICT row_out = out.PlaneRow(c, y);
+ for (size_t x = 0; x < image.xsize(); ++x) {
+ row_out[x] = lambda * row[x];
+ }
+ }
+ }
+ return out;
+}
+
+// Multiplies image by lambda in-place
+template <typename T>
+void ScaleImage(const T lambda, Image3<T>* image) {
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* const JXL_RESTRICT row = image->PlaneRow(c, y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ row[x] = lambda * row[x];
+ }
+ }
+ }
+}
+
+// Initializes all planes to the same "value".
+template <typename T>
+void FillImage(const T value, Image3<T>* image) {
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* JXL_RESTRICT row = image->PlaneRow(c, y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ row[x] = value;
+ }
+ }
+ }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* JXL_RESTRICT row = image->Row(y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ row[x] = value;
+ }
+ }
+}
+
+template <typename T>
+void FillImage(const T value, Image3<T>* image, Rect rect) {
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ T* JXL_RESTRICT row = rect.PlaneRow(image, c, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ row[x] = value;
+ }
+ }
+ }
+}
+
+template <typename T>
+void FillPlane(const T value, Plane<T>* image, Rect rect) {
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ T* JXL_RESTRICT row = rect.Row(image, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ row[x] = value;
+ }
+ }
+}
+
+template <typename T>
+void ZeroFillImage(Image3<T>* image) {
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* JXL_RESTRICT row = image->PlaneRow(c, y);
+ if (image->xsize() != 0) memset(row, 0, image->xsize() * sizeof(T));
+ }
+ }
+}
+
+template <typename T>
+void ZeroFillPlane(Plane<T>* image, Rect rect) {
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ T* JXL_RESTRICT row = rect.Row(image, y);
+ memset(row, 0, rect.xsize() * sizeof(T));
+ }
+}
+
+// Pad an image with xborder columns on each vertical side and yboder rows
+// above and below, mirroring the image.
+Image3F PadImageMirror(const Image3F& in, size_t xborder, size_t yborder);
+
+// Same as above, but operates in-place. Assumes that the `in` image was
+// allocated large enough.
+void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in,
+ size_t block_dim = kBlockDim);
+
+// Downsamples an image by a given factor.
+void DownsampleImage(Image3F* opsin, size_t factor);
+void DownsampleImage(ImageF* image, size_t factor);
+
+} // namespace jxl
+
+#endif // LIB_JXL_IMAGE_OPS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/image_ops_test.cc b/third_party/jpeg-xl/lib/jxl/image_ops_test.cc
new file mode 100644
index 0000000000..44c021513d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_ops_test.cc
@@ -0,0 +1,164 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/image_ops.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <utility>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void TestPacked(const size_t xsize, const size_t ysize) {
+ Plane<T> image1(xsize, ysize);
+ RandomFillImage(&image1);
+ const std::vector<T>& packed = PackedFromImage(image1);
+ const Plane<T>& image2 = ImageFromPacked(packed, xsize, ysize);
+ JXL_EXPECT_OK(SamePixels(image1, image2, _));
+}
+
+TEST(ImageTest, TestPacked) {
+ TestPacked<uint8_t>(1, 1);
+ TestPacked<uint8_t>(7, 1);
+ TestPacked<uint8_t>(1, 7);
+
+ TestPacked<int16_t>(1, 1);
+ TestPacked<int16_t>(7, 1);
+ TestPacked<int16_t>(1, 7);
+
+ TestPacked<uint16_t>(1, 1);
+ TestPacked<uint16_t>(7, 1);
+ TestPacked<uint16_t>(1, 7);
+
+ TestPacked<float>(1, 1);
+ TestPacked<float>(7, 1);
+ TestPacked<float>(1, 7);
+}
+
+// Ensure entire payload is readable/writable for various size/offset combos.
+TEST(ImageTest, TestAllocator) {
+ Rng rng(0);
+ const size_t k32 = 32;
+ const size_t kAlign = CacheAligned::kAlignment;
+ for (size_t size : {k32 * 1, k32 * 2, k32 * 3, k32 * 4, k32 * 5,
+ CacheAligned::kAlias, 2 * CacheAligned::kAlias + 4}) {
+ for (size_t offset = 0; offset <= CacheAligned::kAlias; offset += kAlign) {
+ uint8_t* bytes =
+ static_cast<uint8_t*>(CacheAligned::Allocate(size, offset));
+ JXL_CHECK(reinterpret_cast<uintptr_t>(bytes) % kAlign == 0);
+ // Ensure we can write/read the last byte. Use RNG to fool the compiler
+ // into thinking the write is necessary.
+ memset(bytes, 0, size);
+ bytes[size - 1] = 1; // greatest element
+ uint32_t pos = rng.UniformU(0, size - 1); // random but != greatest
+ JXL_CHECK(bytes[pos] < bytes[size - 1]);
+
+ CacheAligned::Free(bytes);
+ }
+ }
+}
+
+template <typename T>
+void TestFillImpl(Image3<T>* img, const char* layout) {
+ FillImage(T(1), img);
+ for (size_t y = 0; y < img->ysize(); ++y) {
+ for (size_t c = 0; c < 3; ++c) {
+ T* JXL_RESTRICT row = img->PlaneRow(c, y);
+ for (size_t x = 0; x < img->xsize(); ++x) {
+ if (row[x] != T(1)) {
+ printf("Not 1 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS
+ " x %" PRIuS ") (%s)\n",
+ c, x, y, img->xsize(), img->ysize(), layout);
+ abort();
+ }
+ row[x] = T(2);
+ }
+ }
+ }
+
+ // Same for ZeroFillImage and swapped c/y loop ordering.
+ ZeroFillImage(img);
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < img->ysize(); ++y) {
+ T* JXL_RESTRICT row = img->PlaneRow(c, y);
+ for (size_t x = 0; x < img->xsize(); ++x) {
+ if (row[x] != T(0)) {
+ printf("Not 0 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS
+ " x %" PRIuS ") (%s)\n",
+ c, x, y, img->xsize(), img->ysize(), layout);
+ abort();
+ }
+ row[x] = T(3);
+ }
+ }
+ }
+}
+
+template <typename T>
+void TestFillT() {
+ for (uint32_t xsize : {0, 1, 15, 16, 31, 32}) {
+ for (uint32_t ysize : {0, 1, 15, 16, 31, 32}) {
+ Image3<T> image(xsize, ysize);
+ TestFillImpl(&image, "size ctor");
+
+ Image3<T> planar(Plane<T>(xsize, ysize), Plane<T>(xsize, ysize),
+ Plane<T>(xsize, ysize));
+ TestFillImpl(&planar, "planar");
+ }
+ }
+}
+
+// Ensure y/c/x and c/y/x loops visit pixels no more than once.
+TEST(ImageTest, TestFill) {
+ TestFillT<uint8_t>();
+ TestFillT<int16_t>();
+ TestFillT<float>();
+ TestFillT<double>();
+}
+
+TEST(ImageTest, CopyImageToWithPaddingTest) {
+ Plane<uint32_t> src(100, 61);
+ for (size_t y = 0; y < src.ysize(); y++) {
+ for (size_t x = 0; x < src.xsize(); x++) {
+ src.Row(y)[x] = x * 1000 + y;
+ }
+ }
+ Rect src_rect(10, 20, 30, 40);
+ EXPECT_TRUE(src_rect.IsInside(src));
+
+ Plane<uint32_t> dst(60, 50);
+ FillImage(0u, &dst);
+ Rect dst_rect(20, 5, 30, 40);
+ EXPECT_TRUE(dst_rect.IsInside(dst));
+
+ CopyImageToWithPadding(src_rect, src, /*padding=*/2, dst_rect, &dst);
+
+ // ysize is + 3 instead of + 4 because we are at the y image boundary on the
+ // source image.
+ Rect padded_dst_rect(20 - 2, 5 - 2, 30 + 4, 40 + 3);
+ for (size_t y = 0; y < dst.ysize(); y++) {
+ for (size_t x = 0; x < dst.xsize(); x++) {
+ if (Rect(x, y, 1, 1).IsInside(padded_dst_rect)) {
+ EXPECT_EQ((x - dst_rect.x0() + src_rect.x0()) * 1000 +
+ (y - dst_rect.y0() + src_rect.y0()),
+ dst.Row(y)[x]);
+ } else {
+ EXPECT_EQ(0u, dst.Row(y)[x]);
+ }
+ }
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/image_test_utils.h b/third_party/jpeg-xl/lib/jxl/image_test_utils.h
new file mode 100644
index 0000000000..e7d72285e6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/image_test_utils.h
@@ -0,0 +1,257 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_IMAGE_TEST_UTILS_H_
+#define LIB_JXL_IMAGE_TEST_UTILS_H_
+
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T>
+bool SamePixels(const Plane<T>& image1, const Plane<T>& image2,
+ std::stringstream& failures) {
+ const Rect rect(image1);
+ JXL_CHECK(SameSize(image1, image2));
+ size_t mismatches = 0;
+ for (size_t y = rect.y0(); y < rect.ysize(); ++y) {
+ const T* const JXL_RESTRICT row1 = image1.Row(y);
+ const T* const JXL_RESTRICT row2 = image2.Row(y);
+ for (size_t x = rect.x0(); x < rect.xsize(); ++x) {
+ if (row1[x] != row2[x]) {
+ failures << "pixel mismatch" << x << ", " << y << ": "
+ << double(row1[x]) << " != " << double(row2[x]) << "\n";
+ if (++mismatches > 4) {
+ return false;
+ }
+ }
+ }
+ }
+ return mismatches == 0;
+}
+
+template <typename T>
+bool SamePixels(const Image3<T>& image1, const Image3<T>& image2,
+ std::stringstream& failures) {
+ JXL_CHECK(SameSize(image1, image2));
+ for (size_t c = 0; c < 3; ++c) {
+ if (!SamePixels(image1.Plane(c), image2.Plane(c), failures)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Use for floating-point images with fairly large numbers; tolerates small
+// absolute errors and/or small relative errors.
+template <typename T>
+bool VerifyRelativeError(const Plane<T>& expected, const Plane<T>& actual,
+ const double threshold_l1,
+ const double threshold_relative,
+ std::stringstream& failures, const intptr_t border = 0,
+ const size_t c = 0) {
+ JXL_CHECK(SameSize(expected, actual));
+ const intptr_t xsize = expected.xsize();
+ const intptr_t ysize = expected.ysize();
+
+ // Max over current scanline to give a better idea whether there are
+ // systematic errors or just one outlier. Invalid if negative.
+ double max_l1 = -1;
+ double max_relative = -1;
+ bool any_bad = false;
+ for (intptr_t y = border; y < ysize - border; ++y) {
+ const T* const JXL_RESTRICT row_expected = expected.Row(y);
+ const T* const JXL_RESTRICT row_actual = actual.Row(y);
+ for (intptr_t x = border; x < xsize - border; ++x) {
+ const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+ // Cannot compute relative, only check/update L1.
+ if (std::abs(row_expected[x]) < 1E-10) {
+ if (l1 > threshold_l1) {
+ any_bad = true;
+ max_l1 = std::max(max_l1, l1);
+ }
+ } else {
+ const double relative = l1 / std::abs(double(row_expected[x]));
+ if (l1 > threshold_l1 && relative > threshold_relative) {
+ // Fails both tolerances => will exit below, update max_*.
+ any_bad = true;
+ max_l1 = std::max(max_l1, l1);
+ max_relative = std::max(max_relative, relative);
+ }
+ }
+ }
+ }
+ if (!any_bad) {
+ return true;
+ }
+ // Never had a valid relative value, don't print it.
+ if (max_relative < 0) {
+ fprintf(stderr, "c=%" PRIu64 ": max +/- %E exceeds +/- %.2E\n",
+ static_cast<uint64_t>(c), max_l1, threshold_l1);
+ } else {
+ fprintf(stderr,
+ "c=%" PRIu64 ": max +/- %E, x %E exceeds +/- %.2E, x %.2E\n",
+ static_cast<uint64_t>(c), max_l1, max_relative, threshold_l1,
+ threshold_relative);
+ }
+ // Dump the expected image and actual image if the region is small enough.
+ const intptr_t kMaxTestDumpSize = 16;
+ if (xsize <= kMaxTestDumpSize + 2 * border &&
+ ysize <= kMaxTestDumpSize + 2 * border) {
+ fprintf(stderr, "Expected image:\n");
+ for (intptr_t y = border; y < ysize - border; ++y) {
+ const T* const JXL_RESTRICT row_expected = expected.Row(y);
+ for (intptr_t x = border; x < xsize - border; ++x) {
+ fprintf(stderr, "%10lf ", static_cast<double>(row_expected[x]));
+ }
+ fprintf(stderr, "\n");
+ }
+
+ fprintf(stderr, "Actual image:\n");
+ for (intptr_t y = border; y < ysize - border; ++y) {
+ const T* const JXL_RESTRICT row_expected = expected.Row(y);
+ const T* const JXL_RESTRICT row_actual = actual.Row(y);
+ for (intptr_t x = border; x < xsize - border; ++x) {
+ const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+ bool bad = l1 > threshold_l1;
+ if (row_expected[x] > 1E-10) {
+ const double relative = l1 / std::abs(double(row_expected[x]));
+ bad &= relative > threshold_relative;
+ }
+ if (bad) {
+ fprintf(stderr, "%10lf ", static_cast<double>(row_actual[x]));
+ } else {
+ fprintf(stderr, "%10s ", "==");
+ }
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+
+ // Find first failing x for further debugging.
+ for (intptr_t y = border; y < ysize - border; ++y) {
+ const T* const JXL_RESTRICT row_expected = expected.Row(y);
+ const T* const JXL_RESTRICT row_actual = actual.Row(y);
+
+ for (intptr_t x = border; x < xsize - border; ++x) {
+ const double l1 = std::abs(row_expected[x] - row_actual[x]);
+
+ bool bad = l1 > threshold_l1;
+ if (row_expected[x] > 1E-10) {
+ const double relative = l1 / std::abs(double(row_expected[x]));
+ bad &= relative > threshold_relative;
+ }
+ if (bad) {
+ failures << x << ", " << y << " (" << expected.xsize() << " x "
+ << expected.ysize() << ") expected "
+ << static_cast<double>(row_expected[x]) << " actual "
+ << static_cast<double>(row_actual[x]);
+ return false;
+ }
+ }
+ }
+ return false;
+}
+
+template <typename T>
+bool VerifyRelativeError(const Image3<T>& expected, const Image3<T>& actual,
+ const float threshold_l1,
+ const float threshold_relative,
+ std::stringstream& failures,
+ const intptr_t border = 0) {
+ for (size_t c = 0; c < 3; ++c) {
+ bool ok =
+ VerifyRelativeError(expected.Plane(c), actual.Plane(c), threshold_l1,
+ threshold_relative, failures, border, c);
+ if (!ok) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename T, typename U = T>
+void GenerateImage(Rng& rng, Plane<T>* image, U begin, U end) {
+ for (size_t y = 0; y < image->ysize(); ++y) {
+ T* const JXL_RESTRICT row = image->Row(y);
+ for (size_t x = 0; x < image->xsize(); ++x) {
+ if (std::is_same<T, float>::value || std::is_same<T, double>::value) {
+ row[x] = rng.UniformF(begin, end);
+ } else if (std::is_signed<T>::value) {
+ row[x] = rng.UniformI(begin, end);
+ } else {
+ row[x] = rng.UniformU(begin, end);
+ }
+ }
+ }
+}
+
+template <typename T>
+void RandomFillImage(Plane<T>* image, const T begin, const T end,
+ const int seed = 129) {
+ Rng rng(seed);
+ GenerateImage(rng, image, begin, end);
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value>::type RandomFillImage(
+ Plane<T>* image) {
+ Rng rng(129);
+ GenerateImage(rng, image, int64_t(0),
+ int64_t(std::numeric_limits<T>::max()) + 1);
+}
+
+JXL_INLINE void RandomFillImage(Plane<float>* image) {
+ Rng rng(129);
+ GenerateImage(rng, image, 0.0f, std::numeric_limits<float>::max());
+}
+
+template <typename T, typename U>
+void GenerateImage(Rng& rng, Image3<T>* image, U begin, U end) {
+ for (size_t c = 0; c < 3; ++c) {
+ GenerateImage(rng, &image->Plane(c), begin, end);
+ }
+}
+
+template <typename T>
+typename std::enable_if<std::is_integral<T>::value>::type RandomFillImage(
+ Image3<T>* image) {
+ Rng rng(129);
+ GenerateImage(rng, image, int64_t(0),
+ int64_t(std::numeric_limits<T>::max()) + 1);
+}
+
+JXL_INLINE void RandomFillImage(Image3F* image) {
+ Rng rng(129);
+ GenerateImage(rng, image, 0.0f, std::numeric_limits<float>::max());
+}
+
+template <typename T, typename U>
+void RandomFillImage(Image3<T>* image, const U begin, const U end,
+ const int seed = 129) {
+ Rng rng(seed);
+ GenerateImage(rng, image, begin, end);
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_IMAGE_TEST_UTILS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/inverse_mtf-inl.h b/third_party/jpeg-xl/lib/jxl/inverse_mtf-inl.h
new file mode 100644
index 0000000000..fcb01d7396
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/inverse_mtf-inl.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// SIMDified inverse-move-to-front transform.
+
+#if defined(LIB_JXL_INVERSE_MTF_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_INVERSE_MTF_INL_H_
+#undef LIB_JXL_INVERSE_MTF_INL_H_
+#else
+#define LIB_JXL_INVERSE_MTF_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::FirstN;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Load;
+using hwy::HWY_NAMESPACE::LoadU;
+using hwy::HWY_NAMESPACE::StoreU;
+
+inline void MoveToFront(uint8_t* v, uint8_t index) {
+ uint8_t value = v[index];
+ uint8_t i = index;
+ if (i < 4) {
+ for (; i; --i) v[i] = v[i - 1];
+ } else {
+ const HWY_CAPPED(uint8_t, 64) d;
+ int tail = i & (Lanes(d) - 1);
+ if (tail) {
+ i -= tail;
+ const auto vec = Load(d, v + i);
+ const auto prev = LoadU(d, v + i + 1);
+ StoreU(IfThenElse(FirstN(d, tail), vec, prev), d, v + i + 1);
+ }
+ while (i) {
+ i -= Lanes(d);
+ const auto vec = Load(d, v + i);
+ StoreU(vec, d, v + i + 1);
+ }
+ }
+ v[0] = value;
+}
+
+inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+ HWY_ALIGN uint8_t mtf[256 + 64];
+ int i;
+ for (i = 0; i < 256; ++i) {
+ mtf[i] = static_cast<uint8_t>(i);
+ }
+#if JXL_MEMORY_SANITIZER
+ const HWY_CAPPED(uint8_t, 64) d;
+ for (size_t j = 0; j < Lanes(d); ++j) {
+ mtf[256 + j] = 0;
+ }
+#endif // JXL_MEMORY_SANITIZER
+ for (i = 0; i < v_len; ++i) {
+ uint8_t index = v[i];
+ v[i] = mtf[index];
+ if (index) MoveToFront(mtf, index);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_INVERSE_MTF_INL_H_
+
+#if HWY_ONCE
+#ifndef INVERSE_MTF_ONCE
+#define INVERSE_MTF_ONCE
+
+namespace jxl {
+inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) {
+ return HWY_STATIC_DISPATCH(InverseMoveToFrontTransform)(v, v_len);
+}
+} // namespace jxl
+
+#endif // INVERSE_MTF_ONCE
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.cc b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.cc
new file mode 100644
index 0000000000..db49a1c215
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.cc
@@ -0,0 +1,145 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+
+#include <brotli/decode.h>
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data) {
+ Status ret = true;
+ const uint8_t* in = encoded.data();
+ size_t available_in = encoded.size();
+ {
+ BitReader br(encoded);
+ BitReaderScopedCloser br_closer(&br, &ret);
+ JXL_RETURN_IF_ERROR(Bundle::Read(&br, jpeg_data));
+ JXL_RETURN_IF_ERROR(br.JumpToByteBoundary());
+ in += br.TotalBitsConsumed() / 8;
+ available_in -= br.TotalBitsConsumed() / 8;
+ }
+ JXL_RETURN_IF_ERROR(ret);
+
+ BrotliDecoderState* brotli_dec =
+ BrotliDecoderCreateInstance(nullptr, nullptr, nullptr);
+
+ struct BrotliDecDeleter {
+ BrotliDecoderState* brotli_dec;
+ ~BrotliDecDeleter() { BrotliDecoderDestroyInstance(brotli_dec); }
+ } brotli_dec_deleter{brotli_dec};
+
+ BrotliDecoderResult result =
+ BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS;
+
+ auto br_read = [&](std::vector<uint8_t>& data) -> Status {
+ size_t available_out = data.size();
+ uint8_t* out = data.data();
+ while (available_out != 0) {
+ if (BrotliDecoderIsFinished(brotli_dec)) {
+ return JXL_FAILURE("Not enough decompressed output");
+ }
+ uint8_t* next_out_before = out;
+ size_t avail_out_before = available_out;
+ msan::MemoryIsInitialized(in, available_in);
+ result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+ &available_out, &out, nullptr);
+ if (result !=
+ BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT &&
+ result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+ return JXL_FAILURE(
+ "Brotli decoding error: %s\n",
+ BrotliDecoderErrorString(BrotliDecoderGetErrorCode(brotli_dec)));
+ }
+ msan::UnpoisonMemory(next_out_before, avail_out_before - available_out);
+ }
+ return true;
+ };
+ size_t num_icc = 0;
+ for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+ auto& marker = jpeg_data->app_data[i];
+ if (jpeg_data->app_marker_type[i] != AppMarkerType::kUnknown) {
+ // Set the size of the marker.
+ size_t size_minus_1 = marker.size() - 1;
+ marker[1] = size_minus_1 >> 8;
+ marker[2] = size_minus_1 & 0xFF;
+ if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+ if (marker.size() < 17) {
+ return JXL_FAILURE("ICC markers must be at least 17 bytes");
+ }
+ marker[0] = 0xE2;
+ memcpy(&marker[3], kIccProfileTag, sizeof kIccProfileTag);
+ marker[15] = ++num_icc;
+ }
+ } else {
+ JXL_RETURN_IF_ERROR(br_read(marker));
+ if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+ return JXL_FAILURE("Incorrect marker size");
+ }
+ }
+ }
+ for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+ auto& marker = jpeg_data->app_data[i];
+ if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) {
+ marker[16] = num_icc;
+ }
+ if (jpeg_data->app_marker_type[i] == AppMarkerType::kExif) {
+ marker[0] = 0xE1;
+ if (marker.size() < 3 + sizeof kExifTag) {
+ return JXL_FAILURE("Incorrect Exif marker size");
+ }
+ memcpy(&marker[3], kExifTag, sizeof kExifTag);
+ }
+ if (jpeg_data->app_marker_type[i] == AppMarkerType::kXMP) {
+ marker[0] = 0xE1;
+ if (marker.size() < 3 + sizeof kXMPTag) {
+ return JXL_FAILURE("Incorrect XMP marker size");
+ }
+ memcpy(&marker[3], kXMPTag, sizeof kXMPTag);
+ }
+ }
+ // TODO(eustas): actually inject ICC profile and check it fits perfectly.
+ for (size_t i = 0; i < jpeg_data->com_data.size(); i++) {
+ auto& marker = jpeg_data->com_data[i];
+ JXL_RETURN_IF_ERROR(br_read(marker));
+ if (marker[1] * 256u + marker[2] + 1u != marker.size()) {
+ return JXL_FAILURE("Incorrect marker size");
+ }
+ }
+ for (size_t i = 0; i < jpeg_data->inter_marker_data.size(); i++) {
+ JXL_RETURN_IF_ERROR(br_read(jpeg_data->inter_marker_data[i]));
+ }
+ JXL_RETURN_IF_ERROR(br_read(jpeg_data->tail_data));
+
+ // Check if there is more decompressed output.
+ size_t available_out = 1;
+ uint64_t dummy;
+ uint8_t* next_out = reinterpret_cast<uint8_t*>(&dummy);
+ result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in,
+ &available_out, &next_out, nullptr);
+ if (available_out == 0 ||
+ result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
+ return JXL_FAILURE("Excess data in compressed stream");
+ }
+ if (result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
+ return JXL_FAILURE("Incomplete brotli-stream");
+ }
+ if (!BrotliDecoderIsFinished(brotli_dec) ||
+ result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) {
+ return JXL_FAILURE("Corrupted brotli-stream");
+ }
+ if (available_in != 0) {
+ return JXL_FAILURE("Unused data after brotli stream");
+ }
+
+ return true;
+}
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.h b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.h
new file mode 100644
index 0000000000..b9d50bf9f8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data.h
@@ -0,0 +1,19 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_H_
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status DecodeJPEGData(Span<const uint8_t> encoded, JPEGData* jpeg_data);
+}
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_DEC_JPEG_DATA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.cc b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.cc
new file mode 100644
index 0000000000..f9ae755789
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.cc
@@ -0,0 +1,1050 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+
+#include <stdlib.h>
+#include <string.h> /* for memset, memcpy */
+
+#include <deque>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+enum struct SerializationStatus {
+ NEEDS_MORE_INPUT,
+ NEEDS_MORE_OUTPUT,
+ ERROR,
+ DONE
+};
+
+const int kJpegPrecision = 8;
+
+// JpegBitWriter: buffer size
+const size_t kJpegBitWriterChunkSize = 16384;
+
+// DCTCodingState: maximum number of correction bits to buffer
+const int kJPEGMaxCorrectionBits = 1u << 16;
+
+// Returns non-zero if and only if x has a zero byte, i.e. one of
+// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero.
+static JXL_INLINE uint64_t HasZeroByte(uint64_t x) {
+ return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL;
+}
+
+void JpegBitWriterInit(JpegBitWriter* bw,
+ std::deque<OutputChunk>* output_queue) {
+ bw->output = output_queue;
+ bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+ bw->pos = 0;
+ bw->put_buffer = 0;
+ bw->put_bits = 64;
+ bw->healthy = true;
+ bw->data = bw->chunk.buffer->data();
+}
+
+static JXL_NOINLINE void SwapBuffer(JpegBitWriter* bw) {
+ bw->chunk.len = bw->pos;
+ bw->output->emplace_back(std::move(bw->chunk));
+ bw->chunk = OutputChunk(kJpegBitWriterChunkSize);
+ bw->data = bw->chunk.buffer->data();
+ bw->pos = 0;
+}
+
+static JXL_INLINE void Reserve(JpegBitWriter* bw, size_t n_bytes) {
+ if (JXL_UNLIKELY((bw->pos + n_bytes) > kJpegBitWriterChunkSize)) {
+ SwapBuffer(bw);
+ }
+}
+
+/**
+ * Writes the given byte to the output, writes an extra zero if byte is 0xFF.
+ *
+ * This method is "careless" - caller must make sure that there is enough
+ * space in the output buffer. Emits up to 2 bytes to buffer.
+ */
+static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) {
+ bw->data[bw->pos++] = byte;
+ if (byte == 0xFF) bw->data[bw->pos++] = 0;
+}
+
+static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) {
+ // At this point we are ready to emit the most significant 6 bytes of
+ // put_buffer_ to the output.
+ // The JPEG format requires that after every 0xff byte in the entropy
+ // coded section, there is a zero byte, therefore we first check if any of
+ // the 6 most significant bytes of put_buffer_ is 0xFF.
+ Reserve(bw, 12);
+ if (HasZeroByte(~bw->put_buffer | 0xFFFF)) {
+ // We have a 0xFF byte somewhere, examine each byte and append a zero
+ // byte if necessary.
+ EmitByte(bw, (bw->put_buffer >> 56) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 48) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 40) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 32) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 24) & 0xFF);
+ EmitByte(bw, (bw->put_buffer >> 16) & 0xFF);
+ } else {
+ // We don't have any 0xFF bytes, output all 6 bytes without checking.
+ bw->data[bw->pos] = (bw->put_buffer >> 56) & 0xFF;
+ bw->data[bw->pos + 1] = (bw->put_buffer >> 48) & 0xFF;
+ bw->data[bw->pos + 2] = (bw->put_buffer >> 40) & 0xFF;
+ bw->data[bw->pos + 3] = (bw->put_buffer >> 32) & 0xFF;
+ bw->data[bw->pos + 4] = (bw->put_buffer >> 24) & 0xFF;
+ bw->data[bw->pos + 5] = (bw->put_buffer >> 16) & 0xFF;
+ bw->pos += 6;
+ }
+ bw->put_buffer <<= 48;
+ bw->put_bits += 48;
+}
+
+static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) {
+ // This is an optimization; if everything goes well,
+ // then |nbits| is positive; if non-existing Huffman symbol is going to be
+ // encoded, its length should be zero; later encoder could check the
+ // "health" of JpegBitWriter.
+ if (nbits == 0) {
+ bw->healthy = false;
+ return;
+ }
+ bw->put_bits -= nbits;
+ bw->put_buffer |= (bits << bw->put_bits);
+ if (bw->put_bits <= 16) DischargeBitBuffer(bw);
+}
+
+void EmitMarker(JpegBitWriter* bw, int marker) {
+ Reserve(bw, 2);
+ JXL_DASSERT(marker != 0xFF);
+ bw->data[bw->pos++] = 0xFF;
+ bw->data[bw->pos++] = marker;
+}
+
+bool JumpToByteBoundary(JpegBitWriter* bw, const uint8_t** pad_bits,
+ const uint8_t* pad_bits_end) {
+ size_t n_bits = bw->put_bits & 7u;
+ uint8_t pad_pattern;
+ if (*pad_bits == nullptr) {
+ pad_pattern = (1u << n_bits) - 1;
+ } else {
+ pad_pattern = 0;
+ const uint8_t* src = *pad_bits;
+ // TODO(eustas): bitwise reading looks insanely ineffective...
+ while (n_bits--) {
+ pad_pattern <<= 1;
+ if (src >= pad_bits_end) return false;
+ // TODO(eustas): DCHECK *src == {0, 1}
+ pad_pattern |= !!*(src++);
+ }
+ *pad_bits = src;
+ }
+
+ Reserve(bw, 16);
+
+ while (bw->put_bits <= 56) {
+ int c = (bw->put_buffer >> 56) & 0xFF;
+ EmitByte(bw, c);
+ bw->put_buffer <<= 8;
+ bw->put_bits += 8;
+ }
+ if (bw->put_bits < 64) {
+ int pad_mask = 0xFFu >> (64 - bw->put_bits);
+ int c = ((bw->put_buffer >> 56) & ~pad_mask) | pad_pattern;
+ EmitByte(bw, c);
+ }
+ bw->put_buffer = 0;
+ bw->put_bits = 64;
+
+ return true;
+}
+
+void JpegBitWriterFinish(JpegBitWriter* bw) {
+ if (bw->pos == 0) return;
+ bw->chunk.len = bw->pos;
+ bw->output->emplace_back(std::move(bw->chunk));
+ bw->chunk = OutputChunk(nullptr, 0);
+ bw->data = nullptr;
+ bw->pos = 0;
+}
+
+void DCTCodingStateInit(DCTCodingState* s) {
+ s->eob_run_ = 0;
+ s->cur_ac_huff_ = nullptr;
+ s->refinement_bits_.clear();
+ s->refinement_bits_.reserve(kJPEGMaxCorrectionBits);
+}
+
+enum OutputModes {
+ kModeHistogram,
+ kModeWrite,
+};
+
+template <int kOutputMode>
+static JXL_INLINE void WriteSymbol(int symbol, HuffmanCodeTable* table,
+ JpegBitWriter* bw) {
+ if (kOutputMode == OutputModes::kModeHistogram) {
+ ++table->depth[symbol];
+ } else {
+ WriteBits(bw, table->depth[symbol], table->code[symbol]);
+ }
+}
+
+// Emit all buffered data to the bit stream using the given Huffman code and
+// bit writer.
+template <int kOutputMode>
+static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) {
+ if (s->eob_run_ > 0) {
+ int nbits = FloorLog2Nonzero<uint32_t>(s->eob_run_);
+ int symbol = nbits << 4u;
+ WriteSymbol<kOutputMode>(symbol, s->cur_ac_huff_, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1));
+ }
+ s->eob_run_ = 0;
+ }
+ for (size_t i = 0; i < s->refinement_bits_.size(); ++i) {
+ WriteBits(bw, 1, s->refinement_bits_[i]);
+ }
+ s->refinement_bits_.clear();
+}
+
+// Buffer some more data at the end-of-band (the last non-zero or newly
+// non-zero coefficient within the [Ss, Se] spectral band).
+template <int kOutputMode>
+static JXL_INLINE void BufferEndOfBand(DCTCodingState* s,
+ HuffmanCodeTable* ac_huff,
+ const std::vector<int>* new_bits,
+ JpegBitWriter* bw) {
+ if (s->eob_run_ == 0) {
+ s->cur_ac_huff_ = ac_huff;
+ }
+ ++s->eob_run_;
+ if (new_bits) {
+ s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(),
+ new_bits->end());
+ }
+ if (s->eob_run_ == 0x7FFF ||
+ s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) {
+ Flush<kOutputMode>(s, bw);
+ }
+}
+
+bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff,
+ HuffmanCodeTable* table) {
+ int huff_code[kJpegHuffmanAlphabetSize];
+ // +1 for a sentinel element.
+ uint32_t huff_size[kJpegHuffmanAlphabetSize + 1];
+ int p = 0;
+ for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) {
+ int i = huff.counts[l];
+ if (p + i > kJpegHuffmanAlphabetSize + 1) {
+ return false;
+ }
+ while (i--) huff_size[p++] = l;
+ }
+
+ if (p == 0) {
+ return true;
+ }
+
+ // Reuse sentinel element.
+ int last_p = p - 1;
+ huff_size[last_p] = 0;
+
+ int code = 0;
+ uint32_t si = huff_size[0];
+ p = 0;
+ while (huff_size[p]) {
+ while ((huff_size[p]) == si) {
+ huff_code[p++] = code;
+ code++;
+ }
+ code <<= 1;
+ si++;
+ }
+ for (p = 0; p < last_p; p++) {
+ int i = huff.values[p];
+ table->depth[i] = huff_size[p];
+ table->code[i] = huff_code[p];
+ }
+ return true;
+}
+
+bool EncodeSOI(SerializationState* state) {
+ state->output_queue.push_back(OutputChunk({0xFF, 0xD8}));
+ return true;
+}
+
+bool EncodeEOI(const JPEGData& jpg, SerializationState* state) {
+ state->output_queue.push_back(OutputChunk({0xFF, 0xD9}));
+ state->output_queue.emplace_back(jpg.tail_data);
+ return true;
+}
+
+bool EncodeSOF(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+ if (marker <= 0xC2) state->is_progressive = (marker == 0xC2);
+
+ const size_t n_comps = jpg.components.size();
+ const size_t marker_len = 8 + 3 * n_comps;
+ state->output_queue.emplace_back(marker_len + 2);
+ uint8_t* data = state->output_queue.back().buffer->data();
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = marker;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = kJpegPrecision;
+ data[pos++] = jpg.height >> 8u;
+ data[pos++] = jpg.height & 0xFFu;
+ data[pos++] = jpg.width >> 8u;
+ data[pos++] = jpg.width & 0xFFu;
+ data[pos++] = n_comps;
+ for (size_t i = 0; i < n_comps; ++i) {
+ data[pos++] = jpg.components[i].id;
+ data[pos++] = ((jpg.components[i].h_samp_factor << 4u) |
+ (jpg.components[i].v_samp_factor));
+ const size_t quant_idx = jpg.components[i].quant_idx;
+ if (quant_idx >= jpg.quant.size()) return false;
+ data[pos++] = jpg.quant[quant_idx].index;
+ }
+ return true;
+}
+
+bool EncodeSOS(const JPEGData& jpg, const JPEGScanInfo& scan_info,
+ SerializationState* state) {
+ const size_t n_scans = scan_info.num_components;
+ const size_t marker_len = 6 + 2 * n_scans;
+ state->output_queue.emplace_back(marker_len + 2);
+ uint8_t* data = state->output_queue.back().buffer->data();
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDA;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ data[pos++] = n_scans;
+ for (size_t i = 0; i < n_scans; ++i) {
+ const JPEGComponentScanInfo& si = scan_info.components[i];
+ if (si.comp_idx >= jpg.components.size()) return false;
+ data[pos++] = jpg.components[si.comp_idx].id;
+ data[pos++] = (si.dc_tbl_idx << 4u) + si.ac_tbl_idx;
+ }
+ data[pos++] = scan_info.Ss;
+ data[pos++] = scan_info.Se;
+ data[pos++] = ((scan_info.Ah << 4u) | (scan_info.Al));
+ return true;
+}
+
+bool EncodeDHT(const JPEGData& jpg, SerializationState* state) {
+ const std::vector<JPEGHuffmanCode>& huffman_code = jpg.huffman_code;
+
+ size_t marker_len = 2;
+ for (size_t i = state->dht_index; i < huffman_code.size(); ++i) {
+ const JPEGHuffmanCode& huff = huffman_code[i];
+ marker_len += kJpegHuffmanMaxBitLength;
+ for (size_t j = 0; j < huff.counts.size(); ++j) {
+ marker_len += huff.counts[j];
+ }
+ if (huff.is_last) break;
+ }
+ state->output_queue.emplace_back(marker_len + 2);
+ uint8_t* data = state->output_queue.back().buffer->data();
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xC4;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ while (true) {
+ const size_t huffman_code_index = state->dht_index++;
+ if (huffman_code_index >= huffman_code.size()) {
+ return false;
+ }
+ const JPEGHuffmanCode& huff = huffman_code[huffman_code_index];
+ size_t index = huff.slot_id;
+ HuffmanCodeTable* huff_table;
+ if (index & 0x10) {
+ index -= 0x10;
+ huff_table = &state->ac_huff_table[index];
+ } else {
+ huff_table = &state->dc_huff_table[index];
+ }
+ // TODO(eustas): cache
+ // TODO(eustas): set up non-existing symbols
+ if (!BuildHuffmanCodeTable(huff, huff_table)) {
+ return false;
+ }
+ size_t total_count = 0;
+ size_t max_length = 0;
+ for (size_t i = 0; i < huff.counts.size(); ++i) {
+ if (huff.counts[i] != 0) {
+ max_length = i;
+ }
+ total_count += huff.counts[i];
+ }
+ --total_count;
+ data[pos++] = huff.slot_id;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]);
+ }
+ for (size_t i = 0; i < total_count; ++i) {
+ data[pos++] = huff.values[i];
+ }
+ if (huff.is_last) break;
+ }
+ return true;
+}
+
+bool EncodeDQT(const JPEGData& jpg, SerializationState* state) {
+ int marker_len = 2;
+ for (size_t i = state->dqt_index; i < jpg.quant.size(); ++i) {
+ const JPEGQuantTable& table = jpg.quant[i];
+ marker_len += 1 + (table.precision ? 2 : 1) * kDCTBlockSize;
+ if (table.is_last) break;
+ }
+ state->output_queue.emplace_back(marker_len + 2);
+ uint8_t* data = state->output_queue.back().buffer->data();
+ size_t pos = 0;
+ data[pos++] = 0xFF;
+ data[pos++] = 0xDB;
+ data[pos++] = marker_len >> 8u;
+ data[pos++] = marker_len & 0xFFu;
+ while (true) {
+ const size_t idx = state->dqt_index++;
+ if (idx >= jpg.quant.size()) {
+ return false; // corrupt input
+ }
+ const JPEGQuantTable& table = jpg.quant[idx];
+ data[pos++] = (table.precision << 4u) + table.index;
+ for (size_t i = 0; i < kDCTBlockSize; ++i) {
+ int val_idx = kJPEGNaturalOrder[i];
+ int val = table.values[val_idx];
+ if (table.precision) {
+ data[pos++] = val >> 8u;
+ }
+ data[pos++] = val & 0xFFu;
+ }
+ if (table.is_last) break;
+ }
+ return true;
+}
+
+bool EncodeDRI(const JPEGData& jpg, SerializationState* state) {
+ state->seen_dri_marker = true;
+ OutputChunk dri_marker = {0xFF,
+ 0xDD,
+ 0,
+ 4,
+ static_cast<uint8_t>(jpg.restart_interval >> 8),
+ static_cast<uint8_t>(jpg.restart_interval & 0xFF)};
+ state->output_queue.push_back(std::move(dri_marker));
+ return true;
+}
+
+bool EncodeRestart(uint8_t marker, SerializationState* state) {
+ state->output_queue.push_back(OutputChunk({0xFF, marker}));
+ return true;
+}
+
+bool EncodeAPP(const JPEGData& jpg, uint8_t marker, SerializationState* state) {
+ // TODO(eustas): check that marker corresponds to payload?
+ (void)marker;
+
+ size_t app_index = state->app_index++;
+ if (app_index >= jpg.app_data.size()) return false;
+ state->output_queue.push_back(OutputChunk({0xFF}));
+ state->output_queue.emplace_back(jpg.app_data[app_index]);
+ return true;
+}
+
+bool EncodeCOM(const JPEGData& jpg, SerializationState* state) {
+ size_t com_index = state->com_index++;
+ if (com_index >= jpg.com_data.size()) return false;
+ state->output_queue.push_back(OutputChunk({0xFF}));
+ state->output_queue.emplace_back(jpg.com_data[com_index]);
+ return true;
+}
+
+bool EncodeInterMarkerData(const JPEGData& jpg, SerializationState* state) {
+ size_t index = state->data_index++;
+ if (index >= jpg.inter_marker_data.size()) return false;
+ state->output_queue.emplace_back(jpg.inter_marker_data[index]);
+ return true;
+}
+
+template <int kOutputMode>
+bool EncodeDCTBlockSequential(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, int num_zero_runs,
+ coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+ coeff_t temp2;
+ coeff_t temp;
+ temp2 = coeffs[0];
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int dc_nbits = (temp == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp) + 1);
+ WriteSymbol<kOutputMode>(dc_nbits, dc_huff, bw);
+ if (dc_nbits >= 12) return false;
+ if (dc_nbits > 0) {
+ WriteBits(bw, dc_nbits, temp2 & ((1u << dc_nbits) - 1));
+ }
+ int r = 0;
+ for (int k = 1; k < 64; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2 = ~temp;
+ } else {
+ temp2 = temp;
+ }
+ while (r > 15) {
+ WriteSymbol<kOutputMode>(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int ac_nbits = FloorLog2Nonzero<uint32_t>(temp) + 1;
+ if (ac_nbits >= 16) return false;
+ int symbol = (r << 4u) + ac_nbits;
+ WriteSymbol<kOutputMode>(symbol, ac_huff, bw);
+ WriteBits(bw, ac_nbits, temp2 & ((1 << ac_nbits) - 1));
+ r = 0;
+ }
+ for (int i = 0; i < num_zero_runs; ++i) {
+ WriteSymbol<kOutputMode>(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ if (r > 0) {
+ WriteSymbol<kOutputMode>(0, ac_huff, bw);
+ }
+ return true;
+}
+
+template <int kOutputMode>
+bool EncodeDCTBlockProgressive(const coeff_t* coeffs, HuffmanCodeTable* dc_huff,
+ HuffmanCodeTable* ac_huff, int Ss, int Se,
+ int Al, int num_zero_runs,
+ DCTCodingState* coding_state,
+ coeff_t* last_dc_coeff, JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ coeff_t temp2;
+ coeff_t temp;
+ if (Ss == 0) {
+ temp2 = coeffs[0] >> Al;
+ temp = temp2 - *last_dc_coeff;
+ *last_dc_coeff = temp2;
+ temp2 = temp;
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp2--;
+ }
+ int nbits = (temp == 0) ? 0 : (FloorLog2Nonzero<uint32_t>(temp) + 1);
+ WriteSymbol<kOutputMode>(nbits, dc_huff, bw);
+ if (nbits > 0) {
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ }
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int r = 0;
+ for (int k = Ss; k <= Se; ++k) {
+ if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) {
+ r++;
+ continue;
+ }
+ if (temp < 0) {
+ temp = -temp;
+ if (temp < 0) return false;
+ temp >>= Al;
+ temp2 = ~temp;
+ } else {
+ temp >>= Al;
+ temp2 = temp;
+ }
+ if (temp == 0) {
+ r++;
+ continue;
+ }
+ Flush<kOutputMode>(coding_state, bw);
+ while (r > 15) {
+ WriteSymbol<kOutputMode>(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ int nbits = FloorLog2Nonzero<uint32_t>(temp) + 1;
+ int symbol = (r << 4u) + nbits;
+ WriteSymbol<kOutputMode>(symbol, ac_huff, bw);
+ WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1));
+ r = 0;
+ }
+ if (num_zero_runs > 0) {
+ Flush<kOutputMode>(coding_state, bw);
+ for (int i = 0; i < num_zero_runs; ++i) {
+ WriteSymbol<kOutputMode>(0xf0, ac_huff, bw);
+ r -= 16;
+ }
+ }
+ if (r > 0) {
+ BufferEndOfBand<kOutputMode>(coding_state, ac_huff, nullptr, bw);
+ if (!eob_run_allowed) {
+ Flush<kOutputMode>(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+template <int kOutputMode>
+bool EncodeRefinementBits(const coeff_t* coeffs, HuffmanCodeTable* ac_huff,
+ int Ss, int Se, int Al, DCTCodingState* coding_state,
+ JpegBitWriter* bw) {
+ bool eob_run_allowed = Ss > 0;
+ if (Ss == 0) {
+ // Emit next bit of DC component.
+ WriteBits(bw, 1, (coeffs[0] >> Al) & 1);
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int abs_values[kDCTBlockSize];
+ int eob = 0;
+ for (int k = Ss; k <= Se; k++) {
+ const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]);
+ abs_values[k] = abs_val >> Al;
+ if (abs_values[k] == 1) {
+ eob = k;
+ }
+ }
+ int r = 0;
+ std::vector<int> refinement_bits;
+ refinement_bits.reserve(kDCTBlockSize);
+ for (int k = Ss; k <= Se; k++) {
+ if (abs_values[k] == 0) {
+ r++;
+ continue;
+ }
+ while (r > 15 && k <= eob) {
+ Flush<kOutputMode>(coding_state, bw);
+ WriteSymbol<kOutputMode>(0xf0, ac_huff, bw);
+ r -= 16;
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ }
+ if (abs_values[k] > 1) {
+ refinement_bits.push_back(abs_values[k] & 1u);
+ continue;
+ }
+ Flush<kOutputMode>(coding_state, bw);
+ int symbol = (r << 4u) + 1;
+ int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1;
+ WriteSymbol<kOutputMode>(symbol, ac_huff, bw);
+ WriteBits(bw, 1, new_non_zero_bit);
+ for (int bit : refinement_bits) {
+ WriteBits(bw, 1, bit);
+ }
+ refinement_bits.clear();
+ r = 0;
+ }
+ if (r > 0 || !refinement_bits.empty()) {
+ BufferEndOfBand<kOutputMode>(coding_state, ac_huff, &refinement_bits, bw);
+ if (!eob_run_allowed) {
+ Flush<kOutputMode>(coding_state, bw);
+ }
+ }
+ return true;
+}
+
+size_t NumHistograms(const JPEGData& jpg) {
+ size_t num = 0;
+ for (const auto& si : jpg.scan_info) {
+ num += si.num_components;
+ }
+ return num;
+}
+
+size_t HistogramIndex(const JPEGData& jpg, size_t scan_index,
+ size_t component_index) {
+ size_t idx = 0;
+ for (size_t i = 0; i < scan_index; ++i) {
+ idx += jpg.scan_info[i].num_components;
+ }
+ return idx + component_index;
+}
+
+template <int kMode, int kOutputMode>
+SerializationStatus JXL_NOINLINE DoEncodeScan(const JPEGData& jpg,
+ SerializationState* state) {
+ const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+ EncodeScanState& ss = state->scan_state;
+
+ const int restart_interval =
+ state->seen_dri_marker ? jpg.restart_interval : 0;
+
+ const auto get_next_extra_zero_run_index = [&ss, &scan_info]() -> int {
+ if (ss.extra_zero_runs_pos < scan_info.extra_zero_runs.size()) {
+ return scan_info.extra_zero_runs[ss.extra_zero_runs_pos].block_idx;
+ } else {
+ return -1;
+ }
+ };
+
+ const auto get_next_reset_point = [&ss, &scan_info]() -> int {
+ if (ss.next_reset_point_pos < scan_info.reset_points.size()) {
+ return scan_info.reset_points[ss.next_reset_point_pos++];
+ } else {
+ return -1;
+ }
+ };
+
+ if (ss.stage == EncodeScanState::HEAD) {
+ if (!EncodeSOS(jpg, scan_info, state)) return SerializationStatus::ERROR;
+ JpegBitWriterInit(&ss.bw, &state->output_queue);
+ DCTCodingStateInit(&ss.coding_state);
+ ss.restarts_to_go = restart_interval;
+ ss.next_restart_marker = 0;
+ ss.block_scan_index = 0;
+ ss.extra_zero_runs_pos = 0;
+ ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+ ss.next_reset_point_pos = 0;
+ ss.next_reset_point = get_next_reset_point();
+ ss.mcu_y = 0;
+ memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+ ss.stage = EncodeScanState::BODY;
+ }
+ JpegBitWriter* bw = &ss.bw;
+ DCTCodingState* coding_state = &ss.coding_state;
+
+ JXL_DASSERT(ss.stage == EncodeScanState::BODY);
+
+ // "Non-interleaved" means color data comes in separate scans, in other words
+ // each scan can contain only one color component.
+ const bool is_interleaved = (scan_info.num_components > 1);
+ int MCUs_per_row = 0;
+ int MCU_rows = 0;
+ jpg.CalculateMcuSize(scan_info, &MCUs_per_row, &MCU_rows);
+ const bool is_progressive = state->is_progressive;
+ const int Al = is_progressive ? scan_info.Al : 0;
+ const int Ss = is_progressive ? scan_info.Ss : 0;
+ const int Se = is_progressive ? scan_info.Se : 63;
+
+ // DC-only is defined by [0..0] spectral range.
+ const bool want_ac = ((Ss != 0) || (Se != 0));
+ // TODO: support streaming decoding again.
+ const bool complete_ac = true;
+ const bool has_ac = true;
+ if (want_ac && !has_ac) return SerializationStatus::NEEDS_MORE_INPUT;
+
+ // |has_ac| implies |complete_dc| but not vice versa; for the sake of
+ // simplicity we pretend they are equal, because they are separated by just a
+ // few bytes of input.
+ const bool complete_dc = has_ac;
+ const bool complete = want_ac ? complete_ac : complete_dc;
+ // When "incomplete" |ac_dc| tracks information about current ("incomplete")
+ // band parsing progress.
+
+ // FIXME: Is this always complete?
+ // const int last_mcu_y =
+ // complete ? MCU_rows : parsing_state.internal->ac_dc.next_mcu_y *
+ // v_group;
+ (void)complete;
+ const int last_mcu_y = complete ? MCU_rows : 0;
+
+ for (; ss.mcu_y < last_mcu_y; ++ss.mcu_y) {
+ for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+ // Possibly emit a restart marker.
+ if (restart_interval > 0 && ss.restarts_to_go == 0) {
+ Flush<kOutputMode>(coding_state, bw);
+ if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+ return SerializationStatus::ERROR;
+ }
+ EmitMarker(bw, 0xD0 + ss.next_restart_marker);
+ ss.next_restart_marker += 1;
+ ss.next_restart_marker &= 0x7;
+ ss.restarts_to_go = restart_interval;
+ memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff));
+ }
+ // Encode one MCU
+ for (size_t i = 0; i < scan_info.num_components; ++i) {
+ const JPEGComponentScanInfo& si = scan_info.components[i];
+ const JPEGComponent& c = jpg.components[si.comp_idx];
+ size_t dc_tbl_idx = (kOutputMode == OutputModes::kModeHistogram
+ ? HistogramIndex(jpg, state->scan_index, i)
+ : si.dc_tbl_idx);
+ size_t ac_tbl_idx = (kOutputMode == OutputModes::kModeHistogram
+ ? HistogramIndex(jpg, state->scan_index, i)
+ : si.ac_tbl_idx);
+ HuffmanCodeTable* dc_huff = &state->dc_huff_table[dc_tbl_idx];
+ HuffmanCodeTable* ac_huff = &state->ac_huff_table[ac_tbl_idx];
+ int n_blocks_y = is_interleaved ? c.v_samp_factor : 1;
+ int n_blocks_x = is_interleaved ? c.h_samp_factor : 1;
+ for (int iy = 0; iy < n_blocks_y; ++iy) {
+ for (int ix = 0; ix < n_blocks_x; ++ix) {
+ int block_y = ss.mcu_y * n_blocks_y + iy;
+ int block_x = mcu_x * n_blocks_x + ix;
+ int block_idx = block_y * c.width_in_blocks + block_x;
+ if (ss.block_scan_index == ss.next_reset_point) {
+ Flush<kOutputMode>(coding_state, bw);
+ ss.next_reset_point = get_next_reset_point();
+ }
+ int num_zero_runs = 0;
+ if (ss.block_scan_index == ss.next_extra_zero_run_index) {
+ num_zero_runs = scan_info.extra_zero_runs[ss.extra_zero_runs_pos]
+ .num_extra_zero_runs;
+ ++ss.extra_zero_runs_pos;
+ ss.next_extra_zero_run_index = get_next_extra_zero_run_index();
+ }
+ const coeff_t* coeffs = &c.coeffs[block_idx << 6];
+ bool ok;
+ if (kMode == 0) {
+ ok = EncodeDCTBlockSequential<kOutputMode>(
+ coeffs, dc_huff, ac_huff, num_zero_runs,
+ ss.last_dc_coeff + si.comp_idx, bw);
+ } else if (kMode == 1) {
+ ok = EncodeDCTBlockProgressive<kOutputMode>(
+ coeffs, dc_huff, ac_huff, Ss, Se, Al, num_zero_runs,
+ coding_state, ss.last_dc_coeff + si.comp_idx, bw);
+ } else {
+ ok = EncodeRefinementBits<kOutputMode>(coeffs, ac_huff, Ss, Se,
+ Al, coding_state, bw);
+ }
+ if (!ok) return SerializationStatus::ERROR;
+ ++ss.block_scan_index;
+ }
+ }
+ }
+ --ss.restarts_to_go;
+ }
+ }
+ if (ss.mcu_y < MCU_rows) {
+ if (!bw->healthy) return SerializationStatus::ERROR;
+ return SerializationStatus::NEEDS_MORE_INPUT;
+ }
+ Flush<kOutputMode>(coding_state, bw);
+ if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) {
+ return SerializationStatus::ERROR;
+ }
+ JpegBitWriterFinish(bw);
+ ss.stage = EncodeScanState::HEAD;
+ state->scan_index++;
+ if (!bw->healthy) return SerializationStatus::ERROR;
+
+ return SerializationStatus::DONE;
+}
+
+template <int kOutputMode>
+static SerializationStatus JXL_INLINE EncodeScan(const JPEGData& jpg,
+ SerializationState* state) {
+ const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index];
+ const bool is_progressive = state->is_progressive;
+ const int Al = is_progressive ? scan_info.Al : 0;
+ const int Ah = is_progressive ? scan_info.Ah : 0;
+ const int Ss = is_progressive ? scan_info.Ss : 0;
+ const int Se = is_progressive ? scan_info.Se : 63;
+ const bool need_sequential =
+ !is_progressive || (Ah == 0 && Al == 0 && Ss == 0 && Se == 63);
+ if (need_sequential) {
+ return DoEncodeScan<0, kOutputMode>(jpg, state);
+ } else if (Ah == 0) {
+ return DoEncodeScan<1, kOutputMode>(jpg, state);
+ } else {
+ return DoEncodeScan<2, kOutputMode>(jpg, state);
+ }
+}
+
+template <int kOutputMode>
+SerializationStatus SerializeSection(uint8_t marker, SerializationState* state,
+ const JPEGData& jpg) {
+ const auto to_status = [](bool result) {
+ return result ? SerializationStatus::DONE : SerializationStatus::ERROR;
+ };
+ // TODO(eustas): add and use marker enum
+ switch (marker) {
+ case 0xC0:
+ case 0xC1:
+ case 0xC2:
+ case 0xC9:
+ case 0xCA:
+ return to_status(EncodeSOF(jpg, marker, state));
+
+ case 0xC4:
+ return to_status((kOutputMode == OutputModes::kModeHistogram) ||
+ EncodeDHT(jpg, state));
+
+ case 0xD0:
+ case 0xD1:
+ case 0xD2:
+ case 0xD3:
+ case 0xD4:
+ case 0xD5:
+ case 0xD6:
+ case 0xD7:
+ return to_status(EncodeRestart(marker, state));
+
+ case 0xD9:
+ return to_status(EncodeEOI(jpg, state));
+
+ case 0xDA:
+ return EncodeScan<kOutputMode>(jpg, state);
+
+ case 0xDB:
+ return to_status(EncodeDQT(jpg, state));
+
+ case 0xDD:
+ return to_status(EncodeDRI(jpg, state));
+
+ case 0xE0:
+ case 0xE1:
+ case 0xE2:
+ case 0xE3:
+ case 0xE4:
+ case 0xE5:
+ case 0xE6:
+ case 0xE7:
+ case 0xE8:
+ case 0xE9:
+ case 0xEA:
+ case 0xEB:
+ case 0xEC:
+ case 0xED:
+ case 0xEE:
+ case 0xEF:
+ return to_status(EncodeAPP(jpg, marker, state));
+
+ case 0xFE:
+ return to_status(EncodeCOM(jpg, state));
+
+ case 0xFF:
+ return to_status(EncodeInterMarkerData(jpg, state));
+
+ default:
+ return SerializationStatus::ERROR;
+ }
+}
+
+// TODO(veluca): add streaming support again.
+template <int kOutputMode>
+Status WriteJpegInternal(const JPEGData& jpg, const JPEGOutput& out,
+ SerializationState* ss) {
+ const auto maybe_push_output = [&]() -> Status {
+ if (ss->stage != SerializationState::STAGE_ERROR) {
+ while (!ss->output_queue.empty()) {
+ auto& chunk = ss->output_queue.front();
+ size_t num_written = out(chunk.next, chunk.len);
+ if (num_written == 0 && chunk.len > 0) {
+ return StatusMessage(Status(StatusCode::kNotEnoughBytes),
+ "Failed to write output");
+ }
+ chunk.len -= num_written;
+ if (chunk.len == 0) {
+ ss->output_queue.pop_front();
+ }
+ }
+ }
+ return true;
+ };
+
+ while (true) {
+ switch (ss->stage) {
+ case SerializationState::STAGE_INIT: {
+ // Valid Brunsli requires, at least, 0xD9 marker.
+ // This might happen on corrupted stream, or on unconditioned JPEGData.
+ // TODO(eustas): check D9 in the only one and is the last one.
+ if (jpg.marker_order.empty()) {
+ ss->stage = SerializationState::STAGE_ERROR;
+ break;
+ }
+ if (kOutputMode == OutputModes::kModeHistogram) {
+ size_t num_histo = NumHistograms(jpg);
+ ss->dc_huff_table.resize(num_histo);
+ ss->ac_huff_table.resize(num_histo);
+ for (size_t i = 0; i < num_histo; ++i) {
+ ss->dc_huff_table[i].InitDepths();
+ ss->ac_huff_table[i].InitDepths();
+ }
+ } else {
+ ss->dc_huff_table.resize(kMaxHuffmanTables);
+ ss->ac_huff_table.resize(kMaxHuffmanTables);
+ }
+ if (jpg.has_zero_padding_bit) {
+ ss->pad_bits = jpg.padding_bits.data();
+ ss->pad_bits_end = ss->pad_bits + jpg.padding_bits.size();
+ }
+
+ EncodeSOI(ss);
+ JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+ ss->stage = SerializationState::STAGE_SERIALIZE_SECTION;
+ break;
+ }
+
+ case SerializationState::STAGE_SERIALIZE_SECTION: {
+ if (ss->section_index >= jpg.marker_order.size()) {
+ ss->stage = SerializationState::STAGE_DONE;
+ break;
+ }
+ uint8_t marker = jpg.marker_order[ss->section_index];
+ SerializationStatus status =
+ SerializeSection<kOutputMode>(marker, ss, jpg);
+ if (status == SerializationStatus::ERROR) {
+ JXL_WARNING("Failed to encode marker 0x%.2x", marker);
+ ss->stage = SerializationState::STAGE_ERROR;
+ break;
+ }
+ JXL_QUIET_RETURN_IF_ERROR(maybe_push_output());
+ if (status == SerializationStatus::NEEDS_MORE_INPUT) {
+ return JXL_FAILURE("Incomplete serialization data");
+ } else if (status != SerializationStatus::DONE) {
+ JXL_DASSERT(false);
+ ss->stage = SerializationState::STAGE_ERROR;
+ break;
+ }
+ ++ss->section_index;
+ break;
+ }
+
+ case SerializationState::STAGE_DONE:
+ JXL_ASSERT(ss->output_queue.empty());
+ if (ss->pad_bits != nullptr && ss->pad_bits != ss->pad_bits_end) {
+ return JXL_FAILURE("Invalid number of padding bits.");
+ }
+ return true;
+
+ case SerializationState::STAGE_ERROR:
+ return JXL_FAILURE("JPEG serialization error");
+ }
+ }
+}
+
+} // namespace
+
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out) {
+ SerializationState ss;
+ return WriteJpegInternal<OutputModes::kModeWrite>(jpg, out, &ss);
+}
+
+Status ProcessJpeg(const JPEGData& jpg, SerializationState* ss) {
+ auto nullout = [](const uint8_t* buf, size_t len) { return len; };
+ return WriteJpegInternal<OutputModes::kModeHistogram>(jpg, nullout, ss);
+}
+
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.h b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.h
new file mode 100644
index 0000000000..9ccfb749a8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_data_writer.h
@@ -0,0 +1,35 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for writing a JPEGData object into a jpeg byte stream.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+#define LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <functional>
+
+#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Function type used to write len bytes into buf. Returns the number of bytes
+// written.
+using JPEGOutput = std::function<size_t(const uint8_t* buf, size_t len)>;
+
+Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out);
+
+// Same as WriteJpeg, but instead of writing to the output, collects statistics
+// about the bit-stream into `ss`.
+Status ProcessJpeg(const JPEGData& jpg, SerializationState* ss);
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_output_chunk.h b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_output_chunk.h
new file mode 100644
index 0000000000..e003c04952
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_output_chunk.h
@@ -0,0 +1,72 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+#define LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <initializer_list>
+#include <memory>
+#include <vector>
+
+namespace jxl {
+namespace jpeg {
+
+/**
+ * A chunk of output data.
+ *
+ * Data producer creates OutputChunks and adds them to the end output queue.
+ * Once control flow leaves the producer code, it is considered that chunk of
+ * data is final and can not be changed; to underline this fact |next| is a
+ * const-pointer.
+ *
+ * Data consumer removes OutputChunks from the beginning of the output queue.
+ * It is possible to consume OutputChunks partially, by updating |next| and
+ * |len|.
+ *
+ * There are 2 types of output chunks:
+ * - owning: actual data is stored in |buffer| field; producer fills data after
+ * the instance it created; it is legal to reduce |len| to show that not all
+ * the capacity of |buffer| is used
+ * - non-owning: represents the data stored (owned) somewhere else
+ */
+struct OutputChunk {
+ // Non-owning
+ template <typename Bytes>
+ explicit OutputChunk(Bytes& bytes) : len(bytes.size()) {
+ // Deal both with const qualifier and data type.
+ const void* src = bytes.data();
+ next = reinterpret_cast<const uint8_t*>(src);
+ }
+
+ // Non-owning
+ OutputChunk(const uint8_t* data, size_t size) : next(data), len(size) {}
+
+ // Owning
+ explicit OutputChunk(size_t size = 0) {
+ buffer.reset(new std::vector<uint8_t>(size));
+ next = buffer->data();
+ len = size;
+ }
+
+ // Owning
+ OutputChunk(std::initializer_list<uint8_t> bytes) {
+ buffer.reset(new std::vector<uint8_t>(bytes));
+ next = buffer->data();
+ len = bytes.size();
+ }
+
+ const uint8_t* next;
+ size_t len;
+ // TODO(veluca): consider removing the unique_ptr.
+ std::unique_ptr<std::vector<uint8_t>> buffer;
+};
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_serialization_state.h b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_serialization_state.h
new file mode 100644
index 0000000000..40ce450a76
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/dec_jpeg_serialization_state.h
@@ -0,0 +1,96 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+#define LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
+
+#include <deque>
+#include <vector>
+
+#include "lib/jxl/jpeg/dec_jpeg_output_chunk.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+struct HuffmanCodeTable {
+ int depth[256];
+ int code[256];
+ void InitDepths() { std::fill(std::begin(depth), std::end(depth), 0); }
+};
+
+// Handles the packing of bits into output bytes.
+struct JpegBitWriter {
+ bool healthy;
+ std::deque<OutputChunk>* output;
+ OutputChunk chunk;
+ uint8_t* data;
+ size_t pos;
+ uint64_t put_buffer;
+ int put_bits;
+};
+
+// Holds data that is buffered between 8x8 blocks in progressive mode.
+struct DCTCodingState {
+ // The run length of end-of-band symbols in a progressive scan.
+ int eob_run_;
+ // The huffman table to be used when flushing the state.
+ HuffmanCodeTable* cur_ac_huff_;
+ // The sequence of currently buffered refinement bits for a successive
+ // approximation scan (one where Ah > 0).
+ std::vector<int> refinement_bits_;
+};
+
+struct EncodeScanState {
+ enum Stage { HEAD, BODY };
+
+ Stage stage = HEAD;
+
+ int mcu_y;
+ JpegBitWriter bw;
+ coeff_t last_dc_coeff[kMaxComponents] = {0};
+ int restarts_to_go;
+ int next_restart_marker;
+ int block_scan_index;
+ DCTCodingState coding_state;
+ size_t extra_zero_runs_pos;
+ int next_extra_zero_run_index;
+ size_t next_reset_point_pos;
+ int next_reset_point;
+};
+
+struct SerializationState {
+ enum Stage {
+ STAGE_INIT,
+ STAGE_SERIALIZE_SECTION,
+ STAGE_DONE,
+ STAGE_ERROR,
+ };
+
+ Stage stage = STAGE_INIT;
+
+ std::deque<OutputChunk> output_queue;
+
+ size_t section_index = 0;
+ int dht_index = 0;
+ int dqt_index = 0;
+ int app_index = 0;
+ int com_index = 0;
+ int data_index = 0;
+ int scan_index = 0;
+ std::vector<HuffmanCodeTable> dc_huff_table;
+ std::vector<HuffmanCodeTable> ac_huff_table;
+ const uint8_t* pad_bits = nullptr;
+ const uint8_t* pad_bits_end = nullptr;
+ bool seen_dri_marker = false;
+ bool is_progressive = false;
+
+ EncodeScanState scan_state;
+};
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.cc b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.cc
new file mode 100644
index 0000000000..842612f4ab
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.cc
@@ -0,0 +1,384 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+
+#include <brotli/encode.h>
+#include <stdio.h>
+
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+#include "lib/jxl/luminance.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+
+constexpr int BITS_IN_JSAMPLE = 8;
+using ByteSpan = Span<const uint8_t>;
+
+// TODO(eustas): move to jpeg_data, to use from codec_jpg as well.
+// See if there is a canonically chunked ICC profile and mark corresponding
+// app-tags with AppMarkerType::kICC.
+Status DetectIccProfile(JPEGData& jpeg_data) {
+ JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+ size_t num_icc = 0;
+ size_t num_icc_jpeg = 0;
+ for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+ const auto& app = jpeg_data.app_data[i];
+ size_t pos = 0;
+ if (app[pos++] != 0xE2) continue;
+ // At least APPn + size; otherwise it should be intermarker-data.
+ JXL_DASSERT(app.size() >= 3);
+ size_t tag_length = (app[pos] << 8) + app[pos + 1];
+ pos += 2;
+ JXL_DASSERT(app.size() == tag_length + 1);
+ // Empty payload is 2 bytes for tag length itself + signature
+ if (tag_length < 2 + sizeof kIccProfileTag) continue;
+
+ if (memcmp(&app[pos], kIccProfileTag, sizeof kIccProfileTag) != 0) continue;
+ pos += sizeof kIccProfileTag;
+ uint8_t chunk_id = app[pos++];
+ uint8_t num_chunks = app[pos++];
+ if (chunk_id != num_icc + 1) continue;
+ if (num_icc_jpeg == 0) num_icc_jpeg = num_chunks;
+ if (num_icc_jpeg != num_chunks) continue;
+ num_icc++;
+ jpeg_data.app_marker_type[i] = AppMarkerType::kICC;
+ }
+ if (num_icc != num_icc_jpeg) {
+ return JXL_FAILURE("Invalid ICC chunks");
+ }
+ return true;
+}
+
+bool GetMarkerPayload(const uint8_t* data, size_t size, ByteSpan* payload) {
+ if (size < 3) {
+ return false;
+ }
+ size_t hi = data[1];
+ size_t lo = data[2];
+ size_t internal_size = (hi << 8u) | lo;
+ // Second byte of marker is not counted towards size.
+ if (internal_size != size - 1) {
+ return false;
+ }
+ // cut second marker byte and "length" from payload.
+ *payload = ByteSpan(data, size);
+ payload->remove_prefix(3);
+ return true;
+}
+
+Status DetectBlobs(jpeg::JPEGData& jpeg_data) {
+ JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size());
+ bool have_exif = false, have_xmp = false;
+ for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+ auto& marker = jpeg_data.app_data[i];
+ if (marker.empty() || marker[0] != kApp1) {
+ continue;
+ }
+ ByteSpan payload;
+ if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+ // Something is wrong with this marker; does not care.
+ continue;
+ }
+ if (!have_exif && payload.size() >= sizeof kExifTag &&
+ !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+ jpeg_data.app_marker_type[i] = AppMarkerType::kExif;
+ have_exif = true;
+ }
+ if (!have_xmp && payload.size() >= sizeof kXMPTag &&
+ !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+ jpeg_data.app_marker_type[i] = AppMarkerType::kXMP;
+ have_xmp = true;
+ }
+ }
+ return true;
+}
+
+Status ParseChunkedMarker(const jpeg::JPEGData& src, uint8_t marker_type,
+ const ByteSpan& tag, PaddedBytes* output,
+ bool allow_permutations = false) {
+ output->clear();
+
+ std::vector<ByteSpan> chunks;
+ std::vector<bool> presence;
+ size_t expected_number_of_parts = 0;
+ bool is_first_chunk = true;
+ size_t ordinal = 0;
+ for (const auto& marker : src.app_data) {
+ if (marker.empty() || marker[0] != marker_type) {
+ continue;
+ }
+ ByteSpan payload;
+ if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+ // Something is wrong with this marker; does not care.
+ continue;
+ }
+ if ((payload.size() < tag.size()) ||
+ memcmp(payload.data(), tag.data(), tag.size()) != 0) {
+ continue;
+ }
+ payload.remove_prefix(tag.size());
+ if (payload.size() < 2) {
+ return JXL_FAILURE("Chunk is too small.");
+ }
+ uint8_t index = payload[0];
+ uint8_t total = payload[1];
+ ordinal++;
+ if (!allow_permutations) {
+ if (index != ordinal) return JXL_FAILURE("Invalid chunk order.");
+ }
+
+ payload.remove_prefix(2);
+
+ JXL_RETURN_IF_ERROR(total != 0);
+ if (is_first_chunk) {
+ is_first_chunk = false;
+ expected_number_of_parts = total;
+ // 1-based indices; 0-th element is added for convenience.
+ chunks.resize(total + 1);
+ presence.resize(total + 1);
+ } else {
+ JXL_RETURN_IF_ERROR(expected_number_of_parts == total);
+ }
+
+ if (index == 0 || index > total) {
+ return JXL_FAILURE("Invalid chunk index.");
+ }
+
+ if (presence[index]) {
+ return JXL_FAILURE("Duplicate chunk.");
+ }
+ presence[index] = true;
+ chunks[index] = payload;
+ }
+
+ for (size_t i = 0; i < expected_number_of_parts; ++i) {
+ // 0-th element is not used.
+ size_t index = i + 1;
+ if (!presence[index]) {
+ return JXL_FAILURE("Missing chunk.");
+ }
+ output->append(chunks[index]);
+ }
+
+ return true;
+}
+
+Status SetBlobsFromJpegData(const jpeg::JPEGData& jpeg_data, Blobs* blobs) {
+ for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+ auto& marker = jpeg_data.app_data[i];
+ if (marker.empty() || marker[0] != kApp1) {
+ continue;
+ }
+ ByteSpan payload;
+ if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) {
+ // Something is wrong with this marker; does not care.
+ continue;
+ }
+ if (payload.size() >= sizeof kExifTag &&
+ !memcmp(payload.data(), kExifTag, sizeof kExifTag)) {
+ if (blobs->exif.empty()) {
+ blobs->exif.resize(payload.size() - sizeof kExifTag);
+ memcpy(blobs->exif.data(), payload.data() + sizeof kExifTag,
+ payload.size() - sizeof kExifTag);
+ } else {
+ JXL_WARNING(
+ "ReJPEG: multiple Exif blobs, storing only first one in the JPEG "
+ "XL container\n");
+ }
+ }
+ if (payload.size() >= sizeof kXMPTag &&
+ !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) {
+ if (blobs->xmp.empty()) {
+ blobs->xmp.resize(payload.size() - sizeof kXMPTag);
+ memcpy(blobs->xmp.data(), payload.data() + sizeof kXMPTag,
+ payload.size() - sizeof kXMPTag);
+ } else {
+ JXL_WARNING(
+ "ReJPEG: multiple XMP blobs, storing only first one in the JPEG "
+ "XL container\n");
+ }
+ }
+ }
+ return true;
+}
+
+static inline bool IsJPG(const Span<const uint8_t> bytes) {
+ return bytes.size() >= 2 && bytes[0] == 0xFF && bytes[1] == 0xD8;
+}
+
+} // namespace
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+ ColorEncoding* color_encoding) {
+ PaddedBytes icc_profile;
+ if (!ParseChunkedMarker(jpg, kApp2, ByteSpan(kIccProfileTag), &icc_profile)) {
+ JXL_WARNING("ReJPEG: corrupted ICC profile\n");
+ icc_profile.clear();
+ }
+
+ if (icc_profile.empty()) {
+ bool is_gray = (jpg.components.size() == 1);
+ *color_encoding = ColorEncoding::SRGB(is_gray);
+ return true;
+ }
+
+ return color_encoding->SetICC(std::move(icc_profile));
+}
+
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes,
+ const CompressParams& cparams) {
+ jpeg_data.app_marker_type.resize(jpeg_data.app_data.size(),
+ AppMarkerType::kUnknown);
+ JXL_RETURN_IF_ERROR(DetectIccProfile(jpeg_data));
+ JXL_RETURN_IF_ERROR(DetectBlobs(jpeg_data));
+ BitWriter writer;
+ JXL_RETURN_IF_ERROR(Bundle::Write(jpeg_data, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ *bytes = std::move(writer).TakeBytes();
+ BrotliEncoderState* brotli_enc =
+ BrotliEncoderCreateInstance(nullptr, nullptr, nullptr);
+ int effort = cparams.brotli_effort;
+ if (effort < 0) effort = 11 - static_cast<int>(cparams.speed_tier);
+ BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_QUALITY, effort);
+ size_t total_data = 0;
+ for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+ if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+ continue;
+ }
+ total_data += jpeg_data.app_data[i].size();
+ }
+ for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+ total_data += jpeg_data.com_data[i].size();
+ }
+ for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+ total_data += jpeg_data.inter_marker_data[i].size();
+ }
+ total_data += jpeg_data.tail_data.size();
+ size_t initial_size = bytes->size();
+ size_t brotli_capacity = BrotliEncoderMaxCompressedSize(total_data);
+ BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_SIZE_HINT, total_data);
+ bytes->resize(bytes->size() + brotli_capacity);
+ size_t enc_size = 0;
+ auto br_append = [&](const std::vector<uint8_t>& data, bool last) {
+ size_t available_in = data.size();
+ const uint8_t* in = data.data();
+ uint8_t* out = &(*bytes)[initial_size + enc_size];
+ do {
+ uint8_t* out_before = out;
+ msan::MemoryIsInitialized(in, available_in);
+ JXL_CHECK(BrotliEncoderCompressStream(
+ brotli_enc, last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS,
+ &available_in, &in, &brotli_capacity, &out, &enc_size));
+ msan::UnpoisonMemory(out_before, out - out_before);
+ } while (BrotliEncoderHasMoreOutput(brotli_enc) || available_in > 0);
+ };
+
+ for (size_t i = 0; i < jpeg_data.app_data.size(); i++) {
+ if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) {
+ continue;
+ }
+ br_append(jpeg_data.app_data[i], /*last=*/false);
+ }
+ for (size_t i = 0; i < jpeg_data.com_data.size(); i++) {
+ br_append(jpeg_data.com_data[i], /*last=*/false);
+ }
+ for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) {
+ br_append(jpeg_data.inter_marker_data[i], /*last=*/false);
+ }
+ br_append(jpeg_data.tail_data, /*last=*/true);
+ BrotliEncoderDestroyInstance(brotli_enc);
+ bytes->resize(initial_size + enc_size);
+ return true;
+}
+
+Status DecodeImageJPG(const Span<const uint8_t> bytes, CodecInOut* io) {
+ if (!IsJPG(bytes)) return false;
+ io->frames.clear();
+ io->frames.reserve(1);
+ io->frames.emplace_back(&io->metadata.m);
+ io->Main().jpeg_data = make_unique<jpeg::JPEGData>();
+ jpeg::JPEGData* jpeg_data = io->Main().jpeg_data.get();
+ if (!jpeg::ReadJpeg(bytes.data(), bytes.size(), jpeg::JpegReadMode::kReadAll,
+ jpeg_data)) {
+ return JXL_FAILURE("Error reading JPEG");
+ }
+ JXL_RETURN_IF_ERROR(
+ SetColorEncodingFromJpegData(*jpeg_data, &io->metadata.m.color_encoding));
+ JXL_RETURN_IF_ERROR(SetBlobsFromJpegData(*jpeg_data, &io->blobs));
+ size_t nbcomp = jpeg_data->components.size();
+ if (nbcomp != 1 && nbcomp != 3) {
+ return JXL_FAILURE("Cannot recompress JPEGs with neither 1 nor 3 channels");
+ }
+ YCbCrChromaSubsampling cs;
+ if (nbcomp == 3) {
+ uint8_t hsample[3], vsample[3];
+ for (size_t i = 0; i < nbcomp; i++) {
+ hsample[i] = jpeg_data->components[i].h_samp_factor;
+ vsample[i] = jpeg_data->components[i].v_samp_factor;
+ }
+ JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+ } else if (nbcomp == 1) {
+ uint8_t hsample[3], vsample[3];
+ for (size_t i = 0; i < 3; i++) {
+ hsample[i] = jpeg_data->components[0].h_samp_factor;
+ vsample[i] = jpeg_data->components[0].v_samp_factor;
+ }
+ JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample));
+ }
+ bool is_rgb = false;
+ {
+ const auto& markers = jpeg_data->marker_order;
+ // If there is a JFIF marker, this is YCbCr. Otherwise...
+ if (std::find(markers.begin(), markers.end(), 0xE0) == markers.end()) {
+ // Try to find an 'Adobe' marker.
+ size_t app_markers = 0;
+ size_t i = 0;
+ for (; i < markers.size(); i++) {
+ // This is an APP marker.
+ if ((markers[i] & 0xF0) == 0xE0) {
+ JXL_CHECK(app_markers < jpeg_data->app_data.size());
+ // APP14 marker
+ if (markers[i] == 0xEE) {
+ const auto& data = jpeg_data->app_data[app_markers];
+ if (data.size() == 15 && data[3] == 'A' && data[4] == 'd' &&
+ data[5] == 'o' && data[6] == 'b' && data[7] == 'e') {
+ // 'Adobe' marker.
+ is_rgb = data[14] == 0;
+ break;
+ }
+ }
+ app_markers++;
+ }
+ }
+
+ if (i == markers.size()) {
+ // No 'Adobe' marker, guess from component IDs.
+ is_rgb = nbcomp == 3 && jpeg_data->components[0].id == 'R' &&
+ jpeg_data->components[1].id == 'G' &&
+ jpeg_data->components[2].id == 'B';
+ }
+ }
+ }
+
+ io->Main().chroma_subsampling = cs;
+ io->Main().color_transform =
+ (!is_rgb || nbcomp == 1) ? ColorTransform::kYCbCr : ColorTransform::kNone;
+
+ io->metadata.m.SetIntensityTarget(kDefaultIntensityTarget);
+ io->metadata.m.SetUintSamples(BITS_IN_JSAMPLE);
+ io->SetFromImage(Image3F(jpeg_data->width, jpeg_data->height),
+ io->metadata.m.color_encoding);
+ SetIntensityTarget(&io->metadata.m);
+ return true;
+}
+
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.h b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.h
new file mode 100644
index 0000000000..806128c465
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_H_
+
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes,
+ const CompressParams& cparams);
+
+Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg,
+ ColorEncoding* color_encoding);
+
+/**
+ * Decodes bytes containing JPEG codestream into a CodecInOut as coefficients
+ * only, for lossless JPEG transcoding.
+ */
+Status DecodeImageJPG(Span<const uint8_t> bytes, CodecInOut* io);
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_ENC_JPEG_DATA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.cc b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.cc
new file mode 100644
index 0000000000..f569b73363
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.cc
@@ -0,0 +1,1053 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_data_reader.h"
+
+#include <inttypes.h>
+#include <string.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+namespace {
+static const int kBrunsliMaxSampling = 15;
+
+// Macros for commonly used error conditions.
+
+#define JXL_JPEG_VERIFY_LEN(n) \
+ if (*pos + (n) > len) { \
+ return JXL_FAILURE("Unexpected end of input: pos=%" PRIuS \
+ " need=%d len=%" PRIuS, \
+ *pos, static_cast<int>(n), len); \
+ }
+
+#define JXL_JPEG_VERIFY_INPUT(var, low, high, code) \
+ if ((var) < (low) || (var) > (high)) { \
+ return JXL_FAILURE("Invalid " #var ": %d", static_cast<int>(var)); \
+ }
+
+#define JXL_JPEG_VERIFY_MARKER_END() \
+ if (start_pos + marker_len != *pos) { \
+ return JXL_FAILURE("Invalid marker length: declared=%" PRIuS \
+ " actual=%" PRIuS, \
+ marker_len, (*pos - start_pos)); \
+ }
+
+#define JXL_JPEG_EXPECT_MARKER() \
+ if (pos + 2 > len || data[pos] != 0xff) { \
+ return JXL_FAILURE( \
+ "Marker byte (0xff) expected, found: 0x%.2x pos=%" PRIuS \
+ " len=%" PRIuS, \
+ (pos < len ? data[pos] : 0), pos, len); \
+ }
+
+inline int ReadUint8(const uint8_t* data, size_t* pos) {
+ return data[(*pos)++];
+}
+
+inline int ReadUint16(const uint8_t* data, size_t* pos) {
+ int v = (data[*pos] << 8) + data[*pos + 1];
+ *pos += 2;
+ return v;
+}
+
+// Reads the Start of Frame (SOF) marker segment and fills in *jpg with the
+// parsed data.
+bool ProcessSOF(const uint8_t* data, const size_t len, JpegReadMode mode,
+ size_t* pos, JPEGData* jpg) {
+ if (jpg->width != 0) {
+ return JXL_FAILURE("Duplicate SOF marker.");
+ }
+ const size_t start_pos = *pos;
+ JXL_JPEG_VERIFY_LEN(8);
+ size_t marker_len = ReadUint16(data, pos);
+ int precision = ReadUint8(data, pos);
+ int height = ReadUint16(data, pos);
+ int width = ReadUint16(data, pos);
+ int num_components = ReadUint8(data, pos);
+ // 'jbrd' is hardcoded for 8bits:
+ JXL_JPEG_VERIFY_INPUT(precision, 8, 8, PRECISION);
+ JXL_JPEG_VERIFY_INPUT(height, 1, kMaxDimPixels, HEIGHT);
+ JXL_JPEG_VERIFY_INPUT(width, 1, kMaxDimPixels, WIDTH);
+ JXL_JPEG_VERIFY_INPUT(num_components, 1, kMaxComponents, NUMCOMP);
+ JXL_JPEG_VERIFY_LEN(3 * num_components);
+ jpg->height = height;
+ jpg->width = width;
+ jpg->components.resize(num_components);
+
+ // Read sampling factors and quant table index for each component.
+ std::vector<bool> ids_seen(256, false);
+ int max_h_samp_factor = 1;
+ int max_v_samp_factor = 1;
+ for (size_t i = 0; i < jpg->components.size(); ++i) {
+ const int id = ReadUint8(data, pos);
+ if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci)
+ return JXL_FAILURE("Duplicate ID %d in SOF.", id);
+ }
+ ids_seen[id] = true;
+ jpg->components[i].id = id;
+ int factor = ReadUint8(data, pos);
+ int h_samp_factor = factor >> 4;
+ int v_samp_factor = factor & 0xf;
+ JXL_JPEG_VERIFY_INPUT(h_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+ JXL_JPEG_VERIFY_INPUT(v_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR);
+ jpg->components[i].h_samp_factor = h_samp_factor;
+ jpg->components[i].v_samp_factor = v_samp_factor;
+ jpg->components[i].quant_idx = ReadUint8(data, pos);
+ max_h_samp_factor = std::max(max_h_samp_factor, h_samp_factor);
+ max_v_samp_factor = std::max(max_v_samp_factor, v_samp_factor);
+ }
+
+ // We have checked above that none of the sampling factors are 0, so the max
+ // sampling factors can not be 0.
+ int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+ int MCU_cols = DivCeil(jpg->width, max_h_samp_factor * 8);
+ // Compute the block dimensions for each component.
+ for (size_t i = 0; i < jpg->components.size(); ++i) {
+ JPEGComponent* c = &jpg->components[i];
+ if (max_h_samp_factor % c->h_samp_factor != 0 ||
+ max_v_samp_factor % c->v_samp_factor != 0) {
+ return JXL_FAILURE("Non-integral subsampling ratios.");
+ }
+ c->width_in_blocks = MCU_cols * c->h_samp_factor;
+ c->height_in_blocks = MCU_rows * c->v_samp_factor;
+ const uint64_t num_blocks =
+ static_cast<uint64_t>(c->width_in_blocks) * c->height_in_blocks;
+ if (mode == JpegReadMode::kReadAll) {
+ c->coeffs.resize(num_blocks * kDCTBlockSize);
+ }
+ }
+ JXL_JPEG_VERIFY_MARKER_END();
+ return true;
+}
+
+// Reads the Start of Scan (SOS) marker segment and fills in *scan_info with the
+// parsed data.
+bool ProcessSOS(const uint8_t* data, const size_t len, size_t* pos,
+ JPEGData* jpg) {
+ const size_t start_pos = *pos;
+ JXL_JPEG_VERIFY_LEN(3);
+ size_t marker_len = ReadUint16(data, pos);
+ size_t comps_in_scan = ReadUint8(data, pos);
+ JXL_JPEG_VERIFY_INPUT(comps_in_scan, 1, jpg->components.size(),
+ COMPS_IN_SCAN);
+
+ JPEGScanInfo scan_info;
+ scan_info.num_components = comps_in_scan;
+ JXL_JPEG_VERIFY_LEN(2 * comps_in_scan);
+ std::vector<bool> ids_seen(256, false);
+ for (size_t i = 0; i < comps_in_scan; ++i) {
+ uint32_t id = ReadUint8(data, pos);
+ if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj)
+ return JXL_FAILURE("Duplicate ID %d in SOS.", id);
+ }
+ ids_seen[id] = true;
+ bool found_index = false;
+ for (size_t j = 0; j < jpg->components.size(); ++j) {
+ if (jpg->components[j].id == id) {
+ scan_info.components[i].comp_idx = j;
+ found_index = true;
+ }
+ }
+ if (!found_index) {
+ return JXL_FAILURE("SOS marker: Could not find component with id %d", id);
+ }
+ int c = ReadUint8(data, pos);
+ int dc_tbl_idx = c >> 4;
+ int ac_tbl_idx = c & 0xf;
+ JXL_JPEG_VERIFY_INPUT(dc_tbl_idx, 0, 3, HUFFMAN_INDEX);
+ JXL_JPEG_VERIFY_INPUT(ac_tbl_idx, 0, 3, HUFFMAN_INDEX);
+ scan_info.components[i].dc_tbl_idx = dc_tbl_idx;
+ scan_info.components[i].ac_tbl_idx = ac_tbl_idx;
+ }
+ JXL_JPEG_VERIFY_LEN(3);
+ scan_info.Ss = ReadUint8(data, pos);
+ scan_info.Se = ReadUint8(data, pos);
+ JXL_JPEG_VERIFY_INPUT(static_cast<int>(scan_info.Ss), 0, 63, START_OF_SCAN);
+ JXL_JPEG_VERIFY_INPUT(scan_info.Se, scan_info.Ss, 63, END_OF_SCAN);
+ int c = ReadUint8(data, pos);
+ scan_info.Ah = c >> 4;
+ scan_info.Al = c & 0xf;
+ if (scan_info.Ah != 0 && scan_info.Al != scan_info.Ah - 1) {
+ // section G.1.1.1.2 : Successive approximation control only improves
+ // by one bit at a time. But it's not always respected, so we just issue
+ // a warning.
+ JXL_WARNING("Invalid progressive parameters: Al=%d Ah=%d", scan_info.Al,
+ scan_info.Ah);
+ }
+ // Check that all the Huffman tables needed for this scan are defined.
+ for (size_t i = 0; i < comps_in_scan; ++i) {
+ bool found_dc_table = false;
+ bool found_ac_table = false;
+ for (size_t j = 0; j < jpg->huffman_code.size(); ++j) {
+ uint32_t slot_id = jpg->huffman_code[j].slot_id;
+ if (slot_id == scan_info.components[i].dc_tbl_idx) {
+ found_dc_table = true;
+ } else if (slot_id == scan_info.components[i].ac_tbl_idx + 16) {
+ found_ac_table = true;
+ }
+ }
+ if (scan_info.Ss == 0 && !found_dc_table) {
+ return JXL_FAILURE(
+ "SOS marker: Could not find DC Huffman table with index %d",
+ scan_info.components[i].dc_tbl_idx);
+ }
+ if (scan_info.Se > 0 && !found_ac_table) {
+ return JXL_FAILURE(
+ "SOS marker: Could not find AC Huffman table with index %d",
+ scan_info.components[i].ac_tbl_idx);
+ }
+ }
+ jpg->scan_info.push_back(scan_info);
+ JXL_JPEG_VERIFY_MARKER_END();
+ return true;
+}
+
+// Reads the Define Huffman Table (DHT) marker segment and fills in *jpg with
+// the parsed data. Builds the Huffman decoding table in either dc_huff_lut or
+// ac_huff_lut, depending on the type and solt_id of Huffman code being read.
+bool ProcessDHT(const uint8_t* data, const size_t len, JpegReadMode mode,
+ std::vector<HuffmanTableEntry>* dc_huff_lut,
+ std::vector<HuffmanTableEntry>* ac_huff_lut, size_t* pos,
+ JPEGData* jpg) {
+ const size_t start_pos = *pos;
+ JXL_JPEG_VERIFY_LEN(2);
+ size_t marker_len = ReadUint16(data, pos);
+ if (marker_len == 2) {
+ return JXL_FAILURE("DHT marker: no Huffman table found");
+ }
+ while (*pos < start_pos + marker_len) {
+ JXL_JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength);
+ JPEGHuffmanCode huff;
+ huff.slot_id = ReadUint8(data, pos);
+ int huffman_index = huff.slot_id;
+ int is_ac_table = (huff.slot_id & 0x10) != 0;
+ HuffmanTableEntry* huff_lut;
+ if (is_ac_table) {
+ huffman_index -= 0x10;
+ JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+ huff_lut = &(*ac_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+ } else {
+ JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX);
+ huff_lut = &(*dc_huff_lut)[huffman_index * kJpegHuffmanLutSize];
+ }
+ huff.counts[0] = 0;
+ int total_count = 0;
+ int space = 1 << kJpegHuffmanMaxBitLength;
+ int max_depth = 1;
+ for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) {
+ int count = ReadUint8(data, pos);
+ if (count != 0) {
+ max_depth = i;
+ }
+ huff.counts[i] = count;
+ total_count += count;
+ space -= count * (1 << (kJpegHuffmanMaxBitLength - i));
+ }
+ if (is_ac_table) {
+ JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize,
+ HUFFMAN_CODE);
+ } else {
+ JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegDCAlphabetSize, HUFFMAN_CODE);
+ }
+ JXL_JPEG_VERIFY_LEN(total_count);
+ std::vector<bool> values_seen(256, false);
+ for (int i = 0; i < total_count; ++i) {
+ int value = ReadUint8(data, pos);
+ if (!is_ac_table) {
+ JXL_JPEG_VERIFY_INPUT(value, 0, kJpegDCAlphabetSize - 1, HUFFMAN_CODE);
+ }
+ if (values_seen[value]) {
+ return JXL_FAILURE("Duplicate Huffman code value %d", value);
+ }
+ values_seen[value] = true;
+ huff.values[i] = value;
+ }
+ // Add an invalid symbol that will have the all 1 code.
+ ++huff.counts[max_depth];
+ huff.values[total_count] = kJpegHuffmanAlphabetSize;
+ space -= (1 << (kJpegHuffmanMaxBitLength - max_depth));
+ if (space < 0) {
+ return JXL_FAILURE("Invalid Huffman code lengths.");
+ } else if (space > 0 && huff_lut[0].value != 0xffff) {
+ // Re-initialize the values to an invalid symbol so that we can recognize
+ // it when reading the bit stream using a Huffman code with space > 0.
+ for (int i = 0; i < kJpegHuffmanLutSize; ++i) {
+ huff_lut[i].bits = 0;
+ huff_lut[i].value = 0xffff;
+ }
+ }
+ huff.is_last = (*pos == start_pos + marker_len);
+ if (mode == JpegReadMode::kReadAll) {
+ BuildJpegHuffmanTable(&huff.counts[0], &huff.values[0], huff_lut);
+ }
+ jpg->huffman_code.push_back(huff);
+ }
+ JXL_JPEG_VERIFY_MARKER_END();
+ return true;
+}
+
+// Reads the Define Quantization Table (DQT) marker segment and fills in *jpg
+// with the parsed data.
+bool ProcessDQT(const uint8_t* data, const size_t len, size_t* pos,
+ JPEGData* jpg) {
+ const size_t start_pos = *pos;
+ JXL_JPEG_VERIFY_LEN(2);
+ size_t marker_len = ReadUint16(data, pos);
+ if (marker_len == 2) {
+ return JXL_FAILURE("DQT marker: no quantization table found");
+ }
+ while (*pos < start_pos + marker_len && jpg->quant.size() < kMaxQuantTables) {
+ JXL_JPEG_VERIFY_LEN(1);
+ int quant_table_index = ReadUint8(data, pos);
+ int quant_table_precision = quant_table_index >> 4;
+ JXL_JPEG_VERIFY_INPUT(quant_table_precision, 0, 1, QUANT_TBL_PRECISION);
+ quant_table_index &= 0xf;
+ JXL_JPEG_VERIFY_INPUT(quant_table_index, 0, 3, QUANT_TBL_INDEX);
+ JXL_JPEG_VERIFY_LEN((quant_table_precision + 1) * kDCTBlockSize);
+ JPEGQuantTable table;
+ table.index = quant_table_index;
+ table.precision = quant_table_precision;
+ for (size_t i = 0; i < kDCTBlockSize; ++i) {
+ int quant_val =
+ quant_table_precision ? ReadUint16(data, pos) : ReadUint8(data, pos);
+ JXL_JPEG_VERIFY_INPUT(quant_val, 1, 65535, QUANT_VAL);
+ table.values[kJPEGNaturalOrder[i]] = quant_val;
+ }
+ table.is_last = (*pos == start_pos + marker_len);
+ jpg->quant.push_back(table);
+ }
+ JXL_JPEG_VERIFY_MARKER_END();
+ return true;
+}
+
+// Reads the DRI marker and saves the restart interval into *jpg.
+bool ProcessDRI(const uint8_t* data, const size_t len, size_t* pos,
+ bool* found_dri, JPEGData* jpg) {
+ if (*found_dri) {
+ return JXL_FAILURE("Duplicate DRI marker.");
+ }
+ *found_dri = true;
+ const size_t start_pos = *pos;
+ JXL_JPEG_VERIFY_LEN(4);
+ size_t marker_len = ReadUint16(data, pos);
+ int restart_interval = ReadUint16(data, pos);
+ jpg->restart_interval = restart_interval;
+ JXL_JPEG_VERIFY_MARKER_END();
+ return true;
+}
+
+// Saves the APP marker segment as a string to *jpg.
+bool ProcessAPP(const uint8_t* data, const size_t len, size_t* pos,
+ JPEGData* jpg) {
+ JXL_JPEG_VERIFY_LEN(2);
+ size_t marker_len = ReadUint16(data, pos);
+ JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+ JXL_JPEG_VERIFY_LEN(marker_len - 2);
+ JXL_DASSERT(*pos >= 3);
+ // Save the marker type together with the app data.
+ const uint8_t* app_str_start = data + *pos - 3;
+ std::vector<uint8_t> app_str(app_str_start, app_str_start + marker_len + 1);
+ *pos += marker_len - 2;
+ jpg->app_data.push_back(app_str);
+ return true;
+}
+
+// Saves the COM marker segment as a string to *jpg.
+bool ProcessCOM(const uint8_t* data, const size_t len, size_t* pos,
+ JPEGData* jpg) {
+ JXL_JPEG_VERIFY_LEN(2);
+ size_t marker_len = ReadUint16(data, pos);
+ JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN);
+ JXL_JPEG_VERIFY_LEN(marker_len - 2);
+ const uint8_t* com_str_start = data + *pos - 3;
+ std::vector<uint8_t> com_str(com_str_start, com_str_start + marker_len + 1);
+ *pos += marker_len - 2;
+ jpg->com_data.push_back(com_str);
+ return true;
+}
+
+// Helper structure to read bits from the entropy coded data segment.
+struct BitReaderState {
+ BitReaderState(const uint8_t* data, const size_t len, size_t pos)
+ : data_(data), len_(len) {
+ Reset(pos);
+ }
+
+ void Reset(size_t pos) {
+ pos_ = pos;
+ val_ = 0;
+ bits_left_ = 0;
+ next_marker_pos_ = len_ - 2;
+ FillBitWindow();
+ }
+
+ // Returns the next byte and skips the 0xff/0x00 escape sequences.
+ uint8_t GetNextByte() {
+ if (pos_ >= next_marker_pos_) {
+ ++pos_;
+ return 0;
+ }
+ uint8_t c = data_[pos_++];
+ if (c == 0xff) {
+ uint8_t escape = data_[pos_];
+ if (escape == 0) {
+ ++pos_;
+ } else {
+ // 0xff was followed by a non-zero byte, which means that we found the
+ // start of the next marker segment.
+ next_marker_pos_ = pos_ - 1;
+ }
+ }
+ return c;
+ }
+
+ void FillBitWindow() {
+ if (bits_left_ <= 16) {
+ while (bits_left_ <= 56) {
+ val_ <<= 8;
+ val_ |= (uint64_t)GetNextByte();
+ bits_left_ += 8;
+ }
+ }
+ }
+
+ int ReadBits(int nbits) {
+ FillBitWindow();
+ uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1);
+ bits_left_ -= nbits;
+ return val;
+ }
+
+ // Sets *pos to the next stream position where parsing should continue.
+ // Enqueue the padding bits seen (0 or 1).
+ // Returns false if there is inconsistent or invalid padding or the stream
+ // ended too early.
+ bool FinishStream(JPEGData* jpg, size_t* pos) {
+ int npadbits = bits_left_ & 7;
+ if (npadbits > 0) {
+ uint64_t padmask = (1ULL << npadbits) - 1;
+ uint64_t padbits = (val_ >> (bits_left_ - npadbits)) & padmask;
+ if (padbits != padmask) {
+ jpg->has_zero_padding_bit = true;
+ }
+ for (int i = npadbits - 1; i >= 0; --i) {
+ jpg->padding_bits.push_back((padbits >> i) & 1);
+ }
+ }
+ // Give back some bytes that we did not use.
+ int unused_bytes_left = bits_left_ >> 3;
+ while (unused_bytes_left-- > 0) {
+ --pos_;
+ // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape
+ // sequence, and if yes, we need to give back one more byte.
+ if (pos_ < next_marker_pos_ && data_[pos_] == 0 &&
+ data_[pos_ - 1] == 0xff) {
+ --pos_;
+ }
+ }
+ if (pos_ > next_marker_pos_) {
+ // Data ran out before the scan was complete.
+ return JXL_FAILURE("Unexpected end of scan.");
+ }
+ *pos = pos_;
+ return true;
+ }
+
+ const uint8_t* data_;
+ const size_t len_;
+ size_t pos_;
+ uint64_t val_;
+ int bits_left_;
+ size_t next_marker_pos_;
+};
+
+// Returns the next Huffman-coded symbol.
+int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) {
+ int nbits;
+ br->FillBitWindow();
+ int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff;
+ table += val;
+ nbits = table->bits - 8;
+ if (nbits > 0) {
+ br->bits_left_ -= 8;
+ table += table->value;
+ val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1);
+ table += val;
+ }
+ br->bits_left_ -= table->bits;
+ return table->value;
+}
+
+/**
+ * Returns the DC diff or AC value for extra bits value x and prefix code s.
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.1 – Difference magnitude categories for DC coding
+ * SSSS | DIFF values
+ * ------+--------------------------
+ * 0 | 0
+ * 1 | –1, 1
+ * 2 | –3, –2, 2, 3
+ * 3 | –7..–4, 4..7
+ * ......|..........................
+ * 11 | –2047..–1024, 1024..2047
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * Table F.2 – Categories assigned to coefficient values
+ * [ Same as Table F.1, but does not include SSSS equal to 0 and 11]
+ *
+ *
+ * CCITT Rec. T.81 (1992 E)
+ * F.1.2.1.1 Structure of DC code table
+ * For each category,... additional bits... appended... to uniquely identify
+ * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF
+ * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are
+ * appended... Most significant bit... is 0 for negative differences and 1 for
+ * positive differences.
+ *
+ * In other words the upper half of extra bits range represents DIFF as is.
+ * The lower half represents the negative DIFFs with an offset.
+ */
+int HuffExtend(int x, int s) {
+ JXL_DASSERT(s >= 1);
+ int half = 1 << (s - 1);
+ if (x >= half) {
+ JXL_DASSERT(x < (1 << s));
+ return x;
+ } else {
+ return x - (1 << s) + 1;
+ }
+}
+
+// Decodes one 8x8 block of DCT coefficients from the bit stream.
+bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff,
+ const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, bool* reset_state, int* num_zero_runs,
+ BitReaderState* br, JPEGData* jpg, coeff_t* last_dc_coeff,
+ coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = ReadSymbol(dc_huff, br);
+ if (s >= kJpegDCAlphabetSize) {
+ return JXL_FAILURE("Invalid Huffman symbol %d for DC coefficient.", s);
+ }
+ int diff = 0;
+ if (s > 0) {
+ int bits = br->ReadBits(s);
+ diff = HuffExtend(bits, s);
+ }
+ int coeff = diff + *last_dc_coeff;
+ const int dc_coeff = coeff * Am;
+ coeffs[0] = dc_coeff;
+ // TODO(eustas): is there a more elegant / explicit way to check this?
+ if (dc_coeff != coeffs[0]) {
+ return JXL_FAILURE("Invalid DC coefficient %d", dc_coeff);
+ }
+ *last_dc_coeff = coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ if (*eobrun > 0) {
+ --(*eobrun);
+ return true;
+ }
+ *num_zero_runs = 0;
+ for (int k = Ss; k <= Se; k++) {
+ int sr = ReadSymbol(ac_huff, br);
+ if (sr >= kJpegHuffmanAlphabetSize) {
+ return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", sr,
+ k);
+ }
+ int r = sr >> 4;
+ int s = sr & 15;
+ if (s > 0) {
+ k += r;
+ if (k > Se) {
+ return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss,
+ Se);
+ }
+ if (s + Al >= kJpegDCAlphabetSize) {
+ return JXL_FAILURE(
+ "Out of range AC coefficient value: s = %d Al = %d k = %d", s, Al,
+ k);
+ }
+ int bits = br->ReadBits(s);
+ int coeff = HuffExtend(bits, s);
+ coeffs[kJPEGNaturalOrder[k]] = coeff * Am;
+ *num_zero_runs = 0;
+ } else if (r == 15) {
+ k += 15;
+ ++(*num_zero_runs);
+ } else {
+ if (eobrun_allowed && k == Ss && *eobrun == 0) {
+ // We have two end-of-block runs right after each other, so we signal
+ // the jpeg encoder to force a state reset at this point.
+ *reset_state = true;
+ }
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return JXL_FAILURE("End-of-block run crossing DC coeff.");
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al,
+ int* eobrun, bool* reset_state, BitReaderState* br,
+ JPEGData* jpg, coeff_t* coeffs) {
+ // Nowadays multiplication is even faster than variable shift.
+ int Am = 1 << Al;
+ bool eobrun_allowed = Ss > 0;
+ if (Ss == 0) {
+ int s = br->ReadBits(1);
+ coeff_t dc_coeff = coeffs[0];
+ dc_coeff |= s * Am;
+ coeffs[0] = dc_coeff;
+ ++Ss;
+ }
+ if (Ss > Se) {
+ return true;
+ }
+ int p1 = Am;
+ int m1 = -Am;
+ int k = Ss;
+ int r;
+ int s;
+ bool in_zero_run = false;
+ if (*eobrun <= 0) {
+ for (; k <= Se; k++) {
+ s = ReadSymbol(ac_huff, br);
+ if (s >= kJpegHuffmanAlphabetSize) {
+ return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", s,
+ k);
+ }
+ r = s >> 4;
+ s &= 15;
+ if (s) {
+ if (s != 1) {
+ return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d",
+ s, k);
+ }
+ s = br->ReadBits(1) ? p1 : m1;
+ in_zero_run = false;
+ } else {
+ if (r != 15) {
+ if (eobrun_allowed && k == Ss && *eobrun == 0) {
+ // We have two end-of-block runs right after each other, so we
+ // signal the jpeg encoder to force a state reset at this point.
+ *reset_state = true;
+ }
+ *eobrun = 1 << r;
+ if (r > 0) {
+ if (!eobrun_allowed) {
+ return JXL_FAILURE("End-of-block run crossing DC coeff.");
+ }
+ *eobrun += br->ReadBits(r);
+ }
+ break;
+ }
+ in_zero_run = true;
+ }
+ do {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ } else {
+ if (--r < 0) {
+ break;
+ }
+ }
+ k++;
+ } while (k <= Se);
+ if (s) {
+ if (k > Se) {
+ return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss,
+ Se);
+ }
+ coeffs[kJPEGNaturalOrder[k]] = s;
+ }
+ }
+ }
+ if (in_zero_run) {
+ return JXL_FAILURE("Extra zero run before end-of-block.");
+ }
+ if (*eobrun > 0) {
+ for (; k <= Se; k++) {
+ coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]];
+ if (thiscoef != 0) {
+ if (br->ReadBits(1)) {
+ if ((thiscoef & p1) == 0) {
+ if (thiscoef >= 0) {
+ thiscoef += p1;
+ } else {
+ thiscoef += m1;
+ }
+ }
+ }
+ coeffs[kJPEGNaturalOrder[k]] = thiscoef;
+ }
+ }
+ }
+ --(*eobrun);
+ return true;
+}
+
+bool ProcessRestart(const uint8_t* data, const size_t len,
+ int* next_restart_marker, BitReaderState* br,
+ JPEGData* jpg) {
+ size_t pos = 0;
+ if (!br->FinishStream(jpg, &pos)) {
+ return JXL_FAILURE("Invalid scan");
+ }
+ int expected_marker = 0xd0 + *next_restart_marker;
+ JXL_JPEG_EXPECT_MARKER();
+ int marker = data[pos + 1];
+ if (marker != expected_marker) {
+ return JXL_FAILURE("Did not find expected restart marker %d actual %d",
+ expected_marker, marker);
+ }
+ br->Reset(pos + 2);
+ *next_restart_marker += 1;
+ *next_restart_marker &= 0x7;
+ return true;
+}
+
+bool ProcessScan(const uint8_t* data, const size_t len,
+ const std::vector<HuffmanTableEntry>& dc_huff_lut,
+ const std::vector<HuffmanTableEntry>& ac_huff_lut,
+ uint16_t scan_progression[kMaxComponents][kDCTBlockSize],
+ bool is_progressive, size_t* pos, JPEGData* jpg) {
+ if (!ProcessSOS(data, len, pos, jpg)) {
+ return false;
+ }
+ JPEGScanInfo* scan_info = &jpg->scan_info.back();
+ bool is_interleaved = (scan_info->num_components > 1);
+ int max_h_samp_factor = 1;
+ int max_v_samp_factor = 1;
+ for (size_t i = 0; i < jpg->components.size(); ++i) {
+ max_h_samp_factor =
+ std::max(max_h_samp_factor, jpg->components[i].h_samp_factor);
+ max_v_samp_factor =
+ std::max(max_v_samp_factor, jpg->components[i].v_samp_factor);
+ }
+
+ int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8);
+ int MCUs_per_row = DivCeil(jpg->width, max_h_samp_factor * 8);
+ if (!is_interleaved) {
+ const JPEGComponent& c = jpg->components[scan_info->components[0].comp_idx];
+ MCUs_per_row = DivCeil(jpg->width * c.h_samp_factor, 8 * max_h_samp_factor);
+ MCU_rows = DivCeil(jpg->height * c.v_samp_factor, 8 * max_v_samp_factor);
+ }
+ coeff_t last_dc_coeff[kMaxComponents] = {0};
+ BitReaderState br(data, len, *pos);
+ int restarts_to_go = jpg->restart_interval;
+ int next_restart_marker = 0;
+ int eobrun = -1;
+ int block_scan_index = 0;
+ const int Al = is_progressive ? scan_info->Al : 0;
+ const int Ah = is_progressive ? scan_info->Ah : 0;
+ const int Ss = is_progressive ? scan_info->Ss : 0;
+ const int Se = is_progressive ? scan_info->Se : 63;
+ const uint16_t scan_bitmask = Ah == 0 ? (0xffff << Al) : (1u << Al);
+ const uint16_t refinement_bitmask = (1 << Al) - 1;
+ for (size_t i = 0; i < scan_info->num_components; ++i) {
+ int comp_idx = scan_info->components[i].comp_idx;
+ for (int k = Ss; k <= Se; ++k) {
+ if (scan_progression[comp_idx][k] & scan_bitmask) {
+ return JXL_FAILURE(
+ "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u",
+ comp_idx, k, scan_progression[i][k], scan_bitmask);
+ }
+ if (scan_progression[comp_idx][k] & refinement_bitmask) {
+ return JXL_FAILURE(
+ "Invalid scan order, a more refined scan was already done: "
+ "component=%d k=%d prev_mask=%u cur_mask=%u",
+ comp_idx, k, scan_progression[i][k], scan_bitmask);
+ }
+ scan_progression[comp_idx][k] |= scan_bitmask;
+ }
+ }
+ if (Al > 10) {
+ return JXL_FAILURE("Scan parameter Al=%d is not supported.", Al);
+ }
+ for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) {
+ for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) {
+ // Handle the restart intervals.
+ if (jpg->restart_interval > 0) {
+ if (restarts_to_go == 0) {
+ if (ProcessRestart(data, len, &next_restart_marker, &br, jpg)) {
+ restarts_to_go = jpg->restart_interval;
+ memset(static_cast<void*>(last_dc_coeff), 0, sizeof(last_dc_coeff));
+ if (eobrun > 0) {
+ return JXL_FAILURE("End-of-block run too long.");
+ }
+ eobrun = -1; // fresh start
+ } else {
+ return JXL_FAILURE("Could not process restart.");
+ }
+ }
+ --restarts_to_go;
+ }
+ // Decode one MCU.
+ for (size_t i = 0; i < scan_info->num_components; ++i) {
+ JPEGComponentScanInfo* si = &scan_info->components[i];
+ JPEGComponent* c = &jpg->components[si->comp_idx];
+ const HuffmanTableEntry* dc_lut =
+ &dc_huff_lut[si->dc_tbl_idx * kJpegHuffmanLutSize];
+ const HuffmanTableEntry* ac_lut =
+ &ac_huff_lut[si->ac_tbl_idx * kJpegHuffmanLutSize];
+ int nblocks_y = is_interleaved ? c->v_samp_factor : 1;
+ int nblocks_x = is_interleaved ? c->h_samp_factor : 1;
+ for (int iy = 0; iy < nblocks_y; ++iy) {
+ for (int ix = 0; ix < nblocks_x; ++ix) {
+ int block_y = mcu_y * nblocks_y + iy;
+ int block_x = mcu_x * nblocks_x + ix;
+ int block_idx = block_y * c->width_in_blocks + block_x;
+ bool reset_state = false;
+ int num_zero_runs = 0;
+ coeff_t* coeffs = &c->coeffs[block_idx * kDCTBlockSize];
+ if (Ah == 0) {
+ if (!DecodeDCTBlock(dc_lut, ac_lut, Ss, Se, Al, &eobrun,
+ &reset_state, &num_zero_runs, &br, jpg,
+ &last_dc_coeff[si->comp_idx], coeffs)) {
+ return false;
+ }
+ } else {
+ if (!RefineDCTBlock(ac_lut, Ss, Se, Al, &eobrun, &reset_state,
+ &br, jpg, coeffs)) {
+ return false;
+ }
+ }
+ if (reset_state) {
+ scan_info->reset_points.emplace_back(block_scan_index);
+ }
+ if (num_zero_runs > 0) {
+ JPEGScanInfo::ExtraZeroRunInfo info;
+ info.block_idx = block_scan_index;
+ info.num_extra_zero_runs = num_zero_runs;
+ scan_info->extra_zero_runs.push_back(info);
+ }
+ ++block_scan_index;
+ }
+ }
+ }
+ }
+ }
+ if (eobrun > 0) {
+ return JXL_FAILURE("End-of-block run too long.");
+ }
+ if (!br.FinishStream(jpg, pos)) {
+ return JXL_FAILURE("Invalid scan.");
+ }
+ if (*pos > len) {
+ return JXL_FAILURE("Unexpected end of file during scan. pos=%" PRIuS
+ " len=%" PRIuS,
+ *pos, len);
+ }
+ return true;
+}
+
+// Changes the quant_idx field of the components to refer to the index of the
+// quant table in the jpg->quant array.
+bool FixupIndexes(JPEGData* jpg) {
+ for (size_t i = 0; i < jpg->components.size(); ++i) {
+ JPEGComponent* c = &jpg->components[i];
+ bool found_index = false;
+ for (size_t j = 0; j < jpg->quant.size(); ++j) {
+ if (jpg->quant[j].index == c->quant_idx) {
+ c->quant_idx = j;
+ found_index = true;
+ break;
+ }
+ }
+ if (!found_index) {
+ return JXL_FAILURE("Quantization table with index %u not found",
+ c->quant_idx);
+ }
+ }
+ return true;
+}
+
+size_t FindNextMarker(const uint8_t* data, const size_t len, size_t pos) {
+ // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker.
+ static const uint8_t kIsValidMarker[] = {
+ 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ };
+ size_t num_skipped = 0;
+ while (pos + 1 < len && (data[pos] != 0xff || data[pos + 1] < 0xc0 ||
+ !kIsValidMarker[data[pos + 1] - 0xc0])) {
+ ++pos;
+ ++num_skipped;
+ }
+ return num_skipped;
+}
+
+} // namespace
+
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+ JPEGData* jpg) {
+ size_t pos = 0;
+ // Check SOI marker.
+ JXL_JPEG_EXPECT_MARKER();
+ int marker = data[pos + 1];
+ pos += 2;
+ if (marker != 0xd8) {
+ return JXL_FAILURE("Did not find expected SOI marker, actual=%d", marker);
+ }
+ int lut_size = kMaxHuffmanTables * kJpegHuffmanLutSize;
+ std::vector<HuffmanTableEntry> dc_huff_lut(lut_size);
+ std::vector<HuffmanTableEntry> ac_huff_lut(lut_size);
+ bool found_sof = false;
+ bool found_dri = false;
+ uint16_t scan_progression[kMaxComponents][kDCTBlockSize] = {{0}};
+
+ jpg->padding_bits.resize(0);
+ bool is_progressive = false; // default
+ do {
+ // Read next marker.
+ size_t num_skipped = FindNextMarker(data, len, pos);
+ if (num_skipped > 0) {
+ // Add a fake marker to indicate arbitrary in-between-markers data.
+ jpg->marker_order.push_back(0xff);
+ jpg->inter_marker_data.emplace_back(data + pos, data + pos + num_skipped);
+ pos += num_skipped;
+ }
+ JXL_JPEG_EXPECT_MARKER();
+ marker = data[pos + 1];
+ pos += 2;
+ bool ok = true;
+ switch (marker) {
+ case 0xc0:
+ case 0xc1:
+ case 0xc2:
+ is_progressive = (marker == 0xc2);
+ ok = ProcessSOF(data, len, mode, &pos, jpg);
+ found_sof = true;
+ break;
+ case 0xc4:
+ ok = ProcessDHT(data, len, mode, &dc_huff_lut, &ac_huff_lut, &pos, jpg);
+ break;
+ case 0xd0:
+ case 0xd1:
+ case 0xd2:
+ case 0xd3:
+ case 0xd4:
+ case 0xd5:
+ case 0xd6:
+ case 0xd7:
+ // RST markers do not have any data.
+ break;
+ case 0xd9:
+ // Found end marker.
+ break;
+ case 0xda:
+ if (mode == JpegReadMode::kReadAll) {
+ ok = ProcessScan(data, len, dc_huff_lut, ac_huff_lut,
+ scan_progression, is_progressive, &pos, jpg);
+ }
+ break;
+ case 0xdb:
+ ok = ProcessDQT(data, len, &pos, jpg);
+ break;
+ case 0xdd:
+ ok = ProcessDRI(data, len, &pos, &found_dri, jpg);
+ break;
+ case 0xe0:
+ case 0xe1:
+ case 0xe2:
+ case 0xe3:
+ case 0xe4:
+ case 0xe5:
+ case 0xe6:
+ case 0xe7:
+ case 0xe8:
+ case 0xe9:
+ case 0xea:
+ case 0xeb:
+ case 0xec:
+ case 0xed:
+ case 0xee:
+ case 0xef:
+ if (mode != JpegReadMode::kReadTables) {
+ ok = ProcessAPP(data, len, &pos, jpg);
+ }
+ break;
+ case 0xfe:
+ if (mode != JpegReadMode::kReadTables) {
+ ok = ProcessCOM(data, len, &pos, jpg);
+ }
+ break;
+ default:
+ return JXL_FAILURE("Unsupported marker: %d pos=%" PRIuS " len=%" PRIuS,
+ marker, pos, len);
+ }
+ if (!ok) {
+ return false;
+ }
+ jpg->marker_order.push_back(marker);
+ if (mode == JpegReadMode::kReadHeader && found_sof) {
+ break;
+ }
+ } while (marker != 0xd9);
+
+ if (!found_sof) {
+ return JXL_FAILURE("Missing SOF marker.");
+ }
+
+ // Supplemental checks.
+ if (mode == JpegReadMode::kReadAll) {
+ if (pos < len) {
+ jpg->tail_data = std::vector<uint8_t>(data + pos, data + len);
+ }
+ if (!FixupIndexes(jpg)) {
+ return false;
+ }
+ if (jpg->huffman_code.empty()) {
+ // Section B.2.4.2: "If a table has never been defined for a particular
+ // destination, then when this destination is specified in a scan header,
+ // the results are unpredictable."
+ return JXL_FAILURE("Need at least one Huffman code table.");
+ }
+ if (jpg->huffman_code.size() >= kMaxDHTMarkers) {
+ return JXL_FAILURE("Too many Huffman tables.");
+ }
+ }
+ return true;
+}
+
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.h b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.h
new file mode 100644
index 0000000000..3fad820e9d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_data_reader.h
@@ -0,0 +1,36 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Functions for reading a jpeg byte stream into a JPEGData object.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+#define LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+enum class JpegReadMode {
+ kReadHeader, // only basic headers
+ kReadTables, // headers and tables (quant, Huffman, ...)
+ kReadAll, // everything
+};
+
+// Parses the JPEG stream contained in data[*pos ... len) and fills in *jpg with
+// the parsed information.
+// If mode is kReadHeader, it fills in only the image dimensions in *jpg.
+// Returns false if the data is not valid JPEG, or if it contains an unsupported
+// JPEG feature.
+bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode,
+ JPEGData* jpg);
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
new file mode 100644
index 0000000000..38282e640a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h"
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+namespace jxl {
+namespace jpeg {
+
+// Returns the table width of the next 2nd level table, count is the histogram
+// of bit lengths for the remaining symbols, len is the code length of the next
+// processed symbol.
+static inline int NextTableBitSize(const int* count, int len) {
+ int left = 1 << (len - kJpegHuffmanRootTableBits);
+ while (len < static_cast<int>(kJpegHuffmanMaxBitLength)) {
+ left -= count[len];
+ if (left <= 0) break;
+ ++len;
+ left <<= 1;
+ }
+ return len - kJpegHuffmanRootTableBits;
+}
+
+void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols,
+ HuffmanTableEntry* lut) {
+ HuffmanTableEntry code; // current table entry
+ HuffmanTableEntry* table; // next available space in table
+ int len; // current code length
+ int idx; // symbol index
+ int key; // prefix code
+ int reps; // number of replicate key values in current table
+ int low; // low bits for current root entry
+ int table_bits; // key length of current table
+ int table_size; // size of current table
+
+ // Make a local copy of the input bit length histogram.
+ int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0};
+ int total_count = 0;
+ for (len = 1; len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ tmp_count[len] = count[len];
+ total_count += tmp_count[len];
+ }
+
+ table = lut;
+ table_bits = kJpegHuffmanRootTableBits;
+ table_size = 1 << table_bits;
+
+ // Special case code with only one value.
+ if (total_count == 1) {
+ code.bits = 0;
+ code.value = symbols[0];
+ for (key = 0; key < table_size; ++key) {
+ table[key] = code;
+ }
+ return;
+ }
+
+ // Fill in root table.
+ key = 0;
+ idx = 0;
+ for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ code.bits = len;
+ code.value = symbols[idx++];
+ reps = 1 << (kJpegHuffmanRootTableBits - len);
+ while (reps--) {
+ table[key++] = code;
+ }
+ }
+ }
+
+ // Fill in 2nd level tables and add pointers to root table.
+ table += table_size;
+ table_size = 0;
+ low = 0;
+ for (len = kJpegHuffmanRootTableBits + 1;
+ len <= static_cast<int>(kJpegHuffmanMaxBitLength); ++len) {
+ for (; tmp_count[len] > 0; --tmp_count[len]) {
+ // Start a new sub-table if the previous one is full.
+ if (low >= table_size) {
+ table += table_size;
+ table_bits = NextTableBitSize(tmp_count, len);
+ table_size = 1 << table_bits;
+ low = 0;
+ lut[key].bits = table_bits + kJpegHuffmanRootTableBits;
+ lut[key].value = (table - lut) - key;
+ ++key;
+ }
+ code.bits = len - kJpegHuffmanRootTableBits;
+ code.value = symbols[idx++];
+ reps = 1 << (table_bits - code.bits);
+ while (reps--) {
+ table[low++] = code;
+ }
+ }
+ }
+}
+
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
new file mode 100644
index 0000000000..b8a60e4107
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Utility function for building a Huffman lookup table for the jpeg decoder.
+
+#ifndef LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+#define LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
+
+#include <stdint.h>
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kJpegHuffmanRootTableBits = 8;
+// Maximum huffman lookup table size.
+// According to zlib/examples/enough.c, 758 entries are always enough for
+// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and
+// max bit length 16 if the root table has 8 bits.
+constexpr int kJpegHuffmanLutSize = 758;
+
+struct HuffmanTableEntry {
+ // Initialize the value to an invalid symbol so that we can recognize it
+ // when reading the bit stream using a Huffman code with space > 0.
+ HuffmanTableEntry() : bits(0), value(0xffff) {}
+
+ uint8_t bits; // number of bits used for this symbol
+ uint16_t value; // symbol value or table offset
+};
+
+// Builds jpeg-style Huffman lookup table from the given symbols.
+// The symbols are in order of increasing bit lengths. The number of symbols
+// with bit length n is given in counts[n] for each n >= 1.
+void BuildJpegHuffmanTable(const uint32_t* counts, const uint32_t* symbols,
+ HuffmanTableEntry* lut);
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.cc b/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.cc
new file mode 100644
index 0000000000..430707b9ed
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.cc
@@ -0,0 +1,451 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/jpeg/jpeg_data.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+namespace jpeg {
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+namespace {
+enum JPEGComponentType : uint32_t {
+ kGray = 0,
+ kYCbCr = 1,
+ kRGB = 2,
+ kCustom = 3,
+};
+
+struct JPEGInfo {
+ size_t num_app_markers = 0;
+ size_t num_com_markers = 0;
+ size_t num_scans = 0;
+ size_t num_intermarker = 0;
+ bool has_dri = false;
+};
+
+Status VisitMarker(uint8_t* marker, Visitor* visitor, JPEGInfo* info) {
+ uint32_t marker32 = *marker - 0xc0;
+ JXL_RETURN_IF_ERROR(visitor->Bits(6, 0x00, &marker32));
+ *marker = marker32 + 0xc0;
+ if ((*marker & 0xf0) == 0xe0) {
+ info->num_app_markers++;
+ }
+ if (*marker == 0xfe) {
+ info->num_com_markers++;
+ }
+ if (*marker == 0xda) {
+ info->num_scans++;
+ }
+ // We use a fake 0xff marker to signal intermarker data.
+ if (*marker == 0xff) {
+ info->num_intermarker++;
+ }
+ if (*marker == 0xdd) {
+ info->has_dri = true;
+ }
+ return true;
+}
+
+} // namespace
+
+Status JPEGData::VisitFields(Visitor* visitor) {
+ bool is_gray = components.size() == 1;
+ JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_gray));
+ if (visitor->IsReading()) {
+ components.resize(is_gray ? 1 : 3);
+ }
+ JPEGInfo info;
+ if (visitor->IsReading()) {
+ uint8_t marker = 0xc0;
+ do {
+ JXL_RETURN_IF_ERROR(VisitMarker(&marker, visitor, &info));
+ marker_order.push_back(marker);
+ if (marker_order.size() > 16384) {
+ return JXL_FAILURE("Too many markers: %" PRIuS "\n",
+ marker_order.size());
+ }
+ } while (marker != 0xd9);
+ } else {
+ if (marker_order.size() > 16384) {
+ return JXL_FAILURE("Too many markers: %" PRIuS "\n", marker_order.size());
+ }
+ for (size_t i = 0; i < marker_order.size(); i++) {
+ JXL_RETURN_IF_ERROR(VisitMarker(&marker_order[i], visitor, &info));
+ }
+ if (!marker_order.empty()) {
+ // Last marker should always be EOI marker.
+ JXL_CHECK(marker_order.back() == 0xd9);
+ }
+ }
+
+ // Size of the APP and COM markers.
+ if (visitor->IsReading()) {
+ app_data.resize(info.num_app_markers);
+ app_marker_type.resize(info.num_app_markers);
+ com_data.resize(info.num_com_markers);
+ scan_info.resize(info.num_scans);
+ }
+ JXL_ASSERT(app_data.size() == info.num_app_markers);
+ JXL_ASSERT(app_marker_type.size() == info.num_app_markers);
+ JXL_ASSERT(com_data.size() == info.num_com_markers);
+ JXL_ASSERT(scan_info.size() == info.num_scans);
+ for (size_t i = 0; i < app_data.size(); i++) {
+ auto& app = app_data[i];
+ // Encodes up to 8 different values.
+ JXL_RETURN_IF_ERROR(
+ visitor->U32(Val(0), Val(1), BitsOffset(1, 2), BitsOffset(2, 4), 0,
+ reinterpret_cast<uint32_t*>(&app_marker_type[i])));
+ if (app_marker_type[i] != AppMarkerType::kUnknown &&
+ app_marker_type[i] != AppMarkerType::kICC &&
+ app_marker_type[i] != AppMarkerType::kExif &&
+ app_marker_type[i] != AppMarkerType::kXMP) {
+ return JXL_FAILURE("Unknown app marker type %u",
+ static_cast<uint32_t>(app_marker_type[i]));
+ }
+ uint32_t len = app.size() - 1;
+ JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+ if (visitor->IsReading()) app.resize(len + 1);
+ if (app.size() < 3) {
+ return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", app.size());
+ }
+ }
+ for (auto& com : com_data) {
+ uint32_t len = com.size() - 1;
+ JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+ if (visitor->IsReading()) com.resize(len + 1);
+ if (com.size() < 3) {
+ return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", com.size());
+ }
+ }
+
+ uint32_t num_quant_tables = quant.size();
+ JXL_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), Val(4), 2, &num_quant_tables));
+ if (num_quant_tables == 4) {
+ return JXL_FAILURE("Invalid number of quant tables");
+ }
+ if (visitor->IsReading()) {
+ quant.resize(num_quant_tables);
+ }
+ for (size_t i = 0; i < num_quant_tables; i++) {
+ if (quant[i].precision > 1) {
+ return JXL_FAILURE(
+ "Quant tables with more than 16 bits are not supported");
+ }
+ JXL_RETURN_IF_ERROR(visitor->Bits(1, 0, &quant[i].precision));
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, i, &quant[i].index));
+ JXL_RETURN_IF_ERROR(visitor->Bool(true, &quant[i].is_last));
+ }
+
+ JPEGComponentType component_type =
+ components.size() == 1 && components[0].id == 1 ? JPEGComponentType::kGray
+ : components.size() == 3 && components[0].id == 1 &&
+ components[1].id == 2 && components[2].id == 3
+ ? JPEGComponentType::kYCbCr
+ : components.size() == 3 && components[0].id == 'R' &&
+ components[1].id == 'G' && components[2].id == 'B'
+ ? JPEGComponentType::kRGB
+ : JPEGComponentType::kCustom;
+ JXL_RETURN_IF_ERROR(
+ visitor->Bits(2, JPEGComponentType::kYCbCr,
+ reinterpret_cast<uint32_t*>(&component_type)));
+ uint32_t num_components;
+ if (component_type == JPEGComponentType::kGray) {
+ num_components = 1;
+ } else if (component_type != JPEGComponentType::kCustom) {
+ num_components = 3;
+ } else {
+ num_components = components.size();
+ JXL_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), Val(4), 3, &num_components));
+ if (num_components != 1 && num_components != 3) {
+ return JXL_FAILURE("Invalid number of components: %u", num_components);
+ }
+ }
+ if (visitor->IsReading()) {
+ components.resize(num_components);
+ }
+ if (component_type == JPEGComponentType::kCustom) {
+ for (size_t i = 0; i < components.size(); i++) {
+ JXL_RETURN_IF_ERROR(visitor->Bits(8, 0, &components[i].id));
+ }
+ } else if (component_type == JPEGComponentType::kGray) {
+ components[0].id = 1;
+ } else if (component_type == JPEGComponentType::kRGB) {
+ components[0].id = 'R';
+ components[1].id = 'G';
+ components[2].id = 'B';
+ } else {
+ components[0].id = 1;
+ components[1].id = 2;
+ components[2].id = 3;
+ }
+ size_t used_tables = 0;
+ for (size_t i = 0; i < components.size(); i++) {
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &components[i].quant_idx));
+ if (components[i].quant_idx >= quant.size()) {
+ return JXL_FAILURE("Invalid quant table for component %" PRIuS ": %u\n",
+ i, components[i].quant_idx);
+ }
+ used_tables |= 1U << components[i].quant_idx;
+ }
+ for (size_t i = 0; i < quant.size(); i++) {
+ if (used_tables & (1 << i)) continue;
+ if (i == 0) return JXL_FAILURE("First quant table unused.");
+ // Unused quant table has to be set to copy of previous quant table
+ for (size_t j = 0; j < 64; j++) {
+ if (quant[i].values[j] != quant[i - 1].values[j]) {
+ return JXL_FAILURE("Non-trivial unused quant table");
+ }
+ }
+ }
+
+ uint32_t num_huff = huffman_code.size();
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(4), BitsOffset(3, 2), BitsOffset(4, 10),
+ BitsOffset(6, 26), 4, &num_huff));
+ if (visitor->IsReading()) {
+ huffman_code.resize(num_huff);
+ }
+ for (JPEGHuffmanCode& hc : huffman_code) {
+ bool is_ac = hc.slot_id >> 4;
+ uint32_t id = hc.slot_id & 0xF;
+ JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_ac));
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &id));
+ hc.slot_id = (static_cast<uint32_t>(is_ac) << 4) | id;
+ JXL_RETURN_IF_ERROR(visitor->Bool(true, &hc.is_last));
+ size_t num_symbols = 0;
+ for (size_t i = 0; i <= 16; i++) {
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(3, 2),
+ Bits(8), 0, &hc.counts[i]));
+ num_symbols += hc.counts[i];
+ }
+ if (num_symbols < 1) {
+ // Actually, at least 2 symbols are required, since one of them is EOI.
+ return JXL_FAILURE("Empty Huffman table");
+ }
+ if (num_symbols > hc.values.size()) {
+ return JXL_FAILURE("Huffman code too large (%" PRIuS ")", num_symbols);
+ }
+ // Presence flags for 4 * 64 + 1 values.
+ uint64_t value_slots[5] = {};
+ for (size_t i = 0; i < num_symbols; i++) {
+ // Goes up to 256, included. Might have the same symbol appear twice...
+ JXL_RETURN_IF_ERROR(visitor->U32(Bits(2), BitsOffset(2, 4),
+ BitsOffset(4, 8), BitsOffset(8, 1), 0,
+ &hc.values[i]));
+ value_slots[hc.values[i] >> 6] |= (uint64_t)1 << (hc.values[i] & 0x3F);
+ }
+ if (hc.values[num_symbols - 1] != kJpegHuffmanAlphabetSize) {
+ return JXL_FAILURE("Missing EOI symbol");
+ }
+ // Last element, denoting EOI, have to be 1 after the loop.
+ JXL_ASSERT(value_slots[4] == 1);
+ size_t num_values = 1;
+ for (size_t i = 0; i < 4; ++i) num_values += hwy::PopCount(value_slots[i]);
+ if (num_values != num_symbols) {
+ return JXL_FAILURE("Duplicate Huffman symbols");
+ }
+ if (!is_ac) {
+ bool only_dc = ((value_slots[0] >> kJpegDCAlphabetSize) | value_slots[1] |
+ value_slots[2] | value_slots[3]) == 0;
+ if (!only_dc) return JXL_FAILURE("Huffman symbols out of DC range");
+ }
+ }
+
+ for (auto& scan : scan_info) {
+ JXL_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &scan.num_components));
+ if (scan.num_components >= 4) {
+ return JXL_FAILURE("Invalid number of components in SOS marker");
+ }
+ JXL_RETURN_IF_ERROR(visitor->Bits(6, 0, &scan.Ss));
+ JXL_RETURN_IF_ERROR(visitor->Bits(6, 63, &scan.Se));
+ JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Al));
+ JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Ah));
+ for (size_t i = 0; i < scan.num_components; i++) {
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].comp_idx));
+ if (scan.components[i].comp_idx >= components.size()) {
+ return JXL_FAILURE("Invalid component idx in SOS marker");
+ }
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].ac_tbl_idx));
+ JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].dc_tbl_idx));
+ }
+ // TODO(veluca): actually set and use this value.
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), Val(2), BitsOffset(3, 3),
+ kMaxNumPasses - 1,
+ &scan.last_needed_pass));
+ }
+
+ // From here on, this is data that is not strictly necessary to get a valid
+ // JPEG, but necessary for bit-exact JPEG reconstruction.
+ if (info.has_dri) {
+ JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &restart_interval));
+ }
+
+ for (auto& scan : scan_info) {
+ uint32_t num_reset_points = scan.reset_points.size();
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+ BitsOffset(16, 20), 0, &num_reset_points));
+ if (visitor->IsReading()) {
+ scan.reset_points.resize(num_reset_points);
+ }
+ int last_block_idx = -1;
+ for (auto& block_idx : scan.reset_points) {
+ block_idx -= last_block_idx + 1;
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+ BitsOffset(5, 9), BitsOffset(28, 41), 0,
+ &block_idx));
+ block_idx += last_block_idx + 1;
+ if (static_cast<int>(block_idx) < last_block_idx + 1) {
+ return JXL_FAILURE("Invalid block ID: %u, last block was %d", block_idx,
+ last_block_idx);
+ }
+ // TODO(eustas): better upper boundary could be given at this point; also
+ // it could be applied during reset_points reading.
+ if (block_idx > (1u << 30)) {
+ // At most 8K x 8K x num_channels blocks are expected. That is,
+ // typically, 1.5 * 2^27. 2^30 should be sufficient for any sane
+ // image.
+ return JXL_FAILURE("Invalid block ID: %u", block_idx);
+ }
+ last_block_idx = block_idx;
+ }
+
+ uint32_t num_extra_zero_runs = scan.extra_zero_runs.size();
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4),
+ BitsOffset(16, 20), 0,
+ &num_extra_zero_runs));
+ if (visitor->IsReading()) {
+ scan.extra_zero_runs.resize(num_extra_zero_runs);
+ }
+ last_block_idx = -1;
+ for (size_t i = 0; i < scan.extra_zero_runs.size(); ++i) {
+ uint32_t& block_idx = scan.extra_zero_runs[i].block_idx;
+ JXL_RETURN_IF_ERROR(visitor->U32(
+ Val(1), BitsOffset(2, 2), BitsOffset(4, 5), BitsOffset(8, 20), 1,
+ &scan.extra_zero_runs[i].num_extra_zero_runs));
+ block_idx -= last_block_idx + 1;
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1),
+ BitsOffset(5, 9), BitsOffset(28, 41), 0,
+ &block_idx));
+ block_idx += last_block_idx + 1;
+ if (static_cast<int>(block_idx) < last_block_idx + 1) {
+ return JXL_FAILURE("Invalid block ID: %u, last block was %d", block_idx,
+ last_block_idx);
+ }
+ if (block_idx > (1u << 30)) {
+ // At most 8K x 8K x num_channels blocks are expected. That is,
+ // typically, 1.5 * 2^27. 2^30 should be sufficient for any sane
+ // image.
+ return JXL_FAILURE("Invalid block ID: %u", block_idx);
+ }
+ last_block_idx = block_idx;
+ }
+ }
+ std::vector<uint32_t> inter_marker_data_sizes;
+ inter_marker_data_sizes.reserve(info.num_intermarker);
+ for (size_t i = 0; i < info.num_intermarker; ++i) {
+ uint32_t len = visitor->IsReading() ? 0 : inter_marker_data[i].size();
+ JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len));
+ if (visitor->IsReading()) inter_marker_data_sizes.emplace_back(len);
+ }
+ uint32_t tail_data_len = tail_data.size();
+ if (!visitor->IsReading() && tail_data_len > 4260096) {
+ return JXL_FAILURE("Tail data too large (max size = 4260096, size = %u).",
+ tail_data_len);
+ }
+ JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(8, 1),
+ BitsOffset(16, 257), BitsOffset(22, 65793),
+ 0, &tail_data_len));
+
+ JXL_RETURN_IF_ERROR(visitor->Bool(false, &has_zero_padding_bit));
+ if (has_zero_padding_bit) {
+ uint32_t nbit = padding_bits.size();
+ JXL_RETURN_IF_ERROR(visitor->Bits(24, 0, &nbit));
+ if (visitor->IsReading()) {
+ padding_bits.reserve(std::min<uint32_t>(1024u, nbit));
+ for (uint32_t i = 0; i < nbit; i++) {
+ bool bbit = false;
+ JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit));
+ padding_bits.push_back(bbit);
+ }
+ } else {
+ for (uint8_t& bit : padding_bits) {
+ bool bbit = bit;
+ JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit));
+ bit = bbit;
+ }
+ }
+ }
+
+ // Apply postponed actions.
+ if (visitor->IsReading()) {
+ tail_data.resize(tail_data_len);
+ JXL_ASSERT(inter_marker_data_sizes.size() == info.num_intermarker);
+ inter_marker_data.reserve(info.num_intermarker);
+ for (size_t i = 0; i < info.num_intermarker; ++i) {
+ inter_marker_data.emplace_back(inter_marker_data_sizes[i]);
+ }
+ }
+
+ return true;
+}
+
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+void JPEGData::CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+ int* MCU_rows) const {
+ const bool is_interleaved = (scan.num_components > 1);
+ const JPEGComponent& base_component = components[scan.components[0].comp_idx];
+ // h_group / v_group act as numerators for converting number of blocks to
+ // number of MCU. In interleaved mode it is 1, so MCU is represented with
+ // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to
+ // be the samping factor, consequently MCU is always represented with single
+ // block.
+ const int h_group = is_interleaved ? 1 : base_component.h_samp_factor;
+ const int v_group = is_interleaved ? 1 : base_component.v_samp_factor;
+ int max_h_samp_factor = 1;
+ int max_v_samp_factor = 1;
+ for (const auto& c : components) {
+ max_h_samp_factor = std::max(c.h_samp_factor, max_h_samp_factor);
+ max_v_samp_factor = std::max(c.v_samp_factor, max_v_samp_factor);
+ }
+ *MCUs_per_row = DivCeil(width * h_group, 8 * max_h_samp_factor);
+ *MCU_rows = DivCeil(height * v_group, 8 * max_v_samp_factor);
+}
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data) {
+ size_t icc_pos = 0;
+ for (size_t i = 0; i < jpeg_data->app_data.size(); i++) {
+ if (jpeg_data->app_marker_type[i] != jpeg::AppMarkerType::kICC) {
+ continue;
+ }
+ size_t len = jpeg_data->app_data[i].size() - 17;
+ if (icc_pos + len > icc.size()) {
+ return JXL_FAILURE(
+ "ICC length is less than APP markers: requested %" PRIuS
+ " more bytes, "
+ "%" PRIuS " available",
+ len, icc.size() - icc_pos);
+ }
+ memcpy(&jpeg_data->app_data[i][17], icc.data() + icc_pos, len);
+ icc_pos += len;
+ }
+ if (icc_pos != icc.size() && icc_pos != 0) {
+ return JXL_FAILURE("ICC length is more than APP markers");
+ }
+ return true;
+}
+
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+} // namespace jpeg
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.h b/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.h
new file mode 100644
index 0000000000..70ff4f8e05
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jpeg/jpeg_data.h
@@ -0,0 +1,216 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Data structures that represent the non-pixel contents of a jpeg file.
+
+#ifndef LIB_JXL_JPEG_JPEG_DATA_H_
+#define LIB_JXL_JPEG_JPEG_DATA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+namespace jpeg {
+
+constexpr int kMaxComponents = 4;
+constexpr int kMaxQuantTables = 4;
+constexpr int kMaxHuffmanTables = 4;
+constexpr size_t kJpegHuffmanMaxBitLength = 16;
+constexpr int kJpegHuffmanAlphabetSize = 256;
+constexpr int kJpegDCAlphabetSize = 12;
+constexpr int kMaxDHTMarkers = 512;
+constexpr int kMaxDimPixels = 65535;
+constexpr uint8_t kApp1 = 0xE1;
+constexpr uint8_t kApp2 = 0xE2;
+const uint8_t kIccProfileTag[12] = "ICC_PROFILE";
+const uint8_t kExifTag[6] = "Exif\0";
+const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/";
+
+/* clang-format off */
+constexpr uint32_t kJPEGNaturalOrder[80] = {
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ // extra entries for safety in decoder
+ 63, 63, 63, 63, 63, 63, 63, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+constexpr uint32_t kJPEGZigZagOrder[64] = {
+ 0, 1, 5, 6, 14, 15, 27, 28,
+ 2, 4, 7, 13, 16, 26, 29, 42,
+ 3, 8, 12, 17, 25, 30, 41, 43,
+ 9, 11, 18, 24, 31, 40, 44, 53,
+ 10, 19, 23, 32, 39, 45, 52, 54,
+ 20, 22, 33, 38, 46, 51, 55, 60,
+ 21, 34, 37, 47, 50, 56, 59, 61,
+ 35, 36, 48, 49, 57, 58, 62, 63
+};
+/* clang-format on */
+
+// Quantization values for an 8x8 pixel block.
+struct JPEGQuantTable {
+ std::array<int32_t, kDCTBlockSize> values;
+ uint32_t precision = 0;
+ // The index of this quantization table as it was parsed from the input JPEG.
+ // Each DQT marker segment contains an 'index' field, and we save this index
+ // here. Valid values are 0 to 3.
+ uint32_t index = 0;
+ // Set to true if this table is the last one within its marker segment.
+ bool is_last = true;
+};
+
+// Huffman code and decoding lookup table used for DC and AC coefficients.
+struct JPEGHuffmanCode {
+ // Bit length histogram.
+ std::array<uint32_t, kJpegHuffmanMaxBitLength + 1> counts = {};
+ // Symbol values sorted by increasing bit lengths.
+ std::array<uint32_t, kJpegHuffmanAlphabetSize + 1> values = {};
+ // The index of the Huffman code in the current set of Huffman codes. For AC
+ // component Huffman codes, 0x10 is added to the index.
+ int slot_id = 0;
+ // Set to true if this Huffman code is the last one within its marker segment.
+ bool is_last = true;
+};
+
+// Huffman table indexes used for one component of one scan.
+struct JPEGComponentScanInfo {
+ uint32_t comp_idx;
+ uint32_t dc_tbl_idx;
+ uint32_t ac_tbl_idx;
+};
+
+// Contains information that is used in one scan.
+struct JPEGScanInfo {
+ // Parameters used for progressive scans (named the same way as in the spec):
+ // Ss : Start of spectral band in zig-zag sequence.
+ // Se : End of spectral band in zig-zag sequence.
+ // Ah : Successive approximation bit position, high.
+ // Al : Successive approximation bit position, low.
+ uint32_t Ss;
+ uint32_t Se;
+ uint32_t Ah;
+ uint32_t Al;
+ uint32_t num_components = 0;
+ std::array<JPEGComponentScanInfo, 4> components;
+ // Last codestream pass that is needed to write this scan.
+ uint32_t last_needed_pass = 0;
+
+ // Extra information required for bit-precise JPEG file reconstruction.
+
+ // Set of block indexes where the JPEG encoder has to flush the end-of-block
+ // runs and refinement bits.
+ std::vector<uint32_t> reset_points;
+ // The number of extra zero runs (Huffman symbol 0xf0) before the end of
+ // block (if nonzero), indexed by block index.
+ // All of these symbols can be omitted without changing the pixel values, but
+ // some jpeg encoders put these at the end of blocks.
+ typedef struct {
+ uint32_t block_idx;
+ uint32_t num_extra_zero_runs;
+ } ExtraZeroRunInfo;
+ std::vector<ExtraZeroRunInfo> extra_zero_runs;
+};
+
+typedef int16_t coeff_t;
+
+// Represents one component of a jpeg file.
+struct JPEGComponent {
+ JPEGComponent()
+ : id(0),
+ h_samp_factor(1),
+ v_samp_factor(1),
+ quant_idx(0),
+ width_in_blocks(0),
+ height_in_blocks(0) {}
+
+ // One-byte id of the component.
+ uint32_t id;
+ // Horizontal and vertical sampling factors.
+ // In interleaved mode, each minimal coded unit (MCU) has
+ // h_samp_factor x v_samp_factor DCT blocks from this component.
+ int h_samp_factor;
+ int v_samp_factor;
+ // The index of the quantization table used for this component.
+ uint32_t quant_idx;
+ // The dimensions of the component measured in 8x8 blocks.
+ uint32_t width_in_blocks;
+ uint32_t height_in_blocks;
+ // The DCT coefficients of this component, laid out block-by-block, divided
+ // through the quantization matrix values.
+ std::vector<coeff_t> coeffs;
+};
+
+enum class AppMarkerType : uint32_t {
+ kUnknown = 0,
+ kICC = 1,
+ kExif = 2,
+ kXMP = 3,
+};
+
+// Represents a parsed jpeg file.
+struct JPEGData : public Fields {
+ JPEGData()
+ : width(0), height(0), restart_interval(0), has_zero_padding_bit(false) {}
+
+ JXL_FIELDS_NAME(JPEGData)
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ // Doesn't serialize everything - skips brotli-encoded data and what is
+ // already encoded in the codestream.
+ Status VisitFields(Visitor* visitor) override;
+#else
+ Status VisitFields(Visitor* /* visitor */) override {
+ JXL_ABORT("JPEG transcoding support not enabled");
+ }
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+ void CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row,
+ int* MCU_rows) const;
+
+ int width;
+ int height;
+ uint32_t restart_interval;
+ std::vector<std::vector<uint8_t>> app_data;
+ std::vector<AppMarkerType> app_marker_type;
+ std::vector<std::vector<uint8_t>> com_data;
+ std::vector<JPEGQuantTable> quant;
+ std::vector<JPEGHuffmanCode> huffman_code;
+ std::vector<JPEGComponent> components;
+ std::vector<JPEGScanInfo> scan_info;
+ std::vector<uint8_t> marker_order;
+ std::vector<std::vector<uint8_t>> inter_marker_data;
+ std::vector<uint8_t> tail_data;
+
+ // Extra information required for bit-precise JPEG file reconstruction.
+
+ bool has_zero_padding_bit;
+ std::vector<uint8_t> padding_bits;
+};
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+// Set ICC profile in jpeg_data.
+Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data);
+#else
+static JXL_INLINE Status SetJPEGDataFromICC(const PaddedBytes& /* icc */,
+ jpeg::JPEGData* /* jpeg_data */) {
+ JXL_ABORT("JPEG transcoding support not enabled");
+}
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+} // namespace jpeg
+} // namespace jxl
+
+#endif // LIB_JXL_JPEG_JPEG_DATA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jxl.syms b/third_party/jpeg-xl/lib/jxl/jxl.syms
new file mode 100644
index 0000000000..0f398d7151
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jxl.syms
@@ -0,0 +1,5 @@
+{
+ extern "C" {
+ jpegxl_*;
+ };
+};
diff --git a/third_party/jpeg-xl/lib/jxl/jxl.version b/third_party/jpeg-xl/lib/jxl/jxl.version
new file mode 100644
index 0000000000..26b0e9e54d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jxl.version
@@ -0,0 +1,17 @@
+JXL_0 {
+ global:
+ Jxl*;
+
+ local:
+ # Hide all the std namespace symbols. std namespace is explicitly marked
+ # as visibility(default) and header-only functions or methods (such as those
+ # from templates) should be exposed in shared libraries as weak symbols but
+ # this is only needed when we expose those types in the shared library API
+ # in any way. We don't use C++ std types in the API and we also don't
+ # support exceptions in the library.
+ # See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36022 for a discussion
+ # about this.
+ extern "C++" {
+ *std::*;
+ };
+};
diff --git a/third_party/jpeg-xl/lib/jxl/jxl_inspection.h b/third_party/jpeg-xl/lib/jxl/jxl_inspection.h
new file mode 100644
index 0000000000..0b70a58523
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jxl_inspection.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_JXL_INSPECTION_H_
+#define LIB_JXL_JXL_INSPECTION_H_
+
+#include <functional>
+
+#include "lib/jxl/image.h"
+
+namespace jxl {
+// Type of the inspection-callback which, if enabled, will be called on various
+// intermediate data during image processing, allowing inspection access.
+//
+// Returns false if processing can be stopped at that point, true otherwise.
+// This is only advisory - it is always OK to just continue processing.
+using InspectorImage3F = std::function<bool(const char*, const Image3F&)>;
+} // namespace jxl
+
+#endif // LIB_JXL_JXL_INSPECTION_H_
diff --git a/third_party/jpeg-xl/lib/jxl/jxl_osx.syms b/third_party/jpeg-xl/lib/jxl/jxl_osx.syms
new file mode 100644
index 0000000000..96bc568025
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jxl_osx.syms
@@ -0,0 +1 @@
+_Jxl*
diff --git a/third_party/jpeg-xl/lib/jxl/jxl_test.cc b/third_party/jpeg-xl/lib/jxl/jxl_test.cc
new file mode 100644
index 0000000000..0a676802f6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/jxl_test.cc
@@ -0,0 +1,1537 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/dec/jxl.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <future>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/enc/encode.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/fake_parallel_runner_testonly.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/dec_jpeg_data.h"
+#include "lib/jxl/jpeg/dec_jpeg_data_writer.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/jpeg/jpeg_data.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/test_image.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+#include "tools/box/box.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace {
+using extras::JXLCompressParams;
+using extras::JXLDecompressParams;
+using extras::PackedPixelFile;
+using test::ButteraugliDistance;
+using test::ComputeDistance2;
+using test::Roundtrip;
+using test::TestImage;
+using test::ThreadPoolForTests;
+
+#define JXL_TEST_NL 0 // Disabled in code
+
+TEST(JxlTest, RoundtripSinglePixel) {
+ TestImage t;
+ t.SetDimensions(1, 1).AddFrame().ZeroFill();
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 55);
+}
+
+TEST(JxlTest, RoundtripSinglePixelWithAlpha) {
+ TestImage t;
+ t.SetDimensions(1, 1).SetChannels(4).AddFrame().ZeroFill();
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 59);
+}
+
+// Changing serialized signature causes Decode to fail.
+#ifndef JXL_CRASH_ON_ERROR
+TEST(JxlTest, RoundtripMarker) {
+ TestImage t;
+ t.SetDimensions(1, 1).AddFrame().ZeroFill();
+ for (size_t i = 0; i < 2; ++i) {
+ std::vector<uint8_t> compressed;
+ EXPECT_TRUE(extras::EncodeImageJXL({}, t.ppf(), /*jpeg_bytes=*/nullptr,
+ &compressed));
+ compressed[i] ^= 0xFF;
+ PackedPixelFile ppf_out;
+ EXPECT_FALSE(extras::DecodeImageJXL(compressed.data(), compressed.size(),
+ {}, /*decodec_bytes=*/nullptr,
+ &ppf_out));
+ }
+}
+#endif
+
+TEST(JxlTest, RoundtripTinyFast) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(32, 32);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7);
+ cparams.distance = 4.0f;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 192, 10);
+}
+
+TEST(JxlTest, RoundtripSmallD1) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ size_t xsize = t.ppf().info.xsize / 8;
+ size_t ysize = t.ppf().info.ysize / 8;
+ t.SetDimensions(xsize, ysize);
+
+ {
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 766, 40);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.2));
+ }
+
+ // With a lower intensity target than the default, the bitrate should be
+ // smaller.
+ t.ppf().info.intensity_target = 100.0f;
+
+ {
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 659, 20);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.3));
+ EXPECT_EQ(ppf_out.info.intensity_target, t.ppf().info.intensity_target);
+ }
+}
+TEST(JxlTest, RoundtripResample2) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 18772, 200);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(90));
+}
+
+TEST(JxlTest, RoundtripResample2Slow) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9); // kTortoise
+ cparams.distance = 10.0;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 4088, 200);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(250));
+}
+
+TEST(JxlTest, RoundtripResample2MT) {
+ ThreadPoolForTests pool(4);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ // image has to be large enough to have multiple groups after downsampling
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 228283, 1000);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(340));
+}
+
+// Roundtrip the image using a parallel runner that executes single-threaded but
+// in random order.
+TEST(JxlTest, RoundtripOutOfOrderProcessing) {
+ FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+ ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ // Image size is selected so that the block border needed is larger than the
+ // amount of pixels available on the next block.
+ t.SetDimensions(513, 515);
+
+ JXLCompressParams cparams;
+ // Force epf so we end up needing a lot of border.
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 22584, 400);
+ EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 1.35);
+}
+
+TEST(JxlTest, RoundtripOutOfOrderProcessingBorder) {
+ FakeParallelRunner fake_pool(/*order_seed=*/47, /*num_threads=*/8);
+ ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ // Image size is selected so that the block border needed is larger than the
+ // amount of pixels available on the next block.
+ t.SetDimensions(513, 515);
+
+ JXLCompressParams cparams;
+ // Force epf so we end up needing a lot of border.
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 10907, 200);
+ EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 2.9);
+}
+
+TEST(JxlTest, RoundtripResample4) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 4);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 5824, 100);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(22));
+}
+
+TEST(JxlTest, RoundtripResample8) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 8);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 2036, 50);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(50));
+}
+
+TEST(JxlTest, RoundtripUnalignedD2) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ size_t xsize = t.ppf().info.xsize / 12;
+ size_t ysize = t.ppf().info.ysize / 7;
+ t.SetDimensions(xsize, ysize);
+
+ JXLCompressParams cparams;
+ cparams.distance = 2.0;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 506, 30);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.72));
+}
+
+TEST(JxlTest, RoundtripMultiGroup) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+ auto test = [&](jxl::SpeedTier speed_tier, float target_distance,
+ size_t expected_size, float expected_distance) {
+ ThreadPoolForTests pool(4);
+ JXLCompressParams cparams;
+ int64_t effort = 10 - static_cast<int>(speed_tier);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, effort);
+ cparams.distance = target_distance;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), expected_size,
+ 700);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out),
+ IsSlightlyBelow(expected_distance));
+ };
+
+ auto run_kitten = std::async(std::launch::async, test, SpeedTier::kKitten,
+ 1.0f, 54895u, 11.7);
+ auto run_wombat = std::async(std::launch::async, test, SpeedTier::kWombat,
+ 2.0f, 33507u, 20.0);
+}
+
+TEST(JxlTest, RoundtripRGBToGrayscale) {
+ ThreadPoolForTests pool(4);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+ io.ShrinkTo(600, 1024);
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0f;
+ cparams.speed_tier = SpeedTier::kFalcon;
+
+ JXLDecompressParams dparams;
+ dparams.color_space = "Gra_D65_Rel_SRG";
+
+ CodecInOut io2;
+ EXPECT_FALSE(io.Main().IsGray());
+ size_t compressed_size;
+ JXL_EXPECT_OK(
+ Roundtrip(&io, cparams, dparams, &io2, _, &compressed_size, &pool));
+ EXPECT_LE(compressed_size, 65000u);
+ EXPECT_TRUE(io2.Main().IsGray());
+
+ // Convert original to grayscale here, because TransformTo refuses to
+ // convert between grayscale and RGB.
+ ColorEncoding srgb_lin = ColorEncoding::LinearSRGB(/*is_gray=*/false);
+ ASSERT_TRUE(io.frames[0].TransformTo(srgb_lin, GetJxlCms()));
+ Image3F* color = io.Main().color();
+ for (size_t y = 0; y < color->ysize(); ++y) {
+ float* row_r = color->PlaneRow(0, y);
+ float* row_g = color->PlaneRow(1, y);
+ float* row_b = color->PlaneRow(2, y);
+ for (size_t x = 0; x < color->xsize(); ++x) {
+ float luma = 0.2126 * row_r[x] + 0.7152 * row_g[x] + 0.0722 * row_b[x];
+ row_r[x] = row_g[x] = row_b[x] = luma;
+ }
+ }
+ ColorEncoding srgb_gamma = ColorEncoding::SRGB(/*is_gray=*/false);
+ ASSERT_TRUE(io.frames[0].TransformTo(srgb_gamma, GetJxlCms()));
+ io.metadata.m.color_encoding = io2.Main().c_current();
+ io.Main().OverrideProfile(io2.Main().c_current());
+ EXPECT_THAT(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr, &pool),
+ IsSlightlyBelow(1.36));
+}
+
+TEST(JxlTest, RoundtripLargeFast) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 445684, 5000);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(100));
+}
+
+TEST(JxlTest, RoundtripDotsForceEpf) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 41472, 300);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(18));
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(JxlTest, RoundtripD2Consistent) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+ cparams.distance = 2.0;
+
+ // Try each xsize mod kBlockDim to verify right border handling.
+ for (size_t xsize = 48; xsize > 40; --xsize) {
+ t.SetDimensions(xsize, 15);
+
+ PackedPixelFile ppf2;
+ const size_t size2 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2);
+
+ PackedPixelFile ppf3;
+ const size_t size3 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf3);
+
+ // Exact same compressed size.
+ EXPECT_EQ(size2, size3);
+
+ // Exact same distance.
+ const float dist2 = ComputeDistance2(t.ppf(), ppf2);
+ const float dist3 = ComputeDistance2(t.ppf(), ppf3);
+ EXPECT_EQ(dist2, dist3);
+ }
+}
+
+// Same as above, but for full image, testing multiple groups.
+TEST(JxlTest, RoundtripLargeConsistent) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+ cparams.distance = 2.0;
+
+ auto roundtrip_and_compare = [&]() {
+ ThreadPoolForTests pool(8);
+ PackedPixelFile ppf2;
+ size_t size = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2);
+ double dist = ComputeDistance2(t.ppf(), ppf2);
+ return std::tuple<size_t, double>(size, dist);
+ };
+
+ // Try each xsize mod kBlockDim to verify right border handling.
+ auto future2 = std::async(std::launch::async, roundtrip_and_compare);
+ auto future3 = std::async(std::launch::async, roundtrip_and_compare);
+
+ const auto result2 = future2.get();
+ const auto result3 = future3.get();
+
+ // Exact same compressed size.
+ EXPECT_EQ(std::get<0>(result2), std::get<0>(result3));
+
+ // Exact same distance.
+ EXPECT_EQ(std::get<1>(result2), std::get<1>(result3));
+}
+
+TEST(JxlTest, RoundtripSmallNL) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ size_t xsize = t.ppf().info.xsize / 8;
+ size_t ysize = t.ppf().info.ysize / 8;
+ t.SetDimensions(xsize, ysize);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 783, 25);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1));
+}
+
+TEST(JxlTest, RoundtripNoGaborishNoAR) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 0);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 38561, 200);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.8));
+}
+
+TEST(JxlTest, RoundtripSmallNoGaborish) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ size_t xsize = t.ppf().info.xsize / 8;
+ size_t ysize = t.ppf().info.ysize / 8;
+ t.SetDimensions(xsize, ysize);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 811, 20);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1));
+}
+
+TEST(JxlTest, RoundtripSmallPatchesAlpha) {
+ ThreadPool* pool = nullptr;
+ TestImage t;
+ t.SetDimensions(256, 256).SetChannels(4);
+ t.SetColorEncoding("RGB_D65_SRG_Rel_Lin");
+ TestImage::Frame frame = t.AddFrame();
+ frame.ZeroFill();
+ // This pattern should be picked up by the patch detection heuristics.
+ for (size_t y = 0; y < t.ppf().info.ysize; ++y) {
+ for (size_t x = 0; x < t.ppf().info.xsize; ++x) {
+ if (x % 4 == 0 && (y / 32) % 4 == 0) {
+ frame.SetValue(y, x, 1, 127.0f / 255.0f);
+ }
+ frame.SetValue(y, x, 3, 1.0f);
+ }
+ }
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+ cparams.distance = 0.1f;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 597, 100);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.012f));
+}
+
+TEST(JxlTest, RoundtripSmallPatches) {
+ ThreadPool* pool = nullptr;
+ TestImage t;
+ t.SetDimensions(256, 256);
+ t.SetColorEncoding("RGB_D65_SRG_Rel_Lin");
+ TestImage::Frame frame = t.AddFrame();
+ frame.ZeroFill();
+ // This pattern should be picked up by the patch detection heuristics.
+ for (size_t y = 0; y < t.ppf().info.ysize; ++y) {
+ for (size_t x = 0; x < t.ppf().info.xsize; ++x) {
+ if (x % 4 == 0 && (y / 32) % 4 == 0) {
+ frame.SetValue(y, x, 1, 127.0f / 255.0f);
+ }
+ }
+ }
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel
+ cparams.distance = 0.1f;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 486, 100);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.012f));
+}
+
+// TODO(szabadka) Add encoder and decoder API functions that accept frame
+// buffers in arbitrary unsigned and floating point formats, and then roundtrip
+// test the lossless codepath to make sure the exact binary representations
+// are preserved.
+#if 0
+TEST(JxlTest, RoundtripImageBundleOriginalBits) {
+ // Image does not matter, only io.metadata.m and io2.metadata.m are tested.
+ Image3F image(1, 1);
+ ZeroFillImage(&image);
+ CodecInOut io;
+ io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+ io.SetFromImage(std::move(image), ColorEncoding::LinearSRGB());
+
+ CompressParams cparams;
+
+ // Test unsigned integers from 1 to 32 bits
+ for (uint32_t bit_depth = 1; bit_depth <= 32; bit_depth++) {
+ if (bit_depth == 32) {
+ // TODO(lode): allow testing 32, however the code below ends up in
+ // enc_modular which does not support 32. We only want to test the header
+ // encoding though, so try without modular.
+ break;
+ }
+
+ io.metadata.m.SetUintSamples(bit_depth);
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+
+ EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0u, io2.metadata.m.bit_depth.exponent_bits_per_sample);
+ EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits());
+ }
+
+ // Test various existing and non-existing floating point formats
+ for (uint32_t bit_depth = 8; bit_depth <= 32; bit_depth++) {
+ if (bit_depth != 32) {
+ // TODO: test other float types once they work
+ break;
+ }
+
+ uint32_t exponent_bit_depth;
+ if (bit_depth < 10) {
+ exponent_bit_depth = 2;
+ } else if (bit_depth < 12) {
+ exponent_bit_depth = 3;
+ } else if (bit_depth < 16) {
+ exponent_bit_depth = 4;
+ } else if (bit_depth < 20) {
+ exponent_bit_depth = 5;
+ } else if (bit_depth < 24) {
+ exponent_bit_depth = 6;
+ } else if (bit_depth < 28) {
+ exponent_bit_depth = 7;
+ } else {
+ exponent_bit_depth = 8;
+ }
+
+ io.metadata.m.bit_depth.bits_per_sample = bit_depth;
+ io.metadata.m.bit_depth.floating_point_sample = true;
+ io.metadata.m.bit_depth.exponent_bits_per_sample = exponent_bit_depth;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2));
+
+ EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_TRUE(io2.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(exponent_bit_depth,
+ io2.metadata.m.bit_depth.exponent_bits_per_sample);
+ EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits());
+ }
+}
+#endif
+
+TEST(JxlTest, RoundtripGrayscale) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ ASSERT_NE(io.xsize(), 0u);
+ io.ShrinkTo(128, 128);
+ EXPECT_TRUE(io.Main().IsGray());
+ EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample);
+ EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+
+ PassesEncoderState enc_state;
+ AuxOut* aux_out = nullptr;
+
+ {
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0;
+
+ PaddedBytes compressed;
+ EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ aux_out));
+ CodecInOut io2;
+ EXPECT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &io2));
+ EXPECT_TRUE(io2.Main().IsGray());
+
+ EXPECT_LE(compressed.size(), 7000u);
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.6));
+ }
+
+ // Test with larger butteraugli distance and other settings enabled so
+ // different jxl codepaths trigger.
+ {
+ CompressParams cparams;
+ cparams.butteraugli_distance = 8.0;
+
+ PaddedBytes compressed;
+ EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ aux_out));
+ CodecInOut io2;
+ EXPECT_TRUE(test::DecodeFile({}, Span<const uint8_t>(compressed), &io2));
+ EXPECT_TRUE(io2.Main().IsGray());
+
+ EXPECT_LE(compressed.size(), 1300u);
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(6.0));
+ }
+
+ {
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0;
+
+ PaddedBytes compressed;
+ EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ aux_out));
+
+ CodecInOut io2;
+ JXLDecompressParams dparams;
+ dparams.color_space = "RGB_D65_SRG_Rel_SRG";
+ EXPECT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+ EXPECT_FALSE(io2.Main().IsGray());
+
+ EXPECT_LE(compressed.size(), 7000u);
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.6));
+ }
+}
+
+TEST(JxlTest, RoundtripAlpha) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+ ASSERT_NE(io.xsize(), 0u);
+ ASSERT_TRUE(io.metadata.m.HasAlpha());
+ ASSERT_TRUE(io.Main().HasAlpha());
+ io.ShrinkTo(300, 300);
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0;
+
+ EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample);
+ EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample);
+ EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample);
+ EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB());
+ PassesEncoderState enc_state;
+ AuxOut* aux_out = nullptr;
+ PaddedBytes compressed;
+ EXPECT_TRUE(
+ EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out));
+
+ EXPECT_LE(compressed.size(), 10077u);
+
+ for (bool use_image_callback : {false, true}) {
+ for (bool unpremul_alpha : {false, true}) {
+ CodecInOut io2;
+ JXLDecompressParams dparams;
+ dparams.use_image_callback = use_image_callback;
+ dparams.unpremultiply_alpha = unpremul_alpha;
+ EXPECT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.15));
+ }
+ }
+}
+
+namespace {
+// Performs "PremultiplyAlpha" for each ImageBundle (preview/frames).
+bool PremultiplyAlpha(CodecInOut& io) {
+ const auto doPremultiplyAlpha = [](ImageBundle& bundle) {
+ if (!bundle.HasAlpha()) return;
+ if (!bundle.HasColor()) return;
+ auto* color = bundle.color();
+ const auto* alpha = bundle.alpha();
+ JXL_CHECK(color->ysize() == alpha->ysize());
+ JXL_CHECK(color->xsize() == alpha->xsize());
+ for (size_t y = 0; y < color->ysize(); y++) {
+ ::jxl::PremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y),
+ color->PlaneRow(2, y), alpha->Row(y),
+ color->xsize());
+ }
+ };
+ ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha);
+ if (eci == nullptr || eci->alpha_associated) return false;
+ if (io.metadata.m.have_preview) {
+ doPremultiplyAlpha(io.preview_frame);
+ }
+ for (ImageBundle& ib : io.frames) {
+ doPremultiplyAlpha(ib);
+ }
+ eci->alpha_associated = true;
+ return true;
+}
+
+bool UnpremultiplyAlpha(CodecInOut& io) {
+ const auto doUnpremultiplyAlpha = [](ImageBundle& bundle) {
+ if (!bundle.HasAlpha()) return;
+ if (!bundle.HasColor()) return;
+ auto* color = bundle.color();
+ const auto* alpha = bundle.alpha();
+ JXL_CHECK(color->ysize() == alpha->ysize());
+ JXL_CHECK(color->xsize() == alpha->xsize());
+ for (size_t y = 0; y < color->ysize(); y++) {
+ ::jxl::UnpremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y),
+ color->PlaneRow(2, y), alpha->Row(y),
+ color->xsize());
+ }
+ };
+ ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha);
+ if (eci == nullptr || !eci->alpha_associated) return false;
+ if (io.metadata.m.have_preview) {
+ doUnpremultiplyAlpha(io.preview_frame);
+ }
+ for (ImageBundle& ib : io.frames) {
+ doUnpremultiplyAlpha(ib);
+ }
+ eci->alpha_associated = false;
+ return true;
+}
+} // namespace
+
+TEST(JxlTest, RoundtripAlphaPremultiplied) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ CodecInOut io, io_nopremul;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_nopremul));
+
+ ASSERT_NE(io.xsize(), 0u);
+ ASSERT_TRUE(io.metadata.m.HasAlpha());
+ ASSERT_TRUE(io.Main().HasAlpha());
+ io.ShrinkTo(300, 300);
+ io_nopremul.ShrinkTo(300, 300);
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0;
+
+ EXPECT_FALSE(io.Main().AlphaIsPremultiplied());
+ EXPECT_TRUE(PremultiplyAlpha(io));
+ EXPECT_TRUE(io.Main().AlphaIsPremultiplied());
+
+ EXPECT_FALSE(io_nopremul.Main().AlphaIsPremultiplied());
+
+ PassesEncoderState enc_state;
+ AuxOut* aux_out = nullptr;
+ PaddedBytes compressed;
+ EXPECT_TRUE(
+ EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out));
+ EXPECT_LE(compressed.size(), 10000u);
+
+ for (bool use_image_callback : {false, true}) {
+ for (bool unpremul_alpha : {false, true}) {
+ for (bool use_uint8 : {false, true}) {
+ printf(
+ "Testing premultiplied alpha using %s %s requesting "
+ "%spremultiplied output.\n",
+ use_uint8 ? "uint8" : "float",
+ use_image_callback ? "image callback" : "image_buffer",
+ unpremul_alpha ? "un" : "");
+ CodecInOut io2;
+ JXLDecompressParams dparams;
+ dparams.use_image_callback = use_image_callback;
+ dparams.unpremultiply_alpha = unpremul_alpha;
+ if (use_uint8) {
+ dparams.accepted_formats = {
+ {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}};
+ }
+ EXPECT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &io2));
+
+ EXPECT_EQ(unpremul_alpha, !io2.Main().AlphaIsPremultiplied());
+ if (!unpremul_alpha) {
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames,
+ cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.2));
+ EXPECT_TRUE(UnpremultiplyAlpha(io2));
+ EXPECT_FALSE(io2.Main().AlphaIsPremultiplied());
+ }
+ EXPECT_THAT(ButteraugliDistance(io_nopremul.frames, io2.frames,
+ cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.47));
+ }
+ }
+ }
+}
+
+TEST(JxlTest, RoundtripAlphaResampling) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 5); // kHare
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 12803, 130);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(5.2));
+}
+
+TEST(JxlTest, RoundtripAlphaResamplingOnlyAlpha) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 33571, 400);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.49));
+}
+
+TEST(JxlTest, RoundtripAlphaNonMultipleOf8) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(12, 12);
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ ASSERT_TRUE(t.ppf().info.alpha_bits > 0);
+ EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 107, 10);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.95));
+}
+
+TEST(JxlTest, RoundtripAlpha16) {
+ ThreadPoolForTests pool(4);
+ // The image is wider than 512 pixels to ensure multiple groups are tested.
+ size_t xsize = 1200, ysize = 160;
+ TestImage t;
+ t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+ TestImage::Frame frame = t.AddFrame();
+ // Generate 16-bit pattern that uses various colors and alpha values.
+ const float mul = 1.0f / 65535;
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ uint16_t r = y * 65535 / ysize;
+ uint16_t g = x * 65535 / xsize;
+ uint16_t b = (y + x) * 65535 / (xsize + ysize);
+ frame.SetValue(y, x, 0, r * mul);
+ frame.SetValue(y, x, 1, g * mul);
+ frame.SetValue(y, x, 2, b * mul);
+ frame.SetValue(y, x, 3, g * mul);
+ }
+ }
+
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 6); // kWombat
+ cparams.distance = 0.5;
+
+ PackedPixelFile ppf_out;
+ // TODO(szabadka) Investigate big size difference on i686
+ // This still keeps happening (2023-04-18).
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 3466, 120);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.65));
+}
+
+namespace {
+JXLCompressParams CompressParamsForLossless() {
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR, 1);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, 1);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 6); // Weighted
+ cparams.distance = 0;
+ return cparams;
+}
+} // namespace
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 222167);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8ThunderGradient)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 2); // kThunder
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 5); // Gradient
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 261684);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8LightningGradient)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1); // kLightning
+
+ PackedPixelFile ppf_out;
+ // Lax comparison because different SIMD will cause different compression.
+ EXPECT_THAT(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out),
+ IsSlightlyBelow(286848u));
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8Falcon)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 230766);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+}
+
+TEST(JxlTest, RoundtripLossless8Alpha) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_alpha.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_EQ(t.ppf().info.alpha_bits, 8);
+ EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8);
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 248817);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+ EXPECT_EQ(ppf_out.info.alpha_bits, 8);
+ EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripLossless16Alpha) {
+ ThreadPool* pool = nullptr;
+ size_t xsize = 1200, ysize = 160;
+ TestImage t;
+ t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+ TestImage::Frame frame = t.AddFrame();
+ // Generate 16-bit pattern that uses various colors and alpha values.
+ const float mul = 1.0f / 65535;
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ uint16_t r = y * 65535 / ysize;
+ uint16_t g = x * 65535 / xsize + 37;
+ uint16_t b = (y + x) * 65535 / (xsize + ysize);
+ frame.SetValue(y, x, 0, r * mul);
+ frame.SetValue(y, x, 1, g * mul);
+ frame.SetValue(y, x, 2, b * mul);
+ frame.SetValue(y, x, 3, g * mul);
+ }
+ }
+ ASSERT_EQ(t.ppf().info.bits_per_sample, 16);
+ ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ // TODO(szabadka) Investigate big size difference on i686
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 4849, 100);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+ EXPECT_EQ(ppf_out.info.alpha_bits, 16);
+ EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripLossless16AlphaNotMisdetectedAs8Bit) {
+ ThreadPool* pool = nullptr;
+ size_t xsize = 128, ysize = 128;
+ TestImage t;
+ t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16);
+ TestImage::Frame frame = t.AddFrame();
+ // All 16-bit values, both color and alpha, of this image are below 64.
+ // This allows testing if a code path wrongly concludes it's an 8-bit instead
+ // of 16-bit image (or even 6-bit).
+ const float mul = 1.0f / 65535;
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ uint16_t r = y * 64 / ysize;
+ uint16_t g = x * 64 / xsize + 37;
+ uint16_t b = (y + x) * 64 / (xsize + ysize);
+ frame.SetValue(y, x, 0, r * mul);
+ frame.SetValue(y, x, 1, g * mul);
+ frame.SetValue(y, x, 2, b * mul);
+ frame.SetValue(y, x, 3, g * mul);
+ }
+ }
+ ASSERT_EQ(t.ppf().info.bits_per_sample, 16);
+ ASSERT_EQ(t.ppf().info.alpha_bits, 16);
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 543, 75);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+ EXPECT_EQ(ppf_out.info.bits_per_sample, 16);
+ EXPECT_EQ(ppf_out.info.alpha_bits, 16);
+ EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out));
+}
+
+TEST(JxlTest, RoundtripDots) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+ EXPECT_EQ(t.ppf().color_encoding.transfer_function,
+ JXL_TRANSFER_FUNCTION_SRGB);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSkirrel
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1);
+ cparams.distance = 0.04;
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 284295, 3000);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.35));
+}
+
+TEST(JxlTest, RoundtripNoise) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_NE(t.ppf().info.xsize, 0);
+ EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+ EXPECT_EQ(t.ppf().color_encoding.transfer_function,
+ JXL_TRANSFER_FUNCTION_SRGB);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSkirrel
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_NOISE, 1);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 41385, 750);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.48));
+}
+
+TEST(JxlTest, RoundtripLossless8Gray) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+ TestImage t;
+ t.SetColorEncoding("Gra_D65_Rel_SRG").DecodeFromBytes(orig).ClearMetadata();
+ EXPECT_EQ(t.ppf().color_encoding.color_space, JXL_COLOR_SPACE_GRAY);
+ EXPECT_EQ(t.ppf().info.bits_per_sample, 8);
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 92766);
+ EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0);
+ EXPECT_EQ(ppf_out.color_encoding.color_space, JXL_COLOR_SPACE_GRAY);
+ EXPECT_EQ(ppf_out.info.bits_per_sample, 8);
+}
+
+#if JPEGXL_ENABLE_GIF
+
+TEST(JxlTest, RoundtripAnimation) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ EXPECT_EQ(4, t.ppf().frames.size());
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ EXPECT_THAT(Roundtrip(t.ppf(), {}, dparams, pool, &ppf_out),
+ IsSlightlyBelow(2600));
+
+ t.CoalesceGIFAnimationWithAlpha();
+ ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+ EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out),
+#if JXL_HIGH_PRECISION
+ 1.55);
+#else
+ 1.75);
+#endif
+}
+
+TEST(JxlTest, RoundtripLosslessAnimation) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ EXPECT_EQ(4, t.ppf().frames.size());
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 958);
+
+ t.CoalesceGIFAnimationWithAlpha();
+ ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+ EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 5e-4);
+}
+
+TEST(JxlTest, RoundtripAnimationPatches) {
+ ThreadPool* pool = nullptr;
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/animation_patches.gif");
+
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata();
+ ASSERT_EQ(2u, t.ppf().frames.size());
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_PATCHES, 1);
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+
+ PackedPixelFile ppf_out;
+ // 40k with no patches, 27k with patch frames encoded multiple times.
+ EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out),
+ IsSlightlyBelow(16710));
+ EXPECT_EQ(ppf_out.frames.size(), t.ppf().frames.size());
+ // >10 with broken patches
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.05));
+}
+
+#endif // JPEGXL_ENABLE_GIF
+
+size_t RoundtripJpeg(const PaddedBytes& jpeg_in, ThreadPool* pool) {
+ std::vector<uint8_t> jpeg_bytes(jpeg_in.data(),
+ jpeg_in.data() + jpeg_in.size());
+ std::vector<uint8_t> compressed;
+ EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes,
+ &compressed));
+
+ jxl::JXLDecompressParams dparams;
+ test::SetThreadParallelRunner(dparams, pool);
+ std::vector<uint8_t> out;
+ jxl::PackedPixelFile ppf;
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+ nullptr, &ppf, &out));
+ EXPECT_EQ(out.size(), jpeg_in.size());
+ size_t failures = 0;
+ for (size_t i = 0; i < std::min(out.size(), jpeg_in.size()); i++) {
+ if (out[i] != jpeg_in[i]) {
+ EXPECT_EQ(out[i], jpeg_in[i])
+ << "byte mismatch " << i << " " << out[i] << " != " << jpeg_in[i];
+ if (++failures > 4) {
+ return compressed.size();
+ }
+ }
+ }
+ return compressed.size();
+}
+
+void RoundtripJpegToPixels(const PaddedBytes& jpeg_in,
+ JXLDecompressParams dparams, ThreadPool* pool,
+ PackedPixelFile* ppf_out) {
+ std::vector<uint8_t> jpeg_bytes(jpeg_in.data(),
+ jpeg_in.data() + jpeg_in.size());
+ std::vector<uint8_t> compressed;
+ EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes,
+ &compressed));
+
+ test::SetThreadParallelRunner(dparams, pool);
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams,
+ nullptr, ppf_out, nullptr));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg");
+ // JPEG size is 696,659 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 568940u, 10);
+}
+
+#if JPEGXL_ENABLE_JPEG
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg");
+ TestImage t;
+ t.DecodeFromBytes(orig);
+
+ PackedPixelFile ppf_out;
+ RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(12));
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+ TestImage t;
+ t.DecodeFromBytes(orig);
+
+ PackedPixelFile ppf_out;
+ RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(11));
+}
+
+TEST(JxlTest,
+ JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420EarlyFlush)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+ TestImage t;
+ t.DecodeFromBytes(orig);
+
+ JXLDecompressParams dparams;
+ dparams.max_downsampling = 8;
+
+ PackedPixelFile ppf_out;
+ RoundtripJpegToPixels(orig, dparams, &pool, &ppf_out);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4410));
+}
+
+TEST(JxlTest,
+ JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420Mul16)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower_cropped.jpg");
+ TestImage t;
+ t.DecodeFromBytes(orig);
+
+ PackedPixelFile ppf_out;
+ RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4));
+}
+
+TEST(JxlTest,
+ JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels_asymmetric)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg");
+ TestImage t;
+ t.DecodeFromBytes(orig);
+
+ PackedPixelFile ppf_out;
+ RoundtripJpegToPixels(orig, {}, &pool, &ppf_out);
+ EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(10));
+}
+
+#endif
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionGray)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_gray.jpg");
+ // JPEG size is 456,528 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 387496u, 200);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg");
+ // JPEG size is 546,797 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455560u, 10);
+}
+
+TEST(JxlTest,
+ JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_luma_subsample)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "jxl/flower/flower.png.im_q85_luma_subsample.jpg");
+ // JPEG size is 400,724 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 325354u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444_12)) {
+ // 444 JPEG that has an interesting sampling-factor (1x2, 1x2, 1x2).
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444_1x2.jpg");
+ // JPEG size is 703,874 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 569679u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression422)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_422.jpg");
+ // JPEG size is 522,057 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 499282u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression440)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_440.jpg");
+ // JPEG size is 603,623 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 501151u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_asymmetric)) {
+ // 2x vertical downsample of one chroma channel, 2x horizontal downsample of
+ // the other.
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg");
+ // JPEG size is 604,601 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 500602u, 10);
+}
+
+TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420Progr)) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig =
+ jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420_progr.jpg");
+ // JPEG size is 522,057 bytes.
+ EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455499u, 10);
+}
+
+TEST(JxlTest, RoundtripProgressive) {
+ ThreadPoolForTests pool(4);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 1);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 61635, 750);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.4));
+}
+
+TEST(JxlTest, RoundtripProgressiveLevel2Slow) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ TestImage t;
+ t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024);
+
+ JXLCompressParams cparams;
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9); // kTortoise
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1);
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1);
+
+ PackedPixelFile ppf_out;
+ EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 72841, 1000);
+ EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.17));
+}
+
+TEST(JxlTest, RoundtripUnsignedCustomBitdepthLossless) {
+ ThreadPool* pool = nullptr;
+ for (uint32_t num_channels = 1; num_channels < 6; ++num_channels) {
+ for (JxlEndianness endianness : {JXL_LITTLE_ENDIAN, JXL_BIG_ENDIAN}) {
+ for (uint32_t bitdepth = 3; bitdepth <= 16; ++bitdepth) {
+ if (bitdepth <= 8 && endianness == JXL_BIG_ENDIAN) continue;
+ printf("Testing %u channel unsigned %u bit %s endian lossless.\n",
+ num_channels, bitdepth,
+ endianness == JXL_LITTLE_ENDIAN ? "little" : "big");
+ TestImage t;
+ t.SetDimensions(256, 256).SetChannels(num_channels);
+ t.SetAllBitDepths(bitdepth).SetEndianness(endianness);
+ TestImage::Frame frame = t.AddFrame();
+ frame.RandomFill();
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+ cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+ dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+ PackedPixelFile ppf_out;
+ Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out);
+
+ ASSERT_TRUE(test::SamePixels(t.ppf(), ppf_out));
+ }
+ }
+ }
+}
+
+TEST(JxlTest, LosslessPNMRoundtrip) {
+ static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"};
+ static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"};
+ for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) {
+ for (size_t channels = 1; channels <= 4; ++channels) {
+ if (bit_depth == 1 && (channels == 2 || channels == 4)) continue;
+ std::string extension(kExtension[channels]);
+ std::string filename = "jxl/flower/flower_small." +
+ std::string(kChannels[channels]) + ".depth" +
+ std::to_string(bit_depth) + extension;
+ const PaddedBytes orig = jxl::test::ReadTestData(filename);
+ test::TestImage t;
+ if (channels < 3) t.SetColorEncoding("Gra_D65_Rel_SRG");
+ t.DecodeFromBytes(orig);
+
+ JXLCompressParams cparams = CompressParamsForLossless();
+ cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1); // kLightning
+ cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+ JXLDecompressParams dparams;
+ dparams.accepted_formats.push_back(t.ppf().frames[0].color.format);
+ dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM;
+
+ PackedPixelFile ppf_out;
+ Roundtrip(t.ppf(), cparams, dparams, nullptr, &ppf_out);
+
+ extras::EncodedImage encoded;
+ auto encoder = extras::Encoder::FromExtension(extension);
+ ASSERT_TRUE(encoder.get());
+ ASSERT_TRUE(encoder->Encode(ppf_out, &encoded, nullptr));
+ ASSERT_EQ(encoded.bitstreams.size(), 1);
+ ASSERT_EQ(orig.size(), encoded.bitstreams[0].size());
+ EXPECT_EQ(0,
+ memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size()));
+ }
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/lehmer_code.h b/third_party/jpeg-xl/lib/jxl/lehmer_code.h
new file mode 100644
index 0000000000..dd1d21c6f7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/lehmer_code.h
@@ -0,0 +1,102 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LEHMER_CODE_H_
+#define LIB_JXL_LEHMER_CODE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Permutation <=> factorial base representation (Lehmer code).
+
+using LehmerT = uint32_t;
+
+template <typename T>
+constexpr T ValueOfLowest1Bit(T t) {
+ return t & -t;
+}
+
+// Computes the Lehmer (factorial basis) code of permutation, an array of n
+// unique indices in [0..n), and stores it in code[0..len). N*logN time.
+// temp must have n + 1 elements but need not be initialized.
+template <typename PermutationT>
+void ComputeLehmerCode(const PermutationT* JXL_RESTRICT permutation,
+ uint32_t* JXL_RESTRICT temp, const size_t n,
+ LehmerT* JXL_RESTRICT code) {
+ for (size_t idx = 0; idx < n + 1; ++idx) temp[idx] = 0;
+
+ for (size_t idx = 0; idx < n; ++idx) {
+ const PermutationT s = permutation[idx];
+
+ // Compute sum in Fenwick tree
+ uint32_t penalty = 0;
+ uint32_t i = s + 1;
+ while (i != 0) {
+ penalty += temp[i];
+ i &= i - 1; // clear lowest bit
+ }
+ JXL_DASSERT(s >= penalty);
+ code[idx] = s - penalty;
+ i = s + 1;
+ // Add operation in Fenwick tree
+ while (i < n + 1) {
+ temp[i] += 1;
+ i += ValueOfLowest1Bit(i);
+ }
+ }
+}
+
+// Decodes the Lehmer code in code[0..n) into permutation[0..n).
+// temp must have 1 << CeilLog2(n) elements but need not be initialized.
+template <typename PermutationT>
+void DecodeLehmerCode(const LehmerT* JXL_RESTRICT code,
+ uint32_t* JXL_RESTRICT temp, size_t n,
+ PermutationT* JXL_RESTRICT permutation) {
+ JXL_DASSERT(n != 0);
+ const size_t log2n = CeilLog2Nonzero(n);
+ const size_t padded_n = 1ull << log2n;
+
+ for (size_t i = 0; i < padded_n; i++) {
+ const int32_t i1 = static_cast<int32_t>(i + 1);
+ temp[i] = static_cast<uint32_t>(ValueOfLowest1Bit(i1));
+ }
+
+ for (size_t i = 0; i < n; i++) {
+ JXL_DASSERT(code[i] + i < n);
+ uint32_t rank = code[i] + 1;
+
+ // Extract i-th unused element via implicit order-statistics tree.
+ size_t bit = padded_n;
+ size_t next = 0;
+ for (size_t i = 0; i <= log2n; i++) {
+ const size_t cand = next + bit;
+ JXL_DASSERT(cand >= 1);
+ bit >>= 1;
+ if (temp[cand - 1] < rank) {
+ next = cand;
+ rank -= temp[cand - 1];
+ }
+ }
+
+ permutation[i] = next;
+
+ // Mark as used
+ next += 1;
+ while (next <= padded_n) {
+ temp[next - 1] -= 1;
+ next += ValueOfLowest1Bit(next);
+ }
+ }
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_LEHMER_CODE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/lehmer_code_test.cc b/third_party/jpeg-xl/lib/jxl/lehmer_code_test.cc
new file mode 100644
index 0000000000..acda762545
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/lehmer_code_test.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/lehmer_code.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+template <typename PermutationT>
+struct WorkingSet {
+ explicit WorkingSet(size_t max_n)
+ : padded_n(1ull << CeilLog2Nonzero(max_n + 1)),
+ permutation(max_n),
+ temp(padded_n),
+ lehmer(max_n),
+ decoded(max_n) {}
+
+ size_t padded_n;
+ std::vector<PermutationT> permutation;
+ std::vector<uint32_t> temp;
+ std::vector<LehmerT> lehmer;
+ std::vector<PermutationT> decoded;
+};
+
+template <typename PermutationT>
+void Roundtrip(size_t n, WorkingSet<PermutationT>* ws) {
+ JXL_ASSERT(n != 0);
+ const size_t padded_n = 1ull << CeilLog2Nonzero(n);
+
+ Rng rng(n * 65537 + 13);
+
+ // Ensure indices fit into PermutationT
+ EXPECT_LE(n, 1ULL << (sizeof(PermutationT) * 8));
+
+ std::iota(ws->permutation.begin(), ws->permutation.begin() + n, 0);
+
+ // For various random permutations:
+ for (size_t rep = 0; rep < 3; ++rep) {
+ rng.Shuffle(ws->permutation.data(), n);
+
+ // Must decode to the same permutation
+ ComputeLehmerCode(ws->permutation.data(), ws->temp.data(), n,
+ ws->lehmer.data());
+ memset(ws->temp.data(), 0, padded_n * 4);
+ DecodeLehmerCode(ws->lehmer.data(), ws->temp.data(), n, ws->decoded.data());
+
+ for (size_t i = 0; i < n; ++i) {
+ EXPECT_EQ(ws->permutation[i], ws->decoded[i]);
+ }
+ }
+}
+
+// Preallocates arrays and tests n = [begin, end).
+template <typename PermutationT>
+void RoundtripSizeRange(ThreadPool* pool, uint32_t begin, uint32_t end) {
+ ASSERT_NE(0u, begin); // n = 0 not allowed.
+ std::vector<WorkingSet<PermutationT>> working_sets;
+
+ JXL_CHECK(RunOnPool(
+ pool, begin, end,
+ [&working_sets, end](const size_t num_threads) {
+ for (size_t i = 0; i < num_threads; i++) {
+ working_sets.emplace_back(end - 1);
+ }
+ return true;
+ },
+ [&working_sets](const uint32_t n, const size_t thread) {
+ Roundtrip(n, &working_sets[thread]);
+ },
+ "lehmer test"));
+}
+
+TEST(LehmerCodeTest, TestRoundtrips) {
+ test::ThreadPoolForTests pool(8);
+
+ RoundtripSizeRange<uint16_t>(&pool, 1, 1026);
+
+ // Ensures PermutationT can fit > 16 bit values.
+ RoundtripSizeRange<uint32_t>(&pool, 65536, 65540);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/libjxl.pc.in b/third_party/jpeg-xl/lib/jxl/libjxl.pc.in
new file mode 100644
index 0000000000..4a7af65b7c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/libjxl.pc.in
@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@PKGCONFIG_TARGET_LIBS@
+includedir=@PKGCONFIG_TARGET_INCLUDES@
+
+Name: libjxl
+Description: Loads and saves JPEG XL files
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl
+Libs.private: -lm
+Cflags: -I${includedir}
+Cflags.private: -DJXL_STATIC_DEFINE
diff --git a/third_party/jpeg-xl/lib/jxl/loop_filter.cc b/third_party/jpeg-xl/lib/jxl/loop_filter.cc
new file mode 100644
index 0000000000..5afe87617d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/loop_filter.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/loop_filter.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+
+LoopFilter::LoopFilter() { Bundle::Init(this); }
+Status LoopFilter::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ // Must come before AllDefault.
+
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &gab));
+ if (visitor->Conditional(gab)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &gab_custom));
+ if (visitor->Conditional(gab_custom)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.104699568f, &gab_x_weight1));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.055680538f, &gab_x_weight2));
+ if (std::abs(1.0f + (gab_x_weight1 + gab_x_weight2) * 4) < 1e-8) {
+ return JXL_FAILURE(
+ "Gaborish x weights lead to near 0 unnormalized kernel");
+ }
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.104699568f, &gab_y_weight1));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.055680538f, &gab_y_weight2));
+ if (std::abs(1.0f + (gab_y_weight1 + gab_y_weight2) * 4) < 1e-8) {
+ return JXL_FAILURE(
+ "Gaborish y weights lead to near 0 unnormalized kernel");
+ }
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.104699568f, &gab_b_weight1));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(1.1 * 0.055680538f, &gab_b_weight2));
+ if (std::abs(1.0f + (gab_b_weight1 + gab_b_weight2) * 4) < 1e-8) {
+ return JXL_FAILURE(
+ "Gaborish b weights lead to near 0 unnormalized kernel");
+ }
+ }
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 2, &epf_iters));
+ if (visitor->Conditional(epf_iters > 0)) {
+ if (visitor->Conditional(!nonserialized_is_modular)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sharp_custom));
+ if (visitor->Conditional(epf_sharp_custom)) {
+ for (size_t i = 0; i < kEpfSharpEntries; ++i) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(
+ float(i) / float(kEpfSharpEntries - 1), &epf_sharp_lut[i]));
+ }
+ }
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_weight_custom));
+ if (visitor->Conditional(epf_weight_custom)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(40.0f, &epf_channel_scale[0]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(5.0f, &epf_channel_scale[1]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(3.5f, &epf_channel_scale[2]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.45f, &epf_pass1_zeroflush));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.6f, &epf_pass2_zeroflush));
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sigma_custom));
+ if (visitor->Conditional(epf_sigma_custom)) {
+ if (visitor->Conditional(!nonserialized_is_modular)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.46f, &epf_quant_mul));
+ }
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.9f, &epf_pass0_sigma_scale));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(6.5f, &epf_pass2_sigma_scale));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->F16(0.6666666666666666f, &epf_border_sad_mul));
+ }
+ if (visitor->Conditional(nonserialized_is_modular)) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.0f, &epf_sigma_for_modular));
+ if (epf_sigma_for_modular < 1e-8) {
+ return JXL_FAILURE("EPF: sigma for modular is too small");
+ }
+ }
+ }
+
+ JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions));
+ // Extensions: in chronological order of being added to the format.
+ return visitor->EndExtensions();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/loop_filter.h b/third_party/jpeg-xl/lib/jxl/loop_filter.h
new file mode 100644
index 0000000000..e4b418ba2b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/loop_filter.h
@@ -0,0 +1,76 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LOOP_FILTER_H_
+#define LIB_JXL_LOOP_FILTER_H_
+
+// Parameters for loop filter(s), stored in each frame.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+struct LoopFilter : public Fields {
+ LoopFilter();
+ JXL_FIELDS_NAME(LoopFilter)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ size_t Padding() const {
+ static const size_t padding_per_epf_iter[4] = {0, 2, 3, 6};
+ return padding_per_epf_iter[epf_iters] + (gab ? 1 : 0);
+ }
+
+ mutable bool all_default;
+
+ // --- Gaborish convolution
+ bool gab;
+
+ bool gab_custom;
+ float gab_x_weight1;
+ float gab_x_weight2;
+ float gab_y_weight1;
+ float gab_y_weight2;
+ float gab_b_weight1;
+ float gab_b_weight2;
+
+ // --- Edge-preserving filter
+
+ // Number of EPF stages to apply. 0 means EPF disabled. 1 applies only the
+ // first stage, 2 applies both stages and 3 applies the first stage twice and
+ // the second stage once.
+ uint32_t epf_iters;
+
+ bool epf_sharp_custom;
+ enum { kEpfSharpEntries = 8 };
+ float epf_sharp_lut[kEpfSharpEntries];
+
+ bool epf_weight_custom; // Custom weight params
+ float epf_channel_scale[3]; // Relative weight of each channel
+ float epf_pass1_zeroflush; // Minimum weight for first pass
+ float epf_pass2_zeroflush; // Minimum weight for second pass
+
+ bool epf_sigma_custom; // Custom sigma parameters
+ float epf_quant_mul; // Sigma is ~ this * quant
+ float epf_pass0_sigma_scale; // Multiplier for sigma in pass 0
+ float epf_pass2_sigma_scale; // Multiplier for sigma in the second pass
+ float epf_border_sad_mul; // (inverse) multiplier for sigma on borders
+
+ float epf_sigma_for_modular;
+
+ uint64_t extensions;
+
+ bool nonserialized_is_modular = false;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_LOOP_FILTER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/luminance.cc b/third_party/jpeg-xl/lib/jxl/luminance.cc
new file mode 100644
index 0000000000..10151f4267
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/luminance.cc
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/luminance.h"
+
+#include "lib/jxl/image_metadata.h"
+
+namespace jxl {
+
+void SetIntensityTarget(ImageMetadata* m) {
+ if (m->color_encoding.tf.IsPQ()) {
+ // Peak luminance of PQ as defined by SMPTE ST 2084:2014.
+ m->SetIntensityTarget(10000);
+ } else if (m->color_encoding.tf.IsHLG()) {
+ // Nominal display peak luminance used as a reference by
+ // Rec. ITU-R BT.2100-2.
+ m->SetIntensityTarget(1000);
+ } else {
+ // SDR
+ m->SetIntensityTarget(kDefaultIntensityTarget);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/luminance.h b/third_party/jpeg-xl/lib/jxl/luminance.h
new file mode 100644
index 0000000000..3181576823
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/luminance.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_LUMINANCE_H_
+#define LIB_JXL_LUMINANCE_H_
+
+namespace jxl {
+
+// Chooses a default intensity target based on the transfer function of the
+// image, if known. For SDR images or images not known to be HDR, returns
+// kDefaultIntensityTarget, for images known to have PQ or HLG transfer function
+// returns a higher value.
+
+struct ImageMetadata;
+// TODO(eustas): rename
+void SetIntensityTarget(ImageMetadata* m);
+
+} // namespace jxl
+
+#endif // LIB_JXL_LUMINANCE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/matrix_ops.h b/third_party/jpeg-xl/lib/jxl/matrix_ops.h
new file mode 100644
index 0000000000..1a969bd4f0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/matrix_ops.h
@@ -0,0 +1,84 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MATRIX_OPS_H_
+#define LIB_JXL_MATRIX_OPS_H_
+
+// 3x3 matrix operations.
+
+#include <cmath> // abs
+#include <cstddef>
+
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Computes C = A * B, where A, B, C are 3x3 matrices.
+template <typename T>
+void Mul3x3Matrix(const T* a, const T* b, T* c) {
+ alignas(16) T temp[3]; // For transposed column
+ for (size_t x = 0; x < 3; x++) {
+ for (size_t z = 0; z < 3; z++) {
+ temp[z] = b[z * 3 + x];
+ }
+ for (size_t y = 0; y < 3; y++) {
+ double e = 0;
+ for (size_t z = 0; z < 3; z++) {
+ e += a[y * 3 + z] * temp[z];
+ }
+ c[y * 3 + x] = e;
+ }
+ }
+}
+
+// Computes C = A * B, where A is 3x3 matrix and B is vector.
+template <typename T>
+void Mul3x3Vector(const T* a, const T* b, T* c) {
+ for (size_t y = 0; y < 3; y++) {
+ double e = 0;
+ for (size_t x = 0; x < 3; x++) {
+ e += a[y * 3 + x] * b[x];
+ }
+ c[y] = e;
+ }
+}
+
+// Inverts a 3x3 matrix in place.
+template <typename T>
+Status Inv3x3Matrix(T* matrix) {
+ // Intermediate computation is done in double precision.
+ double temp[9];
+ temp[0] = static_cast<double>(matrix[4]) * matrix[8] -
+ static_cast<double>(matrix[5]) * matrix[7];
+ temp[1] = static_cast<double>(matrix[2]) * matrix[7] -
+ static_cast<double>(matrix[1]) * matrix[8];
+ temp[2] = static_cast<double>(matrix[1]) * matrix[5] -
+ static_cast<double>(matrix[2]) * matrix[4];
+ temp[3] = static_cast<double>(matrix[5]) * matrix[6] -
+ static_cast<double>(matrix[3]) * matrix[8];
+ temp[4] = static_cast<double>(matrix[0]) * matrix[8] -
+ static_cast<double>(matrix[2]) * matrix[6];
+ temp[5] = static_cast<double>(matrix[2]) * matrix[3] -
+ static_cast<double>(matrix[0]) * matrix[5];
+ temp[6] = static_cast<double>(matrix[3]) * matrix[7] -
+ static_cast<double>(matrix[4]) * matrix[6];
+ temp[7] = static_cast<double>(matrix[1]) * matrix[6] -
+ static_cast<double>(matrix[0]) * matrix[7];
+ temp[8] = static_cast<double>(matrix[0]) * matrix[4] -
+ static_cast<double>(matrix[1]) * matrix[3];
+ double det = matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6];
+ if (std::abs(det) < 1e-10) {
+ return JXL_FAILURE("Matrix determinant is too close to 0");
+ }
+ double idet = 1.0 / det;
+ for (size_t i = 0; i < 9; i++) {
+ matrix[i] = temp[i] * idet;
+ }
+ return true;
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_MATRIX_OPS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/memory_manager_internal.cc b/third_party/jpeg-xl/lib/jxl/memory_manager_internal.cc
new file mode 100644
index 0000000000..87727e75cd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/memory_manager_internal.cc
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/memory_manager_internal.h"
+
+#include <stdlib.h>
+
+namespace jxl {
+
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size) {
+ return malloc(size);
+}
+
+void MemoryManagerDefaultFree(void* opaque, void* address) { free(address); }
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/memory_manager_internal.h b/third_party/jpeg-xl/lib/jxl/memory_manager_internal.h
new file mode 100644
index 0000000000..f8a5cd8d59
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/memory_manager_internal.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+#define LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
+
+// Memory allocator with support for alignment + misalignment.
+
+#include <jxl/memory_manager.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h> // memcpy
+
+#include <atomic>
+#include <memory>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+
+namespace jxl {
+
+// Default alloc and free functions.
+void* MemoryManagerDefaultAlloc(void* opaque, size_t size);
+void MemoryManagerDefaultFree(void* opaque, void* address);
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+static JXL_INLINE Status MemoryManagerInit(
+ JxlMemoryManager* self, const JxlMemoryManager* memory_manager) {
+ if (memory_manager) {
+ *self = *memory_manager;
+ } else {
+ memset(self, 0, sizeof(*self));
+ }
+ if (!self->alloc != !self->free) {
+ return false;
+ }
+ if (!self->alloc) self->alloc = jxl::MemoryManagerDefaultAlloc;
+ if (!self->free) self->free = jxl::MemoryManagerDefaultFree;
+
+ return true;
+}
+
+static JXL_INLINE void* MemoryManagerAlloc(
+ const JxlMemoryManager* memory_manager, size_t size) {
+ return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+static JXL_INLINE void MemoryManagerFree(const JxlMemoryManager* memory_manager,
+ void* address) {
+ return memory_manager->free(memory_manager->opaque, address);
+}
+
+// Helper class to be used as a deleter in a unique_ptr<T> call.
+class MemoryManagerDeleteHelper {
+ public:
+ explicit MemoryManagerDeleteHelper(const JxlMemoryManager* memory_manager)
+ : memory_manager_(memory_manager) {}
+
+ // Delete and free the passed pointer using the memory_manager.
+ template <typename T>
+ void operator()(T* address) const {
+ if (!address) {
+ return;
+ }
+ address->~T();
+ return memory_manager_->free(memory_manager_->opaque, address);
+ }
+
+ private:
+ const JxlMemoryManager* memory_manager_;
+};
+
+template <typename T>
+using MemoryManagerUniquePtr = std::unique_ptr<T, MemoryManagerDeleteHelper>;
+
+// Creates a new object T allocating it with the memory allocator into a
+// unique_ptr.
+template <typename T, typename... Args>
+JXL_INLINE MemoryManagerUniquePtr<T> MemoryManagerMakeUnique(
+ const JxlMemoryManager* memory_manager, Args&&... args) {
+ T* mem =
+ static_cast<T*>(memory_manager->alloc(memory_manager->opaque, sizeof(T)));
+ if (!mem) {
+ // Allocation error case.
+ return MemoryManagerUniquePtr<T>(nullptr,
+ MemoryManagerDeleteHelper(memory_manager));
+ }
+ return MemoryManagerUniquePtr<T>(new (mem) T(std::forward<Args>(args)...),
+ MemoryManagerDeleteHelper(memory_manager));
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_MEMORY_MANAGER_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h
new file mode 100644
index 0000000000..914cd6a4e4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/context_predict.h
@@ -0,0 +1,626 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+#define LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+namespace weighted {
+constexpr static size_t kNumPredictors = 4;
+constexpr static int64_t kPredExtraBits = 3;
+constexpr static int64_t kPredictionRound = ((1 << kPredExtraBits) >> 1) - 1;
+constexpr static size_t kNumProperties = 1;
+
+struct Header : public Fields {
+ JXL_FIELDS_NAME(WeightedPredictorHeader)
+ // TODO(janwas): move to cc file, avoid including fields.h.
+ Header() { Bundle::Init(this); }
+
+ Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+ if (visitor->AllDefault(*this, &all_default)) {
+ // Overwrite all serialized fields, but not any nonserialized_*.
+ visitor->SetDefault(this);
+ return true;
+ }
+ auto visit_p = [visitor](pixel_type val, pixel_type *p) {
+ uint32_t up = *p;
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, val, &up));
+ *p = up;
+ return Status(true);
+ };
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(16, &p1C));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(10, &p2C));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Ca));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cb));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cc));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Cd));
+ JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Ce));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xd, &w[0]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[1]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[2]));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[3]));
+ return true;
+ }
+
+ bool all_default;
+ pixel_type p1C = 0, p2C = 0, p3Ca = 0, p3Cb = 0, p3Cc = 0, p3Cd = 0, p3Ce = 0;
+ uint32_t w[kNumPredictors] = {};
+};
+
+struct State {
+ pixel_type_w prediction[kNumPredictors] = {};
+ pixel_type_w pred = 0; // *before* removing the added bits.
+ std::vector<uint32_t> pred_errors[kNumPredictors];
+ std::vector<int32_t> error;
+ const Header header;
+
+ // Allows to approximate division by a number from 1 to 64.
+ uint32_t divlookup[64];
+
+ constexpr static pixel_type_w AddBits(pixel_type_w x) {
+ return uint64_t(x) << kPredExtraBits;
+ }
+
+ State(Header header, size_t xsize, size_t ysize) : header(header) {
+ // Extra margin to avoid out-of-bounds writes.
+ // All have space for two rows of data.
+ for (size_t i = 0; i < 4; i++) {
+ pred_errors[i].resize((xsize + 2) * 2);
+ }
+ error.resize((xsize + 2) * 2);
+ // Initialize division lookup table.
+ for (int i = 0; i < 64; i++) {
+ divlookup[i] = (1 << 24) / (i + 1);
+ }
+ }
+
+ // Approximates 4+(maxweight<<24)/(x+1), avoiding division
+ JXL_INLINE uint32_t ErrorWeight(uint64_t x, uint32_t maxweight) const {
+ int shift = static_cast<int>(FloorLog2Nonzero(x + 1)) - 5;
+ if (shift < 0) shift = 0;
+ return 4 + ((maxweight * divlookup[x >> shift]) >> shift);
+ }
+
+ // Approximates the weighted average of the input values with the given
+ // weights, avoiding division. Weights must sum to at least 16.
+ JXL_INLINE pixel_type_w
+ WeightedAverage(const pixel_type_w *JXL_RESTRICT p,
+ std::array<uint32_t, kNumPredictors> w) const {
+ uint32_t weight_sum = 0;
+ for (size_t i = 0; i < kNumPredictors; i++) {
+ weight_sum += w[i];
+ }
+ JXL_DASSERT(weight_sum > 15);
+ uint32_t log_weight = FloorLog2Nonzero(weight_sum); // at least 4.
+ weight_sum = 0;
+ for (size_t i = 0; i < kNumPredictors; i++) {
+ w[i] >>= log_weight - 4;
+ weight_sum += w[i];
+ }
+ // for rounding.
+ pixel_type_w sum = (weight_sum >> 1) - 1;
+ for (size_t i = 0; i < kNumPredictors; i++) {
+ sum += p[i] * w[i];
+ }
+ return (sum * divlookup[weight_sum - 1]) >> 24;
+ }
+
+ template <bool compute_properties>
+ JXL_INLINE pixel_type_w Predict(size_t x, size_t y, size_t xsize,
+ pixel_type_w N, pixel_type_w W,
+ pixel_type_w NE, pixel_type_w NW,
+ pixel_type_w NN, Properties *properties,
+ size_t offset) {
+ size_t cur_row = y & 1 ? 0 : (xsize + 2);
+ size_t prev_row = y & 1 ? (xsize + 2) : 0;
+ size_t pos_N = prev_row + x;
+ size_t pos_NE = x < xsize - 1 ? pos_N + 1 : pos_N;
+ size_t pos_NW = x > 0 ? pos_N - 1 : pos_N;
+ std::array<uint32_t, kNumPredictors> weights;
+ for (size_t i = 0; i < kNumPredictors; i++) {
+ // pred_errors[pos_N] also contains the error of pixel W.
+ // pred_errors[pos_NW] also contains the error of pixel WW.
+ weights[i] = pred_errors[i][pos_N] + pred_errors[i][pos_NE] +
+ pred_errors[i][pos_NW];
+ weights[i] = ErrorWeight(weights[i], header.w[i]);
+ }
+
+ N = AddBits(N);
+ W = AddBits(W);
+ NE = AddBits(NE);
+ NW = AddBits(NW);
+ NN = AddBits(NN);
+
+ pixel_type_w teW = x == 0 ? 0 : error[cur_row + x - 1];
+ pixel_type_w teN = error[pos_N];
+ pixel_type_w teNW = error[pos_NW];
+ pixel_type_w sumWN = teN + teW;
+ pixel_type_w teNE = error[pos_NE];
+
+ if (compute_properties) {
+ pixel_type_w p = teW;
+ if (std::abs(teN) > std::abs(p)) p = teN;
+ if (std::abs(teNW) > std::abs(p)) p = teNW;
+ if (std::abs(teNE) > std::abs(p)) p = teNE;
+ (*properties)[offset++] = p;
+ }
+
+ prediction[0] = W + NE - N;
+ prediction[1] = N - (((sumWN + teNE) * header.p1C) >> 5);
+ prediction[2] = W - (((sumWN + teNW) * header.p2C) >> 5);
+ prediction[3] =
+ N - ((teNW * header.p3Ca + teN * header.p3Cb + teNE * header.p3Cc +
+ (NN - N) * header.p3Cd + (NW - W) * header.p3Ce) >>
+ 5);
+
+ pred = WeightedAverage(prediction, weights);
+
+ // If all three have the same sign, skip clamping.
+ if (((teN ^ teW) | (teN ^ teNW)) > 0) {
+ return (pred + kPredictionRound) >> kPredExtraBits;
+ }
+
+ // Otherwise, clamp to min/max of neighbouring pixels (just W, NE, N).
+ pixel_type_w mx = std::max(W, std::max(NE, N));
+ pixel_type_w mn = std::min(W, std::min(NE, N));
+ pred = std::max(mn, std::min(mx, pred));
+ return (pred + kPredictionRound) >> kPredExtraBits;
+ }
+
+ JXL_INLINE void UpdateErrors(pixel_type_w val, size_t x, size_t y,
+ size_t xsize) {
+ size_t cur_row = y & 1 ? 0 : (xsize + 2);
+ size_t prev_row = y & 1 ? (xsize + 2) : 0;
+ val = AddBits(val);
+ error[cur_row + x] = pred - val;
+ for (size_t i = 0; i < kNumPredictors; i++) {
+ pixel_type_w err =
+ (std::abs(prediction[i] - val) + kPredictionRound) >> kPredExtraBits;
+ // For predicting in the next row.
+ pred_errors[i][cur_row + x] = err;
+ // Add the error on this pixel to the error on the NE pixel. This has the
+ // effect of adding the error on this pixel to the E and EE pixels.
+ pred_errors[i][prev_row + x + 1] += err;
+ }
+ }
+};
+
+// Encoder helper function to set the parameters to some presets.
+inline void PredictorMode(int i, Header *header) {
+ switch (i) {
+ case 0:
+ // ~ lossless16 predictor
+ header->w[0] = 0xd;
+ header->w[1] = 0xc;
+ header->w[2] = 0xc;
+ header->w[3] = 0xc;
+ header->p1C = 16;
+ header->p2C = 10;
+ header->p3Ca = 7;
+ header->p3Cb = 7;
+ header->p3Cc = 7;
+ header->p3Cd = 0;
+ header->p3Ce = 0;
+ break;
+ case 1:
+ // ~ default lossless8 predictor
+ header->w[0] = 0xd;
+ header->w[1] = 0xc;
+ header->w[2] = 0xc;
+ header->w[3] = 0xb;
+ header->p1C = 8;
+ header->p2C = 8;
+ header->p3Ca = 4;
+ header->p3Cb = 0;
+ header->p3Cc = 3;
+ header->p3Cd = 23;
+ header->p3Ce = 2;
+ break;
+ case 2:
+ // ~ west lossless8 predictor
+ header->w[0] = 0xd;
+ header->w[1] = 0xc;
+ header->w[2] = 0xd;
+ header->w[3] = 0xc;
+ header->p1C = 10;
+ header->p2C = 9;
+ header->p3Ca = 7;
+ header->p3Cb = 0;
+ header->p3Cc = 0;
+ header->p3Cd = 16;
+ header->p3Ce = 9;
+ break;
+ case 3:
+ // ~ north lossless8 predictor
+ header->w[0] = 0xd;
+ header->w[1] = 0xd;
+ header->w[2] = 0xc;
+ header->w[3] = 0xc;
+ header->p1C = 16;
+ header->p2C = 8;
+ header->p3Ca = 0;
+ header->p3Cb = 16;
+ header->p3Cc = 0;
+ header->p3Cd = 23;
+ header->p3Ce = 0;
+ break;
+ case 4:
+ default:
+ // something else, because why not
+ header->w[0] = 0xd;
+ header->w[1] = 0xc;
+ header->w[2] = 0xc;
+ header->w[3] = 0xc;
+ header->p1C = 10;
+ header->p2C = 10;
+ header->p3Ca = 5;
+ header->p3Cb = 5;
+ header->p3Cc = 5;
+ header->p3Cd = 12;
+ header->p3Ce = 4;
+ break;
+ }
+}
+} // namespace weighted
+
+// Stores a node and its two children at the same time. This significantly
+// reduces the number of branches needed during decoding.
+struct FlatDecisionNode {
+ // Property + splitval of the top node.
+ int32_t property0; // -1 if leaf.
+ union {
+ PropertyVal splitval0;
+ Predictor predictor;
+ };
+ uint32_t childID; // childID is ctx id if leaf.
+ // Property+splitval of the two child nodes.
+ union {
+ PropertyVal splitvals[2];
+ int32_t multiplier;
+ };
+ union {
+ int32_t properties[2];
+ int64_t predictor_offset;
+ };
+};
+using FlatTree = std::vector<FlatDecisionNode>;
+
+class MATreeLookup {
+ public:
+ explicit MATreeLookup(const FlatTree &tree) : nodes_(tree) {}
+ struct LookupResult {
+ uint32_t context;
+ Predictor predictor;
+ int64_t offset;
+ int32_t multiplier;
+ };
+ JXL_INLINE LookupResult Lookup(const Properties &properties) const {
+ uint32_t pos = 0;
+ while (true) {
+ const FlatDecisionNode &node = nodes_[pos];
+ if (node.property0 < 0) {
+ return {node.childID, node.predictor, node.predictor_offset,
+ node.multiplier};
+ }
+ bool p0 = properties[node.property0] <= node.splitval0;
+ uint32_t off0 = properties[node.properties[0]] <= node.splitvals[0];
+ uint32_t off1 =
+ 2 | (properties[node.properties[1]] <= node.splitvals[1] ? 1 : 0);
+ pos = node.childID + (p0 ? off1 : off0);
+ }
+ }
+
+ private:
+ const FlatTree &nodes_;
+};
+
+static constexpr size_t kExtraPropsPerChannel = 4;
+static constexpr size_t kNumNonrefProperties =
+ kNumStaticProperties + 13 + weighted::kNumProperties;
+
+constexpr size_t kWPProp = kNumNonrefProperties - weighted::kNumProperties;
+constexpr size_t kGradientProp = 9;
+
+// Clamps gradient to the min/max of n, w (and l, implicitly).
+static JXL_INLINE int32_t ClampedGradient(const int32_t n, const int32_t w,
+ const int32_t l) {
+ const int32_t m = std::min(n, w);
+ const int32_t M = std::max(n, w);
+ // The end result of this operation doesn't overflow or underflow if the
+ // result is between m and M, but the intermediate value may overflow, so we
+ // do the intermediate operations in uint32_t and check later if we had an
+ // overflow or underflow condition comparing m, M and l directly.
+ // grad = M + m - l = n + w - l
+ const int32_t grad =
+ static_cast<int32_t>(static_cast<uint32_t>(n) + static_cast<uint32_t>(w) -
+ static_cast<uint32_t>(l));
+ // We use two sets of ternary operators to force the evaluation of them in
+ // any case, allowing the compiler to avoid branches and use cmovl/cmovg in
+ // x86.
+ const int32_t grad_clamp_M = (l < m) ? M : grad;
+ return (l > M) ? m : grad_clamp_M;
+}
+
+inline pixel_type_w Select(pixel_type_w a, pixel_type_w b, pixel_type_w c) {
+ pixel_type_w p = a + b - c;
+ pixel_type_w pa = std::abs(p - a);
+ pixel_type_w pb = std::abs(p - b);
+ return pa < pb ? a : b;
+}
+
+inline void PrecomputeReferences(const Channel &ch, size_t y,
+ const Image &image, uint32_t i,
+ Channel *references) {
+ ZeroFillImage(&references->plane);
+ uint32_t offset = 0;
+ size_t num_extra_props = references->w;
+ intptr_t onerow = references->plane.PixelsPerRow();
+ for (int32_t j = static_cast<int32_t>(i) - 1;
+ j >= 0 && offset < num_extra_props; j--) {
+ if (image.channel[j].w != image.channel[i].w ||
+ image.channel[j].h != image.channel[i].h) {
+ continue;
+ }
+ if (image.channel[j].hshift != image.channel[i].hshift) continue;
+ if (image.channel[j].vshift != image.channel[i].vshift) continue;
+ pixel_type *JXL_RESTRICT rp = references->Row(0) + offset;
+ const pixel_type *JXL_RESTRICT rpp = image.channel[j].Row(y);
+ const pixel_type *JXL_RESTRICT rpprev = image.channel[j].Row(y ? y - 1 : 0);
+ for (size_t x = 0; x < ch.w; x++, rp += onerow) {
+ pixel_type_w v = rpp[x];
+ rp[0] = std::abs(v);
+ rp[1] = v;
+ pixel_type_w vleft = (x ? rpp[x - 1] : 0);
+ pixel_type_w vtop = (y ? rpprev[x] : vleft);
+ pixel_type_w vtopleft = (x && y ? rpprev[x - 1] : vleft);
+ pixel_type_w vpredicted = ClampedGradient(vleft, vtop, vtopleft);
+ rp[2] = std::abs(v - vpredicted);
+ rp[3] = v - vpredicted;
+ }
+
+ offset += kExtraPropsPerChannel;
+ }
+}
+
+struct PredictionResult {
+ int context = 0;
+ pixel_type_w guess = 0;
+ Predictor predictor;
+ int32_t multiplier;
+};
+
+inline void InitPropsRow(
+ Properties *p,
+ const std::array<pixel_type, kNumStaticProperties> &static_props,
+ const int y) {
+ for (size_t i = 0; i < kNumStaticProperties; i++) {
+ (*p)[i] = static_props[i];
+ }
+ (*p)[2] = y;
+ (*p)[9] = 0; // local gradient.
+}
+
+namespace detail {
+enum PredictorMode {
+ kUseTree = 1,
+ kUseWP = 2,
+ kForceComputeProperties = 4,
+ kAllPredictions = 8,
+ kNoEdgeCases = 16
+};
+
+JXL_INLINE pixel_type_w PredictOne(Predictor p, pixel_type_w left,
+ pixel_type_w top, pixel_type_w toptop,
+ pixel_type_w topleft, pixel_type_w topright,
+ pixel_type_w leftleft,
+ pixel_type_w toprightright,
+ pixel_type_w wp_pred) {
+ switch (p) {
+ case Predictor::Zero:
+ return pixel_type_w{0};
+ case Predictor::Left:
+ return left;
+ case Predictor::Top:
+ return top;
+ case Predictor::Select:
+ return Select(left, top, topleft);
+ case Predictor::Weighted:
+ return wp_pred;
+ case Predictor::Gradient:
+ return pixel_type_w{ClampedGradient(left, top, topleft)};
+ case Predictor::TopLeft:
+ return topleft;
+ case Predictor::TopRight:
+ return topright;
+ case Predictor::LeftLeft:
+ return leftleft;
+ case Predictor::Average0:
+ return (left + top) / 2;
+ case Predictor::Average1:
+ return (left + topleft) / 2;
+ case Predictor::Average2:
+ return (topleft + top) / 2;
+ case Predictor::Average3:
+ return (top + topright) / 2;
+ case Predictor::Average4:
+ return (6 * top - 2 * toptop + 7 * left + 1 * leftleft +
+ 1 * toprightright + 3 * topright + 8) /
+ 16;
+ default:
+ return pixel_type_w{0};
+ }
+}
+
+template <int mode>
+JXL_INLINE PredictionResult Predict(
+ Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const size_t x, const size_t y, Predictor predictor,
+ const MATreeLookup *lookup, const Channel *references,
+ weighted::State *wp_state, pixel_type_w *predictions) {
+ // We start in position 3 because of 2 static properties + y.
+ size_t offset = 3;
+ constexpr bool compute_properties =
+ mode & kUseTree || mode & kForceComputeProperties;
+ constexpr bool nec = mode & kNoEdgeCases;
+ pixel_type_w left = (nec || x ? pp[-1] : (y ? pp[-onerow] : 0));
+ pixel_type_w top = (nec || y ? pp[-onerow] : left);
+ pixel_type_w topleft = (nec || (x && y) ? pp[-1 - onerow] : left);
+ pixel_type_w topright = (nec || (x + 1 < w && y) ? pp[1 - onerow] : top);
+ pixel_type_w leftleft = (nec || x > 1 ? pp[-2] : left);
+ pixel_type_w toptop = (nec || y > 1 ? pp[-onerow - onerow] : top);
+ pixel_type_w toprightright =
+ (nec || (x + 2 < w && y) ? pp[2 - onerow] : topright);
+
+ if (compute_properties) {
+ // location
+ (*p)[offset++] = x;
+ // neighbors
+ (*p)[offset++] = std::abs(top);
+ (*p)[offset++] = std::abs(left);
+ (*p)[offset++] = top;
+ (*p)[offset++] = left;
+
+ // local gradient
+ (*p)[offset] = left - (*p)[offset + 1];
+ offset++;
+ // local gradient
+ (*p)[offset++] = left + top - topleft;
+
+ // FFV1 context properties
+ (*p)[offset++] = left - topleft;
+ (*p)[offset++] = topleft - top;
+ (*p)[offset++] = top - topright;
+ (*p)[offset++] = top - toptop;
+ (*p)[offset++] = left - leftleft;
+ }
+
+ pixel_type_w wp_pred = 0;
+ if (mode & kUseWP) {
+ wp_pred = wp_state->Predict<compute_properties>(
+ x, y, w, top, left, topright, topleft, toptop, p, offset);
+ }
+ if (!nec && compute_properties) {
+ offset += weighted::kNumProperties;
+ // Extra properties.
+ const pixel_type *JXL_RESTRICT rp = references->Row(x);
+ for (size_t i = 0; i < references->w; i++) {
+ (*p)[offset++] = rp[i];
+ }
+ }
+ PredictionResult result;
+ if (mode & kUseTree) {
+ MATreeLookup::LookupResult lr = lookup->Lookup(*p);
+ result.context = lr.context;
+ result.guess = lr.offset;
+ result.multiplier = lr.multiplier;
+ predictor = lr.predictor;
+ }
+ if (mode & kAllPredictions) {
+ for (size_t i = 0; i < kNumModularPredictors; i++) {
+ predictions[i] = PredictOne((Predictor)i, left, top, toptop, topleft,
+ topright, leftleft, toprightright, wp_pred);
+ }
+ }
+ result.guess += PredictOne(predictor, left, top, toptop, topleft, topright,
+ leftleft, toprightright, wp_pred);
+ result.predictor = predictor;
+
+ return result;
+}
+} // namespace detail
+
+inline PredictionResult PredictNoTreeNoWP(size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x,
+ const int y, Predictor predictor) {
+ return detail::Predict</*mode=*/0>(
+ /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+ /*references=*/nullptr, /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictNoTreeWP(size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x,
+ const int y, Predictor predictor,
+ weighted::State *wp_state) {
+ return detail::Predict<detail::kUseWP>(
+ /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr,
+ /*references=*/nullptr, wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeNoWP(Properties *p, size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x,
+ const int y,
+ const MATreeLookup &tree_lookup,
+ const Channel &references) {
+ return detail::Predict<detail::kUseTree>(
+ p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+ /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+// Only use for y > 1, x > 1, x < w-2, and empty references
+JXL_INLINE PredictionResult
+PredictTreeNoWPNEC(Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x, const int y,
+ const MATreeLookup &tree_lookup, const Channel &references) {
+ return detail::Predict<detail::kUseTree | detail::kNoEdgeCases>(
+ p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+ /*wp_state=*/nullptr, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictTreeWP(Properties *p, size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x,
+ const int y,
+ const MATreeLookup &tree_lookup,
+ const Channel &references,
+ weighted::State *wp_state) {
+ return detail::Predict<detail::kUseTree | detail::kUseWP>(
+ p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references,
+ wp_state, /*predictions=*/nullptr);
+}
+
+inline PredictionResult PredictLearn(Properties *p, size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x,
+ const int y, Predictor predictor,
+ const Channel &references,
+ weighted::State *wp_state) {
+ return detail::Predict<detail::kForceComputeProperties | detail::kUseWP>(
+ p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references,
+ wp_state, /*predictions=*/nullptr);
+}
+
+inline void PredictLearnAll(Properties *p, size_t w,
+ const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x, const int y,
+ const Channel &references,
+ weighted::State *wp_state,
+ pixel_type_w *predictions) {
+ detail::Predict<detail::kForceComputeProperties | detail::kUseWP |
+ detail::kAllPredictions>(
+ p, w, pp, onerow, x, y, Predictor::Zero,
+ /*lookup=*/nullptr, &references, wp_state, predictions);
+}
+
+inline void PredictAllNoWP(size_t w, const pixel_type *JXL_RESTRICT pp,
+ const intptr_t onerow, const int x, const int y,
+ pixel_type_w *predictions) {
+ detail::Predict<detail::kAllPredictions>(
+ /*p=*/nullptr, w, pp, onerow, x, y, Predictor::Zero,
+ /*lookup=*/nullptr,
+ /*references=*/nullptr, /*wp_state=*/nullptr, predictions);
+}
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.cc b/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.cc
new file mode 100644
index 0000000000..66562f7dfd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.cc
@@ -0,0 +1,107 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/dec_ma.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+namespace {
+
+Status ValidateTree(
+ const Tree &tree,
+ const std::vector<std::pair<pixel_type, pixel_type>> &prop_bounds,
+ size_t root) {
+ if (tree[root].property == -1) return true;
+ size_t p = tree[root].property;
+ int val = tree[root].splitval;
+ if (prop_bounds[p].first > val) return JXL_FAILURE("Invalid tree");
+ // Splitting at max value makes no sense: left range will be exactly same
+ // as parent, right range will be invalid (min > max).
+ if (prop_bounds[p].second <= val) return JXL_FAILURE("Invalid tree");
+ auto new_bounds = prop_bounds;
+ new_bounds[p].first = val + 1;
+ JXL_RETURN_IF_ERROR(ValidateTree(tree, new_bounds, tree[root].lchild));
+ new_bounds[p] = prop_bounds[p];
+ new_bounds[p].second = val;
+ return ValidateTree(tree, new_bounds, tree[root].rchild);
+}
+
+Status DecodeTree(BitReader *br, ANSSymbolReader *reader,
+ const std::vector<uint8_t> &context_map, Tree *tree,
+ size_t tree_size_limit) {
+ size_t leaf_id = 0;
+ size_t to_decode = 1;
+ tree->clear();
+ while (to_decode > 0) {
+ JXL_RETURN_IF_ERROR(br->AllReadsWithinBounds());
+ if (tree->size() > tree_size_limit) {
+ return JXL_FAILURE("Tree is too large: %" PRIuS " nodes vs %" PRIuS
+ " max nodes",
+ tree->size(), tree_size_limit);
+ }
+ to_decode--;
+ uint32_t prop1 = reader->ReadHybridUint(kPropertyContext, br, context_map);
+ if (prop1 > 256) return JXL_FAILURE("Invalid tree property value");
+ int property = prop1 - 1;
+ if (property == -1) {
+ size_t predictor =
+ reader->ReadHybridUint(kPredictorContext, br, context_map);
+ if (predictor >= kNumModularPredictors) {
+ return JXL_FAILURE("Invalid predictor");
+ }
+ int64_t predictor_offset =
+ UnpackSigned(reader->ReadHybridUint(kOffsetContext, br, context_map));
+ uint32_t mul_log =
+ reader->ReadHybridUint(kMultiplierLogContext, br, context_map);
+ if (mul_log >= 31) {
+ return JXL_FAILURE("Invalid multiplier logarithm");
+ }
+ uint32_t mul_bits =
+ reader->ReadHybridUint(kMultiplierBitsContext, br, context_map);
+ if (mul_bits + 1 >= 1u << (31u - mul_log)) {
+ return JXL_FAILURE("Invalid multiplier");
+ }
+ uint32_t multiplier = (mul_bits + 1U) << mul_log;
+ tree->emplace_back(-1, 0, leaf_id++, 0, static_cast<Predictor>(predictor),
+ predictor_offset, multiplier);
+ continue;
+ }
+ int splitval =
+ UnpackSigned(reader->ReadHybridUint(kSplitValContext, br, context_map));
+ tree->emplace_back(property, splitval, tree->size() + to_decode + 1,
+ tree->size() + to_decode + 2, Predictor::Zero, 0, 1);
+ to_decode += 2;
+ }
+ std::vector<std::pair<pixel_type, pixel_type>> prop_bounds;
+ prop_bounds.resize(256, {std::numeric_limits<pixel_type>::min(),
+ std::numeric_limits<pixel_type>::max()});
+ return ValidateTree(*tree, prop_bounds, 0);
+}
+} // namespace
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit) {
+ std::vector<uint8_t> tree_context_map;
+ ANSCode tree_code;
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(br, kNumTreeContexts, &tree_code, &tree_context_map));
+ // TODO(eustas): investigate more infinite tree cases.
+ if (tree_code.degenerate_symbols[tree_context_map[kPropertyContext]] > 0) {
+ return JXL_FAILURE("Infinite tree");
+ }
+ ANSSymbolReader reader(&tree_code, br);
+ JXL_RETURN_IF_ERROR(DecodeTree(br, &reader, tree_context_map, tree,
+ std::min(tree_size_limit, kMaxTreeSize)));
+ if (!reader.CheckANSFinalState()) {
+ return JXL_FAILURE("ANS decode final state failed");
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.h
new file mode 100644
index 0000000000..a910c4deb1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/dec_ma.h
@@ -0,0 +1,66 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// inner nodes
+struct PropertyDecisionNode {
+ PropertyVal splitval;
+ int16_t property; // -1: leaf node, lchild points to leaf node
+ uint32_t lchild;
+ uint32_t rchild;
+ Predictor predictor;
+ int64_t predictor_offset;
+ uint32_t multiplier;
+
+ PropertyDecisionNode(int p, int split_val, int lchild, int rchild,
+ Predictor predictor, int64_t predictor_offset,
+ uint32_t multiplier)
+ : splitval(split_val),
+ property(p),
+ lchild(lchild),
+ rchild(rchild),
+ predictor(predictor),
+ predictor_offset(predictor_offset),
+ multiplier(multiplier) {}
+ PropertyDecisionNode()
+ : splitval(0),
+ property(-1),
+ lchild(0),
+ rchild(0),
+ predictor(Predictor::Zero),
+ predictor_offset(0),
+ multiplier(1) {}
+ static PropertyDecisionNode Leaf(Predictor predictor, int64_t offset = 0,
+ uint32_t multiplier = 1) {
+ return PropertyDecisionNode(-1, 0, 0, 0, predictor, offset, multiplier);
+ }
+ static PropertyDecisionNode Split(int p, int split_val, int lchild,
+ int rchild = -1) {
+ if (rchild == -1) rchild = lchild + 1;
+ return PropertyDecisionNode(p, split_val, lchild, rchild, Predictor::Zero,
+ 0, 1);
+ }
+};
+
+using Tree = std::vector<PropertyDecisionNode>;
+
+Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_DEC_MA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.cc b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.cc
new file mode 100644
index 0000000000..f2a1705e4b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.cc
@@ -0,0 +1,124 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/jxl/base/os_macros.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/options.h"
+
+#if JXL_OS_IOS
+#define JXL_ENABLE_DOT 0
+#else
+#define JXL_ENABLE_DOT 1 // iOS lacks C89 system()
+#endif
+
+namespace jxl {
+
+const char *PredictorName(Predictor p) {
+ switch (p) {
+ case Predictor::Zero:
+ return "Zero";
+ case Predictor::Left:
+ return "Left";
+ case Predictor::Top:
+ return "Top";
+ case Predictor::Average0:
+ return "Avg0";
+ case Predictor::Average1:
+ return "Avg1";
+ case Predictor::Average2:
+ return "Avg2";
+ case Predictor::Average3:
+ return "Avg3";
+ case Predictor::Average4:
+ return "Avg4";
+ case Predictor::Select:
+ return "Sel";
+ case Predictor::Gradient:
+ return "Grd";
+ case Predictor::Weighted:
+ return "Wgh";
+ case Predictor::TopLeft:
+ return "TopL";
+ case Predictor::TopRight:
+ return "TopR";
+ case Predictor::LeftLeft:
+ return "LL";
+ default:
+ return "INVALID";
+ };
+}
+
+std::string PropertyName(size_t i) {
+ static_assert(kNumNonrefProperties == 16, "Update this function");
+ switch (i) {
+ case 0:
+ return "c";
+ case 1:
+ return "g";
+ case 2:
+ return "y";
+ case 3:
+ return "x";
+ case 4:
+ return "|N|";
+ case 5:
+ return "|W|";
+ case 6:
+ return "N";
+ case 7:
+ return "W";
+ case 8:
+ return "W-WW-NW+NWW";
+ case 9:
+ return "W+N-NW";
+ case 10:
+ return "W-NW";
+ case 11:
+ return "NW-N";
+ case 12:
+ return "N-NE";
+ case 13:
+ return "N-NN";
+ case 14:
+ return "W-WW";
+ case 15:
+ return "WGH";
+ default:
+ return "ch[" + ToString(15 - (int)i) + "]";
+ }
+}
+
+void PrintTree(const Tree &tree, const std::string &path) {
+ FILE *f = fopen((path + ".dot").c_str(), "w");
+ fprintf(f, "graph{\n");
+ for (size_t cur = 0; cur < tree.size(); cur++) {
+ if (tree[cur].property < 0) {
+ fprintf(f, "n%05" PRIuS " [label=\"%s%+" PRId64 " (x%u)\"];\n", cur,
+ PredictorName(tree[cur].predictor), tree[cur].predictor_offset,
+ tree[cur].multiplier);
+ } else {
+ fprintf(f, "n%05" PRIuS " [label=\"%s>%d\"];\n", cur,
+ PropertyName(tree[cur].property).c_str(), tree[cur].splitval);
+ fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].lchild);
+ fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].rchild);
+ }
+ }
+ fprintf(f, "}\n");
+ fclose(f);
+#if JXL_ENABLE_DOT
+ JXL_ASSERT(
+ system(("dot " + path + ".dot -T svg -o " + path + ".svg").c_str()) == 0);
+#endif
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.h
new file mode 100644
index 0000000000..78deaab1b8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_debug_tree.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+const char *PredictorName(Predictor p);
+std::string PropertyName(size_t i);
+
+void PrintTree(const Tree &tree, const std::string &path);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.cc b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.cc
new file mode 100644
index 0000000000..c8c183335e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.cc
@@ -0,0 +1,562 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <cinttypes>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_debug_tree.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+#include "lib/jxl/toc.h"
+
+namespace jxl {
+
+namespace {
+// Plot tree (if enabled) and predictor usage map.
+constexpr bool kWantDebug = false;
+constexpr bool kPrintTree = false;
+
+inline std::array<uint8_t, 3> PredictorColor(Predictor p) {
+ switch (p) {
+ case Predictor::Zero:
+ return {{0, 0, 0}};
+ case Predictor::Left:
+ return {{255, 0, 0}};
+ case Predictor::Top:
+ return {{0, 255, 0}};
+ case Predictor::Average0:
+ return {{0, 0, 255}};
+ case Predictor::Average4:
+ return {{192, 128, 128}};
+ case Predictor::Select:
+ return {{255, 255, 0}};
+ case Predictor::Gradient:
+ return {{255, 0, 255}};
+ case Predictor::Weighted:
+ return {{0, 255, 255}};
+ // TODO
+ default:
+ return {{255, 255, 255}};
+ };
+}
+
+} // namespace
+
+void GatherTreeData(const Image &image, pixel_type chan, size_t group_id,
+ const weighted::Header &wp_header,
+ const ModularOptions &options, TreeSamples &tree_samples,
+ size_t *total_pixels) {
+ const Channel &channel = image.channel[chan];
+
+ JXL_DEBUG_V(7, "Learning %" PRIuS "x%" PRIuS " channel %d", channel.w,
+ channel.h, chan);
+
+ std::array<pixel_type, kNumStaticProperties> static_props = {
+ {chan, (int)group_id}};
+ Properties properties(kNumNonrefProperties +
+ kExtraPropsPerChannel * options.max_properties);
+ double pixel_fraction = std::min(1.0f, options.nb_repeats);
+ // a fraction of 0 is used to disable learning entirely.
+ if (pixel_fraction > 0) {
+ pixel_fraction = std::max(pixel_fraction,
+ std::min(1.0, 1024.0 / (channel.w * channel.h)));
+ }
+ uint64_t threshold =
+ (std::numeric_limits<uint64_t>::max() >> 32) * pixel_fraction;
+ uint64_t s[2] = {static_cast<uint64_t>(0x94D049BB133111EBull),
+ static_cast<uint64_t>(0xBF58476D1CE4E5B9ull)};
+ // Xorshift128+ adapted from xorshift128+-inl.h
+ auto use_sample = [&]() {
+ auto s1 = s[0];
+ const auto s0 = s[1];
+ const auto bits = s1 + s0; // b, c
+ s[0] = s0;
+ s1 ^= s1 << 23;
+ s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+ s[1] = s1;
+ return (bits >> 32) <= threshold;
+ };
+
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ Channel references(properties.size() - kNumNonrefProperties, channel.w);
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ tree_samples.PrepareForSamples(pixel_fraction * channel.h * channel.w + 64);
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT p = channel.Row(y);
+ PrecomputeReferences(channel, y, image, chan, &references);
+ InitPropsRow(&properties, static_props, y);
+ // TODO(veluca): avoid computing WP if we don't use its property or
+ // predictions.
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type_w pred[kNumModularPredictors];
+ if (tree_samples.NumPredictors() != 1) {
+ PredictLearnAll(&properties, channel.w, p + x, onerow, x, y, references,
+ &wp_state, pred);
+ } else {
+ pred[static_cast<int>(tree_samples.PredictorFromIndex(0))] =
+ PredictLearn(&properties, channel.w, p + x, onerow, x, y,
+ tree_samples.PredictorFromIndex(0), references,
+ &wp_state)
+ .guess;
+ }
+ (*total_pixels)++;
+ if (use_sample()) {
+ tree_samples.AddSample(p[x], properties, pred);
+ }
+ wp_state.UpdateErrors(p[x], x, y, channel.w);
+ }
+ }
+}
+
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+ const ModularOptions &options,
+ const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+ StaticPropRange static_prop_range = {}) {
+ for (size_t i = 0; i < kNumStaticProperties; i++) {
+ if (static_prop_range[i][1] == 0) {
+ static_prop_range[i][1] = std::numeric_limits<uint32_t>::max();
+ }
+ }
+ if (!tree_samples.HasSamples()) {
+ Tree tree;
+ tree.emplace_back();
+ tree.back().predictor = tree_samples.PredictorFromIndex(0);
+ tree.back().property = -1;
+ tree.back().predictor_offset = 0;
+ tree.back().multiplier = 1;
+ return tree;
+ }
+ float pixel_fraction = tree_samples.NumSamples() * 1.0f / total_pixels;
+ float required_cost = pixel_fraction * 0.9 + 0.1;
+ tree_samples.AllSamplesDone();
+ Tree tree;
+ ComputeBestTree(tree_samples,
+ options.splitting_heuristics_node_threshold * required_cost,
+ multiplier_info, static_prop_range,
+ options.fast_decode_multiplier, &tree);
+ return tree;
+}
+
+Status EncodeModularChannelMAANS(const Image &image, pixel_type chan,
+ const weighted::Header &wp_header,
+ const Tree &global_tree, Token **tokenpp,
+ AuxOut *aux_out, size_t group_id,
+ bool skip_encoder_fast_path) {
+ const Channel &channel = image.channel[chan];
+ Token *tokenp = *tokenpp;
+ JXL_ASSERT(channel.w != 0 && channel.h != 0);
+
+ Image3F predictor_img;
+ if (kWantDebug) predictor_img = Image3F(channel.w, channel.h);
+
+ JXL_DEBUG_V(6,
+ "Encoding %" PRIuS "x%" PRIuS
+ " channel %d, "
+ "(shift=%i,%i)",
+ channel.w, channel.h, chan, channel.hshift, channel.vshift);
+
+ std::array<pixel_type, kNumStaticProperties> static_props = {
+ {chan, (int)group_id}};
+ bool use_wp, is_wp_only;
+ bool is_gradient_only;
+ size_t num_props;
+ FlatTree tree = FilterTree(global_tree, static_props, &num_props, &use_wp,
+ &is_wp_only, &is_gradient_only);
+ Properties properties(num_props);
+ MATreeLookup tree_lookup(tree);
+ JXL_DEBUG_V(3, "Encoding using a MA tree with %" PRIuS " nodes", tree.size());
+
+ // Check if this tree is a WP-only tree with a small enough property value
+ // range.
+ // Initialized to avoid clang-tidy complaining.
+ uint16_t context_lookup[2 * kPropRangeFast] = {};
+ int8_t offsets[2 * kPropRangeFast] = {};
+ if (is_wp_only) {
+ is_wp_only = TreeToLookupTable(tree, context_lookup, offsets);
+ }
+ if (is_gradient_only) {
+ is_gradient_only = TreeToLookupTable(tree, context_lookup, offsets);
+ }
+
+ if (is_wp_only && !skip_encoder_fast_path) {
+ for (size_t c = 0; c < 3; c++) {
+ FillImage(static_cast<float>(PredictorColor(Predictor::Weighted)[c]),
+ &predictor_img.Plane(c));
+ }
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ Properties properties(1);
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ size_t offset = 0;
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ pixel_type_w topright =
+ (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top);
+ pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+ int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+ x, y, channel.w, top, left, topright, topleft, toptop, &properties,
+ offset);
+ uint32_t pos =
+ kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+ kPropRangeFast - 1);
+ uint32_t ctx_id = context_lookup[pos];
+ int32_t residual = r[x] - guess - offsets[pos];
+ *tokenp++ = Token(ctx_id, PackSigned(residual));
+ wp_state.UpdateErrors(r[x], x, y, channel.w);
+ }
+ }
+ } else if (tree.size() == 1 && tree[0].predictor == Predictor::Gradient &&
+ tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+ !skip_encoder_fast_path) {
+ for (size_t c = 0; c < 3; c++) {
+ FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+ &predictor_img.Plane(c));
+ }
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ int32_t guess = ClampedGradient(top, left, topleft);
+ int32_t residual = r[x] - guess;
+ *tokenp++ = Token(tree[0].childID, PackSigned(residual));
+ }
+ }
+ } else if (is_gradient_only && !skip_encoder_fast_path) {
+ for (size_t c = 0; c < 3; c++) {
+ FillImage(static_cast<float>(PredictorColor(Predictor::Gradient)[c]),
+ &predictor_img.Plane(c));
+ }
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ int32_t guess = ClampedGradient(top, left, topleft);
+ uint32_t pos =
+ kPropRangeFast +
+ std::min<pixel_type_w>(
+ std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+ kPropRangeFast - 1);
+ uint32_t ctx_id = context_lookup[pos];
+ int32_t residual = r[x] - guess - offsets[pos];
+ *tokenp++ = Token(ctx_id, PackSigned(residual));
+ }
+ }
+ } else if (tree.size() == 1 && tree[0].predictor == Predictor::Zero &&
+ tree[0].multiplier == 1 && tree[0].predictor_offset == 0 &&
+ !skip_encoder_fast_path) {
+ for (size_t c = 0; c < 3; c++) {
+ FillImage(static_cast<float>(PredictorColor(Predictor::Zero)[c]),
+ &predictor_img.Plane(c));
+ }
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT p = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ *tokenp++ = Token(tree[0].childID, PackSigned(p[x]));
+ }
+ }
+ } else if (tree.size() == 1 && tree[0].predictor != Predictor::Weighted &&
+ (tree[0].multiplier & (tree[0].multiplier - 1)) == 0 &&
+ tree[0].predictor_offset == 0 && !skip_encoder_fast_path) {
+ // multiplier is a power of 2.
+ for (size_t c = 0; c < 3; c++) {
+ FillImage(static_cast<float>(PredictorColor(tree[0].predictor)[c]),
+ &predictor_img.Plane(c));
+ }
+ uint32_t mul_shift = FloorLog2Nonzero((uint32_t)tree[0].multiplier);
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult pred = PredictNoTreeNoWP(channel.w, r + x, onerow, x,
+ y, tree[0].predictor);
+ pixel_type_w residual = r[x] - pred.guess;
+ JXL_DASSERT((residual >> mul_shift) * tree[0].multiplier == residual);
+ *tokenp++ = Token(tree[0].childID, PackSigned(residual >> mul_shift));
+ }
+ }
+
+ } else if (!use_wp && !skip_encoder_fast_path) {
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ Channel references(properties.size() - kNumNonrefProperties, channel.w);
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT p = channel.Row(y);
+ PrecomputeReferences(channel, y, image, chan, &references);
+ float *pred_img_row[3];
+ if (kWantDebug) {
+ for (size_t c = 0; c < 3; c++) {
+ pred_img_row[c] = predictor_img.PlaneRow(c, y);
+ }
+ }
+ InitPropsRow(&properties, static_props, y);
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult res =
+ PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references);
+ if (kWantDebug) {
+ for (size_t i = 0; i < 3; i++) {
+ pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+ }
+ }
+ pixel_type_w residual = p[x] - res.guess;
+ JXL_ASSERT(residual % res.multiplier == 0);
+ *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier));
+ }
+ }
+ } else {
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ Channel references(properties.size() - kNumNonrefProperties, channel.w);
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ for (size_t y = 0; y < channel.h; y++) {
+ const pixel_type *JXL_RESTRICT p = channel.Row(y);
+ PrecomputeReferences(channel, y, image, chan, &references);
+ float *pred_img_row[3];
+ if (kWantDebug) {
+ for (size_t c = 0; c < 3; c++) {
+ pred_img_row[c] = predictor_img.PlaneRow(c, y);
+ }
+ }
+ InitPropsRow(&properties, static_props, y);
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult res =
+ PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references, &wp_state);
+ if (kWantDebug) {
+ for (size_t i = 0; i < 3; i++) {
+ pred_img_row[i][x] = PredictorColor(res.predictor)[i];
+ }
+ }
+ pixel_type_w residual = p[x] - res.guess;
+ JXL_ASSERT(residual % res.multiplier == 0);
+ *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier));
+ wp_state.UpdateErrors(p[x], x, y, channel.w);
+ }
+ }
+ }
+ if (kWantDebug && WantDebugOutput(aux_out)) {
+ aux_out->DumpImage(
+ ("pred_" + ToString(group_id) + "_" + ToString(chan)).c_str(),
+ predictor_img);
+ }
+ *tokenpp = tokenp;
+ return true;
+}
+
+Status ModularEncode(const Image &image, const ModularOptions &options,
+ BitWriter *writer, AuxOut *aux_out, size_t layer,
+ size_t group_id, TreeSamples *tree_samples,
+ size_t *total_pixels, const Tree *tree,
+ GroupHeader *header, std::vector<Token> *tokens,
+ size_t *width) {
+ if (image.error) return JXL_FAILURE("Invalid image");
+ size_t nb_channels = image.channel.size();
+ JXL_DEBUG_V(
+ 2, "Encoding %" PRIuS "-channel, %i-bit, %" PRIuS "x%" PRIuS " image.",
+ nb_channels, image.bitdepth, image.w, image.h);
+
+ if (nb_channels < 1) {
+ return true; // is there any use for a zero-channel image?
+ }
+
+ // encode transforms
+ GroupHeader header_storage;
+ if (header == nullptr) header = &header_storage;
+ Bundle::Init(header);
+ if (options.predictor == Predictor::Weighted) {
+ weighted::PredictorMode(options.wp_mode, &header->wp_header);
+ }
+ header->transforms = image.transform;
+ // This doesn't actually work
+ if (tree != nullptr) {
+ header->use_global_tree = true;
+ }
+ if (tree_samples == nullptr && tree == nullptr) {
+ JXL_RETURN_IF_ERROR(Bundle::Write(*header, writer, layer, aux_out));
+ }
+
+ TreeSamples tree_samples_storage;
+ size_t total_pixels_storage = 0;
+ if (!total_pixels) total_pixels = &total_pixels_storage;
+ // If there's no tree, compute one (or gather data to).
+ if (tree == nullptr) {
+ bool gather_data = tree_samples != nullptr;
+ if (tree_samples == nullptr) {
+ JXL_RETURN_IF_ERROR(tree_samples_storage.SetPredictor(
+ options.predictor, options.wp_tree_mode));
+ JXL_RETURN_IF_ERROR(tree_samples_storage.SetProperties(
+ options.splitting_heuristics_properties, options.wp_tree_mode));
+ std::vector<pixel_type> pixel_samples;
+ std::vector<pixel_type> diff_samples;
+ std::vector<uint32_t> group_pixel_count;
+ std::vector<uint32_t> channel_pixel_count;
+ CollectPixelSamples(image, options, 0, group_pixel_count,
+ channel_pixel_count, pixel_samples, diff_samples);
+ std::vector<ModularMultiplierInfo> dummy_multiplier_info;
+ StaticPropRange range;
+ tree_samples_storage.PreQuantizeProperties(
+ range, dummy_multiplier_info, group_pixel_count, channel_pixel_count,
+ pixel_samples, diff_samples, options.max_property_values);
+ }
+ for (size_t i = 0; i < nb_channels; i++) {
+ if (!image.channel[i].w || !image.channel[i].h) {
+ continue; // skip empty channels
+ }
+ if (i >= image.nb_meta_channels &&
+ (image.channel[i].w > options.max_chan_size ||
+ image.channel[i].h > options.max_chan_size)) {
+ break;
+ }
+ GatherTreeData(image, i, group_id, header->wp_header, options,
+ gather_data ? *tree_samples : tree_samples_storage,
+ total_pixels);
+ }
+ if (gather_data) return true;
+ }
+
+ JXL_ASSERT((tree == nullptr) == (tokens == nullptr));
+
+ Tree tree_storage;
+ std::vector<std::vector<Token>> tokens_storage(1);
+ // Compute tree.
+ if (tree == nullptr) {
+ EntropyEncodingData code;
+ std::vector<uint8_t> context_map;
+
+ std::vector<std::vector<Token>> tree_tokens(1);
+ tree_storage =
+ LearnTree(std::move(tree_samples_storage), *total_pixels, options);
+ tree = &tree_storage;
+ tokens = &tokens_storage[0];
+
+ Tree decoded_tree;
+ TokenizeTree(*tree, &tree_tokens[0], &decoded_tree);
+ JXL_ASSERT(tree->size() == decoded_tree.size());
+ tree_storage = std::move(decoded_tree);
+
+ if (kWantDebug && kPrintTree && WantDebugOutput(aux_out)) {
+ PrintTree(*tree, aux_out->debug_prefix + "/tree_" + ToString(group_id));
+ }
+ // Write tree
+ BuildAndEncodeHistograms(HistogramParams(), kNumTreeContexts, tree_tokens,
+ &code, &context_map, writer, kLayerModularTree,
+ aux_out);
+ WriteTokens(tree_tokens[0], code, context_map, writer, kLayerModularTree,
+ aux_out);
+ }
+
+ size_t image_width = 0;
+ size_t total_tokens = 0;
+ for (size_t i = 0; i < nb_channels; i++) {
+ if (i >= image.nb_meta_channels &&
+ (image.channel[i].w > options.max_chan_size ||
+ image.channel[i].h > options.max_chan_size)) {
+ break;
+ }
+ if (image.channel[i].w > image_width) image_width = image.channel[i].w;
+ total_tokens += image.channel[i].w * image.channel[i].h;
+ }
+ if (options.zero_tokens) {
+ tokens->resize(tokens->size() + total_tokens, {0, 0});
+ } else {
+ // Do one big allocation for all the tokens we'll need,
+ // to avoid reallocs that might require copying.
+ size_t pos = tokens->size();
+ tokens->resize(pos + total_tokens);
+ Token *tokenp = tokens->data() + pos;
+ for (size_t i = 0; i < nb_channels; i++) {
+ if (!image.channel[i].w || !image.channel[i].h) {
+ continue; // skip empty channels
+ }
+ if (i >= image.nb_meta_channels &&
+ (image.channel[i].w > options.max_chan_size ||
+ image.channel[i].h > options.max_chan_size)) {
+ break;
+ }
+ JXL_RETURN_IF_ERROR(EncodeModularChannelMAANS(
+ image, i, header->wp_header, *tree, &tokenp, aux_out, group_id,
+ options.skip_encoder_fast_path));
+ }
+ // Make sure we actually wrote all tokens
+ JXL_CHECK(tokenp == tokens->data() + tokens->size());
+ }
+
+ // Write data if not using a global tree/ANS stream.
+ if (!header->use_global_tree) {
+ EntropyEncodingData code;
+ std::vector<uint8_t> context_map;
+ HistogramParams histo_params;
+ histo_params.image_widths.push_back(image_width);
+ BuildAndEncodeHistograms(histo_params, (tree->size() + 1) / 2,
+ tokens_storage, &code, &context_map, writer, layer,
+ aux_out);
+ WriteTokens(tokens_storage[0], code, context_map, writer, layer, aux_out);
+ } else {
+ *width = image_width;
+ }
+ return true;
+}
+
+Status ModularGenericCompress(Image &image, const ModularOptions &opts,
+ BitWriter *writer, AuxOut *aux_out, size_t layer,
+ size_t group_id, TreeSamples *tree_samples,
+ size_t *total_pixels, const Tree *tree,
+ GroupHeader *header, std::vector<Token> *tokens,
+ size_t *width) {
+ if (image.w == 0 || image.h == 0) return true;
+ ModularOptions options = opts; // Make a copy to modify it.
+
+ if (options.predictor == static_cast<Predictor>(-1)) {
+ options.predictor = Predictor::Gradient;
+ }
+
+ size_t bits = writer ? writer->BitsWritten() : 0;
+ JXL_RETURN_IF_ERROR(ModularEncode(image, options, writer, aux_out, layer,
+ group_id, tree_samples, total_pixels, tree,
+ header, tokens, width));
+ bits = writer ? writer->BitsWritten() - bits : 0;
+ if (writer) {
+ JXL_DEBUG_V(4,
+ "Modular-encoded a %" PRIuS "x%" PRIuS
+ " bitdepth=%i nbchans=%" PRIuS " image in %" PRIuS " bytes",
+ image.w, image.h, image.bitdepth, image.channel.size(),
+ bits / 8);
+ }
+ (void)bits;
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.h
new file mode 100644
index 0000000000..04df504750
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_encoding.h
@@ -0,0 +1,47 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/enc_bit_writer.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/enc_ma.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels,
+ const ModularOptions &options,
+ const std::vector<ModularMultiplierInfo> &multiplier_info = {},
+ StaticPropRange static_prop_range = {});
+
+// TODO(veluca): make cleaner interfaces.
+
+Status ModularGenericCompress(
+ Image &image, const ModularOptions &opts, BitWriter *writer,
+ AuxOut *aux_out = nullptr, size_t layer = 0, size_t group_id = 0,
+ // For gathering data for producing a global tree.
+ TreeSamples *tree_samples = nullptr, size_t *total_pixels = nullptr,
+ // For encoding with global tree.
+ const Tree *tree = nullptr, GroupHeader *header = nullptr,
+ std::vector<Token> *tokens = nullptr, size_t *widths = nullptr);
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.cc b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.cc
new file mode 100644
index 0000000000..d0f6b47566
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.cc
@@ -0,0 +1,1023 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/enc_ma.h"
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <queue>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "lib/jxl/modular/encoding/ma_common.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/encoding/enc_ma.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Eq;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+
+const HWY_FULL(float) df;
+const HWY_FULL(int32_t) di;
+size_t Padded(size_t x) { return RoundUpTo(x, Lanes(df)); }
+
+float EstimateBits(const int32_t *counts, int32_t *rounded_counts,
+ size_t num_symbols) {
+ // Try to approximate the effect of rounding up nonzero probabilities.
+ int32_t total = std::accumulate(counts, counts + num_symbols, 0);
+ const auto min = Set(di, (total + ANS_TAB_SIZE - 1) >> ANS_LOG_TAB_SIZE);
+ const auto zero_i = Zero(di);
+ for (size_t i = 0; i < num_symbols; i += Lanes(df)) {
+ auto counts_v = LoadU(di, &counts[i]);
+ counts_v = IfThenElse(Eq(counts_v, zero_i), zero_i,
+ IfThenElse(Lt(counts_v, min), min, counts_v));
+ StoreU(counts_v, di, &rounded_counts[i]);
+ }
+ // Compute entropy of the "rounded" probabilities.
+ const auto zero = Zero(df);
+ const size_t total_scalar =
+ std::accumulate(rounded_counts, rounded_counts + num_symbols, 0);
+ const auto inv_total = Set(df, 1.0f / total_scalar);
+ auto bits_lanes = Zero(df);
+ auto total_v = Set(di, total_scalar);
+ for (size_t i = 0; i < num_symbols; i += Lanes(df)) {
+ const auto counts_v = ConvertTo(df, LoadU(di, &counts[i]));
+ const auto round_counts_v = LoadU(di, &rounded_counts[i]);
+ const auto probs = Mul(ConvertTo(df, round_counts_v), inv_total);
+ const auto nbps = IfThenElse(Eq(round_counts_v, total_v), BitCast(di, zero),
+ BitCast(di, FastLog2f(df, probs)));
+ bits_lanes = Sub(bits_lanes, IfThenElse(Eq(counts_v, zero), zero,
+ Mul(counts_v, BitCast(df, nbps))));
+ }
+ return GetLane(SumOfLanes(df, bits_lanes));
+}
+
+void MakeSplitNode(size_t pos, int property, int splitval, Predictor lpred,
+ int64_t loff, Predictor rpred, int64_t roff, Tree *tree) {
+ // Note that the tree splits on *strictly greater*.
+ (*tree)[pos].lchild = tree->size();
+ (*tree)[pos].rchild = tree->size() + 1;
+ (*tree)[pos].splitval = splitval;
+ (*tree)[pos].property = property;
+ tree->emplace_back();
+ tree->back().property = -1;
+ tree->back().predictor = rpred;
+ tree->back().predictor_offset = roff;
+ tree->back().multiplier = 1;
+ tree->emplace_back();
+ tree->back().property = -1;
+ tree->back().predictor = lpred;
+ tree->back().predictor_offset = loff;
+ tree->back().multiplier = 1;
+}
+
+enum class IntersectionType { kNone, kPartial, kInside };
+IntersectionType BoxIntersects(StaticPropRange needle, StaticPropRange haystack,
+ uint32_t &partial_axis, uint32_t &partial_val) {
+ bool partial = false;
+ for (size_t i = 0; i < kNumStaticProperties; i++) {
+ if (haystack[i][0] >= needle[i][1]) {
+ return IntersectionType::kNone;
+ }
+ if (haystack[i][1] <= needle[i][0]) {
+ return IntersectionType::kNone;
+ }
+ if (haystack[i][0] <= needle[i][0] && haystack[i][1] >= needle[i][1]) {
+ continue;
+ }
+ partial = true;
+ partial_axis = i;
+ if (haystack[i][0] > needle[i][0] && haystack[i][0] < needle[i][1]) {
+ partial_val = haystack[i][0] - 1;
+ } else {
+ JXL_DASSERT(haystack[i][1] > needle[i][0] &&
+ haystack[i][1] < needle[i][1]);
+ partial_val = haystack[i][1] - 1;
+ }
+ }
+ return partial ? IntersectionType::kPartial : IntersectionType::kInside;
+}
+
+void SplitTreeSamples(TreeSamples &tree_samples, size_t begin, size_t pos,
+ size_t end, size_t prop) {
+ auto cmp = [&](size_t a, size_t b) {
+ return int32_t(tree_samples.Property(prop, a)) -
+ int32_t(tree_samples.Property(prop, b));
+ };
+ Rng rng(0);
+ while (end > begin + 1) {
+ {
+ size_t pivot = rng.UniformU(begin, end);
+ tree_samples.Swap(begin, pivot);
+ }
+ size_t pivot_begin = begin;
+ size_t pivot_end = pivot_begin + 1;
+ for (size_t i = begin + 1; i < end; i++) {
+ JXL_DASSERT(i >= pivot_end);
+ JXL_DASSERT(pivot_end > pivot_begin);
+ int32_t cmp_result = cmp(i, pivot_begin);
+ if (cmp_result < 0) { // i < pivot, move pivot forward and put i before
+ // the pivot.
+ tree_samples.ThreeShuffle(pivot_begin, pivot_end, i);
+ pivot_begin++;
+ pivot_end++;
+ } else if (cmp_result == 0) {
+ tree_samples.Swap(pivot_end, i);
+ pivot_end++;
+ }
+ }
+ JXL_DASSERT(pivot_begin >= begin);
+ JXL_DASSERT(pivot_end > pivot_begin);
+ JXL_DASSERT(pivot_end <= end);
+ for (size_t i = begin; i < pivot_begin; i++) {
+ JXL_DASSERT(cmp(i, pivot_begin) < 0);
+ }
+ for (size_t i = pivot_end; i < end; i++) {
+ JXL_DASSERT(cmp(i, pivot_begin) > 0);
+ }
+ for (size_t i = pivot_begin; i < pivot_end; i++) {
+ JXL_DASSERT(cmp(i, pivot_begin) == 0);
+ }
+ // We now have that [begin, pivot_begin) is < pivot, [pivot_begin,
+ // pivot_end) is = pivot, and [pivot_end, end) is > pivot.
+ // If pos falls in the first or the last interval, we continue in that
+ // interval; otherwise, we are done.
+ if (pivot_begin > pos) {
+ end = pivot_begin;
+ } else if (pivot_end < pos) {
+ begin = pivot_end;
+ } else {
+ break;
+ }
+ }
+}
+
+void FindBestSplit(TreeSamples &tree_samples, float threshold,
+ const std::vector<ModularMultiplierInfo> &mul_info,
+ StaticPropRange initial_static_prop_range,
+ float fast_decode_multiplier, Tree *tree) {
+ struct NodeInfo {
+ size_t pos;
+ size_t begin;
+ size_t end;
+ uint64_t used_properties;
+ StaticPropRange static_prop_range;
+ };
+ std::vector<NodeInfo> nodes;
+ nodes.push_back(NodeInfo{0, 0, tree_samples.NumDistinctSamples(), 0,
+ initial_static_prop_range});
+
+ size_t num_predictors = tree_samples.NumPredictors();
+ size_t num_properties = tree_samples.NumProperties();
+
+ // TODO(veluca): consider parallelizing the search (processing multiple nodes
+ // at a time).
+ while (!nodes.empty()) {
+ size_t pos = nodes.back().pos;
+ size_t begin = nodes.back().begin;
+ size_t end = nodes.back().end;
+ uint64_t used_properties = nodes.back().used_properties;
+ StaticPropRange static_prop_range = nodes.back().static_prop_range;
+ nodes.pop_back();
+ if (begin == end) continue;
+
+ struct SplitInfo {
+ size_t prop = 0;
+ uint32_t val = 0;
+ size_t pos = 0;
+ float lcost = std::numeric_limits<float>::max();
+ float rcost = std::numeric_limits<float>::max();
+ Predictor lpred = Predictor::Zero;
+ Predictor rpred = Predictor::Zero;
+ float Cost() { return lcost + rcost; }
+ };
+
+ SplitInfo best_split_static_constant;
+ SplitInfo best_split_static;
+ SplitInfo best_split_nonstatic;
+ SplitInfo best_split_nowp;
+
+ JXL_DASSERT(begin <= end);
+ JXL_DASSERT(end <= tree_samples.NumDistinctSamples());
+
+ // Compute the maximum token in the range.
+ size_t max_symbols = 0;
+ for (size_t pred = 0; pred < num_predictors; pred++) {
+ for (size_t i = begin; i < end; i++) {
+ uint32_t tok = tree_samples.Token(pred, i);
+ max_symbols = max_symbols > tok + 1 ? max_symbols : tok + 1;
+ }
+ }
+ max_symbols = Padded(max_symbols);
+ std::vector<int32_t> rounded_counts(max_symbols);
+ std::vector<int32_t> counts(max_symbols * num_predictors);
+ std::vector<uint32_t> tot_extra_bits(num_predictors);
+ for (size_t pred = 0; pred < num_predictors; pred++) {
+ for (size_t i = begin; i < end; i++) {
+ counts[pred * max_symbols + tree_samples.Token(pred, i)] +=
+ tree_samples.Count(i);
+ tot_extra_bits[pred] +=
+ tree_samples.NBits(pred, i) * tree_samples.Count(i);
+ }
+ }
+
+ float base_bits;
+ {
+ size_t pred = tree_samples.PredictorIndex((*tree)[pos].predictor);
+ base_bits = EstimateBits(counts.data() + pred * max_symbols,
+ rounded_counts.data(), max_symbols) +
+ tot_extra_bits[pred];
+ }
+
+ SplitInfo *best = &best_split_nonstatic;
+
+ SplitInfo forced_split;
+ // The multiplier ranges cut halfway through the current ranges of static
+ // properties. We do this even if the current node is not a leaf, to
+ // minimize the number of nodes in the resulting tree.
+ for (size_t i = 0; i < mul_info.size(); i++) {
+ uint32_t axis, val;
+ IntersectionType t =
+ BoxIntersects(static_prop_range, mul_info[i].range, axis, val);
+ if (t == IntersectionType::kNone) continue;
+ if (t == IntersectionType::kInside) {
+ (*tree)[pos].multiplier = mul_info[i].multiplier;
+ break;
+ }
+ if (t == IntersectionType::kPartial) {
+ forced_split.val = tree_samples.QuantizeProperty(axis, val);
+ forced_split.prop = axis;
+ forced_split.lcost = forced_split.rcost = base_bits / 2 - threshold;
+ forced_split.lpred = forced_split.rpred = (*tree)[pos].predictor;
+ best = &forced_split;
+ best->pos = begin;
+ JXL_ASSERT(best->prop == tree_samples.PropertyFromIndex(best->prop));
+ for (size_t x = begin; x < end; x++) {
+ if (tree_samples.Property(best->prop, x) <= best->val) {
+ best->pos++;
+ }
+ }
+ break;
+ }
+ }
+
+ if (best != &forced_split) {
+ std::vector<int> prop_value_used_count;
+ std::vector<int> count_increase;
+ std::vector<size_t> extra_bits_increase;
+ // For each property, compute which of its values are used, and what
+ // tokens correspond to those usages. Then, iterate through the values,
+ // and compute the entropy of each side of the split (of the form `prop >
+ // threshold`). Finally, find the split that minimizes the cost.
+ struct CostInfo {
+ float cost = std::numeric_limits<float>::max();
+ float extra_cost = 0;
+ float Cost() const { return cost + extra_cost; }
+ Predictor pred; // will be uninitialized in some cases, but never used.
+ };
+ std::vector<CostInfo> costs_l;
+ std::vector<CostInfo> costs_r;
+
+ std::vector<int32_t> counts_above(max_symbols);
+ std::vector<int32_t> counts_below(max_symbols);
+
+ // The lower the threshold, the higher the expected noisiness of the
+ // estimate. Thus, discourage changing predictors.
+ float change_pred_penalty = 800.0f / (100.0f + threshold);
+ for (size_t prop = 0; prop < num_properties && base_bits > threshold;
+ prop++) {
+ costs_l.clear();
+ costs_r.clear();
+ size_t prop_size = tree_samples.NumPropertyValues(prop);
+ if (extra_bits_increase.size() < prop_size) {
+ count_increase.resize(prop_size * max_symbols);
+ extra_bits_increase.resize(prop_size);
+ }
+ // Clear prop_value_used_count (which cannot be cleared "on the go")
+ prop_value_used_count.clear();
+ prop_value_used_count.resize(prop_size);
+
+ size_t first_used = prop_size;
+ size_t last_used = 0;
+
+ // TODO(veluca): consider finding multiple splits along a single
+ // property at the same time, possibly with a bottom-up approach.
+ for (size_t i = begin; i < end; i++) {
+ size_t p = tree_samples.Property(prop, i);
+ prop_value_used_count[p]++;
+ last_used = std::max(last_used, p);
+ first_used = std::min(first_used, p);
+ }
+ costs_l.resize(last_used - first_used);
+ costs_r.resize(last_used - first_used);
+ // For all predictors, compute the right and left costs of each split.
+ for (size_t pred = 0; pred < num_predictors; pred++) {
+ // Compute cost and histogram increments for each property value.
+ for (size_t i = begin; i < end; i++) {
+ size_t p = tree_samples.Property(prop, i);
+ size_t cnt = tree_samples.Count(i);
+ size_t sym = tree_samples.Token(pred, i);
+ count_increase[p * max_symbols + sym] += cnt;
+ extra_bits_increase[p] += tree_samples.NBits(pred, i) * cnt;
+ }
+ memcpy(counts_above.data(), counts.data() + pred * max_symbols,
+ max_symbols * sizeof counts_above[0]);
+ memset(counts_below.data(), 0, max_symbols * sizeof counts_below[0]);
+ size_t extra_bits_below = 0;
+ // Exclude last used: this ensures neither counts_above nor
+ // counts_below is empty.
+ for (size_t i = first_used; i < last_used; i++) {
+ if (!prop_value_used_count[i]) continue;
+ extra_bits_below += extra_bits_increase[i];
+ // The increase for this property value has been used, and will not
+ // be used again: clear it. Also below.
+ extra_bits_increase[i] = 0;
+ for (size_t sym = 0; sym < max_symbols; sym++) {
+ counts_above[sym] -= count_increase[i * max_symbols + sym];
+ counts_below[sym] += count_increase[i * max_symbols + sym];
+ count_increase[i * max_symbols + sym] = 0;
+ }
+ float rcost = EstimateBits(counts_above.data(),
+ rounded_counts.data(), max_symbols) +
+ tot_extra_bits[pred] - extra_bits_below;
+ float lcost = EstimateBits(counts_below.data(),
+ rounded_counts.data(), max_symbols) +
+ extra_bits_below;
+ JXL_DASSERT(extra_bits_below <= tot_extra_bits[pred]);
+ float penalty = 0;
+ // Never discourage moving away from the Weighted predictor.
+ if (tree_samples.PredictorFromIndex(pred) !=
+ (*tree)[pos].predictor &&
+ (*tree)[pos].predictor != Predictor::Weighted) {
+ penalty = change_pred_penalty;
+ }
+ // If everything else is equal, disfavour Weighted (slower) and
+ // favour Zero (faster if it's the only predictor used in a
+ // group+channel combination)
+ if (tree_samples.PredictorFromIndex(pred) == Predictor::Weighted) {
+ penalty += 1e-8;
+ }
+ if (tree_samples.PredictorFromIndex(pred) == Predictor::Zero) {
+ penalty -= 1e-8;
+ }
+ if (rcost + penalty < costs_r[i - first_used].Cost()) {
+ costs_r[i - first_used].cost = rcost;
+ costs_r[i - first_used].extra_cost = penalty;
+ costs_r[i - first_used].pred =
+ tree_samples.PredictorFromIndex(pred);
+ }
+ if (lcost + penalty < costs_l[i - first_used].Cost()) {
+ costs_l[i - first_used].cost = lcost;
+ costs_l[i - first_used].extra_cost = penalty;
+ costs_l[i - first_used].pred =
+ tree_samples.PredictorFromIndex(pred);
+ }
+ }
+ }
+ // Iterate through the possible splits and find the one with minimum sum
+ // of costs of the two sides.
+ size_t split = begin;
+ for (size_t i = first_used; i < last_used; i++) {
+ if (!prop_value_used_count[i]) continue;
+ split += prop_value_used_count[i];
+ float rcost = costs_r[i - first_used].cost;
+ float lcost = costs_l[i - first_used].cost;
+ // WP was not used + we would use the WP property or predictor
+ bool adds_wp =
+ (tree_samples.PropertyFromIndex(prop) == kWPProp &&
+ (used_properties & (1LU << prop)) == 0) ||
+ ((costs_l[i - first_used].pred == Predictor::Weighted ||
+ costs_r[i - first_used].pred == Predictor::Weighted) &&
+ (*tree)[pos].predictor != Predictor::Weighted);
+ bool zero_entropy_side = rcost == 0 || lcost == 0;
+
+ SplitInfo &best =
+ prop < kNumStaticProperties
+ ? (zero_entropy_side ? best_split_static_constant
+ : best_split_static)
+ : (adds_wp ? best_split_nonstatic : best_split_nowp);
+ if (lcost + rcost < best.Cost()) {
+ best.prop = prop;
+ best.val = i;
+ best.pos = split;
+ best.lcost = lcost;
+ best.lpred = costs_l[i - first_used].pred;
+ best.rcost = rcost;
+ best.rpred = costs_r[i - first_used].pred;
+ }
+ }
+ // Clear extra_bits_increase and cost_increase for last_used.
+ extra_bits_increase[last_used] = 0;
+ for (size_t sym = 0; sym < max_symbols; sym++) {
+ count_increase[last_used * max_symbols + sym] = 0;
+ }
+ }
+
+ // Try to avoid introducing WP.
+ if (best_split_nowp.Cost() + threshold < base_bits &&
+ best_split_nowp.Cost() <= fast_decode_multiplier * best->Cost()) {
+ best = &best_split_nowp;
+ }
+ // Split along static props if possible and not significantly more
+ // expensive.
+ if (best_split_static.Cost() + threshold < base_bits &&
+ best_split_static.Cost() <= fast_decode_multiplier * best->Cost()) {
+ best = &best_split_static;
+ }
+ // Split along static props to create constant nodes if possible.
+ if (best_split_static_constant.Cost() + threshold < base_bits) {
+ best = &best_split_static_constant;
+ }
+ }
+
+ if (best->Cost() + threshold < base_bits) {
+ uint32_t p = tree_samples.PropertyFromIndex(best->prop);
+ pixel_type dequant =
+ tree_samples.UnquantizeProperty(best->prop, best->val);
+ // Split node and try to split children.
+ MakeSplitNode(pos, p, dequant, best->lpred, 0, best->rpred, 0, tree);
+ // "Sort" according to winning property
+ SplitTreeSamples(tree_samples, begin, best->pos, end, best->prop);
+ if (p >= kNumStaticProperties) {
+ used_properties |= 1 << best->prop;
+ }
+ auto new_sp_range = static_prop_range;
+ if (p < kNumStaticProperties) {
+ JXL_ASSERT(static_cast<uint32_t>(dequant + 1) <= new_sp_range[p][1]);
+ new_sp_range[p][1] = dequant + 1;
+ JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+ }
+ nodes.push_back(NodeInfo{(*tree)[pos].rchild, begin, best->pos,
+ used_properties, new_sp_range});
+ new_sp_range = static_prop_range;
+ if (p < kNumStaticProperties) {
+ JXL_ASSERT(new_sp_range[p][0] <= static_cast<uint32_t>(dequant + 1));
+ new_sp_range[p][0] = dequant + 1;
+ JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]);
+ }
+ nodes.push_back(NodeInfo{(*tree)[pos].lchild, best->pos, end,
+ used_properties, new_sp_range});
+ }
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(FindBestSplit); // Local function.
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+ const std::vector<ModularMultiplierInfo> &mul_info,
+ StaticPropRange static_prop_range,
+ float fast_decode_multiplier, Tree *tree) {
+ // TODO(veluca): take into account that different contexts can have different
+ // uint configs.
+ //
+ // Initialize tree.
+ tree->emplace_back();
+ tree->back().property = -1;
+ tree->back().predictor = tree_samples.PredictorFromIndex(0);
+ tree->back().predictor_offset = 0;
+ tree->back().multiplier = 1;
+ JXL_ASSERT(tree_samples.NumProperties() < 64);
+
+ JXL_ASSERT(tree_samples.NumDistinctSamples() <=
+ std::numeric_limits<uint32_t>::max());
+ HWY_DYNAMIC_DISPATCH(FindBestSplit)
+ (tree_samples, threshold, mul_info, static_prop_range, fast_decode_multiplier,
+ tree);
+}
+
+constexpr int32_t TreeSamples::kPropertyRange;
+constexpr uint32_t TreeSamples::kDedupEntryUnused;
+
+Status TreeSamples::SetPredictor(Predictor predictor,
+ ModularOptions::TreeMode wp_tree_mode) {
+ if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+ predictors = {Predictor::Weighted};
+ residuals.resize(1);
+ return true;
+ }
+ if (wp_tree_mode == ModularOptions::TreeMode::kNoWP &&
+ predictor == Predictor::Weighted) {
+ return JXL_FAILURE("Invalid predictor settings");
+ }
+ if (predictor == Predictor::Variable) {
+ for (size_t i = 0; i < kNumModularPredictors; i++) {
+ predictors.push_back(static_cast<Predictor>(i));
+ }
+ std::swap(predictors[0], predictors[static_cast<int>(Predictor::Weighted)]);
+ std::swap(predictors[1], predictors[static_cast<int>(Predictor::Gradient)]);
+ } else if (predictor == Predictor::Best) {
+ predictors = {Predictor::Weighted, Predictor::Gradient};
+ } else {
+ predictors = {predictor};
+ }
+ if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+ auto wp_it =
+ std::find(predictors.begin(), predictors.end(), Predictor::Weighted);
+ if (wp_it != predictors.end()) {
+ predictors.erase(wp_it);
+ }
+ }
+ residuals.resize(predictors.size());
+ return true;
+}
+
+Status TreeSamples::SetProperties(const std::vector<uint32_t> &properties,
+ ModularOptions::TreeMode wp_tree_mode) {
+ props_to_use = properties;
+ if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) {
+ props_to_use = {static_cast<uint32_t>(kWPProp)};
+ }
+ if (wp_tree_mode == ModularOptions::TreeMode::kGradientOnly) {
+ props_to_use = {static_cast<uint32_t>(kGradientProp)};
+ }
+ if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) {
+ auto it = std::find(props_to_use.begin(), props_to_use.end(), kWPProp);
+ if (it != props_to_use.end()) {
+ props_to_use.erase(it);
+ }
+ }
+ if (props_to_use.empty()) {
+ return JXL_FAILURE("Invalid property set configuration");
+ }
+ props.resize(props_to_use.size());
+ return true;
+}
+
+void TreeSamples::InitTable(size_t size) {
+ JXL_DASSERT((size & (size - 1)) == 0);
+ if (dedup_table_.size() == size) return;
+ dedup_table_.resize(size, kDedupEntryUnused);
+ for (size_t i = 0; i < NumDistinctSamples(); i++) {
+ if (sample_counts[i] != std::numeric_limits<uint16_t>::max()) {
+ AddToTable(i);
+ }
+ }
+}
+
+bool TreeSamples::AddToTableAndMerge(size_t a) {
+ size_t pos1 = Hash1(a);
+ size_t pos2 = Hash2(a);
+ if (dedup_table_[pos1] != kDedupEntryUnused &&
+ IsSameSample(a, dedup_table_[pos1])) {
+ JXL_DASSERT(sample_counts[a] == 1);
+ sample_counts[dedup_table_[pos1]]++;
+ // Remove from hash table samples that are saturated.
+ if (sample_counts[dedup_table_[pos1]] ==
+ std::numeric_limits<uint16_t>::max()) {
+ dedup_table_[pos1] = kDedupEntryUnused;
+ }
+ return true;
+ }
+ if (dedup_table_[pos2] != kDedupEntryUnused &&
+ IsSameSample(a, dedup_table_[pos2])) {
+ JXL_DASSERT(sample_counts[a] == 1);
+ sample_counts[dedup_table_[pos2]]++;
+ // Remove from hash table samples that are saturated.
+ if (sample_counts[dedup_table_[pos2]] ==
+ std::numeric_limits<uint16_t>::max()) {
+ dedup_table_[pos2] = kDedupEntryUnused;
+ }
+ return true;
+ }
+ AddToTable(a);
+ return false;
+}
+
+void TreeSamples::AddToTable(size_t a) {
+ size_t pos1 = Hash1(a);
+ size_t pos2 = Hash2(a);
+ if (dedup_table_[pos1] == kDedupEntryUnused) {
+ dedup_table_[pos1] = a;
+ } else if (dedup_table_[pos2] == kDedupEntryUnused) {
+ dedup_table_[pos2] = a;
+ }
+}
+
+void TreeSamples::PrepareForSamples(size_t num_samples) {
+ for (auto &res : residuals) {
+ res.reserve(res.size() + num_samples);
+ }
+ for (auto &p : props) {
+ p.reserve(p.size() + num_samples);
+ }
+ size_t total_num_samples = num_samples + sample_counts.size();
+ size_t next_pow2 = 1LLU << CeilLog2Nonzero(total_num_samples * 3 / 2);
+ InitTable(next_pow2);
+}
+
+size_t TreeSamples::Hash1(size_t a) const {
+ constexpr uint64_t constant = 0x1e35a7bd;
+ uint64_t h = constant;
+ for (const auto &r : residuals) {
+ h = h * constant + r[a].tok;
+ h = h * constant + r[a].nbits;
+ }
+ for (const auto &p : props) {
+ h = h * constant + p[a];
+ }
+ return (h >> 16) & (dedup_table_.size() - 1);
+}
+size_t TreeSamples::Hash2(size_t a) const {
+ constexpr uint64_t constant = 0x1e35a7bd1e35a7bd;
+ uint64_t h = constant;
+ for (const auto &p : props) {
+ h = h * constant ^ p[a];
+ }
+ for (const auto &r : residuals) {
+ h = h * constant ^ r[a].tok;
+ h = h * constant ^ r[a].nbits;
+ }
+ return (h >> 16) & (dedup_table_.size() - 1);
+}
+
+bool TreeSamples::IsSameSample(size_t a, size_t b) const {
+ bool ret = true;
+ for (const auto &r : residuals) {
+ if (r[a].tok != r[b].tok) {
+ ret = false;
+ }
+ if (r[a].nbits != r[b].nbits) {
+ ret = false;
+ }
+ }
+ for (const auto &p : props) {
+ if (p[a] != p[b]) {
+ ret = false;
+ }
+ }
+ return ret;
+}
+
+void TreeSamples::AddSample(pixel_type_w pixel, const Properties &properties,
+ const pixel_type_w *predictions) {
+ for (size_t i = 0; i < predictors.size(); i++) {
+ pixel_type v = pixel - predictions[static_cast<int>(predictors[i])];
+ uint32_t tok, nbits, bits;
+ HybridUintConfig(4, 1, 2).Encode(PackSigned(v), &tok, &nbits, &bits);
+ JXL_DASSERT(tok < 256);
+ JXL_DASSERT(nbits < 256);
+ residuals[i].emplace_back(
+ ResidualToken{static_cast<uint8_t>(tok), static_cast<uint8_t>(nbits)});
+ }
+ for (size_t i = 0; i < props_to_use.size(); i++) {
+ props[i].push_back(QuantizeProperty(i, properties[props_to_use[i]]));
+ }
+ sample_counts.push_back(1);
+ num_samples++;
+ if (AddToTableAndMerge(sample_counts.size() - 1)) {
+ for (auto &r : residuals) r.pop_back();
+ for (auto &p : props) p.pop_back();
+ sample_counts.pop_back();
+ }
+}
+
+void TreeSamples::Swap(size_t a, size_t b) {
+ if (a == b) return;
+ for (auto &r : residuals) {
+ std::swap(r[a], r[b]);
+ }
+ for (auto &p : props) {
+ std::swap(p[a], p[b]);
+ }
+ std::swap(sample_counts[a], sample_counts[b]);
+}
+
+void TreeSamples::ThreeShuffle(size_t a, size_t b, size_t c) {
+ if (b == c) return Swap(a, b);
+ for (auto &r : residuals) {
+ auto tmp = r[a];
+ r[a] = r[c];
+ r[c] = r[b];
+ r[b] = tmp;
+ }
+ for (auto &p : props) {
+ auto tmp = p[a];
+ p[a] = p[c];
+ p[c] = p[b];
+ p[b] = tmp;
+ }
+ auto tmp = sample_counts[a];
+ sample_counts[a] = sample_counts[c];
+ sample_counts[c] = sample_counts[b];
+ sample_counts[b] = tmp;
+}
+
+namespace {
+std::vector<int32_t> QuantizeHistogram(const std::vector<uint32_t> &histogram,
+ size_t num_chunks) {
+ if (histogram.empty()) return {};
+ // TODO(veluca): selecting distinct quantiles is likely not the best
+ // way to go about this.
+ std::vector<int32_t> thresholds;
+ size_t sum = std::accumulate(histogram.begin(), histogram.end(), 0LU);
+ size_t cumsum = 0;
+ size_t threshold = 0;
+ for (size_t i = 0; i + 1 < histogram.size(); i++) {
+ cumsum += histogram[i];
+ if (cumsum > (threshold + 1) * sum / num_chunks) {
+ thresholds.push_back(i);
+ while (cumsum >= (threshold + 1) * sum / num_chunks) threshold++;
+ }
+ }
+ return thresholds;
+}
+
+std::vector<int32_t> QuantizeSamples(const std::vector<int32_t> &samples,
+ size_t num_chunks) {
+ if (samples.empty()) return {};
+ int min = *std::min_element(samples.begin(), samples.end());
+ constexpr int kRange = 512;
+ min = std::min(std::max(min, -kRange), kRange);
+ std::vector<uint32_t> counts(2 * kRange + 1);
+ for (int s : samples) {
+ uint32_t sample_offset = std::min(std::max(s, -kRange), kRange) - min;
+ counts[sample_offset]++;
+ }
+ std::vector<int32_t> thresholds = QuantizeHistogram(counts, num_chunks);
+ for (auto &v : thresholds) v += min;
+ return thresholds;
+}
+} // namespace
+
+void TreeSamples::PreQuantizeProperties(
+ const StaticPropRange &range,
+ const std::vector<ModularMultiplierInfo> &multiplier_info,
+ const std::vector<uint32_t> &group_pixel_count,
+ const std::vector<uint32_t> &channel_pixel_count,
+ std::vector<pixel_type> &pixel_samples,
+ std::vector<pixel_type> &diff_samples, size_t max_property_values) {
+ // If we have forced splits because of multipliers, choose channel and group
+ // thresholds accordingly.
+ std::vector<int32_t> group_multiplier_thresholds;
+ std::vector<int32_t> channel_multiplier_thresholds;
+ for (const auto &v : multiplier_info) {
+ if (v.range[0][0] != range[0][0]) {
+ channel_multiplier_thresholds.push_back(v.range[0][0] - 1);
+ }
+ if (v.range[0][1] != range[0][1]) {
+ channel_multiplier_thresholds.push_back(v.range[0][1] - 1);
+ }
+ if (v.range[1][0] != range[1][0]) {
+ group_multiplier_thresholds.push_back(v.range[1][0] - 1);
+ }
+ if (v.range[1][1] != range[1][1]) {
+ group_multiplier_thresholds.push_back(v.range[1][1] - 1);
+ }
+ }
+ std::sort(channel_multiplier_thresholds.begin(),
+ channel_multiplier_thresholds.end());
+ channel_multiplier_thresholds.resize(
+ std::unique(channel_multiplier_thresholds.begin(),
+ channel_multiplier_thresholds.end()) -
+ channel_multiplier_thresholds.begin());
+ std::sort(group_multiplier_thresholds.begin(),
+ group_multiplier_thresholds.end());
+ group_multiplier_thresholds.resize(
+ std::unique(group_multiplier_thresholds.begin(),
+ group_multiplier_thresholds.end()) -
+ group_multiplier_thresholds.begin());
+
+ compact_properties.resize(props_to_use.size());
+ auto quantize_channel = [&]() {
+ if (!channel_multiplier_thresholds.empty()) {
+ return channel_multiplier_thresholds;
+ }
+ return QuantizeHistogram(channel_pixel_count, max_property_values);
+ };
+ auto quantize_group_id = [&]() {
+ if (!group_multiplier_thresholds.empty()) {
+ return group_multiplier_thresholds;
+ }
+ return QuantizeHistogram(group_pixel_count, max_property_values);
+ };
+ auto quantize_coordinate = [&]() {
+ std::vector<int32_t> quantized;
+ quantized.reserve(max_property_values - 1);
+ for (size_t i = 0; i + 1 < max_property_values; i++) {
+ quantized.push_back((i + 1) * 256 / max_property_values - 1);
+ }
+ return quantized;
+ };
+ std::vector<int32_t> abs_pixel_thr;
+ std::vector<int32_t> pixel_thr;
+ auto quantize_pixel_property = [&]() {
+ if (pixel_thr.empty()) {
+ pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+ }
+ return pixel_thr;
+ };
+ auto quantize_abs_pixel_property = [&]() {
+ if (abs_pixel_thr.empty()) {
+ quantize_pixel_property(); // Compute the non-abs thresholds.
+ for (auto &v : pixel_samples) v = std::abs(v);
+ abs_pixel_thr = QuantizeSamples(pixel_samples, max_property_values);
+ }
+ return abs_pixel_thr;
+ };
+ std::vector<int32_t> abs_diff_thr;
+ std::vector<int32_t> diff_thr;
+ auto quantize_diff_property = [&]() {
+ if (diff_thr.empty()) {
+ diff_thr = QuantizeSamples(diff_samples, max_property_values);
+ }
+ return diff_thr;
+ };
+ auto quantize_abs_diff_property = [&]() {
+ if (abs_diff_thr.empty()) {
+ quantize_diff_property(); // Compute the non-abs thresholds.
+ for (auto &v : diff_samples) v = std::abs(v);
+ abs_diff_thr = QuantizeSamples(diff_samples, max_property_values);
+ }
+ return abs_diff_thr;
+ };
+ auto quantize_wp = [&]() {
+ if (max_property_values < 32) {
+ return std::vector<int32_t>{-127, -63, -31, -15, -7, -3, -1, 0,
+ 1, 3, 7, 15, 31, 63, 127};
+ }
+ if (max_property_values < 64) {
+ return std::vector<int32_t>{-255, -191, -127, -95, -63, -47, -31, -23,
+ -15, -11, -7, -5, -3, -1, 0, 1,
+ 3, 5, 7, 11, 15, 23, 31, 47,
+ 63, 95, 127, 191, 255};
+ }
+ return std::vector<int32_t>{
+ -255, -223, -191, -159, -127, -111, -95, -79, -63, -55, -47,
+ -39, -31, -27, -23, -19, -15, -13, -11, -9, -7, -6,
+ -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5,
+ 6, 7, 9, 11, 13, 15, 19, 23, 27, 31, 39,
+ 47, 55, 63, 79, 95, 111, 127, 159, 191, 223, 255};
+ };
+
+ property_mapping.resize(props_to_use.size());
+ for (size_t i = 0; i < props_to_use.size(); i++) {
+ if (props_to_use[i] == 0) {
+ compact_properties[i] = quantize_channel();
+ } else if (props_to_use[i] == 1) {
+ compact_properties[i] = quantize_group_id();
+ } else if (props_to_use[i] == 2 || props_to_use[i] == 3) {
+ compact_properties[i] = quantize_coordinate();
+ } else if (props_to_use[i] == 6 || props_to_use[i] == 7 ||
+ props_to_use[i] == 8 ||
+ (props_to_use[i] >= kNumNonrefProperties &&
+ (props_to_use[i] - kNumNonrefProperties) % 4 == 1)) {
+ compact_properties[i] = quantize_pixel_property();
+ } else if (props_to_use[i] == 4 || props_to_use[i] == 5 ||
+ (props_to_use[i] >= kNumNonrefProperties &&
+ (props_to_use[i] - kNumNonrefProperties) % 4 == 0)) {
+ compact_properties[i] = quantize_abs_pixel_property();
+ } else if (props_to_use[i] >= kNumNonrefProperties &&
+ (props_to_use[i] - kNumNonrefProperties) % 4 == 2) {
+ compact_properties[i] = quantize_abs_diff_property();
+ } else if (props_to_use[i] == kWPProp) {
+ compact_properties[i] = quantize_wp();
+ } else {
+ compact_properties[i] = quantize_diff_property();
+ }
+ property_mapping[i].resize(kPropertyRange * 2 + 1);
+ size_t mapped = 0;
+ for (size_t j = 0; j < property_mapping[i].size(); j++) {
+ while (mapped < compact_properties[i].size() &&
+ static_cast<int>(j) - kPropertyRange >
+ compact_properties[i][mapped]) {
+ mapped++;
+ }
+ // property_mapping[i] of a value V is `mapped` if
+ // compact_properties[i][mapped] <= j and
+ // compact_properties[i][mapped-1] > j
+ // This is because the decision node in the tree splits on (property) > j,
+ // hence everything that is not > of a threshold should be clustered
+ // together.
+ property_mapping[i][j] = mapped;
+ }
+ }
+}
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+ size_t group_id,
+ std::vector<uint32_t> &group_pixel_count,
+ std::vector<uint32_t> &channel_pixel_count,
+ std::vector<pixel_type> &pixel_samples,
+ std::vector<pixel_type> &diff_samples) {
+ if (options.nb_repeats == 0) return;
+ if (group_pixel_count.size() <= group_id) {
+ group_pixel_count.resize(group_id + 1);
+ }
+ if (channel_pixel_count.size() < image.channel.size()) {
+ channel_pixel_count.resize(image.channel.size());
+ }
+ Rng rng(group_id);
+ // Sample 10% of the final number of samples for property quantization.
+ float fraction = std::min(options.nb_repeats * 0.1, 0.99);
+ Rng::GeometricDistribution dist(fraction);
+ size_t total_pixels = 0;
+ std::vector<size_t> channel_ids;
+ for (size_t i = 0; i < image.channel.size(); i++) {
+ if (image.channel[i].w <= 1 || image.channel[i].h == 0) {
+ continue; // skip empty or width-1 channels.
+ }
+ if (i >= image.nb_meta_channels &&
+ (image.channel[i].w > options.max_chan_size ||
+ image.channel[i].h > options.max_chan_size)) {
+ break;
+ }
+ channel_ids.push_back(i);
+ group_pixel_count[group_id] += image.channel[i].w * image.channel[i].h;
+ channel_pixel_count[i] += image.channel[i].w * image.channel[i].h;
+ total_pixels += image.channel[i].w * image.channel[i].h;
+ }
+ if (channel_ids.empty()) return;
+ pixel_samples.reserve(pixel_samples.size() + fraction * total_pixels);
+ diff_samples.reserve(diff_samples.size() + fraction * total_pixels);
+ size_t i = 0;
+ size_t y = 0;
+ size_t x = 0;
+ auto advance = [&](size_t amount) {
+ x += amount;
+ // Detect row overflow (rare).
+ while (x >= image.channel[channel_ids[i]].w) {
+ x -= image.channel[channel_ids[i]].w;
+ y++;
+ // Detect end-of-channel (even rarer).
+ if (y == image.channel[channel_ids[i]].h) {
+ i++;
+ y = 0;
+ if (i >= channel_ids.size()) {
+ return;
+ }
+ }
+ }
+ };
+ advance(rng.Geometric(dist));
+ for (; i < channel_ids.size(); advance(rng.Geometric(dist) + 1)) {
+ const pixel_type *row = image.channel[channel_ids[i]].Row(y);
+ pixel_samples.push_back(row[x]);
+ size_t xp = x == 0 ? 1 : x - 1;
+ diff_samples.push_back((int64_t)row[x] - row[xp]);
+ }
+}
+
+// TODO(veluca): very simple encoding scheme. This should be improved.
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+ Tree *decoder_tree) {
+ JXL_ASSERT(tree.size() <= kMaxTreeSize);
+ std::queue<int> q;
+ q.push(0);
+ size_t leaf_id = 0;
+ decoder_tree->clear();
+ while (!q.empty()) {
+ int cur = q.front();
+ q.pop();
+ JXL_ASSERT(tree[cur].property >= -1);
+ tokens->emplace_back(kPropertyContext, tree[cur].property + 1);
+ if (tree[cur].property == -1) {
+ tokens->emplace_back(kPredictorContext,
+ static_cast<int>(tree[cur].predictor));
+ tokens->emplace_back(kOffsetContext,
+ PackSigned(tree[cur].predictor_offset));
+ uint32_t mul_log = Num0BitsBelowLS1Bit_Nonzero(tree[cur].multiplier);
+ uint32_t mul_bits = (tree[cur].multiplier >> mul_log) - 1;
+ tokens->emplace_back(kMultiplierLogContext, mul_log);
+ tokens->emplace_back(kMultiplierBitsContext, mul_bits);
+ JXL_ASSERT(tree[cur].predictor < Predictor::Best);
+ decoder_tree->emplace_back(-1, 0, leaf_id++, 0, tree[cur].predictor,
+ tree[cur].predictor_offset,
+ tree[cur].multiplier);
+ continue;
+ }
+ decoder_tree->emplace_back(tree[cur].property, tree[cur].splitval,
+ decoder_tree->size() + q.size() + 1,
+ decoder_tree->size() + q.size() + 2,
+ Predictor::Zero, 0, 1);
+ q.push(tree[cur].lchild);
+ q.push(tree[cur].rchild);
+ tokens->emplace_back(kSplitValContext, PackSigned(tree[cur].splitval));
+ }
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.h
new file mode 100644
index 0000000000..ede37c8023
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/enc_ma.h
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+#define LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
+
+#include <numeric>
+
+#include "lib/jxl/enc_ans.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Struct to collect all the data needed to build a tree.
+struct TreeSamples {
+ bool HasSamples() const {
+ return !residuals.empty() && !residuals[0].empty();
+ }
+ size_t NumDistinctSamples() const { return sample_counts.size(); }
+ size_t NumSamples() const { return num_samples; }
+ // Set the predictor to use. Must be called before adding any samples.
+ Status SetPredictor(Predictor predictor,
+ ModularOptions::TreeMode wp_tree_mode);
+ // Set the properties to use. Must be called before adding any samples.
+ Status SetProperties(const std::vector<uint32_t> &properties,
+ ModularOptions::TreeMode wp_tree_mode);
+
+ size_t Token(size_t pred, size_t i) const { return residuals[pred][i].tok; }
+ size_t NBits(size_t pred, size_t i) const { return residuals[pred][i].nbits; }
+ size_t Count(size_t i) const { return sample_counts[i]; }
+ size_t PredictorIndex(Predictor predictor) const {
+ const auto predictor_elem =
+ std::find(predictors.begin(), predictors.end(), predictor);
+ JXL_DASSERT(predictor_elem != predictors.end());
+ return predictor_elem - predictors.begin();
+ }
+ size_t PropertyIndex(size_t property) const {
+ const auto property_elem =
+ std::find(props_to_use.begin(), props_to_use.end(), property);
+ JXL_DASSERT(property_elem != props_to_use.end());
+ return property_elem - props_to_use.begin();
+ }
+ size_t NumPropertyValues(size_t property_index) const {
+ return compact_properties[property_index].size() + 1;
+ }
+ // Returns the *quantized* property value.
+ size_t Property(size_t property_index, size_t i) const {
+ return props[property_index][i];
+ }
+ int UnquantizeProperty(size_t property_index, uint32_t quant) const {
+ JXL_ASSERT(quant < compact_properties[property_index].size());
+ return compact_properties[property_index][quant];
+ }
+
+ Predictor PredictorFromIndex(size_t index) const {
+ JXL_DASSERT(index < predictors.size());
+ return predictors[index];
+ }
+ size_t PropertyFromIndex(size_t index) const {
+ JXL_DASSERT(index < props_to_use.size());
+ return props_to_use[index];
+ }
+ size_t NumPredictors() const { return predictors.size(); }
+ size_t NumProperties() const { return props_to_use.size(); }
+
+ // Preallocate data for a given number of samples. MUST be called before
+ // adding any sample.
+ void PrepareForSamples(size_t num_samples);
+ // Add a sample.
+ void AddSample(pixel_type_w pixel, const Properties &properties,
+ const pixel_type_w *predictions);
+ // Pre-cluster property values.
+ void PreQuantizeProperties(
+ const StaticPropRange &range,
+ const std::vector<ModularMultiplierInfo> &multiplier_info,
+ const std::vector<uint32_t> &group_pixel_count,
+ const std::vector<uint32_t> &channel_pixel_count,
+ std::vector<pixel_type> &pixel_samples,
+ std::vector<pixel_type> &diff_samples, size_t max_property_values);
+
+ void AllSamplesDone() { dedup_table_ = std::vector<uint32_t>(); }
+
+ uint32_t QuantizeProperty(uint32_t prop, pixel_type v) const {
+ v = std::min(std::max(v, -kPropertyRange), kPropertyRange) + kPropertyRange;
+ return property_mapping[prop][v];
+ }
+
+ // Swaps samples in position a and b. Does nothing if a == b.
+ void Swap(size_t a, size_t b);
+
+ // Cycles samples: a -> b -> c -> a. We assume a <= b <= c, so that we can
+ // just call Swap(a, b) if b==c.
+ void ThreeShuffle(size_t a, size_t b, size_t c);
+
+ private:
+ // TODO(veluca): as the total number of properties and predictors are known
+ // before adding any samples, it might be better to interleave predictors,
+ // properties and counts in a single vector to improve locality.
+ // A first attempt at doing this actually results in much slower encoding,
+ // possibly because of the more complex addressing.
+ struct ResidualToken {
+ uint8_t tok;
+ uint8_t nbits;
+ };
+ // Residual information: token and number of extra bits, per predictor.
+ std::vector<std::vector<ResidualToken>> residuals;
+ // Number of occurrences of each sample.
+ std::vector<uint16_t> sample_counts;
+ // Property values, quantized to at most 256 distinct values.
+ std::vector<std::vector<uint8_t>> props;
+ // Decompactification info for `props`.
+ std::vector<std::vector<int32_t>> compact_properties;
+ // List of properties to use.
+ std::vector<uint32_t> props_to_use;
+ // List of predictors to use.
+ std::vector<Predictor> predictors;
+ // Mapping property value -> quantized property value.
+ static constexpr int32_t kPropertyRange = 511;
+ std::vector<std::vector<uint8_t>> property_mapping;
+ // Number of samples seen.
+ size_t num_samples = 0;
+ // Table for deduplication.
+ static constexpr uint32_t kDedupEntryUnused{static_cast<uint32_t>(-1)};
+ std::vector<uint32_t> dedup_table_;
+
+ // Functions for sample deduplication.
+ bool IsSameSample(size_t a, size_t b) const;
+ size_t Hash1(size_t a) const;
+ size_t Hash2(size_t a) const;
+ void InitTable(size_t size);
+ // Returns true if `a` was already present in the table.
+ bool AddToTableAndMerge(size_t a);
+ void AddToTable(size_t a);
+};
+
+void TokenizeTree(const Tree &tree, std::vector<Token> *tokens,
+ Tree *decoder_tree);
+
+void CollectPixelSamples(const Image &image, const ModularOptions &options,
+ size_t group_id,
+ std::vector<uint32_t> &group_pixel_count,
+ std::vector<uint32_t> &channel_pixel_count,
+ std::vector<pixel_type> &pixel_samples,
+ std::vector<pixel_type> &diff_samples);
+
+void ComputeBestTree(TreeSamples &tree_samples, float threshold,
+ const std::vector<ModularMultiplierInfo> &mul_info,
+ StaticPropRange static_prop_range,
+ float fast_decode_multiplier, Tree *tree);
+
+} // namespace jxl
+#endif // LIB_JXL_MODULAR_ENCODING_ENC_MA_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.cc b/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.cc
new file mode 100644
index 0000000000..9d2c3e5cf9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.cc
@@ -0,0 +1,622 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/encoding/encoding.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <queue>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/scope_guard.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+// Removes all nodes that use a static property (i.e. channel or group ID) from
+// the tree and collapses each node on even levels with its two children to
+// produce a flatter tree. Also computes whether the resulting tree requires
+// using the weighted predictor.
+FlatTree FilterTree(const Tree &global_tree,
+ std::array<pixel_type, kNumStaticProperties> &static_props,
+ size_t *num_props, bool *use_wp, bool *wp_only,
+ bool *gradient_only) {
+ *num_props = 0;
+ bool has_wp = false;
+ bool has_non_wp = false;
+ *gradient_only = true;
+ const auto mark_property = [&](int32_t p) {
+ if (p == kWPProp) {
+ has_wp = true;
+ } else if (p >= kNumStaticProperties) {
+ has_non_wp = true;
+ }
+ if (p >= kNumStaticProperties && p != kGradientProp) {
+ *gradient_only = false;
+ }
+ };
+ FlatTree output;
+ std::queue<size_t> nodes;
+ nodes.push(0);
+ // Produces a trimmed and flattened tree by doing a BFS visit of the original
+ // tree, ignoring branches that are known to be false and proceeding two
+ // levels at a time to collapse nodes in a flatter tree; if an inner parent
+ // node has a leaf as a child, the leaf is duplicated and an implicit fake
+ // node is added. This allows to reduce the number of branches when traversing
+ // the resulting flat tree.
+ while (!nodes.empty()) {
+ size_t cur = nodes.front();
+ nodes.pop();
+ // Skip nodes that we can decide now, by jumping directly to their children.
+ while (global_tree[cur].property < kNumStaticProperties &&
+ global_tree[cur].property != -1) {
+ if (static_props[global_tree[cur].property] > global_tree[cur].splitval) {
+ cur = global_tree[cur].lchild;
+ } else {
+ cur = global_tree[cur].rchild;
+ }
+ }
+ FlatDecisionNode flat;
+ if (global_tree[cur].property == -1) {
+ flat.property0 = -1;
+ flat.childID = global_tree[cur].lchild;
+ flat.predictor = global_tree[cur].predictor;
+ flat.predictor_offset = global_tree[cur].predictor_offset;
+ flat.multiplier = global_tree[cur].multiplier;
+ *gradient_only &= flat.predictor == Predictor::Gradient;
+ has_wp |= flat.predictor == Predictor::Weighted;
+ has_non_wp |= flat.predictor != Predictor::Weighted;
+ output.push_back(flat);
+ continue;
+ }
+ flat.childID = output.size() + nodes.size() + 1;
+
+ flat.property0 = global_tree[cur].property;
+ *num_props = std::max<size_t>(flat.property0 + 1, *num_props);
+ flat.splitval0 = global_tree[cur].splitval;
+
+ for (size_t i = 0; i < 2; i++) {
+ size_t cur_child =
+ i == 0 ? global_tree[cur].lchild : global_tree[cur].rchild;
+ // Skip nodes that we can decide now.
+ while (global_tree[cur_child].property < kNumStaticProperties &&
+ global_tree[cur_child].property != -1) {
+ if (static_props[global_tree[cur_child].property] >
+ global_tree[cur_child].splitval) {
+ cur_child = global_tree[cur_child].lchild;
+ } else {
+ cur_child = global_tree[cur_child].rchild;
+ }
+ }
+ // We ended up in a leaf, add a dummy decision and two copies of the leaf.
+ if (global_tree[cur_child].property == -1) {
+ flat.properties[i] = 0;
+ flat.splitvals[i] = 0;
+ nodes.push(cur_child);
+ nodes.push(cur_child);
+ } else {
+ flat.properties[i] = global_tree[cur_child].property;
+ flat.splitvals[i] = global_tree[cur_child].splitval;
+ nodes.push(global_tree[cur_child].lchild);
+ nodes.push(global_tree[cur_child].rchild);
+ *num_props = std::max<size_t>(flat.properties[i] + 1, *num_props);
+ }
+ }
+
+ for (size_t j = 0; j < 2; j++) mark_property(flat.properties[j]);
+ mark_property(flat.property0);
+ output.push_back(flat);
+ }
+ if (*num_props > kNumNonrefProperties) {
+ *num_props =
+ DivCeil(*num_props - kNumNonrefProperties, kExtraPropsPerChannel) *
+ kExtraPropsPerChannel +
+ kNumNonrefProperties;
+ } else {
+ *num_props = kNumNonrefProperties;
+ }
+ *use_wp = has_wp;
+ *wp_only = has_wp && !has_non_wp;
+
+ return output;
+}
+
+Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader,
+ const std::vector<uint8_t> &context_map,
+ const Tree &global_tree,
+ const weighted::Header &wp_header,
+ pixel_type chan, size_t group_id,
+ Image *image) {
+ Channel &channel = image->channel[chan];
+
+ std::array<pixel_type, kNumStaticProperties> static_props = {
+ {chan, (int)group_id}};
+ // TODO(veluca): filter the tree according to static_props.
+
+ // zero pixel channel? could happen
+ if (channel.w == 0 || channel.h == 0) return true;
+
+ bool tree_has_wp_prop_or_pred = false;
+ bool is_wp_only = false;
+ bool is_gradient_only = false;
+ size_t num_props;
+ FlatTree tree =
+ FilterTree(global_tree, static_props, &num_props,
+ &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only);
+
+ // From here on, tree lookup returns a *clustered* context ID.
+ // This avoids an extra memory lookup after tree traversal.
+ for (size_t i = 0; i < tree.size(); i++) {
+ if (tree[i].property0 == -1) {
+ tree[i].childID = context_map[tree[i].childID];
+ }
+ }
+
+ JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size());
+
+ // MAANS decode
+ const auto make_pixel = [](uint64_t v, pixel_type multiplier,
+ pixel_type_w offset) -> pixel_type {
+ JXL_DASSERT((v & 0xFFFFFFFF) == v);
+ pixel_type_w val = UnpackSigned(v);
+ // if it overflows, it overflows, and we have a problem anyway
+ return val * multiplier + offset;
+ };
+
+ if (tree.size() == 1) {
+ // special optimized case: no meta-adaptation, so no need
+ // to compute properties.
+ Predictor predictor = tree[0].predictor;
+ int64_t offset = tree[0].predictor_offset;
+ int32_t multiplier = tree[0].multiplier;
+ size_t ctx_id = tree[0].childID;
+ if (predictor == Predictor::Zero) {
+ uint32_t value;
+ if (reader->IsSingleValueAndAdvance(ctx_id, &value,
+ channel.w * channel.h)) {
+ // Special-case: histogram has a single symbol, with no extra bits, and
+ // we use ANS mode.
+ JXL_DEBUG_V(8, "Fastest track.");
+ pixel_type v = make_pixel(value, multiplier, offset);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ std::fill(r, r + channel.w, v);
+ }
+ } else {
+ JXL_DEBUG_V(8, "Fast track.");
+ if (multiplier == 1 && offset == 0) {
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ uint32_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = UnpackSigned(v);
+ }
+ }
+ } else {
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ uint32_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = make_pixel(v, multiplier, offset);
+ }
+ }
+ }
+ }
+ } else if (predictor == Predictor::Gradient && offset == 0 &&
+ multiplier == 1 && reader->HuffRleOnly()) {
+ JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track.");
+ uint32_t run = 0;
+ uint32_t v = 0;
+ pixel_type_w sv = 0;
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1);
+ const pixel_type *JXL_RESTRICT rtopleft =
+ (y ? channel.Row(y - 1) - 1 : r - 1);
+ pixel_type_w guess = (y ? rtop[0] : 0);
+ if (run == 0) {
+ reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run);
+ sv = UnpackSigned(v);
+ } else {
+ run--;
+ }
+ r[0] = sv + guess;
+ for (size_t x = 1; x < channel.w; x++) {
+ pixel_type left = r[x - 1];
+ pixel_type top = rtop[x];
+ pixel_type topleft = rtopleft[x];
+ pixel_type_w guess = ClampedGradient(top, left, topleft);
+ if (!run) {
+ reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run);
+ sv = UnpackSigned(v);
+ } else {
+ run--;
+ }
+ r[x] = sv + guess;
+ }
+ }
+ } else if (predictor == Predictor::Gradient && offset == 0 &&
+ multiplier == 1) {
+ JXL_DEBUG_V(8, "Gradient very fast track.");
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type top = (y ? *(r + x - onerow) : left);
+ pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ pixel_type guess = ClampedGradient(top, left, topleft);
+ uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = make_pixel(v, 1, guess);
+ }
+ }
+ } else if (predictor != Predictor::Weighted) {
+ // special optimized case: no wp
+ JXL_DEBUG_V(8, "Quite fast track.");
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult pred =
+ PredictNoTreeNoWP(channel.w, r + x, onerow, x, y, predictor);
+ pixel_type_w g = pred.guess + offset;
+ uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ // NOTE: pred.multiplier is unset.
+ r[x] = make_pixel(v, multiplier, g);
+ }
+ }
+ } else {
+ JXL_DEBUG_V(8, "Somewhat fast track.");
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type_w g = PredictNoTreeWP(channel.w, r + x, onerow, x, y,
+ predictor, &wp_state)
+ .guess +
+ offset;
+ uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = make_pixel(v, multiplier, g);
+ wp_state.UpdateErrors(r[x], x, y, channel.w);
+ }
+ }
+ }
+ return true;
+ }
+
+ // Check if this tree is a WP-only tree with a small enough property value
+ // range.
+ // Initialized to avoid clang-tidy complaining.
+ uint8_t context_lookup[2 * kPropRangeFast] = {};
+ int8_t multipliers[2 * kPropRangeFast] = {};
+ int8_t offsets[2 * kPropRangeFast] = {};
+ if (is_wp_only) {
+ is_wp_only = TreeToLookupTable(tree, context_lookup, offsets, multipliers);
+ }
+ if (is_gradient_only) {
+ is_gradient_only =
+ TreeToLookupTable(tree, context_lookup, offsets, multipliers);
+ }
+
+ if (is_gradient_only) {
+ JXL_DEBUG_V(8, "Gradient fast track.");
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ int32_t guess = ClampedGradient(top, left, topleft);
+ uint32_t pos =
+ kPropRangeFast +
+ std::min<pixel_type_w>(
+ std::max<pixel_type_w>(-kPropRangeFast, top + left - topleft),
+ kPropRangeFast - 1);
+ uint32_t ctx_id = context_lookup[pos];
+ uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = make_pixel(v, multipliers[pos],
+ static_cast<pixel_type_w>(offsets[pos]) + guess);
+ }
+ }
+ } else if (is_wp_only) {
+ JXL_DEBUG_V(8, "WP fast track.");
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ Properties properties(1);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT r = channel.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ size_t offset = 0;
+ pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0);
+ pixel_type_w top = (y ? *(r + x - onerow) : left);
+ pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left);
+ pixel_type_w topright =
+ (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top);
+ pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top);
+ int32_t guess = wp_state.Predict</*compute_properties=*/true>(
+ x, y, channel.w, top, left, topright, topleft, toptop, &properties,
+ offset);
+ uint32_t pos =
+ kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]),
+ kPropRangeFast - 1);
+ uint32_t ctx_id = context_lookup[pos];
+ uint64_t v = reader->ReadHybridUintClustered(ctx_id, br);
+ r[x] = make_pixel(v, multipliers[pos],
+ static_cast<pixel_type_w>(offsets[pos]) + guess);
+ wp_state.UpdateErrors(r[x], x, y, channel.w);
+ }
+ }
+ } else if (!tree_has_wp_prop_or_pred) {
+ // special optimized case: the weighted predictor and its properties are not
+ // used, so no need to compute weights and properties.
+ JXL_DEBUG_V(8, "Slow track.");
+ MATreeLookup tree_lookup(tree);
+ Properties properties = Properties(num_props);
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ Channel references(properties.size() - kNumNonrefProperties, channel.w);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT p = channel.Row(y);
+ PrecomputeReferences(channel, y, *image, chan, &references);
+ InitPropsRow(&properties, static_props, y);
+ if (y > 1 && channel.w > 8 && references.w == 0) {
+ for (size_t x = 0; x < 2; x++) {
+ PredictionResult res =
+ PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references);
+ uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+ p[x] = make_pixel(v, res.multiplier, res.guess);
+ }
+ for (size_t x = 2; x < channel.w - 2; x++) {
+ PredictionResult res =
+ PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references);
+ uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+ p[x] = make_pixel(v, res.multiplier, res.guess);
+ }
+ for (size_t x = channel.w - 2; x < channel.w; x++) {
+ PredictionResult res =
+ PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references);
+ uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+ p[x] = make_pixel(v, res.multiplier, res.guess);
+ }
+ } else {
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult res =
+ PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references);
+ uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+ p[x] = make_pixel(v, res.multiplier, res.guess);
+ }
+ }
+ }
+ } else {
+ JXL_DEBUG_V(8, "Slowest track.");
+ MATreeLookup tree_lookup(tree);
+ Properties properties = Properties(num_props);
+ const intptr_t onerow = channel.plane.PixelsPerRow();
+ Channel references(properties.size() - kNumNonrefProperties, channel.w);
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT p = channel.Row(y);
+ InitPropsRow(&properties, static_props, y);
+ PrecomputeReferences(channel, y, *image, chan, &references);
+ for (size_t x = 0; x < channel.w; x++) {
+ PredictionResult res =
+ PredictTreeWP(&properties, channel.w, p + x, onerow, x, y,
+ tree_lookup, references, &wp_state);
+ uint64_t v = reader->ReadHybridUintClustered(res.context, br);
+ p[x] = make_pixel(v, res.multiplier, res.guess);
+ wp_state.UpdateErrors(p[x], x, y, channel.w);
+ }
+ }
+ }
+ return true;
+}
+
+GroupHeader::GroupHeader() { Bundle::Init(this); }
+
+Status ValidateChannelDimensions(const Image &image,
+ const ModularOptions &options) {
+ size_t nb_channels = image.channel.size();
+ for (bool is_dc : {true, false}) {
+ size_t group_dim = options.group_dim * (is_dc ? kBlockDim : 1);
+ size_t c = image.nb_meta_channels;
+ for (; c < nb_channels; c++) {
+ const Channel &ch = image.channel[c];
+ if (ch.w > options.group_dim || ch.h > options.group_dim) break;
+ }
+ for (; c < nb_channels; c++) {
+ const Channel &ch = image.channel[c];
+ if (ch.w == 0 || ch.h == 0) continue; // skip empty
+ bool is_dc_channel = std::min(ch.hshift, ch.vshift) >= 3;
+ if (is_dc_channel != is_dc) continue;
+ size_t tile_dim = group_dim >> std::max(ch.hshift, ch.vshift);
+ if (tile_dim == 0) {
+ return JXL_FAILURE("Inconsistent transforms");
+ }
+ }
+ }
+ return true;
+}
+
+Status ModularDecode(BitReader *br, Image &image, GroupHeader &header,
+ size_t group_id, ModularOptions *options,
+ const Tree *global_tree, const ANSCode *global_code,
+ const std::vector<uint8_t> *global_ctx_map,
+ bool allow_truncated_group) {
+ if (image.channel.empty()) return true;
+
+ // decode transforms
+ Status status = Bundle::Read(br, &header);
+ if (!allow_truncated_group) JXL_RETURN_IF_ERROR(status);
+ if (status.IsFatalError()) return status;
+ if (!br->AllReadsWithinBounds()) {
+ // Don't do/undo transforms if header is incomplete.
+ header.transforms.clear();
+ image.transform = header.transforms;
+ for (size_t c = 0; c < image.channel.size(); c++) {
+ ZeroFillImage(&image.channel[c].plane);
+ }
+ return Status(StatusCode::kNotEnoughBytes);
+ }
+
+ JXL_DEBUG_V(3, "Image data underwent %" PRIuS " transformations: ",
+ header.transforms.size());
+ image.transform = header.transforms;
+ for (Transform &transform : image.transform) {
+ JXL_RETURN_IF_ERROR(transform.MetaApply(image));
+ }
+ if (image.error) {
+ return JXL_FAILURE("Corrupt file. Aborting.");
+ }
+ JXL_RETURN_IF_ERROR(ValidateChannelDimensions(image, *options));
+
+ size_t nb_channels = image.channel.size();
+
+ size_t num_chans = 0;
+ size_t distance_multiplier = 0;
+ for (size_t i = 0; i < nb_channels; i++) {
+ Channel &channel = image.channel[i];
+ if (!channel.w || !channel.h) {
+ continue; // skip empty channels
+ }
+ if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+ channel.h > options->max_chan_size)) {
+ break;
+ }
+ if (channel.w > distance_multiplier) {
+ distance_multiplier = channel.w;
+ }
+ num_chans++;
+ }
+ if (num_chans == 0) return true;
+
+ size_t next_channel = 0;
+ auto scope_guard = MakeScopeGuard([&]() {
+ // Do not do anything if truncated groups are not allowed.
+ if (!allow_truncated_group) return;
+ for (size_t c = next_channel; c < nb_channels; c++) {
+ ZeroFillImage(&image.channel[c].plane);
+ }
+ });
+
+ // Read tree.
+ Tree tree_storage;
+ std::vector<uint8_t> context_map_storage;
+ ANSCode code_storage;
+ const Tree *tree = &tree_storage;
+ const ANSCode *code = &code_storage;
+ const std::vector<uint8_t> *context_map = &context_map_storage;
+ if (!header.use_global_tree) {
+ size_t max_tree_size = 1024;
+ for (size_t i = 0; i < nb_channels; i++) {
+ Channel &channel = image.channel[i];
+ if (!channel.w || !channel.h) {
+ continue; // skip empty channels
+ }
+ if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size ||
+ channel.h > options->max_chan_size)) {
+ break;
+ }
+ size_t pixels = channel.w * channel.h;
+ if (pixels / channel.w != channel.h) {
+ return JXL_FAILURE("Tree size overflow");
+ }
+ max_tree_size += pixels;
+ if (max_tree_size < pixels) return JXL_FAILURE("Tree size overflow");
+ }
+ max_tree_size = std::min(static_cast<size_t>(1 << 20), max_tree_size);
+ JXL_RETURN_IF_ERROR(DecodeTree(br, &tree_storage, max_tree_size));
+ JXL_RETURN_IF_ERROR(DecodeHistograms(br, (tree_storage.size() + 1) / 2,
+ &code_storage, &context_map_storage));
+ } else {
+ if (!global_tree || !global_code || !global_ctx_map ||
+ global_tree->empty()) {
+ return JXL_FAILURE("No global tree available but one was requested");
+ }
+ tree = global_tree;
+ code = global_code;
+ context_map = global_ctx_map;
+ }
+
+ // Read channels
+ ANSSymbolReader reader(code, br, distance_multiplier);
+ for (; next_channel < nb_channels; next_channel++) {
+ Channel &channel = image.channel[next_channel];
+ if (!channel.w || !channel.h) {
+ continue; // skip empty channels
+ }
+ if (next_channel >= image.nb_meta_channels &&
+ (channel.w > options->max_chan_size ||
+ channel.h > options->max_chan_size)) {
+ break;
+ }
+ JXL_RETURN_IF_ERROR(DecodeModularChannelMAANS(
+ br, &reader, *context_map, *tree, header.wp_header, next_channel,
+ group_id, &image));
+ // Truncated group.
+ if (!br->AllReadsWithinBounds()) {
+ if (!allow_truncated_group) return JXL_FAILURE("Truncated input");
+ return Status(StatusCode::kNotEnoughBytes);
+ }
+ }
+
+ // Make sure no zero-filling happens even if next_channel < nb_channels.
+ scope_guard.Disarm();
+
+ if (!reader.CheckANSFinalState()) {
+ return JXL_FAILURE("ANS decode final state failed");
+ }
+ return true;
+}
+
+Status ModularGenericDecompress(BitReader *br, Image &image,
+ GroupHeader *header, size_t group_id,
+ ModularOptions *options, bool undo_transforms,
+ const Tree *tree, const ANSCode *code,
+ const std::vector<uint8_t> *ctx_map,
+ bool allow_truncated_group) {
+#ifdef JXL_ENABLE_ASSERT
+ std::vector<std::pair<uint32_t, uint32_t>> req_sizes(image.channel.size());
+ for (size_t c = 0; c < req_sizes.size(); c++) {
+ req_sizes[c] = {image.channel[c].w, image.channel[c].h};
+ }
+#endif
+ GroupHeader local_header;
+ if (header == nullptr) header = &local_header;
+ size_t bit_pos = br->TotalBitsConsumed();
+ auto dec_status = ModularDecode(br, image, *header, group_id, options, tree,
+ code, ctx_map, allow_truncated_group);
+ if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status);
+ if (dec_status.IsFatalError()) return dec_status;
+ if (undo_transforms) image.undo_transforms(header->wp_header);
+ if (image.error) return JXL_FAILURE("Corrupt file. Aborting.");
+ JXL_DEBUG_V(4,
+ "Modular-decoded a %" PRIuS "x%" PRIuS " nbchans=%" PRIuS
+ " image from %" PRIuS " bytes",
+ image.w, image.h, image.channel.size(),
+ (br->TotalBitsConsumed() - bit_pos) / 8);
+ JXL_DEBUG_V(5, "Modular image: %s", image.DebugString().c_str());
+ (void)bit_pos;
+#ifdef JXL_ENABLE_ASSERT
+ // Check that after applying all transforms we are back to the requested image
+ // sizes, otherwise there's a programming error with the transformations.
+ if (undo_transforms) {
+ JXL_ASSERT(image.channel.size() == req_sizes.size());
+ for (size_t c = 0; c < req_sizes.size(); c++) {
+ JXL_ASSERT(req_sizes[c].first == image.channel[c].w);
+ JXL_ASSERT(req_sizes[c].second == image.channel[c].h);
+ }
+ }
+#endif
+ return dec_status;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.h
new file mode 100644
index 0000000000..89697bce87
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/encoding.h
@@ -0,0 +1,135 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+#define LIB_JXL_MODULAR_ENCODING_ENCODING_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/encoding/dec_ma.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/options.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+// Valid range of properties for using lookup tables instead of trees.
+constexpr int32_t kPropRangeFast = 512;
+
+struct GroupHeader : public Fields {
+ GroupHeader();
+
+ JXL_FIELDS_NAME(GroupHeader)
+
+ Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &use_global_tree));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&wp_header));
+ uint32_t num_transforms = static_cast<uint32_t>(transforms.size());
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2),
+ BitsOffset(8, 18), 0,
+ &num_transforms));
+ if (visitor->IsReading()) transforms.resize(num_transforms);
+ for (size_t i = 0; i < num_transforms; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&transforms[i]));
+ }
+ return true;
+ }
+
+ bool use_global_tree;
+ weighted::Header wp_header;
+
+ std::vector<Transform> transforms;
+};
+
+FlatTree FilterTree(const Tree &global_tree,
+ std::array<pixel_type, kNumStaticProperties> &static_props,
+ size_t *num_props, bool *use_wp, bool *wp_only,
+ bool *gradient_only);
+
+template <typename T>
+bool TreeToLookupTable(const FlatTree &tree,
+ T context_lookup[2 * kPropRangeFast],
+ int8_t offsets[2 * kPropRangeFast],
+ int8_t multipliers[2 * kPropRangeFast] = nullptr) {
+ struct TreeRange {
+ // Begin *excluded*, end *included*. This works best with > vs <= decision
+ // nodes.
+ int begin, end;
+ size_t pos;
+ };
+ std::vector<TreeRange> ranges;
+ ranges.push_back(TreeRange{-kPropRangeFast - 1, kPropRangeFast - 1, 0});
+ while (!ranges.empty()) {
+ TreeRange cur = ranges.back();
+ ranges.pop_back();
+ if (cur.begin < -kPropRangeFast - 1 || cur.begin >= kPropRangeFast - 1 ||
+ cur.end > kPropRangeFast - 1) {
+ // Tree is outside the allowed range, exit.
+ return false;
+ }
+ auto &node = tree[cur.pos];
+ // Leaf.
+ if (node.property0 == -1) {
+ if (node.predictor_offset < std::numeric_limits<int8_t>::min() ||
+ node.predictor_offset > std::numeric_limits<int8_t>::max()) {
+ return false;
+ }
+ if (node.multiplier < std::numeric_limits<int8_t>::min() ||
+ node.multiplier > std::numeric_limits<int8_t>::max()) {
+ return false;
+ }
+ if (multipliers == nullptr && node.multiplier != 1) {
+ return false;
+ }
+ for (int i = cur.begin + 1; i < cur.end + 1; i++) {
+ context_lookup[i + kPropRangeFast] = node.childID;
+ if (multipliers) multipliers[i + kPropRangeFast] = node.multiplier;
+ offsets[i + kPropRangeFast] = node.predictor_offset;
+ }
+ continue;
+ }
+ // > side of top node.
+ if (node.properties[0] >= kNumStaticProperties) {
+ ranges.push_back(TreeRange({node.splitvals[0], cur.end, node.childID}));
+ ranges.push_back(
+ TreeRange({node.splitval0, node.splitvals[0], node.childID + 1}));
+ } else {
+ ranges.push_back(TreeRange({node.splitval0, cur.end, node.childID}));
+ }
+ // <= side
+ if (node.properties[1] >= kNumStaticProperties) {
+ ranges.push_back(
+ TreeRange({node.splitvals[1], node.splitval0, node.childID + 2}));
+ ranges.push_back(
+ TreeRange({cur.begin, node.splitvals[1], node.childID + 3}));
+ } else {
+ ranges.push_back(
+ TreeRange({cur.begin, node.splitval0, node.childID + 2}));
+ }
+ }
+ return true;
+}
+// TODO(veluca): make cleaner interfaces.
+
+Status ValidateChannelDimensions(const Image &image,
+ const ModularOptions &options);
+
+Status ModularGenericDecompress(BitReader *br, Image &image,
+ GroupHeader *header, size_t group_id,
+ ModularOptions *options,
+ bool undo_transforms = true,
+ const Tree *tree = nullptr,
+ const ANSCode *code = nullptr,
+ const std::vector<uint8_t> *ctx_map = nullptr,
+ bool allow_truncated_group = false);
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_ENCODING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/encoding/ma_common.h b/third_party/jpeg-xl/lib/jxl/modular/encoding/ma_common.h
new file mode 100644
index 0000000000..71b7847321
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/encoding/ma_common.h
@@ -0,0 +1,28 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+#define LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
+
+#include <stddef.h>
+
+namespace jxl {
+
+enum MATreeContext : size_t {
+ kSplitValContext = 0,
+ kPropertyContext = 1,
+ kPredictorContext = 2,
+ kOffsetContext = 3,
+ kMultiplierLogContext = 4,
+ kMultiplierBitsContext = 5,
+
+ kNumTreeContexts = 6,
+};
+
+static constexpr size_t kMaxTreeSize = 1 << 22;
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/modular_image.cc b/third_party/jpeg-xl/lib/jxl/modular/modular_image.cc
new file mode 100644
index 0000000000..785d0c5443
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/modular_image.cc
@@ -0,0 +1,77 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/modular_image.h"
+
+#include <sstream>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void Image::undo_transforms(const weighted::Header &wp_header,
+ jxl::ThreadPool *pool) {
+ while (!transform.empty()) {
+ Transform t = transform.back();
+ JXL_DEBUG_V(4, "Undoing transform");
+ Status result = t.Inverse(*this, wp_header, pool);
+ if (result == false) {
+ JXL_NOTIFY_ERROR("Error while undoing transform.");
+ error = true;
+ return;
+ }
+ JXL_DEBUG_V(8, "Undoing transform: done");
+ transform.pop_back();
+ }
+}
+
+Image::Image(size_t iw, size_t ih, int bitdepth, int nb_chans)
+ : w(iw), h(ih), bitdepth(bitdepth), nb_meta_channels(0), error(false) {
+ for (int i = 0; i < nb_chans; i++) channel.emplace_back(Channel(iw, ih));
+}
+
+Image::Image() : w(0), h(0), bitdepth(8), nb_meta_channels(0), error(true) {}
+
+Image &Image::operator=(Image &&other) noexcept {
+ w = other.w;
+ h = other.h;
+ bitdepth = other.bitdepth;
+ nb_meta_channels = other.nb_meta_channels;
+ error = other.error;
+ channel = std::move(other.channel);
+ transform = std::move(other.transform);
+ return *this;
+}
+
+Image Image::clone() {
+ Image c(w, h, bitdepth, 0);
+ c.nb_meta_channels = nb_meta_channels;
+ c.error = error;
+ c.transform = transform;
+ for (Channel &ch : channel) {
+ Channel a(ch.w, ch.h, ch.hshift, ch.vshift);
+ CopyImageTo(ch.plane, &a.plane);
+ c.channel.push_back(std::move(a));
+ }
+ return c;
+}
+
+std::string Image::DebugString() const {
+ std::ostringstream os;
+ os << w << "x" << h << ", depth: " << bitdepth;
+ if (!channel.empty()) {
+ os << ", channels:";
+ for (size_t i = 0; i < channel.size(); ++i) {
+ os << " " << channel[i].w << "x" << channel[i].h
+ << "(shift: " << channel[i].hshift << "," << channel[i].vshift << ")";
+ if (i < nb_meta_channels) os << "*";
+ }
+ }
+ return os.str();
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/modular_image.h b/third_party/jpeg-xl/lib/jxl/modular/modular_image.h
new file mode 100644
index 0000000000..3e9b5a8a08
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/modular_image.h
@@ -0,0 +1,118 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+#define LIB_JXL_MODULAR_MODULAR_IMAGE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+
+typedef int32_t pixel_type; // can use int16_t if it's only for 8-bit images.
+ // Need some wiggle room for YCoCg / Squeeze etc
+
+typedef int64_t pixel_type_w;
+
+namespace weighted {
+struct Header;
+}
+
+class Channel {
+ public:
+ jxl::Plane<pixel_type> plane;
+ size_t w, h;
+ int hshift, vshift; // w ~= image.w >> hshift; h ~= image.h >> vshift
+ Channel(size_t iw, size_t ih, int hsh = 0, int vsh = 0)
+ : plane(iw, ih), w(iw), h(ih), hshift(hsh), vshift(vsh) {}
+
+ Channel(const Channel& other) = delete;
+ Channel& operator=(const Channel& other) = delete;
+
+ // Move assignment
+ Channel& operator=(Channel&& other) noexcept {
+ w = other.w;
+ h = other.h;
+ hshift = other.hshift;
+ vshift = other.vshift;
+ plane = std::move(other.plane);
+ return *this;
+ }
+
+ // Move constructor
+ Channel(Channel&& other) noexcept = default;
+
+ void shrink() {
+ if (plane.xsize() == w && plane.ysize() == h) return;
+ jxl::Plane<pixel_type> resizedplane(w, h);
+ plane = std::move(resizedplane);
+ }
+ void shrink(int nw, int nh) {
+ w = nw;
+ h = nh;
+ shrink();
+ }
+
+ JXL_INLINE pixel_type* Row(const size_t y) { return plane.Row(y); }
+ JXL_INLINE const pixel_type* Row(const size_t y) const {
+ return plane.Row(y);
+ }
+};
+
+class Transform;
+
+class Image {
+ public:
+ // image data, transforms can dramatically change the number of channels and
+ // their semantics
+ std::vector<Channel> channel;
+ // transforms that have been applied (and that have to be undone)
+ std::vector<Transform> transform;
+
+ // image dimensions (channels may have different dimensions due to transforms)
+ size_t w, h;
+ int bitdepth;
+ size_t nb_meta_channels; // first few channels might contain palette(s)
+ bool error; // true if a fatal error occurred, false otherwise
+
+ Image(size_t iw, size_t ih, int bitdepth, int nb_chans);
+ Image();
+
+ Image(const Image& other) = delete;
+ Image& operator=(const Image& other) = delete;
+
+ Image& operator=(Image&& other) noexcept;
+ Image(Image&& other) noexcept = default;
+
+ bool empty() const {
+ for (const auto& ch : channel) {
+ if (ch.w && ch.h) return false;
+ }
+ return true;
+ }
+
+ Image clone();
+
+ void undo_transforms(const weighted::Header& wp_header,
+ jxl::ThreadPool* pool = nullptr);
+
+ std::string DebugString() const;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_MODULAR_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/options.h b/third_party/jpeg-xl/lib/jxl/modular/options.h
new file mode 100644
index 0000000000..ce6596b912
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/options.h
@@ -0,0 +1,117 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_OPTIONS_H_
+#define LIB_JXL_MODULAR_OPTIONS_H_
+
+#include <stdint.h>
+
+#include <array>
+#include <vector>
+
+namespace jxl {
+
+using PropertyVal = int32_t;
+using Properties = std::vector<PropertyVal>;
+
+enum class Predictor : uint32_t {
+ Zero = 0,
+ Left = 1,
+ Top = 2,
+ Average0 = 3,
+ Select = 4,
+ Gradient = 5,
+ Weighted = 6,
+ TopRight = 7,
+ TopLeft = 8,
+ LeftLeft = 9,
+ Average1 = 10,
+ Average2 = 11,
+ Average3 = 12,
+ Average4 = 13,
+ // The following predictors are encoder-only.
+ Best = 14, // Best of Gradient and Weighted
+ Variable =
+ 15, // Find the best decision tree for predictors/predictor per row
+};
+
+constexpr size_t kNumModularPredictors =
+ static_cast<size_t>(Predictor::Average4) + 1;
+constexpr size_t kNumModularEncoderPredictors =
+ static_cast<size_t>(Predictor::Variable) + 1;
+
+static constexpr ssize_t kNumStaticProperties = 2; // channel, group_id.
+
+using StaticPropRange =
+ std::array<std::array<uint32_t, 2>, kNumStaticProperties>;
+
+struct ModularMultiplierInfo {
+ StaticPropRange range;
+ uint32_t multiplier;
+};
+
+struct ModularOptions {
+ /// Used in both encode and decode:
+
+ // Stop encoding/decoding when reaching a (non-meta) channel that has a
+ // dimension bigger than max_chan_size.
+ size_t max_chan_size = 0xFFFFFF;
+
+ // Used during decoding for validation of transforms (sqeeezing) scheme.
+ size_t group_dim = 0x1FFFFFFF;
+
+ /// Encode options:
+ // Fraction of pixels to look at to learn a MA tree
+ // Number of iterations to do to learn a MA tree
+ // (if zero there is no MA context model)
+ float nb_repeats = .5f;
+
+ // Maximum number of (previous channel) properties to use in the MA trees
+ int max_properties = 0; // no previous channels
+
+ // Alternative heuristic tweaks.
+ // Properties default to channel, group, weighted, gradient residual, W-NW,
+ // NW-N, N-NE, N-NN
+ std::vector<uint32_t> splitting_heuristics_properties = {0, 1, 15, 9,
+ 10, 11, 12, 13};
+ float splitting_heuristics_node_threshold = 96;
+ size_t max_property_values = 32;
+
+ // Predictor to use for each channel.
+ Predictor predictor = static_cast<Predictor>(-1);
+
+ int wp_mode = 0;
+
+ float fast_decode_multiplier = 1.01f;
+
+ // Forces the encoder to produce a tree that is compatible with the WP-only
+ // decode path (or with the no-wp path, or the gradient-only path).
+ enum class TreeMode { kGradientOnly, kWPOnly, kNoWP, kDefault };
+ TreeMode wp_tree_mode = TreeMode::kDefault;
+
+ // Skip fast paths in the encoder.
+ bool skip_encoder_fast_path = false;
+
+ // Kind of tree to use.
+ // TODO(veluca): add tree kinds for JPEG recompression with CfL enabled,
+ // general AC metadata, different DC qualities, and others.
+ enum class TreeKind {
+ kTrivialTreeNoPredictor,
+ kLearn,
+ kJpegTranscodeACMeta,
+ kFalconACMeta,
+ kACMeta,
+ kWPFixedDC,
+ kGradientFixedDC,
+ };
+ TreeKind tree_kind = TreeKind::kLearn;
+
+ // Ignore the image and just pretend all tokens are zeroes
+ bool zero_tokens = false;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_OPTIONS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc
new file mode 100644
index 0000000000..bc31445bc5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.cc
@@ -0,0 +1,606 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+
+#include <array>
+#include <map>
+#include <set>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/enc_transform.h"
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr bool kEncodeToHighQualityImplicitPalette = true;
+
+// Inclusive.
+static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1);
+
+float ColorDistance(const std::vector<float> &JXL_RESTRICT a,
+ const std::vector<pixel_type> &JXL_RESTRICT b) {
+ JXL_ASSERT(a.size() == b.size());
+ float distance = 0;
+ float ave3 = 0;
+ if (a.size() >= 3) {
+ ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f);
+ }
+ float sum_a = 0, sum_b = 0;
+ for (size_t c = 0; c < a.size(); ++c) {
+ const float difference =
+ static_cast<float>(a[c]) - static_cast<float>(b[c]);
+ float weight = c == 0 ? 3 : c == 1 ? 5 : 2;
+ if (c < 3 && (a[c] + b[c] >= ave3)) {
+ const float add_w[3] = {
+ 1.15,
+ 1.15,
+ 1.12,
+ };
+ weight += add_w[c];
+ if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) {
+ weight -= 0.5;
+ }
+ }
+ distance += difference * difference * weight * weight;
+ const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1;
+ sum_a += a[c] * sum_weight;
+ sum_b += b[c] * sum_weight;
+ }
+ distance *= 4;
+ float sum_difference = sum_a - sum_b;
+ distance += sum_difference * sum_difference;
+ return distance;
+}
+
+static int QuantizeColorToImplicitPaletteIndex(
+ const std::vector<pixel_type> &color, const int palette_size,
+ const int bit_depth, bool high_quality) {
+ int index = 0;
+ if (high_quality) {
+ int multiplier = 1;
+ for (size_t c = 0; c < color.size(); c++) {
+ int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) /
+ ((1 << bit_depth) - 1);
+ JXL_ASSERT((quantized % kLargeCube) == quantized);
+ index += quantized * multiplier;
+ multiplier *= kLargeCube;
+ }
+ return index + palette_size + kLargeCubeOffset;
+ } else {
+ int multiplier = 1;
+ for (size_t c = 0; c < color.size(); c++) {
+ int value = color[c];
+ value -= 1 << (std::max(0, bit_depth - 3));
+ value = std::max(0, value);
+ int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) /
+ ((1 << bit_depth) - 1);
+ JXL_ASSERT((quantized % kLargeCube) == quantized);
+ if (quantized > kSmallCube - 1) {
+ quantized = kSmallCube - 1;
+ }
+ index += quantized * multiplier;
+ multiplier *= kSmallCube;
+ }
+ return index + palette_size;
+ }
+}
+
+} // namespace palette_internal
+
+int RoundInt(int value, int div) { // symmetric rounding around 0
+ if (value < 0) return -RoundInt(-value, div);
+ return (value + div / 2) / div;
+}
+
+struct PaletteIterationData {
+ static constexpr int kMaxDeltas = 128;
+ bool final_run = false;
+ std::vector<pixel_type> deltas[3];
+ std::vector<double> delta_distances;
+ std::vector<pixel_type> frequent_deltas[3];
+
+ // Populates `frequent_deltas` with items from `deltas` based on frequencies
+ // and color distances.
+ void FindFrequentColorDeltas(int num_pixels, int bitdepth) {
+ using pixel_type_3d = std::array<pixel_type, 3>;
+ std::map<pixel_type_3d, double> delta_frequency_map;
+ pixel_type bucket_size = 3 << std::max(0, bitdepth - 8);
+ // Store frequency weighted by delta distance from quantized value.
+ for (size_t i = 0; i < deltas[0].size(); ++i) {
+ pixel_type_3d delta = {
+ {RoundInt(deltas[0][i], bucket_size),
+ RoundInt(deltas[1][i], bucket_size),
+ RoundInt(deltas[2][i], bucket_size)}}; // a basic form of clustering
+ if (delta[0] == 0 && delta[1] == 0 && delta[2] == 0) continue;
+ delta_frequency_map[delta] += sqrt(sqrt(delta_distances[i]));
+ }
+
+ const float delta_distance_multiplier = 1.0f / num_pixels;
+
+ // Weigh frequencies by magnitude and normalize.
+ for (auto &delta_frequency : delta_frequency_map) {
+ std::vector<pixel_type> current_delta = {delta_frequency.first[0],
+ delta_frequency.first[1],
+ delta_frequency.first[2]};
+ float delta_distance =
+ sqrt(palette_internal::ColorDistance({0, 0, 0}, current_delta)) + 1;
+ delta_frequency.second *= delta_distance * delta_distance_multiplier;
+ }
+
+ // Sort by weighted frequency.
+ using pixel_type_3d_frequency = std::pair<pixel_type_3d, double>;
+ std::vector<pixel_type_3d_frequency> sorted_delta_frequency_map(
+ delta_frequency_map.begin(), delta_frequency_map.end());
+ std::sort(
+ sorted_delta_frequency_map.begin(), sorted_delta_frequency_map.end(),
+ [](const pixel_type_3d_frequency &a, const pixel_type_3d_frequency &b) {
+ return a.second > b.second;
+ });
+
+ // Store the top deltas.
+ for (auto &delta_frequency : sorted_delta_frequency_map) {
+ if (frequent_deltas[0].size() >= kMaxDeltas) break;
+ // Number obtained by optimizing on jyrki31 corpus:
+ if (delta_frequency.second < 17) break;
+ for (int c = 0; c < 3; ++c) {
+ frequent_deltas[c].push_back(delta_frequency.first[c] * bucket_size);
+ }
+ }
+ }
+};
+
+Status FwdPaletteIteration(Image &input, uint32_t begin_c, uint32_t end_c,
+ uint32_t &nb_colors, uint32_t &nb_deltas,
+ bool ordered, bool lossy, Predictor &predictor,
+ const weighted::Header &wp_header,
+ PaletteIterationData &palette_iteration_data) {
+ JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+ JXL_ASSERT(begin_c >= input.nb_meta_channels);
+ uint32_t nb = end_c - begin_c + 1;
+
+ size_t w = input.channel[begin_c].w;
+ size_t h = input.channel[begin_c].h;
+
+ if (!lossy && nb == 1) {
+ // Channel palette special case
+ if (nb_colors == 0) return false;
+ std::vector<pixel_type> lookup;
+ pixel_type minval, maxval;
+ compute_minmax(input.channel[begin_c], &minval, &maxval);
+ size_t lookup_table_size =
+ static_cast<int64_t>(maxval) - static_cast<int64_t>(minval) + 1;
+ if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) {
+ // a lookup table would use too much memory, instead use a slower approach
+ // with std::set
+ std::set<pixel_type> chpalette;
+ pixel_type idx = 0;
+ for (size_t y = 0; y < h; y++) {
+ const pixel_type *p = input.channel[begin_c].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ const bool new_color = chpalette.insert(p[x]).second;
+ if (new_color) {
+ idx++;
+ if (idx > (int)nb_colors) return false;
+ }
+ }
+ }
+ JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+ Channel pch(idx, 1);
+ pch.hshift = -1;
+ pch.vshift = -1;
+ nb_colors = idx;
+ idx = 0;
+ pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+ for (pixel_type p : chpalette) {
+ p_palette[idx++] = p;
+ }
+ for (size_t y = 0; y < h; y++) {
+ pixel_type *p = input.channel[begin_c].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ for (idx = 0; p[x] != p_palette[idx] && idx < (int)nb_colors; idx++) {
+ }
+ JXL_DASSERT(idx < (int)nb_colors);
+ p[x] = idx;
+ }
+ }
+ predictor = Predictor::Zero;
+ input.nb_meta_channels++;
+ input.channel.insert(input.channel.begin(), std::move(pch));
+
+ return true;
+ }
+ lookup.resize(lookup_table_size, 0);
+ pixel_type idx = 0;
+ for (size_t y = 0; y < h; y++) {
+ const pixel_type *p = input.channel[begin_c].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ if (lookup[p[x] - minval] == 0) {
+ lookup[p[x] - minval] = 1;
+ idx++;
+ if (idx > (int)nb_colors) return false;
+ }
+ }
+ }
+ JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx);
+ Channel pch(idx, 1);
+ pch.hshift = -1;
+ pch.vshift = -1;
+ nb_colors = idx;
+ idx = 0;
+ pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+ for (size_t i = 0; i < lookup_table_size; i++) {
+ if (lookup[i]) {
+ p_palette[idx] = i + minval;
+ lookup[i] = idx;
+ idx++;
+ }
+ }
+ for (size_t y = 0; y < h; y++) {
+ pixel_type *p = input.channel[begin_c].Row(y);
+ for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval];
+ }
+ predictor = Predictor::Zero;
+ input.nb_meta_channels++;
+ input.channel.insert(input.channel.begin(), std::move(pch));
+ return true;
+ }
+
+ Image quantized_input;
+ if (lossy) {
+ quantized_input = Image(w, h, input.bitdepth, nb);
+ for (size_t c = 0; c < nb; c++) {
+ CopyImageTo(input.channel[begin_c + c].plane,
+ &quantized_input.channel[c].plane);
+ }
+ }
+
+ JXL_DEBUG_V(
+ 7, "Trying to represent channels %i-%i using at most a %i-color palette.",
+ begin_c, end_c, nb_colors);
+ nb_deltas = 0;
+ bool delta_used = false;
+ std::set<std::vector<pixel_type>>
+ candidate_palette; // ordered lexicographically
+ std::vector<std::vector<pixel_type>> candidate_palette_imageorder;
+ std::vector<pixel_type> color(nb);
+ std::vector<float> color_with_error(nb);
+ std::vector<const pixel_type *> p_in(nb);
+
+ if (lossy) {
+ palette_iteration_data.FindFrequentColorDeltas(w * h, input.bitdepth);
+ nb_deltas = palette_iteration_data.frequent_deltas[0].size();
+
+ // Count color frequency for colors that make a cross.
+ std::map<std::vector<pixel_type>, size_t> color_freq_map;
+ for (size_t y = 1; y + 1 < h; y++) {
+ for (uint32_t c = 0; c < nb; c++) {
+ p_in[c] = input.channel[begin_c + c].Row(y);
+ }
+ for (size_t x = 1; x + 1 < w; x++) {
+ for (uint32_t c = 0; c < nb; c++) {
+ color[c] = p_in[c][x];
+ }
+ int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}};
+ bool makes_cross = true;
+ for (int i = 0; i < 4 && makes_cross; ++i) {
+ int dx = offsets[i][0];
+ int dy = offsets[i][1];
+ for (uint32_t c = 0; c < nb && makes_cross; c++) {
+ if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) {
+ makes_cross = false;
+ }
+ }
+ }
+ if (makes_cross) color_freq_map[color] += 1;
+ }
+ }
+ // Add colors satisfying frequency condition to the palette.
+ constexpr float kImageFraction = 0.01f;
+ size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction;
+ for (const auto &color_freq : color_freq_map) {
+ if (color_freq.second > color_frequency_lower_bound) {
+ candidate_palette.insert(color_freq.first);
+ candidate_palette_imageorder.push_back(color_freq.first);
+ }
+ }
+ }
+
+ for (size_t y = 0; y < h; y++) {
+ for (uint32_t c = 0; c < nb; c++) {
+ p_in[c] = input.channel[begin_c + c].Row(y);
+ }
+ for (size_t x = 0; x < w; x++) {
+ if (lossy && candidate_palette.size() >= nb_colors) break;
+ for (uint32_t c = 0; c < nb; c++) {
+ color[c] = p_in[c][x];
+ }
+ const bool new_color = candidate_palette.insert(color).second;
+ if (new_color) {
+ candidate_palette_imageorder.push_back(color);
+ }
+ if (candidate_palette.size() > nb_colors) {
+ return false; // too many colors
+ }
+ }
+ }
+
+ nb_colors = nb_deltas + candidate_palette.size();
+ JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.",
+ begin_c, end_c, nb_colors);
+
+ Channel pch(nb_colors, nb);
+ pch.hshift = -1;
+ pch.vshift = -1;
+ pixel_type *JXL_RESTRICT p_palette = pch.Row(0);
+ intptr_t onerow = pch.plane.PixelsPerRow();
+ intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow();
+ const int bit_depth = std::min(input.bitdepth, 24);
+
+ if (lossy) {
+ for (uint32_t i = 0; i < nb_deltas; i++) {
+ for (size_t c = 0; c < 3; c++) {
+ p_palette[c * onerow + i] =
+ palette_iteration_data.frequent_deltas[c][i];
+ }
+ }
+ }
+
+ int x = 0;
+ if (ordered) {
+ JXL_DEBUG_V(7, "Palette of %i colors, using lexicographic order",
+ nb_colors);
+ for (auto pcol : candidate_palette) {
+ JXL_DEBUG_V(9, " Color %i : ", x);
+ for (size_t i = 0; i < nb; i++) {
+ p_palette[nb_deltas + i * onerow + x] = pcol[i];
+ }
+ for (size_t i = 0; i < nb; i++) {
+ JXL_DEBUG_V(9, "%i ", pcol[i]);
+ }
+ x++;
+ }
+ } else {
+ JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors);
+ for (auto pcol : candidate_palette_imageorder) {
+ JXL_DEBUG_V(9, " Color %i : ", x);
+ for (size_t i = 0; i < nb; i++)
+ p_palette[nb_deltas + i * onerow + x] = pcol[i];
+ for (size_t i = 0; i < nb; i++) JXL_DEBUG_V(9, "%i ", pcol[i]);
+ x++;
+ }
+ }
+ std::vector<weighted::State> wp_states;
+ for (size_t c = 0; c < nb; c++) {
+ wp_states.emplace_back(wp_header, w, h);
+ }
+ std::vector<pixel_type *> p_quant(nb);
+ // Three rows of error for dithering: y to y + 2.
+ // Each row has two pixels of padding in the ends, which is
+ // beneficial for both precision and encoding speed.
+ std::vector<std::vector<float>> error_row[3];
+ if (lossy) {
+ for (int i = 0; i < 3; ++i) {
+ error_row[i].resize(nb);
+ for (size_t c = 0; c < nb; ++c) {
+ error_row[i][c].resize(w + 4);
+ }
+ }
+ }
+ for (size_t y = 0; y < h; y++) {
+ for (size_t c = 0; c < nb; c++) {
+ p_in[c] = input.channel[begin_c + c].Row(y);
+ if (lossy) p_quant[c] = quantized_input.channel[c].Row(y);
+ }
+ pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ int index;
+ if (!lossy) {
+ for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x];
+ // Exact search.
+ for (index = 0; static_cast<uint32_t>(index) < nb_colors; index++) {
+ bool found = true;
+ for (size_t c = 0; c < nb; c++) {
+ if (color[c] != p_palette[c * onerow + index]) {
+ found = false;
+ break;
+ }
+ }
+ if (found) break;
+ }
+ if (index < static_cast<int>(nb_deltas)) {
+ delta_used = true;
+ }
+ } else {
+ int best_index = 0;
+ bool best_is_delta = false;
+ float best_distance = std::numeric_limits<float>::infinity();
+ std::vector<pixel_type> best_val(nb, 0);
+ std::vector<pixel_type> ideal_residual(nb, 0);
+ std::vector<pixel_type> quantized_val(nb);
+ std::vector<pixel_type> predictions(nb);
+ static const double kDiffusionMultiplier[] = {0.55, 0.75};
+ for (int diffusion_index = 0; diffusion_index < 2; ++diffusion_index) {
+ for (size_t c = 0; c < nb; c++) {
+ color_with_error[c] =
+ p_in[c][x] + palette_iteration_data.final_run *
+ kDiffusionMultiplier[diffusion_index] *
+ error_row[0][c][x + 2];
+ color[c] = Clamp1(lroundf(color_with_error[c]), 0l,
+ (1l << input.bitdepth) - 1);
+ }
+
+ for (size_t c = 0; c < nb; ++c) {
+ predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x,
+ y, predictor, &wp_states[c])
+ .guess;
+ }
+ const auto TryIndex = [&](const int index) {
+ for (size_t c = 0; c < nb; c++) {
+ quantized_val[c] = palette_internal::GetPaletteValue(
+ p_palette, index, /*c=*/c,
+ /*palette_size=*/nb_colors,
+ /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+ if (index < static_cast<int>(nb_deltas)) {
+ quantized_val[c] += predictions[c];
+ }
+ }
+ const float color_distance =
+ 32.0 / (1LL << std::max(0, 2 * (bit_depth - 8))) *
+ palette_internal::ColorDistance(color_with_error,
+ quantized_val);
+ float index_penalty = 0;
+ if (index == -1) {
+ index_penalty = -124;
+ } else if (index < 0) {
+ index_penalty = -2 * index;
+ } else if (index < static_cast<int>(nb_deltas)) {
+ index_penalty = 250;
+ } else if (index < static_cast<int>(nb_colors)) {
+ index_penalty = 150;
+ } else if (index < static_cast<int>(nb_colors) +
+ palette_internal::kLargeCubeOffset) {
+ index_penalty = 70;
+ } else {
+ index_penalty = 256;
+ }
+ const float distance = color_distance + index_penalty;
+ if (distance < best_distance) {
+ best_distance = distance;
+ best_index = index;
+ best_is_delta = index < static_cast<int>(nb_deltas);
+ best_val.swap(quantized_val);
+ for (size_t c = 0; c < nb; ++c) {
+ ideal_residual[c] = color_with_error[c] - predictions[c];
+ }
+ }
+ };
+ for (index = palette_internal::kMinImplicitPaletteIndex;
+ index < static_cast<int32_t>(nb_colors); index++) {
+ TryIndex(index);
+ }
+ TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+ color, nb_colors, bit_depth,
+ /*high_quality=*/false));
+ if (palette_internal::kEncodeToHighQualityImplicitPalette) {
+ TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex(
+ color, nb_colors, bit_depth,
+ /*high_quality=*/true));
+ }
+ }
+ index = best_index;
+ delta_used |= best_is_delta;
+ if (!palette_iteration_data.final_run) {
+ for (size_t c = 0; c < 3; ++c) {
+ palette_iteration_data.deltas[c].push_back(ideal_residual[c]);
+ }
+ palette_iteration_data.delta_distances.push_back(best_distance);
+ }
+
+ for (size_t c = 0; c < nb; ++c) {
+ wp_states[c].UpdateErrors(best_val[c], x, y, w);
+ p_quant[c][x] = best_val[c];
+ }
+ float len_error = 0;
+ for (size_t c = 0; c < nb; ++c) {
+ float local_error = color_with_error[c] - best_val[c];
+ len_error += local_error * local_error;
+ }
+ len_error = sqrt(len_error);
+ float modulate = 1.0;
+ int len_limit = 38 << std::max(0, bit_depth - 8);
+ if (len_error > len_limit) {
+ modulate *= len_limit / len_error;
+ }
+ for (size_t c = 0; c < nb; ++c) {
+ float total_error = (color_with_error[c] - best_val[c]);
+
+ // If the neighboring pixels have some error in the opposite
+ // direction of total_error, cancel some or all of it out before
+ // spreading among them.
+ constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1},
+ {1, 3}, {2, 2}, {1, 0}, {1, 4},
+ {2, 1}, {2, 3}, {2, 0}, {2, 4}};
+ float total_available = 0;
+ for (int i = 0; i < 11; ++i) {
+ const int row = offsets[i][0];
+ const int col = offsets[i][1];
+ if (std::signbit(error_row[row][c][x + col]) !=
+ std::signbit(total_error)) {
+ total_available += error_row[row][c][x + col];
+ }
+ }
+ float weight =
+ std::abs(total_error) / (std::abs(total_available) + 1e-3);
+ weight = std::min(weight, 1.0f);
+ for (int i = 0; i < 11; ++i) {
+ const int row = offsets[i][0];
+ const int col = offsets[i][1];
+ if (std::signbit(error_row[row][c][x + col]) !=
+ std::signbit(total_error)) {
+ total_error += weight * error_row[row][c][x + col];
+ error_row[row][c][x + col] *= (1 - weight);
+ }
+ }
+ total_error *= modulate;
+ const float remaining_error = (1.0f / 14.) * total_error;
+ error_row[0][c][x + 3] += 2 * remaining_error;
+ error_row[0][c][x + 4] += remaining_error;
+ error_row[1][c][x + 0] += remaining_error;
+ for (int i = 0; i < 5; ++i) {
+ error_row[1][c][x + i] += remaining_error;
+ error_row[2][c][x + i] += remaining_error;
+ }
+ }
+ }
+ if (palette_iteration_data.final_run) p[x] = index;
+ }
+ if (lossy) {
+ for (size_t c = 0; c < nb; ++c) {
+ error_row[0][c].swap(error_row[1][c]);
+ error_row[1][c].swap(error_row[2][c]);
+ std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f);
+ }
+ }
+ }
+ if (!delta_used) {
+ predictor = Predictor::Zero;
+ }
+ if (palette_iteration_data.final_run) {
+ input.nb_meta_channels++;
+ input.channel.erase(input.channel.begin() + begin_c + 1,
+ input.channel.begin() + end_c + 1);
+ input.channel.insert(input.channel.begin(), std::move(pch));
+ }
+ nb_colors -= nb_deltas;
+ return true;
+}
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+ uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+ bool lossy, Predictor &predictor,
+ const weighted::Header &wp_header) {
+ PaletteIterationData palette_iteration_data;
+ uint32_t nb_colors_orig = nb_colors;
+ uint32_t nb_deltas_orig = nb_deltas;
+ // preprocessing pass in case of lossy palette
+ if (lossy && input.bitdepth >= 8) {
+ JXL_RETURN_IF_ERROR(FwdPaletteIteration(
+ input, begin_c, end_c, nb_colors_orig, nb_deltas_orig, ordered, lossy,
+ predictor, wp_header, palette_iteration_data));
+ }
+ palette_iteration_data.final_run = true;
+ return FwdPaletteIteration(input, begin_c, end_c, nb_colors, nb_deltas,
+ ordered, lossy, predictor, wp_header,
+ palette_iteration_data);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h
new file mode 100644
index 0000000000..0f3d66825b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_palette.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+ uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered,
+ bool lossy, Predictor &predictor,
+ const weighted::Header &wp_header);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc
new file mode 100644
index 0000000000..050563a3c2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.cc
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_rct.h"
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels
+
+namespace jxl {
+
+Status FwdRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+ JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+ if (rct_type == 0) { // noop
+ return false;
+ }
+ // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+ int permutation = rct_type / 7;
+ // 0-5 values have the low bit corresponding to Third and the high bits
+ // corresponding to Second. 6 corresponds to YCoCg.
+ //
+ // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+ //
+ // Third: 0=nop, 1=SubtractFirst
+ int custom = rct_type % 7;
+ size_t m = begin_c;
+ size_t w = input.channel[m + 0].w;
+ size_t h = input.channel[m + 0].h;
+ int second = (custom % 7) >> 1;
+ int third = (custom % 7) & 1;
+ const auto do_rct = [&](const int y, const int thread) {
+ const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y);
+ const pixel_type* in1 =
+ input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+ const pixel_type* in2 =
+ input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+ pixel_type* out0 = input.channel[m].Row(y);
+ pixel_type* out1 = input.channel[m + 1].Row(y);
+ pixel_type* out2 = input.channel[m + 2].Row(y);
+ if (custom == 6) {
+ for (size_t x = 0; x < w; x++) {
+ pixel_type R = in0[x];
+ pixel_type G = in1[x];
+ pixel_type B = in2[x];
+ out1[x] = R - B;
+ pixel_type tmp = B + (out1[x] >> 1);
+ out2[x] = G - tmp;
+ out0[x] = tmp + (out2[x] >> 1);
+ }
+ } else {
+ for (size_t x = 0; x < w; x++) {
+ pixel_type First = in0[x];
+ pixel_type Second = in1[x];
+ pixel_type Third = in2[x];
+ if (second == 1) {
+ Second = Second - First;
+ } else if (second == 2) {
+ Second = Second - ((First + Third) >> 1);
+ }
+ if (third) Third = Third - First;
+ out0[x] = First;
+ out1[x] = Second;
+ out2[x] = Third;
+ }
+ }
+ };
+ return RunOnPool(pool, 0, h, ThreadPool::NoInit, do_rct, "FwdRCT");
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h
new file mode 100644
index 0000000000..cb5a193c8d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_rct.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
+
+#include "lib/jxl/modular/modular_image.h"
+
+namespace jxl {
+
+Status FwdRCT(Image &input, size_t begin_c, size_t rct_type, ThreadPool *pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc
new file mode 100644
index 0000000000..dfd90cde68
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.cc
@@ -0,0 +1,141 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+void FwdHSqueeze(Image &input, int c, int rc) {
+ const Channel &chin = input.channel[c];
+
+ JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c,
+ rc);
+
+ Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift);
+ Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1,
+ chin.vshift);
+
+ for (size_t y = 0; y < chout.h; y++) {
+ const pixel_type *JXL_RESTRICT p_in = chin.Row(y);
+ pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+ pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+ for (size_t x = 0; x < chout_residual.w; x++) {
+ pixel_type A = p_in[x * 2];
+ pixel_type B = p_in[x * 2 + 1];
+ pixel_type avg = (A + B + (A > B)) >> 1;
+ p_out[x] = avg;
+
+ pixel_type diff = A - B;
+
+ pixel_type next_avg = avg;
+ if (x + 1 < chout_residual.w) {
+ next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] +
+ (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >>
+ 1; // which will be chout.value(y,x+1)
+ } else if (chin.w & 1)
+ next_avg = p_in[x * 2 + 2];
+ pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg);
+ pixel_type tendency = SmoothTendency(left, avg, next_avg);
+
+ p_res[x] = diff - tendency;
+ }
+ if (chin.w & 1) {
+ int x = chout.w - 1;
+ p_out[x] = p_in[x * 2];
+ }
+ }
+ input.channel[c] = std::move(chout);
+ input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+void FwdVSqueeze(Image &input, int c, int rc) {
+ const Channel &chin = input.channel[c];
+
+ JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c,
+ rc);
+
+ Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1);
+ Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift,
+ chin.vshift + 1);
+ intptr_t onerow_in = chin.plane.PixelsPerRow();
+ for (size_t y = 0; y < chout_residual.h; y++) {
+ const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2);
+ pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+ pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y);
+ for (size_t x = 0; x < chout.w; x++) {
+ pixel_type A = p_in[x];
+ pixel_type B = p_in[x + onerow_in];
+ pixel_type avg = (A + B + (A > B)) >> 1;
+ p_out[x] = avg;
+
+ pixel_type diff = A - B;
+
+ pixel_type next_avg = avg;
+ if (y + 1 < chout_residual.h) {
+ next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] +
+ (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >>
+ 1; // which will be chout.value(y+1,x)
+ } else if (chin.h & 1) {
+ next_avg = p_in[x + 2 * onerow_in];
+ }
+ pixel_type top =
+ (y > 0 ? p_in[static_cast<ssize_t>(x) - onerow_in] : avg);
+ pixel_type tendency = SmoothTendency(top, avg, next_avg);
+
+ p_res[x] = diff - tendency;
+ }
+ }
+ if (chin.h & 1) {
+ size_t y = chout.h - 1;
+ const pixel_type *p_in = chin.Row(y * 2);
+ pixel_type *p_out = chout.Row(y);
+ for (size_t x = 0; x < chout.w; x++) {
+ p_out[x] = p_in[x];
+ }
+ }
+ input.channel[c] = std::move(chout);
+ input.channel.insert(input.channel.begin() + rc, std::move(chout_residual));
+}
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+ ThreadPool *pool) {
+ if (parameters.empty()) {
+ DefaultSqueezeParameters(&parameters, input);
+ }
+ // if nothing to do, don't do squeeze
+ if (parameters.empty()) return false;
+ for (size_t i = 0; i < parameters.size(); i++) {
+ JXL_RETURN_IF_ERROR(
+ CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+ bool horizontal = parameters[i].horizontal;
+ bool in_place = parameters[i].in_place;
+ uint32_t beginc = parameters[i].begin_c;
+ uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+ uint32_t offset;
+ if (in_place) {
+ offset = endc + 1;
+ } else {
+ offset = input.channel.size();
+ }
+ for (uint32_t c = beginc; c <= endc; c++) {
+ if (horizontal) {
+ FwdHSqueeze(input, c, offset + c - beginc);
+ } else {
+ FwdVSqueeze(input, c, offset + c - beginc);
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h
new file mode 100644
index 0000000000..39b001017b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_squeeze.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status FwdSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+ ThreadPool *pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc
new file mode 100644
index 0000000000..bdaaf9f87e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.cc
@@ -0,0 +1,46 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/enc_transform.h"
+
+#include "lib/jxl/modular/transform/enc_palette.h"
+#include "lib/jxl/modular/transform/enc_rct.h"
+#include "lib/jxl/modular/transform/enc_squeeze.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+ const weighted::Header &wp_header, ThreadPool *pool) {
+ switch (t.id) {
+ case TransformId::kRCT:
+ return FwdRCT(input, t.begin_c, t.rct_type, pool);
+ case TransformId::kSqueeze:
+ return FwdSqueeze(input, t.squeezes, pool);
+ case TransformId::kPalette:
+ return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors,
+ t.nb_deltas, t.ordered_palette, t.lossy_palette,
+ t.predictor, wp_header);
+ default:
+ return JXL_FAILURE("Unknown transformation (ID=%u)",
+ static_cast<unsigned int>(t.id));
+ }
+}
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) {
+ pixel_type realmin = std::numeric_limits<pixel_type>::max();
+ pixel_type realmax = std::numeric_limits<pixel_type>::min();
+ for (size_t y = 0; y < ch.h; y++) {
+ const pixel_type *JXL_RESTRICT p = ch.Row(y);
+ for (size_t x = 0; x < ch.w; x++) {
+ if (p[x] < realmin) realmin = p[x];
+ if (p[x] > realmax) realmax = p[x];
+ }
+ }
+
+ if (min) *min = realmin;
+ if (max) *max = realmax;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h
new file mode 100644
index 0000000000..07659e1b0a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/enc_transform.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
+
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+namespace jxl {
+
+Status TransformForward(Transform &t, Image &input,
+ const weighted::Header &wp_header, ThreadPool *pool);
+
+void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc
new file mode 100644
index 0000000000..46129f19f0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.cc
@@ -0,0 +1,176 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/palette.h"
+
+namespace jxl {
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+ uint32_t nb_deltas, Predictor predictor,
+ const weighted::Header &wp_header, ThreadPool *pool) {
+ if (input.nb_meta_channels < 1) {
+ return JXL_FAILURE("Error: Palette transform without palette.");
+ }
+ std::atomic<int> num_errors{0};
+ int nb = input.channel[0].h;
+ uint32_t c0 = begin_c + 1;
+ if (c0 >= input.channel.size()) {
+ return JXL_FAILURE("Channel is out of range.");
+ }
+ size_t w = input.channel[c0].w;
+ size_t h = input.channel[c0].h;
+ if (nb < 1) return JXL_FAILURE("Corrupted transforms");
+ for (int i = 1; i < nb; i++) {
+ input.channel.insert(
+ input.channel.begin() + c0 + 1,
+ Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift));
+ }
+ const Channel &palette = input.channel[0];
+ const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0);
+ intptr_t onerow = input.channel[0].plane.PixelsPerRow();
+ intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow();
+ const int bit_depth = std::min(input.bitdepth, 24);
+
+ if (w == 0) {
+ // Nothing to do.
+ // Avoid touching "empty" channels with non-zero height.
+ } else if (nb_deltas == 0 && predictor == Predictor::Zero) {
+ if (nb == 1) {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, h, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ pixel_type *p = input.channel[c0].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ const int index = Clamp1<int>(p[x], 0, (pixel_type)palette.w - 1);
+ p[x] = palette_internal::GetPaletteValue(
+ p_palette, index, /*c=*/0,
+ /*palette_size=*/palette.w,
+ /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+ }
+ },
+ "UndoChannelPalette"));
+ } else {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, h, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ std::vector<pixel_type *> p_out(nb);
+ const pixel_type *p_index = input.channel[c0].Row(y);
+ for (int c = 0; c < nb; c++)
+ p_out[c] = input.channel[c0 + c].Row(y);
+ for (size_t x = 0; x < w; x++) {
+ const int index = p_index[x];
+ for (int c = 0; c < nb; c++) {
+ p_out[c][x] = palette_internal::GetPaletteValue(
+ p_palette, index, /*c=*/c,
+ /*palette_size=*/palette.w,
+ /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+ }
+ }
+ },
+ "UndoPalette"));
+ }
+ } else {
+ // Parallelized per channel.
+ ImageI indices = CopyImage(input.channel[c0].plane);
+ if (predictor == Predictor::Weighted) {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, nb, ThreadPool::NoInit,
+ [&](const uint32_t c, size_t /* thread */) {
+ Channel &channel = input.channel[c0 + c];
+ weighted::State wp_state(wp_header, channel.w, channel.h);
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT p = channel.Row(y);
+ const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ int index = idx[x];
+ pixel_type_w val = 0;
+ const pixel_type palette_entry =
+ palette_internal::GetPaletteValue(
+ p_palette, index, /*c=*/c,
+ /*palette_size=*/palette.w, /*onerow=*/onerow,
+ /*bit_depth=*/bit_depth);
+ if (index < static_cast<int32_t>(nb_deltas)) {
+ PredictionResult pred =
+ PredictNoTreeWP(channel.w, p + x, onerow_image, x, y,
+ predictor, &wp_state);
+ val = pred.guess + palette_entry;
+ } else {
+ val = palette_entry;
+ }
+ p[x] = val;
+ wp_state.UpdateErrors(p[x], x, y, channel.w);
+ }
+ }
+ },
+ "UndoDeltaPaletteWP"));
+ } else {
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, nb, ThreadPool::NoInit,
+ [&](const uint32_t c, size_t /* thread */) {
+ Channel &channel = input.channel[c0 + c];
+ for (size_t y = 0; y < channel.h; y++) {
+ pixel_type *JXL_RESTRICT p = channel.Row(y);
+ const pixel_type *JXL_RESTRICT idx = indices.Row(y);
+ for (size_t x = 0; x < channel.w; x++) {
+ int index = idx[x];
+ pixel_type_w val = 0;
+ const pixel_type palette_entry =
+ palette_internal::GetPaletteValue(
+ p_palette, index, /*c=*/c,
+ /*palette_size=*/palette.w,
+ /*onerow=*/onerow, /*bit_depth=*/bit_depth);
+ if (index < static_cast<int32_t>(nb_deltas)) {
+ PredictionResult pred = PredictNoTreeNoWP(
+ channel.w, p + x, onerow_image, x, y, predictor);
+ val = pred.guess + palette_entry;
+ } else {
+ val = palette_entry;
+ }
+ p[x] = val;
+ }
+ }
+ },
+ "UndoDeltaPaletteNoWP"));
+ }
+ }
+ if (c0 >= input.nb_meta_channels) {
+ // Palette was done on normal channels
+ input.nb_meta_channels--;
+ } else {
+ // Palette was done on metachannels
+ JXL_ASSERT(static_cast<int>(input.nb_meta_channels) >= 2 - nb);
+ input.nb_meta_channels -= 2 - nb;
+ JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels);
+ }
+ input.channel.erase(input.channel.begin(), input.channel.begin() + 1);
+ return num_errors.load(std::memory_order_relaxed) == 0;
+}
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+ uint32_t nb_colors, uint32_t nb_deltas, bool lossy) {
+ JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c));
+
+ size_t nb = end_c - begin_c + 1;
+ if (begin_c >= input.nb_meta_channels) {
+ // Palette was done on normal channels
+ input.nb_meta_channels++;
+ } else {
+ // Palette was done on metachannels
+ JXL_ASSERT(end_c < input.nb_meta_channels);
+ // we remove nb-1 metachannels and add one
+ input.nb_meta_channels += 2 - nb;
+ }
+ input.channel.erase(input.channel.begin() + begin_c + 1,
+ input.channel.begin() + end_c + 1);
+ Channel pch(nb_colors + nb_deltas, nb);
+ pch.hshift = -1;
+ pch.vshift = -1;
+ input.channel.insert(input.channel.begin(), std::move(pch));
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
new file mode 100644
index 0000000000..cc0f67960b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/palette.h
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels
+
+namespace jxl {
+
+namespace palette_internal {
+
+static constexpr int kMaxPaletteLookupTableSize = 1 << 16;
+
+static constexpr int kRgbChannels = 3;
+
+// 5x5x5 color cube for the larger cube.
+static constexpr int kLargeCube = 5;
+
+// Smaller interleaved color cube to fill the holes of the larger cube.
+static constexpr int kSmallCube = 4;
+static constexpr int kSmallCubeBits = 2;
+// kSmallCube ** 3
+static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube;
+
+static inline pixel_type Scale(uint64_t value, uint64_t bit_depth,
+ uint64_t denom) {
+ // return (value * ((static_cast<pixel_type_w>(1) << bit_depth) - 1)) / denom;
+ // We only call this function with kSmallCube or kLargeCube - 1 as denom,
+ // allowing us to avoid a division here.
+ JXL_ASSERT(denom == 4);
+ return (value * ((static_cast<uint64_t>(1) << bit_depth) - 1)) >> 2;
+}
+
+// The purpose of this function is solely to extend the interpretation of
+// palette indices to implicit values. If index < nb_deltas, indicating that the
+// result is a delta palette entry, it is the responsibility of the caller to
+// treat it as such.
+static JXL_MAYBE_UNUSED pixel_type
+GetPaletteValue(const pixel_type *const palette, int index, const size_t c,
+ const int palette_size, const int onerow, const int bit_depth) {
+ if (index < 0) {
+ static constexpr std::array<std::array<pixel_type, 3>, 72> kDeltaPalette = {
+ {
+ {{0, 0, 0}}, {{4, 4, 4}}, {{11, 0, 0}},
+ {{0, 0, -13}}, {{0, -12, 0}}, {{-10, -10, -10}},
+ {{-18, -18, -18}}, {{-27, -27, -27}}, {{-18, -18, 0}},
+ {{0, 0, -32}}, {{-32, 0, 0}}, {{-37, -37, -37}},
+ {{0, -32, -32}}, {{24, 24, 45}}, {{50, 50, 50}},
+ {{-45, -24, -24}}, {{-24, -45, -45}}, {{0, -24, -24}},
+ {{-34, -34, 0}}, {{-24, 0, -24}}, {{-45, -45, -24}},
+ {{64, 64, 64}}, {{-32, 0, -32}}, {{0, -32, 0}},
+ {{-32, 0, 32}}, {{-24, -45, -24}}, {{45, 24, 45}},
+ {{24, -24, -45}}, {{-45, -24, 24}}, {{80, 80, 80}},
+ {{64, 0, 0}}, {{0, 0, -64}}, {{0, -64, -64}},
+ {{-24, -24, 45}}, {{96, 96, 96}}, {{64, 64, 0}},
+ {{45, -24, -24}}, {{34, -34, 0}}, {{112, 112, 112}},
+ {{24, -45, -45}}, {{45, 45, -24}}, {{0, -32, 32}},
+ {{24, -24, 45}}, {{0, 96, 96}}, {{45, -24, 24}},
+ {{24, -45, -24}}, {{-24, -45, 24}}, {{0, -64, 0}},
+ {{96, 0, 0}}, {{128, 128, 128}}, {{64, 0, 64}},
+ {{144, 144, 144}}, {{96, 96, 0}}, {{-36, -36, 36}},
+ {{45, -24, -45}}, {{45, -45, -24}}, {{0, 0, -96}},
+ {{0, 128, 128}}, {{0, 96, 0}}, {{45, 24, -45}},
+ {{-128, 0, 0}}, {{24, -45, 24}}, {{-45, 24, -45}},
+ {{64, 0, -64}}, {{64, -64, -64}}, {{96, 0, 96}},
+ {{45, -45, 24}}, {{24, 45, -45}}, {{64, 64, -64}},
+ {{128, 128, 0}}, {{0, 0, -128}}, {{-24, 45, -45}},
+ }};
+ if (c >= kRgbChannels) {
+ return 0;
+ }
+ // Do not open the brackets, otherwise INT32_MIN negation could overflow.
+ index = -(index + 1);
+ index %= 1 + 2 * (kDeltaPalette.size() - 1);
+ static constexpr int kMultiplier[] = {-1, 1};
+ pixel_type result =
+ kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1];
+ if (bit_depth > 8) {
+ result *= static_cast<pixel_type>(1) << (bit_depth - 8);
+ }
+ return result;
+ } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) {
+ if (c >= kRgbChannels) return 0;
+ index -= palette_size;
+ index >>= c * kSmallCubeBits;
+ return Scale(index % kSmallCube, bit_depth, kSmallCube) +
+ (1 << (std::max(0, bit_depth - 3)));
+ } else if (palette_size + kLargeCubeOffset <= index) {
+ if (c >= kRgbChannels) return 0;
+ index -= palette_size + kLargeCubeOffset;
+ // TODO(eustas): should we take care of ambiguity created by
+ // index >= kLargeCube ** 3 ?
+ switch (c) {
+ case 0:
+ break;
+ case 1:
+ index /= kLargeCube;
+ break;
+ case 2:
+ index /= kLargeCube * kLargeCube;
+ break;
+ }
+ return Scale(index % kLargeCube, bit_depth, kLargeCube - 1);
+ }
+ return palette[c * onerow + static_cast<size_t>(index)];
+}
+
+} // namespace palette_internal
+
+Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors,
+ uint32_t nb_deltas, Predictor predictor,
+ const weighted::Header &wp_header, ThreadPool *pool);
+
+Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c,
+ uint32_t nb_colors, uint32_t nb_deltas, bool lossy);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc
new file mode 100644
index 0000000000..f3002a5ac3
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.cc
@@ -0,0 +1,153 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/rct.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+template <int transform_type>
+void InvRCTRow(const pixel_type* in0, const pixel_type* in1,
+ const pixel_type* in2, pixel_type* out0, pixel_type* out1,
+ pixel_type* out2, size_t w) {
+ static_assert(transform_type >= 0 && transform_type < 7,
+ "Invalid transform type");
+ int second = transform_type >> 1;
+ int third = transform_type & 1;
+
+ size_t x = 0;
+ const HWY_FULL(pixel_type) d;
+ const size_t N = Lanes(d);
+ for (; x + N - 1 < w; x += N) {
+ if (transform_type == 6) {
+ auto Y = Load(d, in0 + x);
+ auto Co = Load(d, in1 + x);
+ auto Cg = Load(d, in2 + x);
+ Y = Sub(Y, ShiftRight<1>(Cg));
+ auto G = Add(Cg, Y);
+ Y = Sub(Y, ShiftRight<1>(Co));
+ auto R = Add(Y, Co);
+ Store(R, d, out0 + x);
+ Store(G, d, out1 + x);
+ Store(Y, d, out2 + x);
+ } else {
+ auto First = Load(d, in0 + x);
+ auto Second = Load(d, in1 + x);
+ auto Third = Load(d, in2 + x);
+ if (third) Third = Add(Third, First);
+ if (second == 1) {
+ Second = Add(Second, First);
+ } else if (second == 2) {
+ Second = Add(Second, ShiftRight<1>(Add(First, Third)));
+ }
+ Store(First, d, out0 + x);
+ Store(Second, d, out1 + x);
+ Store(Third, d, out2 + x);
+ }
+ }
+ for (; x < w; x++) {
+ if (transform_type == 6) {
+ pixel_type Y = in0[x];
+ pixel_type Co = in1[x];
+ pixel_type Cg = in2[x];
+ pixel_type tmp = PixelAdd(Y, -(Cg >> 1));
+ pixel_type G = PixelAdd(Cg, tmp);
+ pixel_type B = PixelAdd(tmp, -(Co >> 1));
+ pixel_type R = PixelAdd(B, Co);
+ out0[x] = R;
+ out1[x] = G;
+ out2[x] = B;
+ } else {
+ pixel_type First = in0[x];
+ pixel_type Second = in1[x];
+ pixel_type Third = in2[x];
+ if (third) Third = PixelAdd(Third, First);
+ if (second == 1) {
+ Second = PixelAdd(Second, First);
+ } else if (second == 2) {
+ Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1));
+ }
+ out0[x] = First;
+ out1[x] = Second;
+ out2[x] = Third;
+ }
+ }
+}
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+ JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2));
+ size_t m = begin_c;
+ Channel& c0 = input.channel[m + 0];
+ size_t w = c0.w;
+ size_t h = c0.h;
+ if (rct_type == 0) { // noop
+ return true;
+ }
+ // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR
+ int permutation = rct_type / 7;
+ JXL_CHECK(permutation < 6);
+ // 0-5 values have the low bit corresponding to Third and the high bits
+ // corresponding to Second. 6 corresponds to YCoCg.
+ //
+ // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird
+ //
+ // Third: 0=nop, 1=SubtractFirst
+ int custom = rct_type % 7;
+ // Special case: permute-only. Swap channels around.
+ if (custom == 0) {
+ Channel ch0 = std::move(input.channel[m]);
+ Channel ch1 = std::move(input.channel[m + 1]);
+ Channel ch2 = std::move(input.channel[m + 2]);
+ input.channel[m + (permutation % 3)] = std::move(ch0);
+ input.channel[m + ((permutation + 1 + permutation / 3) % 3)] =
+ std::move(ch1);
+ input.channel[m + ((permutation + 2 - permutation / 3) % 3)] =
+ std::move(ch2);
+ return true;
+ }
+ constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = {
+ InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>,
+ InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>};
+ JXL_RETURN_IF_ERROR(RunOnPool(
+ pool, 0, h, ThreadPool::NoInit,
+ [&](const uint32_t task, size_t /* thread */) {
+ const size_t y = task;
+ const pixel_type* in0 = input.channel[m].Row(y);
+ const pixel_type* in1 = input.channel[m + 1].Row(y);
+ const pixel_type* in2 = input.channel[m + 2].Row(y);
+ pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y);
+ pixel_type* out1 =
+ input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y);
+ pixel_type* out2 =
+ input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y);
+ inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w);
+ },
+ "InvRCT"));
+ return true;
+}
+
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(InvRCT);
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) {
+ return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h
new file mode 100644
index 0000000000..aef65621d5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/rct.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels
+
+namespace jxl {
+
+Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_RCT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc
new file mode 100644
index 0000000000..8440d9e804
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.cc
@@ -0,0 +1,478 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/squeeze.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/squeeze.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Abs;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::MulEven;
+using hwy::HWY_NAMESPACE::Ne;
+using hwy::HWY_NAMESPACE::Neg;
+using hwy::HWY_NAMESPACE::OddEven;
+using hwy::HWY_NAMESPACE::RebindToUnsigned;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+using hwy::HWY_NAMESPACE::Xor;
+
+#if HWY_TARGET != HWY_SCALAR
+
+JXL_INLINE void FastUnsqueeze(const pixel_type *JXL_RESTRICT p_residual,
+ const pixel_type *JXL_RESTRICT p_avg,
+ const pixel_type *JXL_RESTRICT p_navg,
+ const pixel_type *p_pout,
+ pixel_type *JXL_RESTRICT p_out,
+ pixel_type *p_nout) {
+ const HWY_CAPPED(pixel_type, 8) d;
+ const RebindToUnsigned<decltype(d)> du;
+ const size_t N = Lanes(d);
+ auto onethird = Set(d, 0x55555556);
+ for (size_t x = 0; x < 8; x += N) {
+ auto avg = Load(d, p_avg + x);
+ auto next_avg = Load(d, p_navg + x);
+ auto top = Load(d, p_pout + x);
+ // Equivalent to SmoothTendency(top,avg,next_avg), but without branches
+ auto Ba = Sub(top, avg);
+ auto an = Sub(avg, next_avg);
+ auto nonmono = Xor(Ba, an);
+ auto absBa = Abs(Ba);
+ auto absan = Abs(an);
+ auto absBn = Abs(Sub(top, next_avg));
+ // Compute a3 = absBa / 3
+ auto a3e = BitCast(d, ShiftRight<32>(MulEven(absBa, onethird)));
+ auto a3oi = MulEven(Reverse(d, absBa), onethird);
+ auto a3o = BitCast(
+ d, Reverse(hwy::HWY_NAMESPACE::Repartition<pixel_type_w, decltype(d)>(),
+ a3oi));
+ auto a3 = OddEven(a3o, a3e);
+ a3 = Add(a3, Add(absBn, Set(d, 2)));
+ auto absdiff = ShiftRight<2>(a3);
+ auto skipdiff = Ne(Ba, Zero(d));
+ skipdiff = And(skipdiff, Ne(an, Zero(d)));
+ skipdiff = And(skipdiff, Lt(nonmono, Zero(d)));
+ auto absBa2 = Add(ShiftLeft<1>(absBa), And(absdiff, Set(d, 1)));
+ absdiff = IfThenElse(Gt(absdiff, absBa2),
+ Add(ShiftLeft<1>(absBa), Set(d, 1)), absdiff);
+ auto absan2 = ShiftLeft<1>(absan);
+ absdiff = IfThenElse(Gt(Add(absdiff, And(absdiff, Set(d, 1))), absan2),
+ absan2, absdiff);
+ auto diff1 = IfThenElse(Lt(top, next_avg), Neg(absdiff), absdiff);
+ auto tendency = IfThenZeroElse(skipdiff, diff1);
+
+ auto diff_minus_tendency = Load(d, p_residual + x);
+ auto diff = Add(diff_minus_tendency, tendency);
+ auto out =
+ Add(avg, ShiftRight<1>(
+ Add(diff, BitCast(d, ShiftRight<31>(BitCast(du, diff))))));
+ Store(out, d, p_out + x);
+ Store(Sub(out, diff), d, p_nout + x);
+ }
+}
+
+#endif
+
+Status InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+ JXL_ASSERT(c < input.channel.size());
+ JXL_ASSERT(rc < input.channel.size());
+ Channel &chin = input.channel[c];
+ const Channel &chin_residual = input.channel[rc];
+ // These must be valid since we ran MetaApply already.
+ JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2));
+ JXL_ASSERT(chin.h == chin_residual.h);
+
+ if (chin_residual.w == 0) {
+ // Short-circuit: output channel has same dimensions as input.
+ input.channel[c].hshift--;
+ return true;
+ }
+
+ // Note: chin.w >= chin_residual.w and at most 1 different.
+ Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift);
+ JXL_DEBUG_V(4,
+ "Undoing horizontal squeeze of channel %i using residuals in "
+ "channel %i (going from width %" PRIuS " to %" PRIuS ")",
+ c, rc, chin.w, chout.w);
+
+ if (chin_residual.h == 0) {
+ // Short-circuit: channel with no pixels.
+ input.channel[c] = std::move(chout);
+ return true;
+ }
+ auto unsqueeze_row = [&](size_t y, size_t x0) {
+ const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y);
+ const pixel_type *JXL_RESTRICT p_avg = chin.Row(y);
+ pixel_type *JXL_RESTRICT p_out = chout.Row(y);
+ for (size_t x = x0; x < chin_residual.w; x++) {
+ pixel_type_w diff_minus_tendency = p_residual[x];
+ pixel_type_w avg = p_avg[x];
+ pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg);
+ pixel_type_w left = (x ? p_out[(x << 1) - 1] : avg);
+ pixel_type_w tendency = SmoothTendency(left, avg, next_avg);
+ pixel_type_w diff = diff_minus_tendency + tendency;
+ pixel_type_w A = avg + (diff / 2);
+ p_out[(x << 1)] = A;
+ pixel_type_w B = A - diff;
+ p_out[(x << 1) + 1] = B;
+ }
+ if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1];
+ };
+
+ // somewhat complicated trickery just to be able to SIMD this.
+ // Horizontal unsqueeze has horizontal data dependencies, so we do
+ // 8 rows at a time and treat it as a vertical unsqueeze of a
+ // transposed 8x8 block (or 9x8 for one input).
+ static constexpr const size_t kRowsPerThread = 8;
+ const auto unsqueeze_span = [&](const uint32_t task, size_t /* thread */) {
+ const size_t y0 = task * kRowsPerThread;
+ const size_t rows = std::min(kRowsPerThread, chin.h - y0);
+ size_t x = 0;
+
+#if HWY_TARGET != HWY_SCALAR
+ intptr_t onerow_in = chin.plane.PixelsPerRow();
+ intptr_t onerow_inr = chin_residual.plane.PixelsPerRow();
+ intptr_t onerow_out = chout.plane.PixelsPerRow();
+ const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y0);
+ const pixel_type *JXL_RESTRICT p_avg = chin.Row(y0);
+ pixel_type *JXL_RESTRICT p_out = chout.Row(y0);
+ HWY_ALIGN pixel_type b_p_avg[9 * kRowsPerThread];
+ HWY_ALIGN pixel_type b_p_residual[8 * kRowsPerThread];
+ HWY_ALIGN pixel_type b_p_out_even[8 * kRowsPerThread];
+ HWY_ALIGN pixel_type b_p_out_odd[8 * kRowsPerThread];
+ HWY_ALIGN pixel_type b_p_out_evenT[8 * kRowsPerThread];
+ HWY_ALIGN pixel_type b_p_out_oddT[8 * kRowsPerThread];
+ const HWY_CAPPED(pixel_type, 8) d;
+ const size_t N = Lanes(d);
+ if (chin_residual.w > 16 && rows == kRowsPerThread) {
+ for (; x < chin_residual.w - 9; x += 8) {
+ Transpose8x8Block(p_residual + x, b_p_residual, onerow_inr);
+ Transpose8x8Block(p_avg + x, b_p_avg, onerow_in);
+ for (size_t y = 0; y < kRowsPerThread; y++) {
+ b_p_avg[8 * 8 + y] = p_avg[x + 8 + onerow_in * y];
+ }
+ for (size_t i = 0; i < 8; i++) {
+ FastUnsqueeze(
+ b_p_residual + 8 * i, b_p_avg + 8 * i, b_p_avg + 8 * (i + 1),
+ (x + i ? b_p_out_odd + 8 * ((x + i - 1) & 7) : b_p_avg + 8 * i),
+ b_p_out_even + 8 * i, b_p_out_odd + 8 * i);
+ }
+
+ Transpose8x8Block(b_p_out_even, b_p_out_evenT, 8);
+ Transpose8x8Block(b_p_out_odd, b_p_out_oddT, 8);
+ for (size_t y = 0; y < kRowsPerThread; y++) {
+ for (size_t i = 0; i < kRowsPerThread; i += N) {
+ auto even = Load(d, b_p_out_evenT + 8 * y + i);
+ auto odd = Load(d, b_p_out_oddT + 8 * y + i);
+ StoreInterleaved(d, even, odd,
+ p_out + ((x + i) << 1) + onerow_out * y);
+ }
+ }
+ }
+ }
+#endif
+ for (size_t y = 0; y < rows; y++) {
+ unsqueeze_row(y0 + y, x);
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.h, kRowsPerThread),
+ ThreadPool::NoInit, unsqueeze_span,
+ "InvHorizontalSqueeze"));
+ input.channel[c] = std::move(chout);
+ return true;
+}
+
+Status InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) {
+ JXL_ASSERT(c < input.channel.size());
+ JXL_ASSERT(rc < input.channel.size());
+ const Channel &chin = input.channel[c];
+ const Channel &chin_residual = input.channel[rc];
+ // These must be valid since we ran MetaApply already.
+ JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2));
+ JXL_ASSERT(chin.w == chin_residual.w);
+
+ if (chin_residual.h == 0) {
+ // Short-circuit: output channel has same dimensions as input.
+ input.channel[c].vshift--;
+ return true;
+ }
+
+ // Note: chin.h >= chin_residual.h and at most 1 different.
+ Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1);
+ JXL_DEBUG_V(
+ 4,
+ "Undoing vertical squeeze of channel %i using residuals in channel "
+ "%i (going from height %" PRIuS " to %" PRIuS ")",
+ c, rc, chin.h, chout.h);
+
+ if (chin_residual.w == 0) {
+ // Short-circuit: channel with no pixels.
+ input.channel[c] = std::move(chout);
+ return true;
+ }
+
+ static constexpr const int kColsPerThread = 64;
+ const auto unsqueeze_slice = [&](const uint32_t task, size_t /* thread */) {
+ const size_t x0 = task * kColsPerThread;
+ const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w);
+ const size_t w = x1 - x0;
+ // We only iterate up to std::min(chin_residual.h, chin.h) which is
+ // always chin_residual.h.
+ for (size_t y = 0; y < chin_residual.h; y++) {
+ const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y) + x0;
+ const pixel_type *JXL_RESTRICT p_avg = chin.Row(y) + x0;
+ const pixel_type *JXL_RESTRICT p_navg =
+ chin.Row(y + 1 < chin.h ? y + 1 : y) + x0;
+ pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1) + x0;
+ pixel_type *JXL_RESTRICT p_nout = chout.Row((y << 1) + 1) + x0;
+ const pixel_type *p_pout = y > 0 ? chout.Row((y << 1) - 1) + x0 : p_avg;
+ size_t x = 0;
+#if HWY_TARGET != HWY_SCALAR
+ for (; x + 7 < w; x += 8) {
+ FastUnsqueeze(p_residual + x, p_avg + x, p_navg + x, p_pout + x,
+ p_out + x, p_nout + x);
+ }
+#endif
+ for (; x < w; x++) {
+ pixel_type_w avg = p_avg[x];
+ pixel_type_w next_avg = p_navg[x];
+ pixel_type_w top = p_pout[x];
+ pixel_type_w tendency = SmoothTendency(top, avg, next_avg);
+ pixel_type_w diff_minus_tendency = p_residual[x];
+ pixel_type_w diff = diff_minus_tendency + tendency;
+ pixel_type_w out = avg + (diff / 2);
+ p_out[x] = out;
+ // If the chin_residual.h == chin.h, the output has an even number
+ // of rows so the next line is fine. Otherwise, this loop won't
+ // write to the last output row which is handled separately.
+ p_nout[x] = out - diff;
+ }
+ }
+ };
+ JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.w, kColsPerThread),
+ ThreadPool::NoInit, unsqueeze_slice,
+ "InvVertSqueeze"));
+
+ if (chout.h & 1) {
+ size_t y = chin.h - 1;
+ const pixel_type *p_avg = chin.Row(y);
+ pixel_type *p_out = chout.Row(y << 1);
+ for (size_t x = 0; x < chin.w; x++) {
+ p_out[x] = p_avg[x];
+ }
+ }
+ input.channel[c] = std::move(chout);
+ return true;
+}
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+ ThreadPool *pool) {
+ for (int i = parameters.size() - 1; i >= 0; i--) {
+ JXL_RETURN_IF_ERROR(
+ CheckMetaSqueezeParams(parameters[i], input.channel.size()));
+ bool horizontal = parameters[i].horizontal;
+ bool in_place = parameters[i].in_place;
+ uint32_t beginc = parameters[i].begin_c;
+ uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1;
+ uint32_t offset;
+ if (in_place) {
+ offset = endc + 1;
+ } else {
+ offset = input.channel.size() + beginc - endc - 1;
+ }
+ if (beginc < input.nb_meta_channels) {
+ // This is checked in MetaSqueeze.
+ JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c);
+ input.nb_meta_channels -= parameters[i].num_c;
+ }
+
+ for (uint32_t c = beginc; c <= endc; c++) {
+ uint32_t rc = offset + c - beginc;
+ // MetaApply should imply that `rc` is within range, otherwise there's a
+ // programming bug.
+ JXL_ASSERT(rc < input.channel.size());
+ if ((input.channel[c].w < input.channel[rc].w) ||
+ (input.channel[c].h < input.channel[rc].h)) {
+ return JXL_FAILURE("Corrupted squeeze transform");
+ }
+ if (horizontal) {
+ JXL_RETURN_IF_ERROR(InvHSqueeze(input, c, rc, pool));
+ } else {
+ JXL_RETURN_IF_ERROR(InvVSqueeze(input, c, rc, pool));
+ }
+ }
+ input.channel.erase(input.channel.begin() + offset,
+ input.channel.begin() + offset + (endc - beginc + 1));
+ }
+ return true;
+}
+
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+
+HWY_EXPORT(InvSqueeze);
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+ ThreadPool *pool) {
+ return HWY_DYNAMIC_DISPATCH(InvSqueeze)(input, parameters, pool);
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+ const Image &image) {
+ int nb_channels = image.channel.size() - image.nb_meta_channels;
+
+ parameters->clear();
+ size_t w = image.channel[image.nb_meta_channels].w;
+ size_t h = image.channel[image.nb_meta_channels].h;
+ JXL_DEBUG_V(
+ 7, "Default squeeze parameters for %" PRIuS "x%" PRIuS " image: ", w, h);
+
+ // do horizontal first on wide images; vertical first on tall images
+ bool wide = (w > h);
+
+ if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w &&
+ image.channel[image.nb_meta_channels + 1].h == h) {
+ // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0
+ // previews
+ JXL_DEBUG_V(7, "(4:2:0 chroma), %" PRIuS "x%" PRIuS " image", w, h);
+ SqueezeParams params;
+ // horizontal chroma squeeze
+ params.horizontal = true;
+ params.in_place = false;
+ params.begin_c = image.nb_meta_channels + 1;
+ params.num_c = 2;
+ parameters->push_back(params);
+ params.horizontal = false;
+ // vertical chroma squeeze
+ parameters->push_back(params);
+ }
+ SqueezeParams params;
+ params.begin_c = image.nb_meta_channels;
+ params.num_c = nb_channels;
+ params.in_place = true;
+
+ if (!wide) {
+ if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+ params.horizontal = false;
+ parameters->push_back(params);
+ h = (h + 1) / 2;
+ JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+ }
+ }
+ while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+ if (w > JXL_MAX_FIRST_PREVIEW_SIZE) {
+ params.horizontal = true;
+ parameters->push_back(params);
+ w = (w + 1) / 2;
+ JXL_DEBUG_V(7, "Horizontal (%" PRIuS "x%" PRIuS "), ", w, h);
+ }
+ if (h > JXL_MAX_FIRST_PREVIEW_SIZE) {
+ params.horizontal = false;
+ parameters->push_back(params);
+ h = (h + 1) / 2;
+ JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h);
+ }
+ }
+ JXL_DEBUG_V(7, "that's it");
+}
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter,
+ int num_channels) {
+ int c1 = parameter.begin_c;
+ int c2 = parameter.begin_c + parameter.num_c - 1;
+ if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) {
+ return JXL_FAILURE("Invalid channel range");
+ }
+ return true;
+}
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters) {
+ if (parameters->empty()) {
+ DefaultSqueezeParameters(parameters, image);
+ }
+
+ for (size_t i = 0; i < parameters->size(); i++) {
+ JXL_RETURN_IF_ERROR(
+ CheckMetaSqueezeParams((*parameters)[i], image.channel.size()));
+ bool horizontal = (*parameters)[i].horizontal;
+ bool in_place = (*parameters)[i].in_place;
+ uint32_t beginc = (*parameters)[i].begin_c;
+ uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1;
+
+ uint32_t offset;
+ if (beginc < image.nb_meta_channels) {
+ if (endc >= image.nb_meta_channels) {
+ return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels");
+ }
+ if (!in_place) {
+ return JXL_FAILURE(
+ "Invalid squeeze: meta channels require in-place residuals");
+ }
+ image.nb_meta_channels += (*parameters)[i].num_c;
+ }
+ if (in_place) {
+ offset = endc + 1;
+ } else {
+ offset = image.channel.size();
+ }
+ for (uint32_t c = beginc; c <= endc; c++) {
+ if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) {
+ return JXL_FAILURE("Too many squeezes: shift > 30");
+ }
+ size_t w = image.channel[c].w;
+ size_t h = image.channel[c].h;
+ if (w == 0 || h == 0) return JXL_FAILURE("Squeezing empty channel");
+ if (horizontal) {
+ image.channel[c].w = (w + 1) / 2;
+ if (image.channel[c].hshift >= 0) image.channel[c].hshift++;
+ w = w - (w + 1) / 2;
+ } else {
+ image.channel[c].h = (h + 1) / 2;
+ if (image.channel[c].vshift >= 0) image.channel[c].vshift++;
+ h = h - (h + 1) / 2;
+ }
+ image.channel[c].shrink();
+ Channel dummy(w, h);
+ dummy.hshift = image.channel[c].hshift;
+ dummy.vshift = image.channel[c].vshift;
+
+ image.channel.insert(image.channel.begin() + offset + (c - beginc),
+ std::move(dummy));
+ JXL_DEBUG_V(8, "MetaSqueeze applied, current image: %s",
+ image.DebugString().c_str());
+ }
+ }
+ return true;
+}
+
+} // namespace jxl
+
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h
new file mode 100644
index 0000000000..fb18710a6f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/squeeze.h
@@ -0,0 +1,90 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
+
+// Haar-like transform: halves the resolution in one direction
+// A B -> (A+B)>>1 in one channel (average) -> same range as
+// original channel
+// A-B - tendency in a new channel ('residual' needed to make
+// the transform reversible)
+// -> theoretically range could be 2.5
+// times larger (2 times without the
+// 'tendency'), but there should be lots
+// of zeroes
+// Repeated application (alternating horizontal and vertical squeezes) results
+// in downscaling
+//
+// The default coefficient ordering is low-frequency to high-frequency, as in
+// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using
+// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp.
+// 205-220, April 1992, doi: 10.1109/83.136597.
+
+#include <stdlib.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/transform.h"
+
+#define JXL_MAX_FIRST_PREVIEW_SIZE 8
+
+namespace jxl {
+
+/*
+ int avg=(A+B)>>1;
+ int diff=(A-B);
+ int rA=(diff+(avg<<1)+(diff&1))>>1;
+ int rB=rA-diff;
+
+*/
+// |A B|C D|E F|
+// p a n p=avg(A,B), a=avg(C,D), n=avg(E,F)
+//
+// Goal: estimate C-D (avoiding ringing artifacts)
+// (ensuring that in smooth areas, a zero residual corresponds to a smooth
+// gradient)
+
+// best estimate for C: (B + 2*a)/3
+// best estimate for D: (n + 3*a)/4
+// best estimate for C-D: 4*B - 3*n - a /12
+
+// avoid ringing by 1) only doing this if B <= a <= n or B >= a >= n
+// (otherwise, this is not a smooth area and we cannot really estimate C-D)
+// 2) making sure that B <= C <= D <= n or B >= C >= D >= n
+
+inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a,
+ pixel_type_w n) {
+ pixel_type_w diff = 0;
+ if (B >= a && a >= n) {
+ diff = (4 * B - 3 * n - a + 6) / 12;
+ // 2C = a<<1 + diff - diff&1 <= 2B so diff - diff&1 <= 2B - 2a
+ // 2D = a<<1 - diff - diff&1 >= 2n so diff + diff&1 <= 2a - 2n
+ if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1;
+ if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n);
+ } else if (B <= a && a <= n) {
+ diff = (4 * B - 3 * n - a - 6) / 12;
+ // 2C = a<<1 + diff + diff&1 >= 2B so diff + diff&1 >= 2B - 2a
+ // 2D = a<<1 - diff + diff&1 <= 2n so diff - diff&1 >= 2a - 2n
+ if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1;
+ if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n);
+ }
+ return diff;
+}
+
+void DefaultSqueezeParameters(std::vector<SqueezeParams> *parameters,
+ const Image &image);
+
+Status CheckMetaSqueezeParams(const SqueezeParams &parameter, int num_channels);
+
+Status MetaSqueeze(Image &image, std::vector<SqueezeParams> *parameters);
+
+Status InvSqueeze(Image &input, std::vector<SqueezeParams> parameters,
+ ThreadPool *pool);
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc
new file mode 100644
index 0000000000..d9f2b435bf
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.cc
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/modular/transform/transform.h"
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/modular_image.h"
+#include "lib/jxl/modular/transform/palette.h"
+#include "lib/jxl/modular/transform/rct.h"
+#include "lib/jxl/modular/transform/squeeze.h"
+
+namespace jxl {
+
+SqueezeParams::SqueezeParams() { Bundle::Init(this); }
+Transform::Transform(TransformId id) {
+ Bundle::Init(this);
+ this->id = id;
+}
+
+Status Transform::Inverse(Image &input, const weighted::Header &wp_header,
+ ThreadPool *pool) {
+ JXL_DEBUG_V(6, "Input channels (%" PRIuS ", %" PRIuS " meta): ",
+ input.channel.size(), input.nb_meta_channels);
+ switch (id) {
+ case TransformId::kRCT:
+ return InvRCT(input, begin_c, rct_type, pool);
+ case TransformId::kSqueeze:
+ return InvSqueeze(input, squeezes, pool);
+ case TransformId::kPalette:
+ return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor,
+ wp_header, pool);
+ default:
+ return JXL_FAILURE("Unknown transformation (ID=%u)",
+ static_cast<unsigned int>(id));
+ }
+}
+
+Status Transform::MetaApply(Image &input) {
+ JXL_DEBUG_V(6, "MetaApply input: %s", input.DebugString().c_str());
+ switch (id) {
+ case TransformId::kRCT:
+ JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type);
+ return CheckEqualChannels(input, begin_c, begin_c + 2);
+ case TransformId::kSqueeze:
+ JXL_DEBUG_V(2, "Transform: kSqueeze:");
+#if JXL_DEBUG_V_LEVEL >= 2
+ {
+ auto squeezes_copy = squeezes;
+ if (squeezes_copy.empty()) {
+ DefaultSqueezeParameters(&squeezes_copy, input);
+ }
+ for (const auto &params : squeezes_copy) {
+ JXL_DEBUG_V(
+ 2,
+ " squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32
+ ", num_c=%" PRIu32,
+ params.horizontal, params.in_place, params.begin_c, params.num_c);
+ }
+ }
+#endif
+ return MetaSqueeze(input, &squeezes);
+ case TransformId::kPalette:
+ JXL_DEBUG_V(2,
+ "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32
+ ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32,
+ begin_c, num_c, nb_colors, nb_deltas);
+ return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors,
+ nb_deltas, lossy_palette);
+ default:
+ return JXL_FAILURE("Unknown transformation (ID=%u)",
+ static_cast<unsigned int>(id));
+ }
+}
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) {
+ if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) {
+ return JXL_FAILURE("Invalid channel range: %u..%u (there are only %" PRIuS
+ " channels)",
+ c1, c2, image.channel.size());
+ }
+ if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) {
+ return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta");
+ }
+ const auto &ch1 = image.channel[c1];
+ for (size_t c = c1 + 1; c <= c2; c++) {
+ const auto &ch2 = image.channel[c];
+ if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift ||
+ ch1.vshift != ch2.vshift) {
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h
new file mode 100644
index 0000000000..d5d3259f7a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular/transform/transform.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/modular/encoding/context_predict.h"
+#include "lib/jxl/modular/options.h"
+
+namespace jxl {
+
+enum class TransformId : uint32_t {
+ // G, R-G, B-G and variants (including YCoCg).
+ kRCT = 0,
+
+ // Color palette. Parameters are: [begin_c] [end_c] [nb_colors]
+ kPalette = 1,
+
+ // Squeezing (Haar-style)
+ kSqueeze = 2,
+
+ // Invalid for now.
+ kInvalid = 3,
+};
+
+struct SqueezeParams : public Fields {
+ JXL_FIELDS_NAME(SqueezeParams)
+ bool horizontal;
+ bool in_place;
+ uint32_t begin_c;
+ uint32_t num_c;
+ SqueezeParams();
+ Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8),
+ BitsOffset(10, 72),
+ BitsOffset(13, 1096), 0, &begin_c));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c));
+ return true;
+ }
+};
+
+class Transform : public Fields {
+ public:
+ TransformId id;
+ // for Palette and RCT.
+ uint32_t begin_c;
+ // for RCT. 42 possible values starting from 0.
+ uint32_t rct_type;
+ // Only for Palette and NearLossless.
+ uint32_t num_c;
+ // Only for Palette.
+ uint32_t nb_colors;
+ uint32_t nb_deltas;
+ // for Squeeze. Default squeeze if empty.
+ std::vector<SqueezeParams> squeezes;
+ // for NearLossless, not serialized.
+ int max_delta_error;
+ // Serialized for Palette.
+ Predictor predictor;
+ // for Palette, not serialized.
+ bool ordered_palette = true;
+ bool lossy_palette = false;
+
+ explicit Transform(TransformId id);
+ // default constructor for bundles.
+ Transform() : Transform(TransformId::kInvalid) {}
+
+ Status VisitFields(Visitor *JXL_RESTRICT visitor) override {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette),
+ Val((uint32_t)TransformId::kSqueeze),
+ Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT,
+ reinterpret_cast<uint32_t *>(&id)));
+ if (id == TransformId::kInvalid) {
+ return JXL_FAILURE("Invalid transform ID");
+ }
+ if (visitor->Conditional(id == TransformId::kRCT ||
+ id == TransformId::kPalette)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72),
+ BitsOffset(13, 1096), 0, &begin_c));
+ }
+ if (visitor->Conditional(id == TransformId::kRCT)) {
+ // 0-41, default YCoCg.
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2),
+ BitsOffset(6, 10), 6, &rct_type));
+ if (rct_type >= 42) {
+ return JXL_FAILURE("Invalid transform RCT type");
+ }
+ }
+ if (visitor->Conditional(id == TransformId::kPalette)) {
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280),
+ BitsOffset(16, 5376), 256, &nb_colors));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257),
+ BitsOffset(16, 1281), 0, &nb_deltas));
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->Bits(4, (uint32_t)Predictor::Zero,
+ reinterpret_cast<uint32_t *>(&predictor)));
+ if (predictor >= Predictor::Best) {
+ return JXL_FAILURE("Invalid predictor");
+ }
+ }
+
+ if (visitor->Conditional(id == TransformId::kSqueeze)) {
+ uint32_t num_squeezes = static_cast<uint32_t>(squeezes.size());
+ JXL_QUIET_RETURN_IF_ERROR(
+ visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9),
+ BitsOffset(8, 41), 0, &num_squeezes));
+ if (visitor->IsReading()) squeezes.resize(num_squeezes);
+ for (size_t i = 0; i < num_squeezes; i++) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i]));
+ }
+ }
+ return true;
+ }
+
+ JXL_FIELDS_NAME(Transform)
+
+ Status Inverse(Image &input, const weighted::Header &wp_header,
+ ThreadPool *pool = nullptr);
+ Status MetaApply(Image &input);
+};
+
+Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2);
+
+static inline pixel_type PixelAdd(pixel_type a, pixel_type b) {
+ return static_cast<pixel_type>(static_cast<uint32_t>(a) +
+ static_cast<uint32_t>(b));
+}
+
+} // namespace jxl
+
+#endif // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_
diff --git a/third_party/jpeg-xl/lib/jxl/modular_test.cc b/third_party/jpeg-xl/lib/jxl/modular_test.cc
new file mode 100644
index 0000000000..293f59ff87
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/modular_test.cc
@@ -0,0 +1,541 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <array>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/extras/dec/jxl.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/modular/encoding/enc_encoding.h"
+#include "lib/jxl/modular/encoding/encoding.h"
+#include "lib/jxl/modular/encoding/ma_common.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+void TestLosslessGroups(size_t group_size_shift) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CompressParams cparams;
+ cparams.SetLossless();
+ cparams.modular_group_size_shift = group_size_shift;
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(io.xsize() / 4, io.ysize() / 4);
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 280000u);
+ JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
+}
+
+TEST(ModularTest, RoundtripLosslessGroups128) { TestLosslessGroups(0); }
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups512)) {
+ TestLosslessGroups(2);
+}
+
+TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups1024)) {
+ TestLosslessGroups(3);
+}
+
+TEST(ModularTest, RoundtripLosslessCustomWP_PermuteRCT) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CompressParams cparams;
+ cparams.SetLossless();
+ // 9 = permute to GBR, to test the special case of permutation-only
+ cparams.colorspace = 9;
+ // slowest speed so different WP modes are tried
+ cparams.speed_tier = SpeedTier::kTortoise;
+ cparams.options.predictor = {Predictor::Weighted};
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(100, 100);
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 10150u);
+ JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _));
+}
+
+TEST(ModularTest, RoundtripLossyDeltaPalette) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ cparams.lossy_palette = true;
+ cparams.palette_colors = 0;
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(300, 100);
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 6800u);
+ cparams.ba_params.intensity_target = 80.0f;
+ EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.5));
+}
+TEST(ModularTest, RoundtripLossyDeltaPaletteWP) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CompressParams cparams;
+ cparams.SetLossless();
+ cparams.lossy_palette = true;
+ cparams.palette_colors = 0;
+ cparams.options.predictor = jxl::Predictor::Weighted;
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(300, 100);
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 7000u);
+ cparams.ba_params.intensity_target = 80.0f;
+ EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(10.1));
+}
+
+TEST(ModularTest, RoundtripLossy) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.butteraugli_distance = 2.f;
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 30000u);
+ cparams.ba_params.intensity_target = 80.0f;
+ EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(2.3));
+}
+
+TEST(ModularTest, RoundtripLossy16) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/raw.pixls/DJI-FC6310-16bit_709_v4_krita.png");
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.butteraugli_distance = 2.f;
+
+ CodecInOut io_out;
+
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ JXL_CHECK(!io.metadata.m.have_preview);
+ JXL_CHECK(io.frames.size() == 1);
+ JXL_CHECK(io.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms()));
+ io.metadata.m.color_encoding = ColorEncoding::SRGB();
+
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size));
+ EXPECT_LE(compressed_size, 300u);
+ cparams.ba_params.intensity_target = 80.0f;
+ EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.6));
+}
+
+TEST(ModularTest, RoundtripExtraProperties) {
+ constexpr size_t kSize = 250;
+ Image image(kSize, kSize, /*bitdepth=*/8, 3);
+ ModularOptions options;
+ options.max_properties = 4;
+ options.predictor = Predictor::Zero;
+ Rng rng(0);
+ for (size_t y = 0; y < kSize; y++) {
+ for (size_t x = 0; x < kSize; x++) {
+ image.channel[0].plane.Row(y)[x] = image.channel[2].plane.Row(y)[x] =
+ rng.UniformU(0, 9);
+ }
+ }
+ ZeroFillImage(&image.channel[1].plane);
+ BitWriter writer;
+ ASSERT_TRUE(ModularGenericCompress(image, options, &writer));
+ writer.ZeroPadToByte();
+ Image decoded(kSize, kSize, /*bitdepth=*/8, image.channel.size());
+ for (size_t i = 0; i < image.channel.size(); i++) {
+ const Channel& ch = image.channel[i];
+ decoded.channel[i] = Channel(ch.w, ch.h, ch.hshift, ch.vshift);
+ }
+ Status status = true;
+ {
+ BitReader reader(writer.GetSpan());
+ BitReaderScopedCloser closer(&reader, &status);
+ ASSERT_TRUE(ModularGenericDecompress(&reader, decoded, /*header=*/nullptr,
+ /*group_id=*/0, &options));
+ }
+ ASSERT_TRUE(status);
+ ASSERT_EQ(image.channel.size(), decoded.channel.size());
+ for (size_t c = 0; c < image.channel.size(); c++) {
+ for (size_t y = 0; y < image.channel[c].plane.ysize(); y++) {
+ for (size_t x = 0; x < image.channel[c].plane.xsize(); x++) {
+ EXPECT_EQ(image.channel[c].plane.Row(y)[x],
+ decoded.channel[c].plane.Row(y)[x])
+ << "c = " << c << ", x = " << x << ", y = " << y;
+ }
+ }
+ }
+}
+
+TEST(ModularTest, RoundtripLosslessCustomSqueeze) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ cparams.butteraugli_distance = 0.f;
+ cparams.options.predictor = {Predictor::Zero};
+ cparams.speed_tier = SpeedTier::kThunder;
+ cparams.responsive = 1;
+ // Custom squeeze params, atm just for testing
+ SqueezeParams p;
+ p.horizontal = true;
+ p.in_place = false;
+ p.begin_c = 0;
+ p.num_c = 3;
+ cparams.squeezes.push_back(p);
+ p.begin_c = 1;
+ p.in_place = true;
+ p.horizontal = false;
+ cparams.squeezes.push_back(p);
+
+ CodecInOut io2;
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+ EXPECT_LE(compressed_size, 265000u);
+ JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _));
+}
+
+struct RoundtripLosslessConfig {
+ int bitdepth;
+ int responsive;
+};
+class ModularTestParam
+ : public ::testing::TestWithParam<RoundtripLosslessConfig> {};
+
+std::vector<RoundtripLosslessConfig> GenerateLosslessTests() {
+ std::vector<RoundtripLosslessConfig> all;
+ for (int responsive = 0; responsive <= 1; responsive++) {
+ for (int bitdepth = 1; bitdepth < 32; bitdepth++) {
+ if (responsive && bitdepth > 30) continue;
+ all.push_back({bitdepth, responsive});
+ }
+ }
+ return all;
+}
+std::string LosslessTestDescription(
+ const testing::TestParamInfo<ModularTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param.bitdepth << "bit";
+ if (info.param.responsive) name << "Squeeze";
+ return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RoundtripLossless, ModularTestParam,
+ testing::ValuesIn(GenerateLosslessTests()),
+ LosslessTestDescription);
+
+TEST_P(ModularTestParam, RoundtripLossless) {
+ RoundtripLosslessConfig config = GetParam();
+ int bitdepth = config.bitdepth;
+ int responsive = config.responsive;
+
+ ThreadPool* pool = nullptr;
+ Rng generator(123);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io1;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io1, pool));
+
+ // vary the dimensions a bit, in case of bugs related to
+ // even vs odd width or height.
+ size_t xsize = 423 + bitdepth;
+ size_t ysize = 467 + bitdepth;
+
+ CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false);
+ io.metadata.m.SetUintSamples(bitdepth);
+
+ double factor = ((1lu << bitdepth) - 1lu);
+ double ifactor = 1.0 / factor;
+ Image3F noise_added(xsize, ysize);
+
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < ysize; y++) {
+ const float* in = io1.Main().color()->PlaneRow(c, y);
+ float* out = noise_added.PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; x++) {
+ // make the least significant bits random
+ float f = in[x] + generator.UniformF(0.0f, 1.f / 255.f);
+ if (f > 1.f) f = 1.f;
+ // quantize to the bitdepth we're testing
+ unsigned int u = f * factor + 0.5;
+ out[x] = u * ifactor;
+ }
+ }
+ }
+ io.SetFromImage(std::move(noise_added), jxl::ColorEncoding::SRGB(false));
+
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ cparams.butteraugli_distance = 0.f;
+ cparams.options.predictor = {Predictor::Zero};
+ cparams.speed_tier = SpeedTier::kThunder;
+ cparams.responsive = responsive;
+ CodecInOut io2;
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+ EXPECT_LE(compressed_size, bitdepth * xsize * ysize / 3);
+ EXPECT_LE(0, ComputeDistance2(io.Main(), io2.Main(), GetJxlCms()));
+ size_t different = 0;
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < ysize; y++) {
+ const float* in = io.Main().color()->PlaneRow(c, y);
+ const float* out = io2.Main().color()->PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; x++) {
+ uint32_t uin = in[x] * factor + 0.5;
+ uint32_t uout = out[x] * factor + 0.5;
+ // check that the integer values are identical
+ if (uin != uout) different++;
+ }
+ }
+ }
+ EXPECT_EQ(different, 0);
+}
+
+TEST(ModularTest, RoundtripLosslessCustomFloat) {
+ CodecInOut io;
+ size_t xsize = 100, ysize = 300;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.bit_depth.bits_per_sample = 18;
+ io.metadata.m.bit_depth.exponent_bits_per_sample = 6;
+ io.metadata.m.bit_depth.floating_point_sample = true;
+ io.metadata.m.modular_16_bit_buffer_sufficient = false;
+ ColorEncoding color_encoding;
+ color_encoding.tf.SetTransferFunction(TransferFunction::kLinear);
+ color_encoding.SetColorSpace(ColorSpace::kRGB);
+ Image3F testimage(xsize, ysize);
+ float factor = 1.f / (1 << 14);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < ysize; y++) {
+ float* const JXL_RESTRICT row = testimage.PlaneRow(c, y);
+ for (size_t x = 0; x < xsize; x++) {
+ row[x] = factor * (x ^ y);
+ }
+ }
+ }
+ io.SetFromImage(std::move(testimage), color_encoding);
+ io.metadata.m.color_encoding = color_encoding;
+ io.metadata.m.SetIntensityTarget(255);
+
+ CompressParams cparams;
+ cparams.modular_mode = true;
+ cparams.color_transform = jxl::ColorTransform::kNone;
+ cparams.butteraugli_distance = 0.f;
+ cparams.options.predictor = {Predictor::Zero};
+ cparams.speed_tier = SpeedTier::kThunder;
+ cparams.decoding_speed_tier = 2;
+
+ CodecInOut io2;
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+ EXPECT_LE(compressed_size, 23000u);
+ JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _));
+}
+
+void WriteHeaders(BitWriter* writer, size_t xsize, size_t ysize) {
+ BitWriter::Allotment allotment(writer, 16);
+ writer->Write(8, 0xFF);
+ writer->Write(8, kCodestreamMarker);
+ allotment.ReclaimAndCharge(writer, 0, nullptr);
+ CodecMetadata metadata;
+ EXPECT_TRUE(metadata.size.Set(xsize, ysize));
+ EXPECT_TRUE(WriteSizeHeader(metadata.size, writer, 0, nullptr));
+ metadata.m.color_encoding = ColorEncoding::LinearSRGB(/*is_gray=*/true);
+ metadata.m.xyb_encoded = false;
+ metadata.m.SetUintSamples(31);
+ EXPECT_TRUE(WriteImageMetadata(metadata.m, writer, 0, nullptr));
+ metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded;
+ EXPECT_TRUE(Bundle::Write(metadata.transform_data, writer, 0, nullptr));
+ writer->ZeroPadToByte();
+ FrameHeader frame_header(&metadata);
+ frame_header.encoding = FrameEncoding::kModular;
+ frame_header.loop_filter.gab = false;
+ frame_header.loop_filter.epf_iters = 0;
+ EXPECT_TRUE(WriteFrameHeader(frame_header, writer, nullptr));
+}
+
+// Tree with single node, zero predictor, offset is 1 and multiplier is 1,
+// entropy code is prefix tree with alphabet size 256 and all bits lengths 8.
+void WriteHistograms(BitWriter* writer) {
+ writer->Write(1, 1); // default DC quant
+ writer->Write(1, 1); // has_tree
+ // tree histograms
+ writer->Write(1, 0); // LZ77 disabled
+ writer->Write(3, 1); // simple context map
+ writer->Write(1, 1); // prefix code
+ writer->Write(7, 0x63); // UnintConfig(3, 2, 1)
+ writer->Write(12, 0xfef); // alphabet_size = 256
+ writer->Write(32, 0x10003); // all bit lengths 8
+ // tree tokens
+ writer->Write(8, 0); // tree leaf
+ writer->Write(8, 0); // zero predictor
+ writer->Write(8, 64); // offset = UnpackSigned(ReverseBits(64)) = 1
+ writer->Write(16, 0); // multiplier = 1
+ // histograms
+ writer->Write(1, 0); // LZ77 disabled
+ writer->Write(1, 1); // prefix code
+ writer->Write(7, 0x63); // UnintConfig(3, 2, 1)
+ writer->Write(12, 0xfef); // alphabet_size = 256
+ writer->Write(32, 0x10003); // all bit lengths 8
+}
+
+TEST(ModularTest, PredictorIntegerOverflow) {
+ const size_t xsize = 1;
+ const size_t ysize = 1;
+ BitWriter writer;
+ WriteHeaders(&writer, xsize, ysize);
+ std::vector<BitWriter> group_codes(1);
+ {
+ BitWriter* bw = &group_codes[0];
+ BitWriter::Allotment allotment(bw, 1 << 20);
+ WriteHistograms(bw);
+ GroupHeader header;
+ header.use_global_tree = true;
+ EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr));
+ // After UnpackSigned this becomes (1 << 31) - 1, the largest pixel_type,
+ // and after adding the offset we get -(1 << 31).
+ bw->Write(8, 119);
+ bw->Write(28, 0xfffffff);
+ bw->ZeroPadToByte();
+ allotment.ReclaimAndCharge(bw, 0, nullptr);
+ }
+ EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr));
+ writer.AppendByteAligned(group_codes);
+
+ PaddedBytes compressed = std::move(writer).TakeBytes();
+ extras::PackedPixelFile ppf;
+ extras::JXLDecompressParams params;
+ params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0});
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params,
+ nullptr, &ppf));
+ ASSERT_EQ(1, ppf.frames.size());
+ const auto& img = ppf.frames[0].color;
+ const auto pixels = reinterpret_cast<const float*>(img.pixels());
+ EXPECT_EQ(-1.0f, pixels[0]);
+}
+
+TEST(ModularTest, UnsqueezeIntegerOverflow) {
+ // Image width is 9 so we can test both the SIMD and non-vector code paths.
+ const size_t xsize = 9;
+ const size_t ysize = 2;
+ BitWriter writer;
+ WriteHeaders(&writer, xsize, ysize);
+ std::vector<BitWriter> group_codes(1);
+ {
+ BitWriter* bw = &group_codes[0];
+ BitWriter::Allotment allotment(bw, 1 << 20);
+ WriteHistograms(bw);
+ GroupHeader header;
+ header.use_global_tree = true;
+ header.transforms.emplace_back();
+ header.transforms[0].id = TransformId::kSqueeze;
+ SqueezeParams params;
+ params.horizontal = false;
+ params.in_place = true;
+ params.begin_c = 0;
+ params.num_c = 1;
+ header.transforms[0].squeezes.emplace_back(params);
+ EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr));
+ for (size_t i = 0; i < xsize * ysize; ++i) {
+ // After UnpackSigned and adding offset, this becomes (1 << 31) - 1, both
+ // in the image and in the residual channels, and unsqueeze makes them
+ // ~(3 << 30) and (1 << 30) (in pixel_type_w) and the first wraps around
+ // to about -(1 << 30).
+ bw->Write(8, 119);
+ bw->Write(28, 0xffffffe);
+ }
+ bw->ZeroPadToByte();
+ allotment.ReclaimAndCharge(bw, 0, nullptr);
+ }
+ EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr));
+ writer.AppendByteAligned(group_codes);
+
+ PaddedBytes compressed = std::move(writer).TakeBytes();
+ extras::PackedPixelFile ppf;
+ extras::JXLDecompressParams params;
+ params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0});
+ EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params,
+ nullptr, &ppf));
+ ASSERT_EQ(1, ppf.frames.size());
+ const auto& img = ppf.frames[0].color;
+ const auto pixels = reinterpret_cast<const float*>(img.pixels());
+ for (size_t x = 0; x < xsize; ++x) {
+ EXPECT_NEAR(-0.5f, pixels[x], 1e-10);
+ EXPECT_NEAR(0.5f, pixels[xsize + x], 1e-10);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/noise.h b/third_party/jpeg-xl/lib/jxl/noise.h
new file mode 100644
index 0000000000..d897ea3abe
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/noise.h
@@ -0,0 +1,60 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_NOISE_H_
+#define LIB_JXL_NOISE_H_
+
+// Noise parameters shared by encoder/decoder.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+const float kNoisePrecision = 1 << 10;
+
+struct NoiseParams {
+ // LUT index is an intensity of pixel / mean intensity of patch
+ static constexpr size_t kNumNoisePoints = 8;
+ float lut[kNumNoisePoints];
+
+ void Clear() {
+ for (float& i : lut) i = 0.f;
+ }
+ bool HasAny() const {
+ for (float i : lut) {
+ if (std::abs(i) > 1e-3f) return true;
+ }
+ return false;
+ }
+};
+
+static inline std::pair<int, float> IndexAndFrac(float x) {
+ constexpr size_t kScaleNumerator = NoiseParams::kNumNoisePoints - 2;
+ // TODO: instead of 1, this should be a proper Y range.
+ constexpr float kScale = kScaleNumerator / 1;
+ float scaled_x = std::max(0.f, x * kScale);
+ float floor_x;
+ float frac_x = std::modf(scaled_x, &floor_x);
+ if (JXL_UNLIKELY(scaled_x >= kScaleNumerator + 1)) {
+ floor_x = kScaleNumerator;
+ frac_x = 1.f;
+ }
+ return std::make_pair(static_cast<int>(floor_x), frac_x);
+}
+
+struct NoiseLevel {
+ float noise_level;
+ float intensity;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_NOISE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/opsin_image_test.cc b/third_party/jpeg-xl/lib/jxl/opsin_image_test.cc
new file mode 100644
index 0000000000..07fd824f14
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/opsin_image_test.cc
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/matrix_ops.h"
+#include "lib/jxl/opsin_params.h"
+
+namespace jxl {
+namespace {
+
+// Convert a single linear sRGB color to xyb, using the exact image conversion
+// procedure that jpeg xl uses.
+void LinearSrgbToOpsin(float rgb_r, float rgb_g, float rgb_b,
+ float* JXL_RESTRICT xyb_x, float* JXL_RESTRICT xyb_y,
+ float* JXL_RESTRICT xyb_b) {
+ Image3F linear(1, 1);
+ linear.PlaneRow(0, 0)[0] = rgb_r;
+ linear.PlaneRow(1, 0)[0] = rgb_g;
+ linear.PlaneRow(2, 0)[0] = rgb_b;
+
+ ImageMetadata metadata;
+ metadata.SetFloat32Samples();
+ metadata.color_encoding = ColorEncoding::LinearSRGB();
+ ImageBundle ib(&metadata);
+ ib.SetFromImage(std::move(linear), metadata.color_encoding);
+ Image3F opsin(1, 1);
+ (void)ToXYB(ib, /*pool=*/nullptr, &opsin, GetJxlCms());
+
+ *xyb_x = opsin.PlaneRow(0, 0)[0];
+ *xyb_y = opsin.PlaneRow(1, 0)[0];
+ *xyb_b = opsin.PlaneRow(2, 0)[0];
+}
+
+// Convert a single XYB color to linear sRGB, using the exact image conversion
+// procedure that jpeg xl uses.
+void OpsinToLinearSrgb(float xyb_x, float xyb_y, float xyb_b,
+ float* JXL_RESTRICT rgb_r, float* JXL_RESTRICT rgb_g,
+ float* JXL_RESTRICT rgb_b) {
+ Image3F opsin(1, 1);
+ opsin.PlaneRow(0, 0)[0] = xyb_x;
+ opsin.PlaneRow(1, 0)[0] = xyb_y;
+ opsin.PlaneRow(2, 0)[0] = xyb_b;
+ Image3F linear(1, 1);
+ OpsinParams opsin_params;
+ opsin_params.Init(/*intensity_target=*/255.0f);
+ OpsinToLinear(opsin, Rect(opsin), nullptr, &linear, opsin_params);
+ *rgb_r = linear.PlaneRow(0, 0)[0];
+ *rgb_g = linear.PlaneRow(1, 0)[0];
+ *rgb_b = linear.PlaneRow(2, 0)[0];
+}
+
+void OpsinRoundtripTestRGB(float r, float g, float b) {
+ float xyb_x, xyb_y, xyb_b;
+ LinearSrgbToOpsin(r, g, b, &xyb_x, &xyb_y, &xyb_b);
+ float r2, g2, b2;
+ OpsinToLinearSrgb(xyb_x, xyb_y, xyb_b, &r2, &g2, &b2);
+ EXPECT_NEAR(r, r2, 1e-3);
+ EXPECT_NEAR(g, g2, 1e-3);
+ EXPECT_NEAR(b, b2, 1e-3);
+}
+
+TEST(OpsinImageTest, VerifyOpsinAbsorbanceInverseMatrix) {
+ float matrix[9]; // writable copy
+ for (int i = 0; i < 9; i++) {
+ matrix[i] = GetOpsinAbsorbanceInverseMatrix()[i];
+ }
+ EXPECT_TRUE(Inv3x3Matrix(matrix));
+ for (int i = 0; i < 9; i++) {
+ EXPECT_NEAR(matrix[i], kOpsinAbsorbanceMatrix[i], 1e-6);
+ }
+}
+
+TEST(OpsinImageTest, OpsinRoundtrip) {
+ OpsinRoundtripTestRGB(0, 0, 0);
+ OpsinRoundtripTestRGB(1. / 255, 1. / 255, 1. / 255);
+ OpsinRoundtripTestRGB(128. / 255, 128. / 255, 128. / 255);
+ OpsinRoundtripTestRGB(1, 1, 1);
+
+ OpsinRoundtripTestRGB(0, 0, 1. / 255);
+ OpsinRoundtripTestRGB(0, 0, 128. / 255);
+ OpsinRoundtripTestRGB(0, 0, 1);
+
+ OpsinRoundtripTestRGB(0, 1. / 255, 0);
+ OpsinRoundtripTestRGB(0, 128. / 255, 0);
+ OpsinRoundtripTestRGB(0, 1, 0);
+
+ OpsinRoundtripTestRGB(1. / 255, 0, 0);
+ OpsinRoundtripTestRGB(128. / 255, 0, 0);
+ OpsinRoundtripTestRGB(1, 0, 0);
+}
+
+TEST(OpsinImageTest, VerifyZero) {
+ // Test that black color (zero energy) is 0,0,0 in xyb.
+ float x, y, b;
+ LinearSrgbToOpsin(0, 0, 0, &x, &y, &b);
+ EXPECT_NEAR(0, x, 1e-9);
+ EXPECT_NEAR(0, y, 1e-7);
+ EXPECT_NEAR(0, b, 1e-7);
+}
+
+TEST(OpsinImageTest, VerifyGray) {
+ // Test that grayscale colors have a fixed y/b ratio and x==0.
+ for (size_t i = 1; i < 255; i++) {
+ float x, y, b;
+ LinearSrgbToOpsin(i / 255., i / 255., i / 255., &x, &y, &b);
+ EXPECT_NEAR(0, x, 1e-6);
+ EXPECT_NEAR(kYToBRatio, b / y, 3e-5);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/opsin_inverse_test.cc b/third_party/jpeg-xl/lib/jxl/opsin_inverse_test.cc
new file mode 100644
index 0000000000..088253c2ce
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/opsin_inverse_test.cc
@@ -0,0 +1,57 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/color_management.h"
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_xyb.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(OpsinInverseTest, LinearInverseInverts) {
+ Image3F linear(128, 128);
+ RandomFillImage(&linear, 0.0f, 1.0f);
+
+ CodecInOut io;
+ io.metadata.m.SetFloat32Samples();
+ io.metadata.m.color_encoding = ColorEncoding::LinearSRGB();
+ io.SetFromImage(CopyImage(linear), io.metadata.m.color_encoding);
+ ThreadPool* null_pool = nullptr;
+ Image3F opsin(io.xsize(), io.ysize());
+ (void)ToXYB(io.Main(), null_pool, &opsin, GetJxlCms());
+
+ OpsinParams opsin_params;
+ opsin_params.Init(/*intensity_target=*/255.0f);
+ OpsinToLinearInplace(&opsin, /*pool=*/nullptr, opsin_params);
+
+ JXL_ASSERT_OK(VerifyRelativeError(linear, opsin, 3E-3, 2E-4, _));
+}
+
+TEST(OpsinInverseTest, YcbCrInverts) {
+ Image3F rgb(128, 128);
+ RandomFillImage(&rgb, 0.0f, 1.0f);
+
+ ThreadPool* null_pool = nullptr;
+ Image3F ycbcr(rgb.xsize(), rgb.ysize());
+ EXPECT_TRUE(RgbToYcbcr(rgb.Plane(0), rgb.Plane(1), rgb.Plane(2),
+ &ycbcr.Plane(1), &ycbcr.Plane(0), &ycbcr.Plane(2),
+ null_pool));
+
+ Image3F rgb2(rgb.xsize(), rgb.ysize());
+ YcbcrToRgb(ycbcr, &rgb2, Rect(rgb));
+
+ JXL_ASSERT_OK(VerifyRelativeError(rgb, rgb2, 4E-5, 4E-7, _));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/opsin_params.cc b/third_party/jpeg-xl/lib/jxl/opsin_params.cc
new file mode 100644
index 0000000000..ec3db4ee76
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/opsin_params.cc
@@ -0,0 +1,44 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/opsin_params.h"
+
+#include <stdlib.h>
+
+#include "lib/jxl/matrix_ops.h"
+
+namespace jxl {
+
+#define INVERSE_OPSIN_FROM_SPEC 1
+
+const float* GetOpsinAbsorbanceInverseMatrix() {
+#if INVERSE_OPSIN_FROM_SPEC
+ return DefaultInverseOpsinAbsorbanceMatrix();
+#else // INVERSE_OPSIN_FROM_SPEC
+ // Compute the inverse opsin matrix from the forward matrix. Less precise
+ // than taking the values from the specification, but must be used if the
+ // forward transform is changed and the spec will require updating.
+ static const float* const kInverse = [] {
+ static float inverse[9];
+ for (int i = 0; i < 9; i++) {
+ inverse[i] = kOpsinAbsorbanceMatrix[i];
+ }
+ Inv3x3Matrix(inverse);
+ return inverse;
+ }();
+ return kInverse;
+#endif // INVERSE_OPSIN_FROM_SPEC
+}
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+ float* JXL_RESTRICT simd_inverse,
+ float intensity_target) {
+ for (size_t i = 0; i < 9; ++i) {
+ simd_inverse[4 * i] = simd_inverse[4 * i + 1] = simd_inverse[4 * i + 2] =
+ simd_inverse[4 * i + 3] = inverse[i] * (255.0f / intensity_target);
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/opsin_params.h b/third_party/jpeg-xl/lib/jxl/opsin_params.h
new file mode 100644
index 0000000000..3a7da97d8a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/opsin_params.h
@@ -0,0 +1,86 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_OPSIN_PARAMS_H_
+#define LIB_JXL_OPSIN_PARAMS_H_
+
+// Constants that define the XYB color space.
+
+#include <stdlib.h>
+
+#include <cmath>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+namespace jxl {
+
+// Parameters for opsin absorbance.
+static const float kM02 = 0.078f;
+static const float kM00 = 0.30f;
+static const float kM01 = 1.0f - kM02 - kM00;
+
+static const float kM12 = 0.078f;
+static const float kM10 = 0.23f;
+static const float kM11 = 1.0f - kM12 - kM10;
+
+static const float kM20 = 0.24342268924547819f;
+static const float kM21 = 0.20476744424496821f;
+static const float kM22 = 1.0f - kM20 - kM21;
+
+static const float kBScale = 1.0f;
+static const float kYToBRatio = 1.0f; // works better with 0.50017729543783418
+static const float kBToYRatio = 1.0f / kYToBRatio;
+
+static const float kB0 = 0.0037930732552754493f;
+static const float kB1 = kB0;
+static const float kB2 = kB0;
+
+// Opsin absorbance matrix is now frozen.
+static const float kOpsinAbsorbanceMatrix[9] = {
+ kM00, kM01, kM02, kM10, kM11, kM12, kM20, kM21, kM22,
+};
+
+// Must be the inverse matrix of kOpsinAbsorbanceMatrix and match the spec.
+static inline const float* DefaultInverseOpsinAbsorbanceMatrix() {
+ static float kDefaultInverseOpsinAbsorbanceMatrix[9] = {
+ 11.031566901960783f, -9.866943921568629f, -0.16462299647058826f,
+ -3.254147380392157f, 4.418770392156863f, -0.16462299647058826f,
+ -3.6588512862745097f, 2.7129230470588235f, 1.9459282392156863f};
+ return kDefaultInverseOpsinAbsorbanceMatrix;
+}
+
+// Returns 3x3 row-major matrix inverse of kOpsinAbsorbanceMatrix.
+// opsin_image_test verifies this is actually the inverse.
+const float* GetOpsinAbsorbanceInverseMatrix();
+
+void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse,
+ float* JXL_RESTRICT simd_inverse,
+ float intensity_target);
+
+static const float kOpsinAbsorbanceBias[3] = {
+ kB0,
+ kB1,
+ kB2,
+};
+
+static const float kNegOpsinAbsorbanceBiasRGB[4] = {
+ -kOpsinAbsorbanceBias[0], -kOpsinAbsorbanceBias[1],
+ -kOpsinAbsorbanceBias[2], 1.0f};
+
+static const float kScaledXYBOffset[3] = {
+ 0.015386134f,
+ 0.0f,
+ 0.27770459f,
+};
+
+static const float kScaledXYBScale[3] = {
+ 22.995788804f,
+ 1.183000077f,
+ 1.502141333f,
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_OPSIN_PARAMS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/padded_bytes_test.cc b/third_party/jpeg-xl/lib/jxl/padded_bytes_test.cc
new file mode 100644
index 0000000000..9ca7a22423
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/padded_bytes_test.cc
@@ -0,0 +1,126 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/base/padded_bytes.h"
+
+#include <numeric> // iota
+#include <vector>
+
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+TEST(PaddedBytesTest, TestNonEmptyFirstByteZero) {
+ PaddedBytes pb(1);
+ EXPECT_EQ(0, pb[0]);
+ // Even after resizing..
+ pb.resize(20);
+ EXPECT_EQ(0, pb[0]);
+ // And reserving.
+ pb.reserve(200);
+ EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestEmptyFirstByteZero) {
+ PaddedBytes pb(0);
+ // After resizing - new zero is written despite there being nothing to copy.
+ pb.resize(20);
+ EXPECT_EQ(0, pb[0]);
+}
+
+TEST(PaddedBytesTest, TestFillWithoutReserve) {
+ PaddedBytes pb;
+ for (size_t i = 0; i < 170u; ++i) {
+ pb.push_back(i);
+ }
+ EXPECT_EQ(170u, pb.size());
+ EXPECT_GE(pb.capacity(), 170u);
+}
+
+TEST(PaddedBytesTest, TestFillWithExactReserve) {
+ PaddedBytes pb;
+ pb.reserve(170);
+ for (size_t i = 0; i < 170u; ++i) {
+ pb.push_back(i);
+ }
+ EXPECT_EQ(170u, pb.size());
+ EXPECT_EQ(pb.capacity(), 170u);
+}
+
+TEST(PaddedBytesTest, TestFillWithMoreReserve) {
+ PaddedBytes pb;
+ pb.reserve(171);
+ for (size_t i = 0; i < 170u; ++i) {
+ pb.push_back(i);
+ }
+ EXPECT_EQ(170u, pb.size());
+ EXPECT_GT(pb.capacity(), 170u);
+}
+
+// Can assign() a subset of the valid data.
+TEST(PaddedBytesTest, TestAssignFromWithin) {
+ PaddedBytes pb;
+ pb.reserve(256);
+ for (size_t i = 0; i < 256; ++i) {
+ pb.push_back(i);
+ }
+ pb.assign(pb.data() + 64, pb.data() + 192);
+ EXPECT_EQ(128u, pb.size());
+ for (size_t i = 0; i < 128; ++i) {
+ EXPECT_EQ(i + 64, pb[i]);
+ }
+}
+
+// Can assign() a range with both valid and previously-allocated data.
+TEST(PaddedBytesTest, TestAssignReclaim) {
+ PaddedBytes pb;
+ pb.reserve(256);
+ for (size_t i = 0; i < 256; ++i) {
+ pb.push_back(i);
+ }
+
+ const uint8_t* mem = pb.data();
+ pb.resize(200);
+ // Just shrank without reallocating
+ EXPECT_EQ(mem, pb.data());
+ EXPECT_EQ(256u, pb.capacity());
+
+ // Reclaim part of initial allocation
+ pb.assign(pb.data() + 100, pb.data() + 240);
+ EXPECT_EQ(140u, pb.size());
+
+ for (size_t i = 0; i < 140; ++i) {
+ EXPECT_EQ(i + 100, pb[i]);
+ }
+}
+
+// Can assign() smaller and larger ranges outside the current allocation.
+TEST(PaddedBytesTest, TestAssignOutside) {
+ PaddedBytes pb;
+ pb.resize(400);
+ std::iota(pb.begin(), pb.end(), 1);
+
+ std::vector<uint8_t> small(64);
+ std::iota(small.begin(), small.end(), 500);
+
+ pb.assign(small.data(), small.data() + small.size());
+ EXPECT_EQ(64u, pb.size());
+ for (size_t i = 0; i < 64; ++i) {
+ EXPECT_EQ((i + 500) & 0xFF, pb[i]);
+ }
+
+ std::vector<uint8_t> large(1000);
+ std::iota(large.begin(), large.end(), 600);
+
+ pb.assign(large.data(), large.data() + large.size());
+ EXPECT_EQ(1000u, pb.size());
+ for (size_t i = 0; i < 1000; ++i) {
+ EXPECT_EQ((i + 600) & 0xFF, pb[i]);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/passes_state.cc b/third_party/jpeg-xl/lib/jxl/passes_state.cc
new file mode 100644
index 0000000000..2f287ec9b6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/passes_state.cc
@@ -0,0 +1,70 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/passes_state.h"
+
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/common.h"
+
+namespace jxl {
+
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+ PassesSharedState* JXL_RESTRICT shared,
+ bool encoder) {
+ JXL_ASSERT(frame_header.nonserialized_metadata != nullptr);
+ shared->frame_header = frame_header;
+ shared->metadata = frame_header.nonserialized_metadata;
+ shared->frame_dim = frame_header.ToFrameDimensions();
+ shared->image_features.patches.SetPassesSharedState(shared);
+
+ const FrameDimensions& frame_dim = shared->frame_dim;
+
+ shared->ac_strategy =
+ AcStrategyImage(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ shared->raw_quant_field =
+ ImageI(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ shared->epf_sharpness =
+ ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ shared->cmap = ColorCorrelationMap(frame_dim.xsize, frame_dim.ysize);
+
+ // In the decoder, we allocate coeff orders afterwards, when we know how many
+ // we will actually need.
+ shared->coeff_order_size = kCoeffOrderMaxSize;
+ if (encoder &&
+ shared->coeff_orders.size() <
+ frame_header.passes.num_passes * kCoeffOrderMaxSize &&
+ frame_header.encoding == FrameEncoding::kVarDCT) {
+ shared->coeff_orders.resize(frame_header.passes.num_passes *
+ kCoeffOrderMaxSize);
+ }
+
+ shared->quant_dc = ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+
+ bool use_dc_frame = !!(frame_header.flags & FrameHeader::kUseDcFrame);
+ if (!encoder && use_dc_frame) {
+ if (frame_header.dc_level == 4) {
+ return JXL_FAILURE("Invalid DC level for kUseDcFrame: %u",
+ frame_header.dc_level);
+ }
+ shared->dc_storage = Image3F();
+ shared->dc = &shared->dc_frames[frame_header.dc_level];
+ if (shared->dc->xsize() == 0) {
+ return JXL_FAILURE(
+ "kUseDcFrame specified for dc_level %u, but no frame was decoded "
+ "with level %u",
+ frame_header.dc_level, frame_header.dc_level + 1);
+ }
+ ZeroFillImage(&shared->quant_dc);
+ } else {
+ shared->dc_storage =
+ Image3F(frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ shared->dc = &shared->dc_storage;
+ }
+
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/passes_state.h b/third_party/jpeg-xl/lib/jxl/passes_state.h
new file mode 100644
index 0000000000..8d648a8feb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/passes_state.h
@@ -0,0 +1,133 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PASSES_STATE_H_
+#define LIB_JXL_PASSES_STATE_H_
+
+#include "lib/jxl/ac_context.h"
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/frame_header.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/noise.h"
+#include "lib/jxl/quant_weights.h"
+#include "lib/jxl/quantizer.h"
+#include "lib/jxl/splines.h"
+
+// Structures that hold the (en/de)coder state for a JPEG XL kVarDCT
+// (en/de)coder.
+
+namespace jxl {
+
+struct ImageFeatures {
+ NoiseParams noise_params;
+ PatchDictionary patches;
+ Splines splines;
+};
+
+// State common to both encoder and decoder.
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct PassesSharedState {
+ PassesSharedState() : frame_header(nullptr) {}
+
+ // Headers and metadata.
+ const CodecMetadata* metadata;
+ FrameHeader frame_header;
+
+ FrameDimensions frame_dim;
+
+ // Control fields and parameters.
+ AcStrategyImage ac_strategy;
+
+ // Dequant matrices + quantizer.
+ DequantMatrices matrices;
+ Quantizer quantizer{&matrices};
+ ImageI raw_quant_field;
+
+ // Per-block side information for EPF detail preservation.
+ ImageB epf_sharpness;
+
+ ColorCorrelationMap cmap;
+
+ ImageFeatures image_features;
+
+ // Memory area for storing coefficient orders.
+ // `coeff_order_size` is the size used by *one* set of coefficient orders (at
+ // most kMaxCoeffOrderSize). A set of coefficient orders is present for each
+ // pass.
+ size_t coeff_order_size = 0;
+ std::vector<coeff_order_t> coeff_orders;
+
+ // Decoder-side DC and quantized DC.
+ ImageB quant_dc;
+ Image3F dc_storage;
+ const Image3F* JXL_RESTRICT dc = &dc_storage;
+
+ BlockCtxMap block_ctx_map;
+
+ Image3F dc_frames[4];
+
+ struct {
+ ImageBundle frame;
+ // ImageBundle doesn't yet have a simple way to state it is in XYB.
+ bool ib_is_in_xyb = false;
+ } reference_frames[4] = {};
+
+ // Number of pre-clustered set of histograms (with the same ctx map), per
+ // pass. Encoded as num_histograms_ - 1.
+ size_t num_histograms = 0;
+
+ bool IsGrayscale() const { return metadata->m.color_encoding.IsGray(); }
+
+ Rect GroupRect(size_t group_index) const {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+ frame_dim.group_dim, frame_dim.group_dim, frame_dim.xsize,
+ frame_dim.ysize);
+ return rect;
+ }
+
+ Rect PaddedGroupRect(size_t group_index) const {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+ frame_dim.group_dim, frame_dim.group_dim,
+ frame_dim.xsize_padded, frame_dim.ysize_padded);
+ return rect;
+ }
+
+ Rect BlockGroupRect(size_t group_index) const {
+ const size_t gx = group_index % frame_dim.xsize_groups;
+ const size_t gy = group_index / frame_dim.xsize_groups;
+ const Rect rect(gx * (frame_dim.group_dim >> 3),
+ gy * (frame_dim.group_dim >> 3), frame_dim.group_dim >> 3,
+ frame_dim.group_dim >> 3, frame_dim.xsize_blocks,
+ frame_dim.ysize_blocks);
+ return rect;
+ }
+
+ Rect DCGroupRect(size_t group_index) const {
+ const size_t gx = group_index % frame_dim.xsize_dc_groups;
+ const size_t gy = group_index / frame_dim.xsize_dc_groups;
+ const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim,
+ frame_dim.group_dim, frame_dim.group_dim,
+ frame_dim.xsize_blocks, frame_dim.ysize_blocks);
+ return rect;
+ }
+};
+
+// Initialized the state information that is shared between encoder and decoder.
+Status InitializePassesSharedState(const FrameHeader& frame_header,
+ PassesSharedState* JXL_RESTRICT shared,
+ bool encoder = false);
+
+} // namespace jxl
+
+#endif // LIB_JXL_PASSES_STATE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/passes_test.cc b/third_party/jpeg-xl/lib/jxl/passes_test.cc
new file mode 100644
index 0000000000..97d776f941
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/passes_test.cc
@@ -0,0 +1,402 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <future>
+#include <string>
+#include <utility>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+using test::Roundtrip;
+using test::ThreadPoolForTests;
+
+namespace {
+
+TEST(PassesTest, RoundtripSmallPasses) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 1.0;
+ cparams.progressive_mode = true;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+ EXPECT_THAT(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.1));
+}
+
+TEST(PassesTest, RoundtripUnalignedPasses) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(io.xsize() / 12, io.ysize() / 7);
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 2.0;
+ cparams.progressive_mode = true;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+ EXPECT_THAT(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.72));
+}
+
+TEST(PassesTest, RoundtripMultiGroupPasses) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io;
+ {
+ ThreadPoolForTests pool(4);
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+ }
+ io.ShrinkTo(600, 1024); // partial X, full Y group
+
+ auto test = [&](float target_distance, float threshold) {
+ ThreadPoolForTests pool(4);
+ CompressParams cparams;
+ cparams.butteraugli_distance = target_distance;
+ cparams.progressive_mode = true;
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _,
+ /* compressed_size */ nullptr, &pool));
+ EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr, &pool),
+ IsSlightlyBelow(target_distance + threshold));
+ };
+
+ auto run1 = std::async(std::launch::async, test, 1.0f, 0.5f);
+ auto run2 = std::async(std::launch::async, test, 2.0f, 0.5f);
+}
+
+TEST(PassesTest, RoundtripLargeFastPasses) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_mode = true;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _,
+ /* comrpessed_size */ nullptr, &pool));
+}
+
+// Checks for differing size/distance in two consecutive runs of distance 2,
+// which involves additional processing including adaptive reconstruction.
+// Failing this may be a sign of race conditions or invalid memory accesses.
+TEST(PassesTest, RoundtripProgressiveConsistent) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_mode = true;
+ cparams.butteraugli_distance = 2.0;
+
+ // Try each xsize mod kBlockDim to verify right border handling.
+ for (size_t xsize = 48; xsize > 40; --xsize) {
+ io.ShrinkTo(xsize, 15);
+
+ CodecInOut io2;
+ size_t size2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &size2, &pool));
+
+ CodecInOut io3;
+ size_t size3;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io3, _, &size3, &pool));
+
+ // Exact same compressed size.
+ EXPECT_EQ(size2, size3);
+
+ // Exact same distance.
+ const float dist2 = ButteraugliDistance(io.frames, io2.frames,
+ cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr, &pool);
+ const float dist3 = ButteraugliDistance(io.frames, io3.frames,
+ cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr, &pool);
+ EXPECT_EQ(dist2, dist3);
+ }
+}
+
+TEST(PassesTest, AllDownsampleFeasible) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ PaddedBytes compressed;
+ AuxOut aux;
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_mode = true;
+ cparams.butteraugli_distance = 1.0;
+ PassesEncoderState enc_state;
+ ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ &aux, &pool));
+
+ EXPECT_LE(compressed.size(), 240000u);
+ float target_butteraugli[9] = {};
+ target_butteraugli[1] = 2.5f;
+ target_butteraugli[2] = 16.0f;
+ target_butteraugli[4] = 20.0f;
+ target_butteraugli[8] = 80.0f;
+
+ // The default progressive encoding scheme should make all these downsampling
+ // factors achievable.
+ // TODO(veluca): re-enable downsampling 16.
+ std::vector<size_t> downsamplings = {1, 2, 4, 8}; //, 16};
+
+ auto check = [&](const uint32_t task, size_t /* thread */) -> void {
+ const size_t downsampling = downsamplings[task];
+ extras::JXLDecompressParams dparams;
+ dparams.max_downsampling = downsampling;
+ CodecInOut output;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+ EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+ EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+ EXPECT_LE(ButteraugliDistance(io.frames, output.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr, nullptr),
+ target_butteraugli[downsampling])
+ << "downsampling: " << downsampling;
+ };
+ EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit,
+ check, "TestDownsampling"));
+}
+
+TEST(PassesTest, AllDownsampleFeasibleQProgressive) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ PaddedBytes compressed;
+ AuxOut aux;
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.qprogressive_mode = true;
+ cparams.butteraugli_distance = 1.0;
+ PassesEncoderState enc_state;
+ ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ &aux, &pool));
+
+ EXPECT_LE(compressed.size(), 220000u);
+
+ float target_butteraugli[9] = {};
+ target_butteraugli[1] = 3.0f;
+ target_butteraugli[2] = 6.0f;
+ target_butteraugli[4] = 10.0f;
+ target_butteraugli[8] = 80.0f;
+
+ // The default progressive encoding scheme should make all these downsampling
+ // factors achievable.
+ std::vector<size_t> downsamplings = {1, 2, 4, 8};
+
+ auto check = [&](const uint32_t task, size_t /* thread */) -> void {
+ const size_t downsampling = downsamplings[task];
+ extras::JXLDecompressParams dparams;
+ dparams.max_downsampling = downsampling;
+ CodecInOut output;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+ EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling;
+ EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling;
+ EXPECT_LE(ButteraugliDistance(io.frames, output.frames, cparams.ba_params,
+ GetJxlCms(),
+ /*distmap=*/nullptr),
+ target_butteraugli[downsampling])
+ << "downsampling: " << downsampling;
+ };
+ EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit,
+ check, "TestQProgressive"));
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectlyGrayscale) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png");
+ CodecInOut io_orig;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+ Rect rect(0, 0, io_orig.xsize(), 128);
+ // need 2 DC groups for the DC frame to actually be progressive.
+ Image3F large(4242, rect.ysize());
+ ZeroFillImage(&large);
+ CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+ CodecInOut io;
+ io.metadata = io_orig.metadata;
+ io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+ PaddedBytes compressed;
+ AuxOut aux;
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_dc = 1;
+ cparams.responsive = true;
+ cparams.qprogressive_mode = true;
+ cparams.butteraugli_distance = 1.0;
+ PassesEncoderState enc_state;
+ ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ &aux, &pool));
+
+ EXPECT_LE(compressed.size(), 10000u);
+
+ extras::JXLDecompressParams dparams;
+ dparams.max_downsampling = 1;
+ CodecInOut output;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+
+ dparams.max_downsampling = 2;
+ CodecInOut output_d2;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output_d2));
+
+ // 0 if reading all the passes, ~15 if skipping the 8x pass.
+ float butteraugli_distance_down2_full = ButteraugliDistance(
+ output.frames, output_d2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr);
+
+ EXPECT_LE(butteraugli_distance_down2_full, 3.2f);
+ EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, ProgressiveDownsample2DegradesCorrectly) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io_orig;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_orig, &pool));
+ Rect rect(0, 0, io_orig.xsize(), 128);
+ // need 2 DC groups for the DC frame to actually be progressive.
+ Image3F large(4242, rect.ysize());
+ ZeroFillImage(&large);
+ CopyImageTo(rect, *io_orig.Main().color(), rect, &large);
+ CodecInOut io;
+ io.SetFromImage(std::move(large), io_orig.Main().c_current());
+
+ PaddedBytes compressed;
+ AuxOut aux;
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_dc = 1;
+ cparams.responsive = true;
+ cparams.qprogressive_mode = true;
+ cparams.butteraugli_distance = 1.0;
+ PassesEncoderState enc_state;
+ ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ &aux, &pool));
+
+ EXPECT_LE(compressed.size(), 220000u);
+
+ extras::JXLDecompressParams dparams;
+ dparams.max_downsampling = 1;
+ CodecInOut output;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output));
+
+ dparams.max_downsampling = 2;
+ CodecInOut output_d2;
+ ASSERT_TRUE(
+ test::DecodeFile(dparams, Span<const uint8_t>(compressed), &output_d2));
+
+ // 0 if reading all the passes, ~15 if skipping the 8x pass.
+ float butteraugli_distance_down2_full = ButteraugliDistance(
+ output.frames, output_d2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr);
+
+ EXPECT_LE(butteraugli_distance_down2_full, 3.0f);
+ EXPECT_GE(butteraugli_distance_down2_full, 1.0f);
+}
+
+TEST(PassesTest, NonProgressiveDCImage) {
+ ThreadPoolForTests pool(8);
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ PaddedBytes compressed;
+ AuxOut aux;
+
+ CompressParams cparams;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+ cparams.progressive_mode = false;
+ cparams.butteraugli_distance = 2.0;
+ PassesEncoderState enc_state;
+ ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(),
+ &aux, &pool));
+
+ // Even in non-progressive mode, it should be possible to return a DC-only
+ // image.
+ extras::JXLDecompressParams dparams;
+ dparams.max_downsampling = 100;
+ CodecInOut output;
+ ASSERT_TRUE(test::DecodeFile(dparams, Span<const uint8_t>(compressed),
+ &output, &pool));
+ EXPECT_EQ(output.xsize(), io.xsize());
+ EXPECT_EQ(output.ysize(), io.ysize());
+}
+
+TEST(PassesTest, RoundtripSmallNoGaborishPasses) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+
+ CompressParams cparams;
+ cparams.gaborish = Override::kOff;
+ cparams.butteraugli_distance = 1.0;
+ cparams.progressive_mode = true;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+ EXPECT_THAT(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ IsSlightlyBelow(1.2));
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/patch_dictionary_internal.h b/third_party/jpeg-xl/lib/jxl/patch_dictionary_internal.h
new file mode 100644
index 0000000000..e4172f6db6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/patch_dictionary_internal.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+#define LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
+
+#include "lib/jxl/dec_patch_dictionary.h"
+#include "lib/jxl/passes_state.h" // for PassesSharedState
+
+namespace jxl {
+
+// Context numbers as specified in Section C.4.5, Listing C.2:
+enum Contexts {
+ kNumRefPatchContext = 0,
+ kReferenceFrameContext = 1,
+ kPatchSizeContext = 2,
+ kPatchReferencePositionContext = 3,
+ kPatchPositionContext = 4,
+ kPatchBlendModeContext = 5,
+ kPatchOffsetContext = 6,
+ kPatchCountContext = 7,
+ kPatchAlphaChannelContext = 8,
+ kPatchClampContext = 9,
+ kNumPatchDictionaryContexts
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/patch_dictionary_test.cc b/third_party/jpeg-xl/lib/jxl/patch_dictionary_test.cc
new file mode 100644
index 0000000000..5cc6c13a9e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/patch_dictionary_test.cc
@@ -0,0 +1,58 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+using ::jxl::test::Roundtrip;
+
+TEST(PatchDictionaryTest, GrayscaleModular) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+ CompressParams cparams;
+ cparams.SetLossless();
+ cparams.patches = jxl::Override::kOn;
+
+ CodecInOut io2;
+ // Without patches: ~25k
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+ EXPECT_LE(compressed_size, 8000u);
+ JXL_ASSERT_OK(VerifyRelativeError(*io.Main().color(), *io2.Main().color(),
+ 1e-7f, 0, _));
+}
+
+TEST(PatchDictionaryTest, GrayscaleVarDCT) {
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+
+ CompressParams cparams;
+ cparams.patches = jxl::Override::kOn;
+
+ CodecInOut io2;
+ // Without patches: ~47k
+ size_t compressed_size;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size));
+ EXPECT_LE(compressed_size, 14000u);
+ // Without patches: ~1.2
+ EXPECT_LE(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ 1.1);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/preview_test.cc b/third_party/jpeg-xl/lib/jxl/preview_test.cc
new file mode 100644
index 0000000000..9d4603ca70
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/preview_test.cc
@@ -0,0 +1,68 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/override.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/headers.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+using test::Roundtrip;
+
+TEST(PreviewTest, RoundtripGivenPreview) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io));
+ io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+ // Same as main image
+ io.preview_frame = io.Main().Copy();
+ const size_t preview_xsize = 15;
+ const size_t preview_ysize = 27;
+ io.preview_frame.ShrinkTo(preview_xsize, preview_ysize);
+ io.metadata.m.have_preview = true;
+ ASSERT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(),
+ io.preview_frame.ysize()));
+
+ CompressParams cparams;
+ cparams.butteraugli_distance = 2.0;
+ cparams.speed_tier = SpeedTier::kSquirrel;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _));
+ EXPECT_EQ(preview_xsize, io2.metadata.m.preview_size.xsize());
+ EXPECT_EQ(preview_ysize, io2.metadata.m.preview_size.ysize());
+ EXPECT_EQ(preview_xsize, io2.preview_frame.xsize());
+ EXPECT_EQ(preview_ysize, io2.preview_frame.ysize());
+
+ EXPECT_LE(ButteraugliDistance(io.preview_frame, io2.preview_frame,
+ cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ 2.5);
+ EXPECT_LE(
+ ButteraugliDistance(io.Main(), io2.Main(), cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr),
+ 2.5);
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/quant_weights.cc b/third_party/jpeg-xl/lib/jxl/quant_weights.cc
new file mode 100644
index 0000000000..5e3f3424aa
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quant_weights.cc
@@ -0,0 +1,1239 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <utility>
+
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/dec_modular.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/quant_weights.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/fast_math-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Sqrt;
+
+// kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y)
+// coefficient in component c. Higher weights correspond to finer quantization
+// intervals and more bits spent in encoding.
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights,
+ float* weights) {
+ for (size_t c = 0; c < 3; c++) {
+ size_t start = c * 64;
+ weights[start] = 0xBAD;
+ weights[start + 1] = weights[start + 8] = dct2weights[c][0];
+ weights[start + 9] = dct2weights[c][1];
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ weights[start + y * 8 + x + 2] = dct2weights[c][2];
+ weights[start + (y + 2) * 8 + x] = dct2weights[c][2];
+ }
+ }
+ for (size_t y = 0; y < 2; y++) {
+ for (size_t x = 0; x < 2; x++) {
+ weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3];
+ }
+ }
+ for (size_t y = 0; y < 4; y++) {
+ for (size_t x = 0; x < 4; x++) {
+ weights[start + y * 8 + x + 4] = dct2weights[c][4];
+ weights[start + (y + 4) * 8 + x] = dct2weights[c][4];
+ }
+ }
+ for (size_t y = 0; y < 4; y++) {
+ for (size_t x = 0; x < 4; x++) {
+ weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5];
+ }
+ }
+ }
+}
+
+void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights,
+ float* weights) {
+ for (size_t c = 0; c < 3; c++) {
+ for (int i = 0; i < 64; i++) {
+ weights[64 * c + i] = idweights[c][0];
+ }
+ weights[64 * c + 1] = idweights[c][1];
+ weights[64 * c + 8] = idweights[c][1];
+ weights[64 * c + 9] = idweights[c][2];
+ }
+}
+
+float Interpolate(float pos, float max, const float* array, size_t len) {
+ float scaled_pos = pos * (len - 1) / max;
+ size_t idx = scaled_pos;
+ JXL_DASSERT(idx + 1 < len);
+ float a = array[idx];
+ float b = array[idx + 1];
+ return a * FastPowf(b / a, scaled_pos - idx);
+}
+
+float Mult(float v) {
+ if (v > 0.0f) return 1.0f + v;
+ return 1.0f / (1.0f - v);
+}
+
+using DF4 = HWY_CAPPED(float, 4);
+
+hwy::HWY_NAMESPACE::Vec<DF4> InterpolateVec(
+ hwy::HWY_NAMESPACE::Vec<DF4> scaled_pos, const float* array) {
+ HWY_CAPPED(int32_t, 4) di;
+
+ auto idx = ConvertTo(di, scaled_pos);
+
+ auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx));
+
+ // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but
+ // it's probably slower.
+ auto a = GatherIndex(DF4(), array, idx);
+ auto b = GatherIndex(DF4(), array + 1, idx);
+
+ return Mul(a, FastPowf(DF4(), Div(b, a), frac));
+}
+
+// Computes quant weights for a COLS*ROWS-sized transform, using num_bands
+// eccentricity bands and num_ebands eccentricity bands. If print_mode is 1,
+// prints the resulting matrix; if print_mode is 2, prints the matrix in a
+// format suitable for a 3d plot with gnuplot.
+Status GetQuantWeights(
+ size_t ROWS, size_t COLS,
+ const DctQuantWeightParams::DistanceBandsArray& distance_bands,
+ size_t num_bands, float* out) {
+ for (size_t c = 0; c < 3; c++) {
+ float bands[DctQuantWeightParams::kMaxDistanceBands] = {
+ distance_bands[c][0]};
+ if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+ for (size_t i = 1; i < num_bands; i++) {
+ bands[i] = bands[i - 1] * Mult(distance_bands[c][i]);
+ if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands");
+ }
+ float scale = (num_bands - 1) / (kSqrt2 + 1e-6f);
+ float rcpcol = scale / (COLS - 1);
+ float rcprow = scale / (ROWS - 1);
+ JXL_ASSERT(COLS >= Lanes(DF4()));
+ HWY_ALIGN float l0123[4] = {0, 1, 2, 3};
+ for (uint32_t y = 0; y < ROWS; y++) {
+ float dy = y * rcprow;
+ float dy2 = dy * dy;
+ for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) {
+ auto dx =
+ Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol));
+ auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2)));
+ auto weight = num_bands == 1 ? Set(DF4(), bands[0])
+ : InterpolateVec(scaled_distance, bands);
+ StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x);
+ }
+ }
+ }
+ return true;
+}
+
+// TODO(veluca): SIMD-fy. With 256x256, this is actually slow.
+Status ComputeQuantTable(const QuantEncoding& encoding,
+ float* JXL_RESTRICT table,
+ float* JXL_RESTRICT inv_table, size_t table_num,
+ DequantMatrices::QuantTable kind, size_t* pos) {
+ constexpr size_t N = kBlockDim;
+ size_t wrows = 8 * DequantMatrices::required_size_x[kind],
+ wcols = 8 * DequantMatrices::required_size_y[kind];
+ size_t num = wrows * wcols;
+
+ std::vector<float> weights(3 * num);
+
+ switch (encoding.mode) {
+ case QuantEncoding::kQuantModeLibrary: {
+ // Library and copy quant encoding should get replaced by the actual
+ // parameters by the caller.
+ JXL_ASSERT(false);
+ break;
+ }
+ case QuantEncoding::kQuantModeID: {
+ JXL_ASSERT(num == kDCTBlockSize);
+ GetQuantWeightsIdentity(encoding.idweights, weights.data());
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT2: {
+ JXL_ASSERT(num == kDCTBlockSize);
+ GetQuantWeightsDCT2(encoding.dct2weights, weights.data());
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4: {
+ JXL_ASSERT(num == kDCTBlockSize);
+ float weights4x4[3 * 4 * 4];
+ // Always use 4x4 GetQuantWeights for DCT4 quantization tables.
+ JXL_RETURN_IF_ERROR(
+ GetQuantWeights(4, 4, encoding.dct_params.distance_bands,
+ encoding.dct_params.num_distance_bands, weights4x4));
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < kBlockDim; y++) {
+ for (size_t x = 0; x < kBlockDim; x++) {
+ weights[c * num + y * kBlockDim + x] =
+ weights4x4[c * 16 + (y / 2) * 4 + (x / 2)];
+ }
+ }
+ weights[c * num + 1] /= encoding.dct4multipliers[c][0];
+ weights[c * num + N] /= encoding.dct4multipliers[c][0];
+ weights[c * num + N + 1] /= encoding.dct4multipliers[c][1];
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4X8: {
+ JXL_ASSERT(num == kDCTBlockSize);
+ float weights4x8[3 * 4 * 8];
+ // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables.
+ JXL_RETURN_IF_ERROR(
+ GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+ encoding.dct_params.num_distance_bands, weights4x8));
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < kBlockDim; y++) {
+ for (size_t x = 0; x < kBlockDim; x++) {
+ weights[c * num + y * kBlockDim + x] =
+ weights4x8[c * 32 + (y / 2) * 8 + x];
+ }
+ }
+ weights[c * num + N] /= encoding.dct4x8multipliers[c];
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT: {
+ JXL_RETURN_IF_ERROR(GetQuantWeights(
+ wrows, wcols, encoding.dct_params.distance_bands,
+ encoding.dct_params.num_distance_bands, weights.data()));
+ break;
+ }
+ case QuantEncoding::kQuantModeRAW: {
+ if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) {
+ return JXL_FAILURE("Invalid table encoding");
+ }
+ for (size_t i = 0; i < 3 * num; i++) {
+ weights[i] =
+ 1.f / (encoding.qraw.qtable_den * (*encoding.qraw.qtable)[i]);
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeAFV: {
+ constexpr float kFreqs[] = {
+ 0xBAD,
+ 0xBAD,
+ 0.8517778890324296,
+ 5.37778436506804,
+ 0xBAD,
+ 0xBAD,
+ 4.734747904497923,
+ 5.449245381693219,
+ 1.6598270267479331,
+ 4,
+ 7.275749096817861,
+ 10.423227632456525,
+ 2.662932286148962,
+ 7.630657783650829,
+ 8.962388608184032,
+ 12.97166202570235,
+ };
+
+ float weights4x8[3 * 4 * 8];
+ JXL_RETURN_IF_ERROR((
+ GetQuantWeights(4, 8, encoding.dct_params.distance_bands,
+ encoding.dct_params.num_distance_bands, weights4x8)));
+ float weights4x4[3 * 4 * 4];
+ JXL_RETURN_IF_ERROR((GetQuantWeights(
+ 4, 4, encoding.dct_params_afv_4x4.distance_bands,
+ encoding.dct_params_afv_4x4.num_distance_bands, weights4x4)));
+
+ constexpr float lo = 0.8517778890324296;
+ constexpr float hi = 12.97166202570235f - lo + 1e-6f;
+ for (size_t c = 0; c < 3; c++) {
+ float bands[4];
+ bands[0] = encoding.afv_weights[c][5];
+ if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+ for (size_t i = 1; i < 4; i++) {
+ bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]);
+ if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands");
+ }
+ size_t start = c * 64;
+ auto set_weight = [&start, &weights](size_t x, size_t y, float val) {
+ weights[start + y * 8 + x] = val;
+ };
+ weights[start] = 1; // Not used, but causes MSAN error otherwise.
+ // Weights for (0, 1) and (1, 0).
+ set_weight(0, 1, encoding.afv_weights[c][0]);
+ set_weight(1, 0, encoding.afv_weights[c][1]);
+ // AFV special weights for 3-pixel corner.
+ set_weight(0, 2, encoding.afv_weights[c][2]);
+ set_weight(2, 0, encoding.afv_weights[c][3]);
+ set_weight(2, 2, encoding.afv_weights[c][4]);
+
+ // All other AFV weights.
+ for (size_t y = 0; y < 4; y++) {
+ for (size_t x = 0; x < 4; x++) {
+ if (x < 2 && y < 2) continue;
+ float val = Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4);
+ set_weight(2 * x, 2 * y, val);
+ }
+ }
+
+ // Put 4x8 weights in odd rows, except (1, 0).
+ for (size_t y = 0; y < kBlockDim / 2; y++) {
+ for (size_t x = 0; x < kBlockDim; x++) {
+ if (x == 0 && y == 0) continue;
+ weights[c * num + (2 * y + 1) * kBlockDim + x] =
+ weights4x8[c * 32 + y * 8 + x];
+ }
+ }
+ // Put 4x4 weights in even rows / odd columns, except (0, 1).
+ for (size_t y = 0; y < kBlockDim / 2; y++) {
+ for (size_t x = 0; x < kBlockDim / 2; x++) {
+ if (x == 0 && y == 0) continue;
+ weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] =
+ weights4x4[c * 16 + y * 4 + x];
+ }
+ }
+ }
+ break;
+ }
+ }
+ size_t prev_pos = *pos;
+ HWY_CAPPED(float, 64) d;
+ for (size_t i = 0; i < num * 3; i += Lanes(d)) {
+ auto inv_val = LoadU(d, weights.data() + i);
+ if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) ||
+ !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) {
+ return JXL_FAILURE("Invalid quantization table");
+ }
+ auto val = Div(Set(d, 1.0f), inv_val);
+ StoreU(val, d, table + *pos + i);
+ StoreU(inv_val, d, inv_table + *pos + i);
+ }
+ (*pos) += 3 * num;
+
+ // Ensure that the lowest frequencies have a 0 inverse table.
+ // This does not affect en/decoding, but allows AC strategy selection to be
+ // slightly simpler.
+ size_t xs = DequantMatrices::required_size_x[kind];
+ size_t ys = DequantMatrices::required_size_y[kind];
+ CoefficientLayout(&ys, &xs);
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t y = 0; y < ys; y++) {
+ for (size_t x = 0; x < xs; x++) {
+ inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs +
+ x] = 0;
+ }
+ }
+ }
+ return true;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+namespace {
+
+HWY_EXPORT(ComputeQuantTable);
+
+static constexpr const float kAlmostZero = 1e-8f;
+
+Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) {
+ params->num_distance_bands =
+ br->ReadFixedBits<DctQuantWeightParams::kLog2MaxDistanceBands>() + 1;
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < params->num_distance_bands; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &params->distance_bands[c][i]));
+ }
+ if (params->distance_bands[c][0] < kAlmostZero) {
+ return JXL_FAILURE("Distance band seed is too small");
+ }
+ params->distance_bands[c][0] *= 64.0f;
+ }
+ return true;
+}
+
+Status Decode(BitReader* br, QuantEncoding* encoding, size_t required_size_x,
+ size_t required_size_y, size_t idx,
+ ModularFrameDecoder* modular_frame_decoder) {
+ size_t required_size = required_size_x * required_size_y;
+ required_size_x *= kBlockDim;
+ required_size_y *= kBlockDim;
+ int mode = br->ReadFixedBits<kLog2NumQuantModes>();
+ switch (mode) {
+ case QuantEncoding::kQuantModeLibrary: {
+ encoding->predefined = br->ReadFixedBits<kCeilLog2NumPredefinedTables>();
+ if (encoding->predefined >= kNumPredefinedTables) {
+ return JXL_FAILURE("Invalid predefined table");
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeID: {
+ if (required_size != 1) return JXL_FAILURE("Invalid mode");
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 3; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i]));
+ if (std::abs(encoding->idweights[c][i]) < kAlmostZero) {
+ return JXL_FAILURE("ID Quantizer is too small");
+ }
+ encoding->idweights[c][i] *= 64;
+ }
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT2: {
+ if (required_size != 1) return JXL_FAILURE("Invalid mode");
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 6; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i]));
+ if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) {
+ return JXL_FAILURE("Quantizer is too small");
+ }
+ encoding->dct2weights[c][i] *= 64;
+ }
+ }
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4X8: {
+ if (required_size != 1) return JXL_FAILURE("Invalid mode");
+ for (size_t c = 0; c < 3; c++) {
+ JXL_RETURN_IF_ERROR(
+ F16Coder::Read(br, &encoding->dct4x8multipliers[c]));
+ if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) {
+ return JXL_FAILURE("DCT4X8 multiplier is too small");
+ }
+ }
+ JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT4: {
+ if (required_size != 1) return JXL_FAILURE("Invalid mode");
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 2; i++) {
+ JXL_RETURN_IF_ERROR(
+ F16Coder::Read(br, &encoding->dct4multipliers[c][i]));
+ if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) {
+ return JXL_FAILURE("DCT4 multiplier is too small");
+ }
+ }
+ }
+ JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+ break;
+ }
+ case QuantEncoding::kQuantModeAFV: {
+ if (required_size != 1) return JXL_FAILURE("Invalid mode");
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t i = 0; i < 9; i++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i]));
+ }
+ for (size_t i = 0; i < 6; i++) {
+ encoding->afv_weights[c][i] *= 64;
+ }
+ }
+ JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+ JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4));
+ break;
+ }
+ case QuantEncoding::kQuantModeDCT: {
+ JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params));
+ break;
+ }
+ case QuantEncoding::kQuantModeRAW: {
+ // Set mode early, to avoid mem-leak.
+ encoding->mode = QuantEncoding::kQuantModeRAW;
+ JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable(
+ required_size_x, required_size_y, br, encoding, idx,
+ modular_frame_decoder));
+ break;
+ }
+ default:
+ return JXL_FAILURE("Invalid quantization table encoding");
+ }
+ encoding->mode = QuantEncoding::Mode(mode);
+ return true;
+}
+
+} // namespace
+
+// These definitions are needed before C++17.
+constexpr size_t DequantMatrices::required_size_[];
+constexpr size_t DequantMatrices::required_size_x[];
+constexpr size_t DequantMatrices::required_size_y[];
+constexpr DequantMatrices::QuantTable DequantMatrices::kQuantTable[];
+
+Status DequantMatrices::Decode(BitReader* br,
+ ModularFrameDecoder* modular_frame_decoder) {
+ size_t all_default = br->ReadBits(1);
+ size_t num_tables = all_default ? 0 : static_cast<size_t>(kNum);
+ encodings_.clear();
+ encodings_.resize(kNum, QuantEncoding::Library(0));
+ for (size_t i = 0; i < num_tables; i++) {
+ JXL_RETURN_IF_ERROR(
+ jxl::Decode(br, &encodings_[i], required_size_x[i % kNum],
+ required_size_y[i % kNum], i, modular_frame_decoder));
+ }
+ computed_mask_ = 0;
+ return true;
+}
+
+Status DequantMatrices::DecodeDC(BitReader* br) {
+ bool all_default = br->ReadBits(1);
+ if (!br->AllReadsWithinBounds()) return JXL_FAILURE("EOS during DecodeDC");
+ if (!all_default) {
+ for (size_t c = 0; c < 3; c++) {
+ JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c]));
+ dc_quant_[c] *= 1.0f / 128.0f;
+ // Negative values and nearly zero are invalid values.
+ if (dc_quant_[c] < kAlmostZero) {
+ return JXL_FAILURE("Invalid dc_quant: coefficient is too small.");
+ }
+ inv_dc_quant_[c] = 1.0f / dc_quant_[c];
+ }
+ }
+ return true;
+}
+
+constexpr float V(float v) { return static_cast<float>(v); }
+
+namespace {
+struct DequantMatricesLibraryDef {
+ // DCT8
+ static constexpr QuantEncodingInternal DCT() {
+ return QuantEncodingInternal::DCT(DctQuantWeightParams({{{{
+ V(3150.0),
+ V(0.0),
+ V(-0.4),
+ V(-0.4),
+ V(-0.4),
+ V(-2.0),
+ }},
+ {{
+ V(560.0),
+ V(0.0),
+ V(-0.3),
+ V(-0.3),
+ V(-0.3),
+ V(-0.3),
+ }},
+ {{
+ V(512.0),
+ V(-2.0),
+ V(-1.0),
+ V(0.0),
+ V(-1.0),
+ V(-2.0),
+ }}}},
+ 6));
+ }
+
+ // Identity
+ static constexpr QuantEncodingInternal IDENTITY() {
+ return QuantEncodingInternal::Identity({{{{
+ V(280.0),
+ V(3160.0),
+ V(3160.0),
+ }},
+ {{
+ V(60.0),
+ V(864.0),
+ V(864.0),
+ }},
+ {{
+ V(18.0),
+ V(200.0),
+ V(200.0),
+ }}}});
+ }
+
+ // DCT2
+ static constexpr QuantEncodingInternal DCT2X2() {
+ return QuantEncodingInternal::DCT2({{{{
+ V(3840.0),
+ V(2560.0),
+ V(1280.0),
+ V(640.0),
+ V(480.0),
+ V(300.0),
+ }},
+ {{
+ V(960.0),
+ V(640.0),
+ V(320.0),
+ V(180.0),
+ V(140.0),
+ V(120.0),
+ }},
+ {{
+ V(640.0),
+ V(320.0),
+ V(128.0),
+ V(64.0),
+ V(32.0),
+ V(16.0),
+ }}}});
+ }
+
+ // DCT4 (quant_kind 3)
+ static constexpr QuantEncodingInternal DCT4X4() {
+ return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{{
+ V(2200.0),
+ V(0.0),
+ V(0.0),
+ V(0.0),
+ }},
+ {{
+ V(392.0),
+ V(0.0),
+ V(0.0),
+ V(0.0),
+ }},
+ {{
+ V(112.0),
+ V(-0.25),
+ V(-0.25),
+ V(-0.5),
+ }}}},
+ 4),
+ /* kMul */
+ {{{{
+ V(1.0),
+ V(1.0),
+ }},
+ {{
+ V(1.0),
+ V(1.0),
+ }},
+ {{
+ V(1.0),
+ V(1.0),
+ }}}});
+ }
+
+ // DCT16
+ static constexpr QuantEncodingInternal DCT16X16() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(8996.8725711814115328),
+ V(-1.3000777393353804),
+ V(-0.49424529824571225),
+ V(-0.439093774457103443),
+ V(-0.6350101832695744),
+ V(-0.90177264050827612),
+ V(-1.6162099239887414),
+ }},
+ {{
+ V(3191.48366296844234752),
+ V(-0.67424582104194355),
+ V(-0.80745813428471001),
+ V(-0.44925837484843441),
+ V(-0.35865440981033403),
+ V(-0.31322389111877305),
+ V(-0.37615025315725483),
+ }},
+ {{
+ V(1157.50408145487200256),
+ V(-2.0531423165804414),
+ V(-1.4),
+ V(-0.50687130033378396),
+ V(-0.42708730624733904),
+ V(-1.4856834539296244),
+ V(-4.9209142884401604),
+ }}}},
+ 7));
+ }
+
+ // DCT32
+ static constexpr QuantEncodingInternal DCT32X32() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(15718.40830982518931456),
+ V(-1.025),
+ V(-0.98),
+ V(-0.9012),
+ V(-0.4),
+ V(-0.48819395464),
+ V(-0.421064),
+ V(-0.27),
+ }},
+ {{
+ V(7305.7636810695983104),
+ V(-0.8041958212306401),
+ V(-0.7633036457487539),
+ V(-0.55660379990111464),
+ V(-0.49785304658857626),
+ V(-0.43699592683512467),
+ V(-0.40180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(3803.53173721215041536),
+ V(-3.060733579805728),
+ V(-2.0413270132490346),
+ V(-2.0235650159727417),
+ V(-0.5495389509954993),
+ V(-0.4),
+ V(-0.4),
+ V(-0.3),
+ }}}},
+ 8));
+ }
+
+ // DCT16X8
+ static constexpr QuantEncodingInternal DCT8X16() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(7240.7734393502),
+ V(-0.7),
+ V(-0.7),
+ V(-0.2),
+ V(-0.2),
+ V(-0.2),
+ V(-0.5),
+ }},
+ {{
+ V(1448.15468787004),
+ V(-0.5),
+ V(-0.5),
+ V(-0.5),
+ V(-0.2),
+ V(-0.2),
+ V(-0.2),
+ }},
+ {{
+ V(506.854140754517),
+ V(-1.4),
+ V(-0.2),
+ V(-0.5),
+ V(-0.5),
+ V(-1.5),
+ V(-3.6),
+ }}}},
+ 7));
+ }
+
+ // DCT32X8
+ static constexpr QuantEncodingInternal DCT8X32() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(16283.2494710648897),
+ V(-1.7812845336559429),
+ V(-1.6309059012653515),
+ V(-1.0382179034313539),
+ V(-0.85),
+ V(-0.7),
+ V(-0.9),
+ V(-1.2360638576849587),
+ }},
+ {{
+ V(5089.15750884921511936),
+ V(-0.320049391452786891),
+ V(-0.35362849922161446),
+ V(-0.30340000000000003),
+ V(-0.61),
+ V(-0.5),
+ V(-0.5),
+ V(-0.6),
+ }},
+ {{
+ V(3397.77603275308720128),
+ V(-0.321327362693153371),
+ V(-0.34507619223117997),
+ V(-0.70340000000000003),
+ V(-0.9),
+ V(-1.0),
+ V(-1.0),
+ V(-1.1754605576265209),
+ }}}},
+ 8));
+ }
+
+ // DCT32X16
+ static constexpr QuantEncodingInternal DCT16X32() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(13844.97076442300573),
+ V(-0.97113799999999995),
+ V(-0.658),
+ V(-0.42026),
+ V(-0.22712),
+ V(-0.2206),
+ V(-0.226),
+ V(-0.6),
+ }},
+ {{
+ V(4798.964084220744293),
+ V(-0.61125308982767057),
+ V(-0.83770786552491361),
+ V(-0.79014862079498627),
+ V(-0.2692727459704829),
+ V(-0.38272769465388551),
+ V(-0.22924222653091453),
+ V(-0.20719098826199578),
+ }},
+ {{
+ V(1807.236946760964614),
+ V(-1.2),
+ V(-1.2),
+ V(-0.7),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+
+ // DCT4X8 and 8x4
+ static constexpr QuantEncodingInternal DCT4X8() {
+ return QuantEncodingInternal::DCT4X8(
+ DctQuantWeightParams({{
+ {{
+ V(2198.050556016380522),
+ V(-0.96269623020744692),
+ V(-0.76194253026666783),
+ V(-0.6551140670773547),
+ }},
+ {{
+ V(764.3655248643528689),
+ V(-0.92630200888366945),
+ V(-0.9675229603596517),
+ V(-0.27845290869168118),
+ }},
+ {{
+ V(527.107573587542228),
+ V(-1.4594385811273854),
+ V(-1.450082094097871593),
+ V(-1.5843722511996204),
+ }},
+ }},
+ 4),
+ /* kMuls */
+ {{
+ V(1.0),
+ V(1.0),
+ V(1.0),
+ }});
+ }
+ // AFV
+ static QuantEncodingInternal AFV0() {
+ return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params,
+ {{{{
+ // 4x4/4x8 DC tendency.
+ V(3072.0),
+ V(3072.0),
+ // AFV corner.
+ V(256.0),
+ V(256.0),
+ V(256.0),
+ // AFV high freqs.
+ V(414.0),
+ V(0.0),
+ V(0.0),
+ V(0.0),
+ }},
+ {{
+ // 4x4/4x8 DC tendency.
+ V(1024.0),
+ V(1024.0),
+ // AFV corner.
+ V(50),
+ V(50),
+ V(50),
+ // AFV high freqs.
+ V(58.0),
+ V(0.0),
+ V(0.0),
+ V(0.0),
+ }},
+ {{
+ // 4x4/4x8 DC tendency.
+ V(384.0),
+ V(384.0),
+ // AFV corner.
+ V(12.0),
+ V(12.0),
+ V(12.0),
+ // AFV high freqs.
+ V(22.0),
+ V(-0.25),
+ V(-0.25),
+ V(-0.25),
+ }}}});
+ }
+
+ // DCT64
+ static QuantEncodingInternal DCT64X64() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(0.9 * 26629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(0.9 * 9311.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(0.9 * 4992.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+
+ // DCT64X32
+ static QuantEncodingInternal DCT32X64() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(0.65 * 23629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(0.65 * 8611.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(0.65 * 4492.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+ // DCT128X128
+ static QuantEncodingInternal DCT128X128() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(1.8 * 26629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(1.8 * 9311.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(1.8 * 4992.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+
+ // DCT128X64
+ static QuantEncodingInternal DCT64X128() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(1.3 * 23629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(1.3 * 8611.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(1.3 * 4492.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+ // DCT256X256
+ static QuantEncodingInternal DCT256X256() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(3.6 * 26629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(3.6 * 9311.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(3.6 * 4992.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+
+ // DCT256X128
+ static QuantEncodingInternal DCT128X256() {
+ return QuantEncodingInternal::DCT(
+ DctQuantWeightParams({{{{
+ V(2.6 * 23629.073922049845),
+ V(-1.025),
+ V(-0.78),
+ V(-0.65012),
+ V(-0.19041574084286472),
+ V(-0.20819395464),
+ V(-0.421064),
+ V(-0.32733845535848671),
+ }},
+ {{
+ V(2.6 * 8611.3238710010046),
+ V(-0.3041958212306401),
+ V(-0.3633036457487539),
+ V(-0.35660379990111464),
+ V(-0.3443074455424403),
+ V(-0.33699592683512467),
+ V(-0.30180866526242109),
+ V(-0.27321683125358037),
+ }},
+ {{
+ V(2.6 * 4492.2486445538634),
+ V(-1.2),
+ V(-1.2),
+ V(-0.8),
+ V(-0.7),
+ V(-0.7),
+ V(-0.4),
+ V(-0.5),
+ }}}},
+ 8));
+ }
+};
+} // namespace
+
+DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() {
+ static_assert(kNum == 17,
+ "Update this function when adding new quantization kinds.");
+ static_assert(kNumPredefinedTables == 1,
+ "Update this function when adding new quantization matrices to "
+ "the library.");
+
+ // The library and the indices need to be kept in sync manually.
+ static_assert(0 == DCT, "Update the DequantLibrary array below.");
+ static_assert(1 == IDENTITY, "Update the DequantLibrary array below.");
+ static_assert(2 == DCT2X2, "Update the DequantLibrary array below.");
+ static_assert(3 == DCT4X4, "Update the DequantLibrary array below.");
+ static_assert(4 == DCT16X16, "Update the DequantLibrary array below.");
+ static_assert(5 == DCT32X32, "Update the DequantLibrary array below.");
+ static_assert(6 == DCT8X16, "Update the DequantLibrary array below.");
+ static_assert(7 == DCT8X32, "Update the DequantLibrary array below.");
+ static_assert(8 == DCT16X32, "Update the DequantLibrary array below.");
+ static_assert(9 == DCT4X8, "Update the DequantLibrary array below.");
+ static_assert(10 == AFV0, "Update the DequantLibrary array below.");
+ static_assert(11 == DCT64X64, "Update the DequantLibrary array below.");
+ static_assert(12 == DCT32X64, "Update the DequantLibrary array below.");
+ static_assert(13 == DCT128X128, "Update the DequantLibrary array below.");
+ static_assert(14 == DCT64X128, "Update the DequantLibrary array below.");
+ static_assert(15 == DCT256X256, "Update the DequantLibrary array below.");
+ static_assert(16 == DCT128X256, "Update the DequantLibrary array below.");
+ return DequantMatrices::DequantLibraryInternal{{
+ DequantMatricesLibraryDef::DCT(),
+ DequantMatricesLibraryDef::IDENTITY(),
+ DequantMatricesLibraryDef::DCT2X2(),
+ DequantMatricesLibraryDef::DCT4X4(),
+ DequantMatricesLibraryDef::DCT16X16(),
+ DequantMatricesLibraryDef::DCT32X32(),
+ DequantMatricesLibraryDef::DCT8X16(),
+ DequantMatricesLibraryDef::DCT8X32(),
+ DequantMatricesLibraryDef::DCT16X32(),
+ DequantMatricesLibraryDef::DCT4X8(),
+ DequantMatricesLibraryDef::AFV0(),
+ DequantMatricesLibraryDef::DCT64X64(),
+ DequantMatricesLibraryDef::DCT32X64(),
+ // Same default for large transforms (128+) as for 64x* transforms.
+ DequantMatricesLibraryDef::DCT128X128(),
+ DequantMatricesLibraryDef::DCT64X128(),
+ DequantMatricesLibraryDef::DCT256X256(),
+ DequantMatricesLibraryDef::DCT128X256(),
+ }};
+}
+
+const QuantEncoding* DequantMatrices::Library() {
+ static const DequantMatrices::DequantLibraryInternal kDequantLibrary =
+ DequantMatrices::LibraryInit();
+ // Downcast the result to a const QuantEncoding* from QuantEncodingInternal*
+ // since the subclass (QuantEncoding) doesn't add any new members and users
+ // will need to upcast to QuantEncodingInternal to access the members of that
+ // class. This allows to have kDequantLibrary as a constexpr value while still
+ // allowing to create QuantEncoding::RAW() instances that use std::vector in
+ // C++11.
+ return reinterpret_cast<const QuantEncoding*>(kDequantLibrary.data());
+}
+
+DequantMatrices::DequantMatrices() {
+ encodings_.resize(size_t(QuantTable::kNum), QuantEncoding::Library(0));
+ size_t pos = 0;
+ size_t offsets[kNum * 3];
+ for (size_t i = 0; i < size_t(QuantTable::kNum); i++) {
+ size_t num = required_size_[i] * kDCTBlockSize;
+ for (size_t c = 0; c < 3; c++) {
+ offsets[3 * i + c] = pos + c * num;
+ }
+ pos += 3 * num;
+ }
+ for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+ for (size_t c = 0; c < 3; c++) {
+ table_offsets_[i * 3 + c] = offsets[kQuantTable[i] * 3 + c];
+ }
+ }
+}
+
+Status DequantMatrices::EnsureComputed(uint32_t acs_mask) {
+ const QuantEncoding* library = Library();
+
+ if (!table_storage_) {
+ table_storage_ = hwy::AllocateAligned<float>(2 * kTotalTableSize);
+ table_ = table_storage_.get();
+ inv_table_ = table_storage_.get() + kTotalTableSize;
+ }
+
+ size_t offsets[kNum * 3 + 1];
+ size_t pos = 0;
+ for (size_t i = 0; i < kNum; i++) {
+ size_t num = required_size_[i] * kDCTBlockSize;
+ for (size_t c = 0; c < 3; c++) {
+ offsets[3 * i + c] = pos + c * num;
+ }
+ pos += 3 * num;
+ }
+ offsets[kNum * 3] = pos;
+ JXL_ASSERT(pos == kTotalTableSize);
+
+ uint32_t kind_mask = 0;
+ for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+ if (acs_mask & (1u << i)) {
+ kind_mask |= 1u << kQuantTable[i];
+ }
+ }
+ uint32_t computed_kind_mask = 0;
+ for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+ if (computed_mask_ & (1u << i)) {
+ computed_kind_mask |= 1u << kQuantTable[i];
+ }
+ }
+ for (size_t table = 0; table < kNum; table++) {
+ if ((1 << table) & computed_kind_mask) continue;
+ if ((1 << table) & ~kind_mask) continue;
+ size_t pos = offsets[table * 3];
+ if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) {
+ JXL_CHECK(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)(
+ library[table], table_storage_.get(),
+ table_storage_.get() + kTotalTableSize, table, QuantTable(table),
+ &pos));
+ } else {
+ JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)(
+ encodings_[table], table_storage_.get(),
+ table_storage_.get() + kTotalTableSize, table, QuantTable(table),
+ &pos));
+ }
+ JXL_ASSERT(pos == offsets[table * 3 + 3]);
+ }
+ computed_mask_ |= acs_mask;
+
+ return true;
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/quant_weights.h b/third_party/jpeg-xl/lib/jxl/quant_weights.h
new file mode 100644
index 0000000000..d76fc1d1e6
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quant_weights.h
@@ -0,0 +1,448 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANT_WEIGHTS_H_
+#define LIB_JXL_QUANT_WEIGHTS_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include <array>
+#include <hwy/aligned_allocator.h>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/cache_aligned.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+template <typename T, size_t N>
+constexpr T ArraySum(T (&a)[N], size_t i = N - 1) {
+ static_assert(N > 0, "Trying to compute the sum of an empty array");
+ return i == 0 ? a[0] : a[i] + ArraySum(a, i - 1);
+}
+
+static constexpr size_t kMaxQuantTableSize = AcStrategy::kMaxCoeffArea;
+static constexpr size_t kNumPredefinedTables = 1;
+static constexpr size_t kCeilLog2NumPredefinedTables = 0;
+static constexpr size_t kLog2NumQuantModes = 3;
+
+struct DctQuantWeightParams {
+ static constexpr size_t kLog2MaxDistanceBands = 4;
+ static constexpr size_t kMaxDistanceBands = 1 + (1 << kLog2MaxDistanceBands);
+ typedef std::array<std::array<float, kMaxDistanceBands>, 3>
+ DistanceBandsArray;
+
+ size_t num_distance_bands = 0;
+ DistanceBandsArray distance_bands = {};
+
+ constexpr DctQuantWeightParams() : num_distance_bands(0) {}
+
+ constexpr DctQuantWeightParams(const DistanceBandsArray& dist_bands,
+ size_t num_dist_bands)
+ : num_distance_bands(num_dist_bands), distance_bands(dist_bands) {}
+
+ template <size_t num_dist_bands>
+ explicit DctQuantWeightParams(const float dist_bands[3][num_dist_bands]) {
+ num_distance_bands = num_dist_bands;
+ for (size_t c = 0; c < 3; c++) {
+ memcpy(distance_bands[c].data(), dist_bands[c],
+ sizeof(float) * num_dist_bands);
+ }
+ }
+};
+
+// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding)
+struct QuantEncodingInternal {
+ enum Mode {
+ kQuantModeLibrary,
+ kQuantModeID,
+ kQuantModeDCT2,
+ kQuantModeDCT4,
+ kQuantModeDCT4X8,
+ kQuantModeAFV,
+ kQuantModeDCT,
+ kQuantModeRAW,
+ };
+
+ template <Mode mode>
+ struct Tag {};
+
+ typedef std::array<std::array<float, 3>, 3> IdWeights;
+ typedef std::array<std::array<float, 6>, 3> DCT2Weights;
+ typedef std::array<std::array<float, 2>, 3> DCT4Multipliers;
+ typedef std::array<std::array<float, 9>, 3> AFVWeights;
+ typedef std::array<float, 3> DCT4x8Multipliers;
+
+ static constexpr QuantEncodingInternal Library(uint8_t predefined) {
+ return ((predefined < kNumPredefinedTables) ||
+ JXL_ABORT("Assert predefined < kNumPredefinedTables")),
+ QuantEncodingInternal(Tag<kQuantModeLibrary>(), predefined);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeLibrary> /* tag */,
+ uint8_t predefined)
+ : mode(kQuantModeLibrary), predefined(predefined) {}
+
+ // Identity
+ // xybweights is an array of {xweights, yweights, bweights}.
+ static constexpr QuantEncodingInternal Identity(const IdWeights& xybweights) {
+ return QuantEncodingInternal(Tag<kQuantModeID>(), xybweights);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeID> /* tag */,
+ const IdWeights& xybweights)
+ : mode(kQuantModeID), idweights(xybweights) {}
+
+ // DCT2
+ static constexpr QuantEncodingInternal DCT2(const DCT2Weights& xybweights) {
+ return QuantEncodingInternal(Tag<kQuantModeDCT2>(), xybweights);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeDCT2> /* tag */,
+ const DCT2Weights& xybweights)
+ : mode(kQuantModeDCT2), dct2weights(xybweights) {}
+
+ // DCT4
+ static constexpr QuantEncodingInternal DCT4(
+ const DctQuantWeightParams& params, const DCT4Multipliers& xybmul) {
+ return QuantEncodingInternal(Tag<kQuantModeDCT4>(), params, xybmul);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeDCT4> /* tag */,
+ const DctQuantWeightParams& params,
+ const DCT4Multipliers& xybmul)
+ : mode(kQuantModeDCT4), dct_params(params), dct4multipliers(xybmul) {}
+
+ // DCT4x8
+ static constexpr QuantEncodingInternal DCT4X8(
+ const DctQuantWeightParams& params, const DCT4x8Multipliers& xybmul) {
+ return QuantEncodingInternal(Tag<kQuantModeDCT4X8>(), params, xybmul);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeDCT4X8> /* tag */,
+ const DctQuantWeightParams& params,
+ const DCT4x8Multipliers& xybmul)
+ : mode(kQuantModeDCT4X8), dct_params(params), dct4x8multipliers(xybmul) {}
+
+ // DCT
+ static constexpr QuantEncodingInternal DCT(
+ const DctQuantWeightParams& params) {
+ return QuantEncodingInternal(Tag<kQuantModeDCT>(), params);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeDCT> /* tag */,
+ const DctQuantWeightParams& params)
+ : mode(kQuantModeDCT), dct_params(params) {}
+
+ // AFV
+ static constexpr QuantEncodingInternal AFV(
+ const DctQuantWeightParams& params4x8,
+ const DctQuantWeightParams& params4x4, const AFVWeights& weights) {
+ return QuantEncodingInternal(Tag<kQuantModeAFV>(), params4x8, params4x4,
+ weights);
+ }
+ constexpr QuantEncodingInternal(Tag<kQuantModeAFV> /* tag */,
+ const DctQuantWeightParams& params4x8,
+ const DctQuantWeightParams& params4x4,
+ const AFVWeights& weights)
+ : mode(kQuantModeAFV),
+ dct_params(params4x8),
+ afv_weights(weights),
+ dct_params_afv_4x4(params4x4) {}
+
+ // This constructor is not constexpr so it can't be used in any of the
+ // constexpr cases above.
+ explicit QuantEncodingInternal(Mode mode) : mode(mode) {}
+
+ Mode mode;
+
+ // Weights for DCT4+ tables.
+ DctQuantWeightParams dct_params;
+
+ union {
+ // Weights for identity.
+ IdWeights idweights;
+
+ // Weights for DCT2.
+ DCT2Weights dct2weights;
+
+ // Extra multipliers for coefficients 01/10 and 11 for DCT4 and AFV.
+ DCT4Multipliers dct4multipliers;
+
+ // Weights for AFV. {0, 1} are used directly for coefficients (0, 1) and (1,
+ // 0); {2, 3, 4} are used directly corner DC, (1,0) - (0,1) and (0, 1) +
+ // (1, 0) - (0, 0) inside the AFV block. Values from 5 to 8 are interpolated
+ // as in GetQuantWeights for DC and are used for other coefficients.
+ AFVWeights afv_weights = {};
+
+ // Extra multipliers for coefficients 01 or 10 for DCT4X8 and DCT8X4.
+ DCT4x8Multipliers dct4x8multipliers;
+
+ // Only used in kQuantModeRAW mode.
+ struct {
+ // explicit quantization table (like in JPEG)
+ std::vector<int>* qtable = nullptr;
+ float qtable_den = 1.f / (8 * 255);
+ } qraw;
+ };
+
+ // Weights for 4x4 sub-block in AFV.
+ DctQuantWeightParams dct_params_afv_4x4;
+
+ union {
+ // Which predefined table to use. Only used if mode is kQuantModeLibrary.
+ uint8_t predefined = 0;
+
+ // Which other quant table to copy; must copy from a table that comes before
+ // the current one. Only used if mode is kQuantModeCopy.
+ uint8_t source;
+ };
+};
+
+class QuantEncoding final : public QuantEncodingInternal {
+ public:
+ QuantEncoding(const QuantEncoding& other)
+ : QuantEncodingInternal(
+ static_cast<const QuantEncodingInternal&>(other)) {
+ if (mode == kQuantModeRAW && qraw.qtable) {
+ // Need to make a copy of the passed *qtable.
+ qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+ }
+ }
+ QuantEncoding(QuantEncoding&& other) noexcept
+ : QuantEncodingInternal(
+ static_cast<const QuantEncodingInternal&>(other)) {
+ // Steal the qtable from the other object if any.
+ if (mode == kQuantModeRAW) {
+ other.qraw.qtable = nullptr;
+ }
+ }
+ QuantEncoding& operator=(const QuantEncoding& other) {
+ if (mode == kQuantModeRAW && qraw.qtable) {
+ delete qraw.qtable;
+ }
+ *static_cast<QuantEncodingInternal*>(this) =
+ QuantEncodingInternal(static_cast<const QuantEncodingInternal&>(other));
+ if (mode == kQuantModeRAW && qraw.qtable) {
+ // Need to make a copy of the passed *qtable.
+ qraw.qtable = new std::vector<int>(*other.qraw.qtable);
+ }
+ return *this;
+ }
+
+ ~QuantEncoding() {
+ if (mode == kQuantModeRAW && qraw.qtable) {
+ delete qraw.qtable;
+ }
+ }
+
+ // Wrappers of the QuantEncodingInternal:: static functions that return a
+ // QuantEncoding instead. This is using the explicit and private cast from
+ // QuantEncodingInternal to QuantEncoding, which would be inlined anyway.
+ // In general, you should use this wrappers. The only reason to directly
+ // create a QuantEncodingInternal instance is if you need a constexpr version
+ // of this class. Note that RAW() is not supported in that case since it uses
+ // a std::vector.
+ static QuantEncoding Library(uint8_t predefined_arg) {
+ return QuantEncoding(QuantEncodingInternal::Library(predefined_arg));
+ }
+ static QuantEncoding Identity(const IdWeights& xybweights) {
+ return QuantEncoding(QuantEncodingInternal::Identity(xybweights));
+ }
+ static QuantEncoding DCT2(const DCT2Weights& xybweights) {
+ return QuantEncoding(QuantEncodingInternal::DCT2(xybweights));
+ }
+ static QuantEncoding DCT4(const DctQuantWeightParams& params,
+ const DCT4Multipliers& xybmul) {
+ return QuantEncoding(QuantEncodingInternal::DCT4(params, xybmul));
+ }
+ static QuantEncoding DCT4X8(const DctQuantWeightParams& params,
+ const DCT4x8Multipliers& xybmul) {
+ return QuantEncoding(QuantEncodingInternal::DCT4X8(params, xybmul));
+ }
+ static QuantEncoding DCT(const DctQuantWeightParams& params) {
+ return QuantEncoding(QuantEncodingInternal::DCT(params));
+ }
+ static QuantEncoding AFV(const DctQuantWeightParams& params4x8,
+ const DctQuantWeightParams& params4x4,
+ const AFVWeights& weights) {
+ return QuantEncoding(
+ QuantEncodingInternal::AFV(params4x8, params4x4, weights));
+ }
+
+ // RAW, note that this one is not a constexpr one.
+ static QuantEncoding RAW(const std::vector<int>& qtable, int shift = 0) {
+ QuantEncoding encoding(kQuantModeRAW);
+ encoding.qraw.qtable = new std::vector<int>();
+ *encoding.qraw.qtable = qtable;
+ encoding.qraw.qtable_den = (1 << shift) * (1.f / (8 * 255));
+ return encoding;
+ }
+
+ private:
+ explicit QuantEncoding(const QuantEncodingInternal& other)
+ : QuantEncodingInternal(other) {}
+
+ explicit QuantEncoding(QuantEncodingInternal::Mode mode_arg)
+ : QuantEncodingInternal(mode_arg) {}
+};
+
+// A constexpr QuantEncodingInternal instance is often downcasted to the
+// QuantEncoding subclass even if the instance wasn't an instance of the
+// subclass. This is safe because user will upcast to QuantEncodingInternal to
+// access any of its members.
+static_assert(sizeof(QuantEncoding) == sizeof(QuantEncodingInternal),
+ "Don't add any members to QuantEncoding");
+
+// Let's try to keep these 2**N for possible future simplicity.
+const float kInvDCQuant[3] = {
+ 4096.0f,
+ 512.0f,
+ 256.0f,
+};
+
+const float kDCQuant[3] = {
+ 1.0f / kInvDCQuant[0],
+ 1.0f / kInvDCQuant[1],
+ 1.0f / kInvDCQuant[2],
+};
+
+class ModularFrameEncoder;
+class ModularFrameDecoder;
+
+class DequantMatrices {
+ public:
+ enum QuantTable : size_t {
+ DCT = 0,
+ IDENTITY,
+ DCT2X2,
+ DCT4X4,
+ DCT16X16,
+ DCT32X32,
+ // DCT16X8
+ DCT8X16,
+ // DCT32X8
+ DCT8X32,
+ // DCT32X16
+ DCT16X32,
+ DCT4X8,
+ // DCT8X4
+ AFV0,
+ // AFV1
+ // AFV2
+ // AFV3
+ DCT64X64,
+ // DCT64X32,
+ DCT32X64,
+ DCT128X128,
+ // DCT128X64,
+ DCT64X128,
+ DCT256X256,
+ // DCT256X128,
+ DCT128X256,
+ kNum
+ };
+
+ static constexpr QuantTable kQuantTable[] = {
+ QuantTable::DCT, QuantTable::IDENTITY, QuantTable::DCT2X2,
+ QuantTable::DCT4X4, QuantTable::DCT16X16, QuantTable::DCT32X32,
+ QuantTable::DCT8X16, QuantTable::DCT8X16, QuantTable::DCT8X32,
+ QuantTable::DCT8X32, QuantTable::DCT16X32, QuantTable::DCT16X32,
+ QuantTable::DCT4X8, QuantTable::DCT4X8, QuantTable::AFV0,
+ QuantTable::AFV0, QuantTable::AFV0, QuantTable::AFV0,
+ QuantTable::DCT64X64, QuantTable::DCT32X64, QuantTable::DCT32X64,
+ QuantTable::DCT128X128, QuantTable::DCT64X128, QuantTable::DCT64X128,
+ QuantTable::DCT256X256, QuantTable::DCT128X256, QuantTable::DCT128X256,
+ };
+ static_assert(AcStrategy::kNumValidStrategies ==
+ sizeof(kQuantTable) / sizeof *kQuantTable,
+ "Update this array when adding or removing AC strategies.");
+
+ DequantMatrices();
+
+ static const QuantEncoding* Library();
+
+ typedef std::array<QuantEncodingInternal, kNumPredefinedTables * kNum>
+ DequantLibraryInternal;
+ // Return the array of library kNumPredefinedTables QuantEncoding entries as
+ // a constexpr array. Use Library() to obtain a pointer to the copy in the
+ // .cc file.
+ static DequantLibraryInternal LibraryInit();
+
+ // Returns aligned memory.
+ JXL_INLINE const float* Matrix(size_t quant_kind, size_t c) const {
+ JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+ JXL_DASSERT((1 << quant_kind) & computed_mask_);
+ return &table_[table_offsets_[quant_kind * 3 + c]];
+ }
+
+ JXL_INLINE const float* InvMatrix(size_t quant_kind, size_t c) const {
+ JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies);
+ JXL_DASSERT((1 << quant_kind) & computed_mask_);
+ return &inv_table_[table_offsets_[quant_kind * 3 + c]];
+ }
+
+ // DC quants are used in modular mode for XYB multipliers.
+ JXL_INLINE float DCQuant(size_t c) const { return dc_quant_[c]; }
+ JXL_INLINE const float* DCQuants() const { return dc_quant_; }
+
+ JXL_INLINE float InvDCQuant(size_t c) const { return inv_dc_quant_[c]; }
+
+ // For encoder.
+ void SetEncodings(const std::vector<QuantEncoding>& encodings) {
+ encodings_ = encodings;
+ computed_mask_ = 0;
+ }
+
+ // For encoder.
+ void SetDCQuant(const float dc[3]) {
+ for (size_t c = 0; c < 3; c++) {
+ dc_quant_[c] = 1.0f / dc[c];
+ inv_dc_quant_[c] = dc[c];
+ }
+ }
+
+ Status Decode(BitReader* br,
+ ModularFrameDecoder* modular_frame_decoder = nullptr);
+ Status DecodeDC(BitReader* br);
+
+ const std::vector<QuantEncoding>& encodings() const { return encodings_; }
+
+ static constexpr size_t required_size_x[] = {1, 1, 1, 1, 2, 4, 1, 1, 2,
+ 1, 1, 8, 4, 16, 8, 32, 16};
+ static_assert(kNum == sizeof(required_size_x) / sizeof(*required_size_x),
+ "Update this array when adding or removing quant tables.");
+
+ static constexpr size_t required_size_y[] = {1, 1, 1, 1, 2, 4, 2, 4, 4,
+ 1, 1, 8, 8, 16, 16, 32, 32};
+ static_assert(kNum == sizeof(required_size_y) / sizeof(*required_size_y),
+ "Update this array when adding or removing quant tables.");
+
+ Status EnsureComputed(uint32_t acs_mask);
+
+ private:
+ static constexpr size_t required_size_[] = {
+ 1, 1, 1, 1, 4, 16, 2, 4, 8, 1, 1, 64, 32, 256, 128, 1024, 512};
+ static_assert(kNum == sizeof(required_size_) / sizeof(*required_size_),
+ "Update this array when adding or removing quant tables.");
+ static constexpr size_t kTotalTableSize =
+ ArraySum(required_size_) * kDCTBlockSize * 3;
+
+ uint32_t computed_mask_ = 0;
+ // kTotalTableSize entries followed by kTotalTableSize for inv_table
+ hwy::AlignedFreeUniquePtr<float[]> table_storage_;
+ const float* table_;
+ const float* inv_table_;
+ float dc_quant_[3] = {kDCQuant[0], kDCQuant[1], kDCQuant[2]};
+ float inv_dc_quant_[3] = {kInvDCQuant[0], kInvDCQuant[1], kInvDCQuant[2]};
+ size_t table_offsets_[AcStrategy::kNumValidStrategies * 3];
+ std::vector<QuantEncoding> encodings_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_QUANT_WEIGHTS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/quant_weights_test.cc b/third_party/jpeg-xl/lib/jxl/quant_weights_test.cc
new file mode 100644
index 0000000000..f0497948a7
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quant_weights_test.cc
@@ -0,0 +1,240 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+#include "lib/jxl/quant_weights.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/base.h> // HWY_ALIGN_MAX
+#include <hwy/tests/test_util-inl.h>
+#include <numeric>
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/dct_for_test.h"
+#include "lib/jxl/dec_transforms_testonly.h"
+#include "lib/jxl/enc_modular.h"
+#include "lib/jxl/enc_quant_weights.h"
+#include "lib/jxl/enc_transforms.h"
+
+namespace jxl {
+namespace {
+
+template <typename T>
+void CheckSimilar(T a, T b) {
+ EXPECT_EQ(a, b);
+}
+// minimum exponent = -15.
+template <>
+void CheckSimilar(float a, float b) {
+ float m = std::max(std::abs(a), std::abs(b));
+ // 10 bits of precision are used in the format. Relative error should be
+ // below 2^-10.
+ EXPECT_LE(std::abs(a - b), m / 1024.0f) << "a: " << a << " b: " << b;
+}
+
+TEST(QuantWeightsTest, DC) {
+ DequantMatrices mat;
+ float dc_quant[3] = {1e+5, 1e+3, 1e+1};
+ DequantMatricesSetCustomDC(&mat, dc_quant);
+ for (size_t c = 0; c < 3; c++) {
+ CheckSimilar(mat.InvDCQuant(c), dc_quant[c]);
+ }
+}
+
+void RoundtripMatrices(const std::vector<QuantEncoding>& encodings) {
+ ASSERT_TRUE(encodings.size() == DequantMatrices::kNum);
+ DequantMatrices mat;
+ CodecMetadata metadata;
+ FrameHeader frame_header(&metadata);
+ ModularFrameEncoder encoder(frame_header, CompressParams{});
+ DequantMatricesSetCustom(&mat, encodings, &encoder);
+ const std::vector<QuantEncoding>& encodings_dec = mat.encodings();
+ for (size_t i = 0; i < encodings.size(); i++) {
+ const QuantEncoding& e = encodings[i];
+ const QuantEncoding& d = encodings_dec[i];
+ // Check values roundtripped correctly.
+ EXPECT_EQ(e.mode, d.mode);
+ EXPECT_EQ(e.predefined, d.predefined);
+ EXPECT_EQ(e.source, d.source);
+
+ EXPECT_EQ(static_cast<uint64_t>(e.dct_params.num_distance_bands),
+ static_cast<uint64_t>(d.dct_params.num_distance_bands));
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+ CheckSimilar(e.dct_params.distance_bands[c][j],
+ d.dct_params.distance_bands[c][j]);
+ }
+ }
+
+ if (e.mode == QuantEncoding::kQuantModeRAW) {
+ EXPECT_FALSE(!e.qraw.qtable);
+ EXPECT_FALSE(!d.qraw.qtable);
+ EXPECT_EQ(e.qraw.qtable->size(), d.qraw.qtable->size());
+ for (size_t j = 0; j < e.qraw.qtable->size(); j++) {
+ EXPECT_EQ((*e.qraw.qtable)[j], (*d.qraw.qtable)[j]);
+ }
+ EXPECT_NEAR(e.qraw.qtable_den, d.qraw.qtable_den, 1e-7f);
+ } else {
+ // modes different than kQuantModeRAW use one of the other fields used
+ // here, which all happen to be arrays of floats.
+ for (size_t c = 0; c < 3; c++) {
+ for (size_t j = 0; j < 3; j++) {
+ CheckSimilar(e.idweights[c][j], d.idweights[c][j]);
+ }
+ for (size_t j = 0; j < 6; j++) {
+ CheckSimilar(e.dct2weights[c][j], d.dct2weights[c][j]);
+ }
+ for (size_t j = 0; j < 2; j++) {
+ CheckSimilar(e.dct4multipliers[c][j], d.dct4multipliers[c][j]);
+ }
+ CheckSimilar(e.dct4x8multipliers[c], d.dct4x8multipliers[c]);
+ for (size_t j = 0; j < 9; j++) {
+ CheckSimilar(e.afv_weights[c][j], d.afv_weights[c][j]);
+ }
+ for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) {
+ CheckSimilar(e.dct_params_afv_4x4.distance_bands[c][j],
+ d.dct_params_afv_4x4.distance_bands[c][j]);
+ }
+ }
+ }
+ }
+}
+
+TEST(QuantWeightsTest, AllDefault) {
+ std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+ QuantEncoding::Library(0));
+ RoundtripMatrices(encodings);
+}
+
+void TestSingleQuantMatrix(DequantMatrices::QuantTable kind) {
+ std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+ QuantEncoding::Library(0));
+ encodings[kind] = DequantMatrices::Library()[kind];
+ RoundtripMatrices(encodings);
+}
+
+// Ensure we can reasonably represent default quant tables.
+TEST(QuantWeightsTest, DCT) { TestSingleQuantMatrix(DequantMatrices::DCT); }
+TEST(QuantWeightsTest, IDENTITY) {
+ TestSingleQuantMatrix(DequantMatrices::IDENTITY);
+}
+TEST(QuantWeightsTest, DCT2X2) {
+ TestSingleQuantMatrix(DequantMatrices::DCT2X2);
+}
+TEST(QuantWeightsTest, DCT4X4) {
+ TestSingleQuantMatrix(DequantMatrices::DCT4X4);
+}
+TEST(QuantWeightsTest, DCT16X16) {
+ TestSingleQuantMatrix(DequantMatrices::DCT16X16);
+}
+TEST(QuantWeightsTest, DCT32X32) {
+ TestSingleQuantMatrix(DequantMatrices::DCT32X32);
+}
+TEST(QuantWeightsTest, DCT8X16) {
+ TestSingleQuantMatrix(DequantMatrices::DCT8X16);
+}
+TEST(QuantWeightsTest, DCT8X32) {
+ TestSingleQuantMatrix(DequantMatrices::DCT8X32);
+}
+TEST(QuantWeightsTest, DCT16X32) {
+ TestSingleQuantMatrix(DequantMatrices::DCT16X32);
+}
+TEST(QuantWeightsTest, DCT4X8) {
+ TestSingleQuantMatrix(DequantMatrices::DCT4X8);
+}
+TEST(QuantWeightsTest, AFV0) { TestSingleQuantMatrix(DequantMatrices::AFV0); }
+TEST(QuantWeightsTest, RAW) {
+ std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+ QuantEncoding::Library(0));
+ std::vector<int> matrix(3 * 32 * 32);
+ Rng rng(0);
+ for (size_t i = 0; i < matrix.size(); i++) matrix[i] = rng.UniformI(1, 256);
+ encodings[DequantMatrices::kQuantTable[AcStrategy::DCT32X32]] =
+ QuantEncoding::RAW(matrix, 2);
+ RoundtripMatrices(encodings);
+}
+
+class QuantWeightsTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(QuantWeightsTargetTest);
+
+TEST_P(QuantWeightsTargetTest, DCTUniform) {
+ constexpr float kUniformQuant = 4;
+ float weights[3][2] = {{1.0f / kUniformQuant, 0},
+ {1.0f / kUniformQuant, 0},
+ {1.0f / kUniformQuant, 0}};
+ DctQuantWeightParams dct_params(weights);
+ std::vector<QuantEncoding> encodings(DequantMatrices::kNum,
+ QuantEncoding::DCT(dct_params));
+ DequantMatrices dequant_matrices;
+ CodecMetadata metadata;
+ FrameHeader frame_header(&metadata);
+ ModularFrameEncoder encoder(frame_header, CompressParams{});
+ DequantMatricesSetCustom(&dequant_matrices, encodings, &encoder);
+ JXL_CHECK(dequant_matrices.EnsureComputed(~0u));
+
+ const float dc_quant[3] = {1.0f / kUniformQuant, 1.0f / kUniformQuant,
+ 1.0f / kUniformQuant};
+ DequantMatricesSetCustomDC(&dequant_matrices, dc_quant);
+
+ HWY_ALIGN_MAX float scratch_space[16 * 16 * 2];
+
+ // DCT8
+ {
+ HWY_ALIGN_MAX float pixels[64];
+ std::iota(std::begin(pixels), std::end(pixels), 0);
+ HWY_ALIGN_MAX float coeffs[64];
+ const AcStrategy::Type dct = AcStrategy::DCT;
+ TransformFromPixels(dct, pixels, 8, coeffs, scratch_space);
+ HWY_ALIGN_MAX double slow_coeffs[64];
+ for (size_t i = 0; i < 64; i++) slow_coeffs[i] = pixels[i];
+ DCTSlow<8>(slow_coeffs);
+
+ for (size_t i = 0; i < 64; i++) {
+ // DCTSlow doesn't multiply/divide by 1/N, so we do it manually.
+ slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+ coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+ dequant_matrices.Matrix(dct, 0)[i];
+ }
+ IDCTSlow<8>(slow_coeffs);
+ TransformToPixels(dct, coeffs, pixels, 8, scratch_space);
+ for (size_t i = 0; i < 64; i++) {
+ EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+ }
+ }
+
+ // DCT16
+ {
+ HWY_ALIGN_MAX float pixels[64 * 4];
+ std::iota(std::begin(pixels), std::end(pixels), 0);
+ HWY_ALIGN_MAX float coeffs[64 * 4];
+ const AcStrategy::Type dct = AcStrategy::DCT16X16;
+ TransformFromPixels(dct, pixels, 16, coeffs, scratch_space);
+ HWY_ALIGN_MAX double slow_coeffs[64 * 4];
+ for (size_t i = 0; i < 64 * 4; i++) slow_coeffs[i] = pixels[i];
+ DCTSlow<16>(slow_coeffs);
+
+ for (size_t i = 0; i < 64 * 4; i++) {
+ slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant;
+ coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) *
+ dequant_matrices.Matrix(dct, 0)[i];
+ }
+
+ IDCTSlow<16>(slow_coeffs);
+ TransformToPixels(dct, coeffs, pixels, 16, scratch_space);
+ for (size_t i = 0; i < 64 * 4; i++) {
+ EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4);
+ }
+ }
+
+ // Check that all matrices have the same DC quantization, i.e. that they all
+ // have the same scaling.
+ for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) {
+ EXPECT_NEAR(dequant_matrices.Matrix(i, 0)[0], kUniformQuant, 1e-6);
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/quantizer-inl.h b/third_party/jpeg-xl/lib/jxl/quantizer-inl.h
new file mode 100644
index 0000000000..64d273c552
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quantizer-inl.h
@@ -0,0 +1,74 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_QUANTIZER_INL_H_
+#undef LIB_JXL_QUANTIZER_INL_H_
+#else
+#define LIB_JXL_QUANTIZER_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::ApproximateReciprocal;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::IfThenElseZero;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::Xor;
+
+template <class DI>
+HWY_INLINE HWY_MAYBE_UNUSED Vec<Rebind<float, DI>> AdjustQuantBias(
+ DI di, const size_t c, const Vec<DI> quant_i,
+ const float* HWY_RESTRICT biases) {
+ const Rebind<float, DI> df;
+
+ const auto quant = ConvertTo(df, quant_i);
+
+ // Compare |quant|, keep sign bit for negating result.
+ const auto kSign = BitCast(df, Set(di, INT32_MIN));
+ const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig
+ const auto abs_quant = AndNot(kSign, quant);
+
+ // If |x| is 1, kZeroBias creates a different bias for each channel.
+ // We're implementing the following:
+ // if (quant == 0) return 0;
+ // if (quant == 1) return biases[c];
+ // if (quant == -1) return -biases[c];
+ // return quant - biases[3] / quant;
+
+ // Integer comparison is not helpful because Clang incurs bypass penalties
+ // from unnecessarily mixing integer and float.
+ const auto is_01 = Lt(abs_quant, Set(df, 1.125f));
+ const auto not_0 = Gt(abs_quant, Zero(df));
+
+ // Bitwise logic is faster than quant * biases[c].
+ const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign));
+
+ // About 2E-5 worse than ReciprocalNR or division.
+ const auto bias =
+ NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant);
+
+ return IfThenElse(is_01, one_bias, bias);
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_QUANTIZER_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/quantizer.cc b/third_party/jpeg-xl/lib/jxl/quantizer.cc
new file mode 100644
index 0000000000..153cf19b21
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quantizer.cc
@@ -0,0 +1,156 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/field_encodings.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/quant_weights.h"
+
+namespace jxl {
+
+static const int32_t kDefaultQuant = 64;
+
+constexpr int32_t Quantizer::kQuantMax;
+
+Quantizer::Quantizer(const DequantMatrices* dequant)
+ : Quantizer(dequant, kDefaultQuant, kGlobalScaleDenom / kDefaultQuant) {}
+
+Quantizer::Quantizer(const DequantMatrices* dequant, int quant_dc,
+ int global_scale)
+ : global_scale_(global_scale), quant_dc_(quant_dc), dequant_(dequant) {
+ JXL_ASSERT(dequant_ != nullptr);
+ RecomputeFromGlobalScale();
+ inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+
+ memcpy(zero_bias_, kZeroBiasDefault, sizeof(kZeroBiasDefault));
+}
+
+void Quantizer::ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+ float quant_median_absd) {
+ // Target value for the median value in the quant field.
+ const float kQuantFieldTarget = 5;
+ // We reduce the median of the quant field by the median absolute deviation:
+ // higher resolution on highly varying quant fields.
+ float scale = kGlobalScaleDenom * (quant_median - quant_median_absd) /
+ kQuantFieldTarget;
+ // Ensure that new_global_scale is positive and no more than 1<<15.
+ if (scale < 1) scale = 1;
+ if (scale > (1 << 15)) scale = 1 << 15;
+ int new_global_scale = static_cast<int>(scale);
+ // Ensure that quant_dc_ will always be at least
+ // 0.625 * kGlobalScaleDenom/kGlobalScaleNumerator = 10.
+ const int scaled_quant_dc =
+ static_cast<int>(quant_dc * kGlobalScaleNumerator * 1.6);
+ if (new_global_scale > scaled_quant_dc) {
+ new_global_scale = scaled_quant_dc;
+ if (new_global_scale <= 0) new_global_scale = 1;
+ }
+ global_scale_ = new_global_scale;
+ // Code below uses inv_global_scale_.
+ RecomputeFromGlobalScale();
+
+ float fval = quant_dc * inv_global_scale_ + 0.5f;
+ fval = std::min<float>(1 << 16, fval);
+ const int new_quant_dc = static_cast<int>(fval);
+ quant_dc_ = new_quant_dc;
+
+ // quant_dc_ was updated, recompute values.
+ RecomputeFromGlobalScale();
+}
+
+void Quantizer::SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+ ImageI* JXL_RESTRICT raw_quant_field) const {
+ for (size_t y = 0; y < rect.ysize(); ++y) {
+ const float* JXL_RESTRICT row_qf = rect.ConstRow(qf, y);
+ int32_t* JXL_RESTRICT row_qi = rect.Row(raw_quant_field, y);
+ for (size_t x = 0; x < rect.xsize(); ++x) {
+ int val = ClampVal(row_qf[x] * inv_global_scale_ + 0.5f);
+ row_qi[x] = val;
+ }
+ }
+}
+
+void Quantizer::SetQuantField(const float quant_dc, const ImageF& qf,
+ ImageI* JXL_RESTRICT raw_quant_field) {
+ std::vector<float> data(qf.xsize() * qf.ysize());
+ for (size_t y = 0; y < qf.ysize(); ++y) {
+ const float* JXL_RESTRICT row_qf = qf.Row(y);
+ for (size_t x = 0; x < qf.xsize(); ++x) {
+ float quant = row_qf[x];
+ data[qf.xsize() * y + x] = quant;
+ }
+ }
+ std::nth_element(data.begin(), data.begin() + data.size() / 2, data.end());
+ const float quant_median = data[data.size() / 2];
+ std::vector<float> deviations(data.size());
+ for (size_t i = 0; i < data.size(); i++) {
+ deviations[i] = fabsf(data[i] - quant_median);
+ }
+ std::nth_element(deviations.begin(),
+ deviations.begin() + deviations.size() / 2,
+ deviations.end());
+ const float quant_median_absd = deviations[deviations.size() / 2];
+ ComputeGlobalScaleAndQuant(quant_dc, quant_median, quant_median_absd);
+ if (raw_quant_field) {
+ JXL_CHECK(SameSize(*raw_quant_field, qf));
+ SetQuantFieldRect(qf, Rect(qf), raw_quant_field);
+ }
+}
+
+void Quantizer::SetQuant(float quant_dc, float quant_ac,
+ ImageI* JXL_RESTRICT raw_quant_field) {
+ ComputeGlobalScaleAndQuant(quant_dc, quant_ac, 0);
+ int32_t val = ClampVal(quant_ac * inv_global_scale_ + 0.5f);
+ FillImage(val, raw_quant_field);
+}
+
+Status QuantizerParams::VisitFields(Visitor* JXL_RESTRICT visitor) {
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(
+ BitsOffset(11, 1), BitsOffset(11, 2049), BitsOffset(12, 4097),
+ BitsOffset(16, 8193), 1, &global_scale));
+ JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), BitsOffset(5, 1),
+ BitsOffset(8, 1), BitsOffset(16, 1), 1,
+ &quant_dc));
+ return true;
+}
+
+QuantizerParams Quantizer::GetParams() const {
+ QuantizerParams params;
+ params.global_scale = global_scale_;
+ params.quant_dc = quant_dc_;
+ return params;
+}
+
+Status Quantizer::Decode(BitReader* reader) {
+ QuantizerParams params;
+ JXL_RETURN_IF_ERROR(Bundle::Read(reader, &params));
+ global_scale_ = static_cast<int>(params.global_scale);
+ quant_dc_ = static_cast<int>(params.quant_dc);
+ RecomputeFromGlobalScale();
+ return true;
+}
+
+void Quantizer::DumpQuantizationMap(const ImageI& raw_quant_field) const {
+ printf("Global scale: %d (%.7f)\nDC quant: %d\n", global_scale_,
+ global_scale_ * 1.0 / kGlobalScaleDenom, quant_dc_);
+ printf("AC quantization Map:\n");
+ for (size_t y = 0; y < raw_quant_field.ysize(); ++y) {
+ for (size_t x = 0; x < raw_quant_field.xsize(); ++x) {
+ printf(" %3d", raw_quant_field.Row(y)[x]);
+ }
+ printf("\n");
+ }
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/quantizer.h b/third_party/jpeg-xl/lib/jxl/quantizer.h
new file mode 100644
index 0000000000..d78ba7b3fc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quantizer.h
@@ -0,0 +1,182 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_QUANTIZER_H_
+#define LIB_JXL_QUANTIZER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ac_strategy.h"
+#include "lib/jxl/base/bits.h"
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/profiler.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_util.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/fields.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/quant_weights.h"
+
+// Quantizes DC and AC coefficients, with separate quantization tables according
+// to the quant_kind (which is currently computed from the AC strategy and the
+// block index inside that strategy).
+
+namespace jxl {
+
+static constexpr int kGlobalScaleDenom = 1 << 16;
+static constexpr int kGlobalScaleNumerator = 4096;
+
+// zero-biases for quantizing channels X, Y, B
+static constexpr float kZeroBiasDefault[3] = {0.5f, 0.5f, 0.5f};
+
+// Returns adjusted version of a quantized integer, such that its value is
+// closer to the expected value of the original.
+// The residuals of AC coefficients that we quantize are not uniformly
+// distributed. Numerical experiments show that they have a distribution with
+// the "shape" of 1/(1+x^2) [up to some coefficients]. This means that the
+// expected value of a coefficient that gets quantized to x will not be x
+// itself, but (at least with reasonable approximation):
+// - 0 if x is 0
+// - x * biases[c] if x is 1 or -1
+// - x - biases[3]/x otherwise
+// This follows from computing the distribution of the quantization bias, which
+// can be approximated fairly well by <constant>/x when |x| is at least two.
+static constexpr float kBiasNumerator = 0.145f;
+
+static constexpr float kDefaultQuantBias[4] = {
+ 1.0f - 0.05465007330715401f,
+ 1.0f - 0.07005449891748593f,
+ 1.0f - 0.049935103337343655f,
+ 0.145f,
+};
+
+struct QuantizerParams;
+
+class Quantizer {
+ public:
+ explicit Quantizer(const DequantMatrices* dequant);
+ Quantizer(const DequantMatrices* dequant, int quant_dc, int global_scale);
+
+ static constexpr int32_t kQuantMax = 256;
+
+ static JXL_INLINE int32_t ClampVal(float val) {
+ return static_cast<int32_t>(
+ std::max(1.0f, std::min<float>(val, kQuantMax)));
+ }
+
+ float ScaleGlobalScale(const float scale) {
+ int new_global_scale = static_cast<int>(global_scale_ * scale + 0.5f);
+ float scale_out = new_global_scale * 1.0f / global_scale_;
+ global_scale_ = new_global_scale;
+ RecomputeFromGlobalScale();
+ return scale_out;
+ }
+
+ // Recomputes other derived fields after global_scale_ has changed.
+ void RecomputeFromGlobalScale() {
+ global_scale_float_ = global_scale_ * (1.0 / kGlobalScaleDenom);
+ inv_global_scale_ = 1.0 * kGlobalScaleDenom / global_scale_;
+ inv_quant_dc_ = inv_global_scale_ / quant_dc_;
+ for (size_t c = 0; c < 3; c++) {
+ mul_dc_[c] = GetDcStep(c);
+ inv_mul_dc_[c] = GetInvDcStep(c);
+ }
+ }
+
+ // Returns scaling factor such that Scale() * (RawDC() or RawQuantField())
+ // pixels yields the same float values returned by GetQuantField.
+ JXL_INLINE float Scale() const { return global_scale_float_; }
+
+ // Reciprocal of Scale().
+ JXL_INLINE float InvGlobalScale() const { return inv_global_scale_; }
+
+ void SetQuantFieldRect(const ImageF& qf, const Rect& rect,
+ ImageI* JXL_RESTRICT raw_quant_field) const;
+
+ void SetQuantField(float quant_dc, const ImageF& qf,
+ ImageI* JXL_RESTRICT raw_quant_field);
+
+ void SetQuant(float quant_dc, float quant_ac,
+ ImageI* JXL_RESTRICT raw_quant_field);
+
+ // Returns the DC quantization base value, which is currently global (not
+ // adaptive). The actual scale factor used to dequantize pixels in channel c
+ // is: inv_quant_dc() * dequant_->DCQuant(c).
+ float inv_quant_dc() const { return inv_quant_dc_; }
+
+ // Dequantize by multiplying with this times dequant_matrix.
+ float inv_quant_ac(int32_t quant) const { return inv_global_scale_ / quant; }
+
+ QuantizerParams GetParams() const;
+
+ Status Decode(BitReader* reader);
+
+ void DumpQuantizationMap(const ImageI& raw_quant_field) const;
+
+ JXL_INLINE const float* DequantMatrix(size_t quant_kind, size_t c) const {
+ return dequant_->Matrix(quant_kind, c);
+ }
+
+ JXL_INLINE const float* InvDequantMatrix(size_t quant_kind, size_t c) const {
+ return dequant_->InvMatrix(quant_kind, c);
+ }
+
+ // Calculates DC quantization step.
+ JXL_INLINE float GetDcStep(size_t c) const {
+ return inv_quant_dc_ * dequant_->DCQuant(c);
+ }
+ JXL_INLINE float GetInvDcStep(size_t c) const {
+ return dequant_->InvDCQuant(c) * (global_scale_float_ * quant_dc_);
+ }
+
+ JXL_INLINE const float* MulDC() const { return mul_dc_; }
+ JXL_INLINE const float* InvMulDC() const { return inv_mul_dc_; }
+
+ JXL_INLINE void ClearDCMul() {
+ std::fill(mul_dc_, mul_dc_ + 4, 1.f);
+ std::fill(inv_mul_dc_, inv_mul_dc_ + 4, 1.f);
+ }
+
+ void ComputeGlobalScaleAndQuant(float quant_dc, float quant_median,
+ float quant_median_absd);
+
+ private:
+ float mul_dc_[4];
+ float inv_mul_dc_[4];
+
+ // These are serialized:
+ int global_scale_;
+ int quant_dc_;
+
+ // These are derived from global_scale_:
+ float inv_global_scale_;
+ float global_scale_float_; // reciprocal of inv_global_scale_
+ float inv_quant_dc_;
+
+ float zero_bias_[3];
+ const DequantMatrices* dequant_;
+};
+
+struct QuantizerParams : public Fields {
+ QuantizerParams() { Bundle::Init(this); }
+ JXL_FIELDS_NAME(QuantizerParams)
+
+ Status VisitFields(Visitor* JXL_RESTRICT visitor) override;
+
+ uint32_t global_scale;
+ uint32_t quant_dc;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_QUANTIZER_H_
diff --git a/third_party/jpeg-xl/lib/jxl/quantizer_test.cc b/third_party/jpeg-xl/lib/jxl/quantizer_test.cc
new file mode 100644
index 0000000000..f9cf2c838e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/quantizer_test.cc
@@ -0,0 +1,81 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/quantizer.h"
+
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/enc_fields.h"
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void TestEquivalence(int qxsize, int qysize, const Quantizer& quantizer1,
+ const Quantizer& quantizer2) {
+ ASSERT_NEAR(quantizer1.inv_quant_dc(), quantizer2.inv_quant_dc(), 1e-7);
+}
+
+TEST(QuantizerTest, QuantizerParams) {
+ for (uint32_t i = 1; i < 10000; ++i) {
+ QuantizerParams p;
+ p.global_scale = i;
+ size_t extension_bits = 0, total_bits = 0;
+ EXPECT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits));
+ EXPECT_EQ(0u, extension_bits);
+ EXPECT_GE(total_bits, 4u);
+ }
+}
+
+TEST(QuantizerTest, BitStreamRoundtripSameQuant) {
+ const int qxsize = 8;
+ const int qysize = 8;
+ DequantMatrices dequant;
+ Quantizer quantizer1(&dequant);
+ ImageI raw_quant_field(qxsize, qysize);
+ quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+ BitWriter writer;
+ QuantizerParams params = quantizer1.GetParams();
+ EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ const size_t bits_written = writer.BitsWritten();
+ Quantizer quantizer2(&dequant);
+ BitReader reader(writer.GetSpan());
+ EXPECT_TRUE(quantizer2.Decode(&reader));
+ EXPECT_TRUE(reader.JumpToByteBoundary());
+ EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+ EXPECT_TRUE(reader.Close());
+ TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+
+TEST(QuantizerTest, BitStreamRoundtripRandomQuant) {
+ const int qxsize = 8;
+ const int qysize = 8;
+ DequantMatrices dequant;
+ Quantizer quantizer1(&dequant);
+ ImageI raw_quant_field(qxsize, qysize);
+ quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field);
+ float quant_dc = 0.17f;
+ ImageF qf(qxsize, qysize);
+ RandomFillImage(&qf, 0.0f, 1.0f);
+ quantizer1.SetQuantField(quant_dc, qf, &raw_quant_field);
+ BitWriter writer;
+ QuantizerParams params = quantizer1.GetParams();
+ EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr));
+ writer.ZeroPadToByte();
+ const size_t bits_written = writer.BitsWritten();
+ Quantizer quantizer2(&dequant);
+ BitReader reader(writer.GetSpan());
+ EXPECT_TRUE(quantizer2.Decode(&reader));
+ EXPECT_TRUE(reader.JumpToByteBoundary());
+ EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+ EXPECT_TRUE(reader.Close());
+ TestEquivalence(qxsize, qysize, quantizer1, quantizer2);
+}
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/rational_polynomial-inl.h b/third_party/jpeg-xl/lib/jxl/rational_polynomial-inl.h
new file mode 100644
index 0000000000..176e24092c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/rational_polynomial-inl.h
@@ -0,0 +1,98 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast SIMD evaluation of rational polynomials for approximating functions.
+
+#if defined(LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#undef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#else
+#define LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+// Primary template: default to actual division.
+template <typename T, class V>
+struct FastDivision {
+ HWY_INLINE V operator()(const V n, const V d) const { return n / d; }
+};
+// Partial specialization for float vectors.
+template <class V>
+struct FastDivision<float, V> {
+ // One Newton-Raphson iteration.
+ static HWY_INLINE V ReciprocalNR(const V x) {
+ const auto rcp = ApproximateReciprocal(x);
+ const auto sum = Add(rcp, rcp);
+ const auto x_rcp = Mul(x, rcp);
+ return NegMulAdd(x_rcp, rcp, sum);
+ }
+
+ V operator()(const V n, const V d) const {
+#if 1 // Faster on SKX
+ return Div(n, d);
+#else
+ return n * ReciprocalNR(d);
+#endif
+ }
+};
+
+// Approximates smooth functions via rational polynomials (i.e. dividing two
+// polynomials). Evaluates polynomials via Horner's scheme, which is faster than
+// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to
+// specify constants (replicated 4x) independently of the lane count.
+template <size_t NP, size_t NQ, class D, class V, typename T>
+HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x,
+ const T (&p)[NP],
+ const T (&q)[NQ]) {
+ constexpr size_t kDegP = NP / 4 - 1;
+ constexpr size_t kDegQ = NQ / 4 - 1;
+ auto yp = LoadDup128(d, &p[kDegP * 4]);
+ auto yq = LoadDup128(d, &q[kDegQ * 4]);
+ // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a
+ // compiler warning that the index is out of bounds since we are already
+ // checking that it is not out of bounds with (kDegP >= n) and the access
+ // will be optimized away. Similarly with q and kDegQ.
+ HWY_FENCE;
+ if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4)));
+ if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4)));
+ if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4)));
+ if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4)));
+ if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4)));
+ if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4)));
+ if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4)));
+ HWY_FENCE;
+ if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4)));
+ if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4)));
+
+ return FastDivision<T, V>()(yp, yq);
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+#endif // LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/rational_polynomial_test.cc b/third_party/jpeg-xl/lib/jxl/rational_polynomial_test.cc
new file mode 100644
index 0000000000..13fc044a55
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/rational_polynomial_test.cc
@@ -0,0 +1,238 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#include <cmath>
+#include <string>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/rational_polynomial_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+using T = float; // required by EvalLog2
+using D = HWY_FULL(T);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::GetLane;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Generic: only computes polynomial
+struct EvalPoly {
+ template <size_t NP, size_t NQ>
+ T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+ const HWY_FULL(T) d;
+ const auto vx = Set(d, x);
+ const auto approx = EvalRationalPolynomial(d, vx, p, q);
+ return GetLane(approx);
+ }
+};
+
+// Range reduction for log2
+struct EvalLog2 {
+ template <size_t NP, size_t NQ>
+ T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const {
+ const HWY_FULL(T) d;
+ auto vx = Set(d, x);
+
+ const HWY_FULL(int32_t) di;
+ const auto x_bits = BitCast(di, vx);
+ // Cannot handle negative numbers / NaN.
+ JXL_DASSERT(AllTrue(di, Eq(Abs(x_bits), x_bits)));
+
+ // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops
+ const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3
+ // Shifted exponent = log2; also used to clear mantissa.
+ const auto exp_shifted = ShiftRight<23>(exp_bits);
+ const auto mantissa = BitCast(d, Sub(x_bits, ShiftLeft<23>(exp_shifted)));
+ const auto exp_val = ConvertTo(d, exp_shifted);
+ vx = Sub(mantissa, Set(d, 1.0f));
+
+ const auto approx = Add(EvalRationalPolynomial(d, vx, p, q), exp_val);
+ return GetLane(approx);
+ }
+};
+
+// Functions to approximate:
+
+T LinearToSrgb8Direct(T val) {
+ if (val < 0.0) return 0.0;
+ if (val >= 255.0) return 255.0;
+ if (val <= 10.0 / 12.92) return val * 12.92;
+ return 255.0 * (std::pow(val / 255.0, 1.0 / 2.4) * 1.055 - 0.055);
+}
+
+T SimpleGamma(T v) {
+ static const T kGamma = 0.387494322593;
+ static const T limit = 43.01745241042018;
+ T bright = v - limit;
+ if (bright >= 0) {
+ static const T mul = 0.0383723643799;
+ v -= bright * mul;
+ }
+ static const T limit2 = 94.68634353321337;
+ T bright2 = v - limit2;
+ if (bright2 >= 0) {
+ static const T mul = 0.22885405968;
+ v -= bright2 * mul;
+ }
+ static const T offset = 0.156775786057;
+ static const T scale = 8.898059160493739;
+ T retval = scale * (offset + pow(v, kGamma));
+ return retval;
+}
+
+// Runs CaratheodoryFejer and verifies the polynomial using a lot of samples to
+// return the biggest error.
+template <size_t NP, size_t NQ, class Eval>
+T RunApproximation(T x0, T x1, const T (&p)[NP], const T (&q)[NQ],
+ const Eval& eval, T func_to_approx(T)) {
+ float maxerr = 0;
+ T lastPrint = 0;
+ // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter)
+ for (T x = x0; x <= x1; x += (x1 - x0) / 10000.0) {
+ const T f = func_to_approx(x);
+ const T g = eval(x, p, q);
+ maxerr = std::max(fabsf(g - f), maxerr);
+ if (x == x0 || x - lastPrint > (x1 - x0) / 20.0) {
+ printf("x: %11.6f, f: %11.6f, g: %11.6f, e: %11.6f\n", x, f, g,
+ fabs(g - f));
+ lastPrint = x;
+ }
+ }
+ return maxerr;
+}
+
+void TestSimpleGamma() {
+ const T p[4 * (6 + 1)] = {
+ HWY_REP4(-5.0646949363741811E-05), HWY_REP4(6.7369380528439771E-05),
+ HWY_REP4(8.9376652530412794E-05), HWY_REP4(2.1153513301520462E-06),
+ HWY_REP4(-6.9130322970386449E-08), HWY_REP4(3.9424752749293728E-10),
+ HWY_REP4(1.2360288207619576E-13)};
+
+ const T q[4 * (6 + 1)] = {
+ HWY_REP4(-6.6389733798591366E-06), HWY_REP4(1.3299859726565908E-05),
+ HWY_REP4(3.8538748358398873E-06), HWY_REP4(-2.8707687262928236E-08),
+ HWY_REP4(-6.6897385800005434E-10), HWY_REP4(6.1428748869186003E-12),
+ HWY_REP4(-2.5475738169252870E-15)};
+
+ const T err = RunApproximation(0.77, 274.579999999999984, p, q, EvalPoly(),
+ SimpleGamma);
+ EXPECT_LT(err, 0.05);
+}
+
+void TestLinearToSrgb8Direct() {
+ const T p[4 * (5 + 1)] = {
+ HWY_REP4(-9.5357499040105154E-05), HWY_REP4(4.6761186249798248E-04),
+ HWY_REP4(2.5708174333943594E-04), HWY_REP4(1.5250087770436082E-05),
+ HWY_REP4(1.1946768008931187E-07), HWY_REP4(5.9916446295972850E-11)};
+
+ const T q[4 * (4 + 1)] = {
+ HWY_REP4(1.8932479758079768E-05), HWY_REP4(2.7312342474687321E-05),
+ HWY_REP4(4.3901204783327006E-06), HWY_REP4(1.0417787306920273E-07),
+ HWY_REP4(3.0084206762140419E-10)};
+
+ const T err =
+ RunApproximation(0.77, 255, p, q, EvalPoly(), LinearToSrgb8Direct);
+ EXPECT_LT(err, 0.05);
+}
+
+void TestExp() {
+ const T p[4 * (2 + 1)] = {HWY_REP4(9.6266879665530902E-01),
+ HWY_REP4(4.8961265681586763E-01),
+ HWY_REP4(8.2619259189548433E-02)};
+ const T q[4 * (2 + 1)] = {HWY_REP4(9.6259895571622622E-01),
+ HWY_REP4(-4.7272457588933831E-01),
+ HWY_REP4(7.4802088567547664E-02)};
+ const T err =
+ RunApproximation(-1, 1, p, q, EvalPoly(), [](T x) { return T(exp(x)); });
+ EXPECT_LT(err, 1E-4);
+}
+
+void TestNegExp() {
+ // 4,3 is the min required for monotonicity; max error in 0,10: 751 ppm
+ // no benefit for k>50.
+ const T p[4 * (4 + 1)] = {
+ HWY_REP4(5.9580258551150123E-02), HWY_REP4(-2.5073728806886408E-02),
+ HWY_REP4(4.1561830213689248E-03), HWY_REP4(-3.1815408488900372E-04),
+ HWY_REP4(9.3866690094906802E-06)};
+ const T q[4 * (3 + 1)] = {
+ HWY_REP4(5.9579108238812878E-02), HWY_REP4(3.4542074345478582E-02),
+ HWY_REP4(8.7263562483501714E-03), HWY_REP4(1.4095109143061216E-03)};
+
+ const T err =
+ RunApproximation(0, 10, p, q, EvalPoly(), [](T x) { return T(exp(-x)); });
+ EXPECT_LT(err, sizeof(T) == 8 ? 2E-5 : 3E-5);
+}
+
+void TestSin() {
+ const T p[4 * (6 + 1)] = {
+ HWY_REP4(1.5518122109203780E-05), HWY_REP4(2.3388958643675966E+00),
+ HWY_REP4(-8.6705520940849157E-01), HWY_REP4(-1.9702294764873535E-01),
+ HWY_REP4(1.2193404314472320E-01), HWY_REP4(-1.7373966109788839E-02),
+ HWY_REP4(7.8829435883034796E-04)};
+ const T q[4 * (5 + 1)] = {
+ HWY_REP4(2.3394371422557279E+00), HWY_REP4(-8.7028221081288615E-01),
+ HWY_REP4(2.0052872219658430E-01), HWY_REP4(-3.2460335995264836E-02),
+ HWY_REP4(3.1546157932479282E-03), HWY_REP4(-1.6692542019380155E-04)};
+
+ const T err = RunApproximation(0, Pi<T>(1) * 2, p, q, EvalPoly(),
+ [](T x) { return T(sin(x)); });
+ EXPECT_LT(err, sizeof(T) == 8 ? 5E-4 : 7E-4);
+}
+
+void TestLog() {
+ HWY_ALIGN const T p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06),
+ HWY_REP4(1.4287160470083755E+00),
+ HWY_REP4(7.4245873327820566E-01)};
+ HWY_ALIGN const T q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01),
+ HWY_REP4(1.0096718572241148E+00),
+ HWY_REP4(1.7409343003366853E-01)};
+ const T err = RunApproximation(1E-6, 1000, p, q, EvalLog2(), std::log2);
+ printf("%E\n", err);
+}
+
+HWY_NOINLINE void TestRationalPolynomial() {
+ TestSimpleGamma();
+ TestLinearToSrgb8Direct();
+ TestExp();
+ TestNegExp();
+ TestSin();
+ TestLog();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class RationalPolynomialTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(RationalPolynomialTest);
+
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSimpleGamma);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLinearToSrgb8Direct);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestNegExp);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSin);
+HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLog);
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc
new file mode 100644
index 0000000000..db60a458db
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc
@@ -0,0 +1,865 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h"
+
+#include <algorithm>
+#include <queue>
+#include <tuple>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/image_ops.h"
+
+namespace jxl {
+std::pair<size_t, size_t>
+LowMemoryRenderPipeline::ColorDimensionsToChannelDimensions(
+ std::pair<size_t, size_t> in, size_t c, size_t stage) const {
+ std::pair<size_t, size_t> ret;
+ std::pair<size_t, size_t> shift = channel_shifts_[stage][c];
+ ret.first =
+ ((in.first << base_color_shift_) + (1 << shift.first) - 1) >> shift.first;
+ ret.second = ((in.second << base_color_shift_) + (1 << shift.second) - 1) >>
+ shift.second;
+ return ret;
+}
+
+std::pair<size_t, size_t> LowMemoryRenderPipeline::BorderToStore(
+ size_t c) const {
+ auto ret = ColorDimensionsToChannelDimensions(group_border_, c, 0);
+ ret.first += padding_[0][c].first;
+ ret.second += padding_[0][c].second;
+ return ret;
+}
+
+void LowMemoryRenderPipeline::SaveBorders(size_t group_id, size_t c,
+ const ImageF& in) {
+ size_t gy = group_id / frame_dimensions_.xsize_groups;
+ size_t gx = group_id % frame_dimensions_.xsize_groups;
+ size_t hshift = channel_shifts_[0][c].first;
+ size_t vshift = channel_shifts_[0][c].second;
+ size_t x0 = gx * GroupInputXSize(c);
+ size_t x1 = std::min((gx + 1) * GroupInputXSize(c),
+ DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+ size_t y0 = gy * GroupInputYSize(c);
+ size_t y1 = std::min((gy + 1) * GroupInputYSize(c),
+ DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+ auto borders = BorderToStore(c);
+ size_t borderx_write = borders.first;
+ size_t bordery_write = borders.second;
+
+ if (gy > 0) {
+ Rect from(group_data_x_border_, group_data_y_border_, x1 - x0,
+ bordery_write);
+ Rect to(x0, (gy * 2 - 1) * bordery_write, x1 - x0, bordery_write);
+ CopyImageTo(from, in, to, &borders_horizontal_[c]);
+ }
+ if (gy + 1 < frame_dimensions_.ysize_groups) {
+ Rect from(group_data_x_border_,
+ group_data_y_border_ + y1 - y0 - bordery_write, x1 - x0,
+ bordery_write);
+ Rect to(x0, (gy * 2) * bordery_write, x1 - x0, bordery_write);
+ CopyImageTo(from, in, to, &borders_horizontal_[c]);
+ }
+ if (gx > 0) {
+ Rect from(group_data_x_border_, group_data_y_border_, borderx_write,
+ y1 - y0);
+ Rect to((gx * 2 - 1) * borderx_write, y0, borderx_write, y1 - y0);
+ CopyImageTo(from, in, to, &borders_vertical_[c]);
+ }
+ if (gx + 1 < frame_dimensions_.xsize_groups) {
+ Rect from(group_data_x_border_ + x1 - x0 - borderx_write,
+ group_data_y_border_, borderx_write, y1 - y0);
+ Rect to((gx * 2) * borderx_write, y0, borderx_write, y1 - y0);
+ CopyImageTo(from, in, to, &borders_vertical_[c]);
+ }
+}
+
+void LowMemoryRenderPipeline::LoadBorders(size_t group_id, size_t c,
+ const Rect& r, ImageF* out) {
+ size_t gy = group_id / frame_dimensions_.xsize_groups;
+ size_t gx = group_id % frame_dimensions_.xsize_groups;
+ size_t hshift = channel_shifts_[0][c].first;
+ size_t vshift = channel_shifts_[0][c].second;
+ // Coordinates of the group in the image.
+ size_t x0 = gx * GroupInputXSize(c);
+ size_t x1 = std::min((gx + 1) * GroupInputXSize(c),
+ DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+ size_t y0 = gy * GroupInputYSize(c);
+ size_t y1 = std::min((gy + 1) * GroupInputYSize(c),
+ DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+ size_t paddingx = padding_[0][c].first;
+ size_t paddingy = padding_[0][c].second;
+
+ auto borders = BorderToStore(c);
+ size_t borderx_write = borders.first;
+ size_t bordery_write = borders.second;
+
+ // Limits of the area to copy from, in image coordinates.
+ JXL_DASSERT(r.x0() == 0 || (r.x0() << base_color_shift_) >= paddingx);
+ size_t x0src = DivCeil(r.x0() << base_color_shift_, 1 << hshift);
+ if (x0src != 0) {
+ x0src -= paddingx;
+ }
+ // r may be such that r.x1 (namely x0() + xsize()) is within paddingx of the
+ // right side of the image, so we use min() here.
+ size_t x1src =
+ DivCeil((r.x0() + r.xsize()) << base_color_shift_, 1 << hshift);
+ x1src = std::min(x1src + paddingx,
+ DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift));
+
+ // Similar computation for y.
+ JXL_DASSERT(r.y0() == 0 || (r.y0() << base_color_shift_) >= paddingy);
+ size_t y0src = DivCeil(r.y0() << base_color_shift_, 1 << vshift);
+ if (y0src != 0) {
+ y0src -= paddingy;
+ }
+ size_t y1src =
+ DivCeil((r.y0() + r.ysize()) << base_color_shift_, 1 << vshift);
+ y1src = std::min(y1src + paddingy,
+ DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift));
+
+ // Copy other groups' borders from the border storage.
+ if (y0src < y0) {
+ JXL_DASSERT(gy > 0);
+ CopyImageTo(
+ Rect(x0src, (gy * 2 - 2) * bordery_write, x1src - x0src, bordery_write),
+ borders_horizontal_[c],
+ Rect(group_data_x_border_ + x0src - x0,
+ group_data_y_border_ - bordery_write, x1src - x0src,
+ bordery_write),
+ out);
+ }
+ if (y1src > y1) {
+ // When copying the bottom border we must not be on the bottom groups.
+ JXL_DASSERT(gy + 1 < frame_dimensions_.ysize_groups);
+ CopyImageTo(
+ Rect(x0src, (gy * 2 + 1) * bordery_write, x1src - x0src, bordery_write),
+ borders_horizontal_[c],
+ Rect(group_data_x_border_ + x0src - x0, group_data_y_border_ + y1 - y0,
+ x1src - x0src, bordery_write),
+ out);
+ }
+ if (x0src < x0) {
+ JXL_DASSERT(gx > 0);
+ CopyImageTo(
+ Rect((gx * 2 - 2) * borderx_write, y0src, borderx_write, y1src - y0src),
+ borders_vertical_[c],
+ Rect(group_data_x_border_ - borderx_write,
+ group_data_y_border_ + y0src - y0, borderx_write, y1src - y0src),
+ out);
+ }
+ if (x1src > x1) {
+ // When copying the right border we must not be on the rightmost groups.
+ JXL_DASSERT(gx + 1 < frame_dimensions_.xsize_groups);
+ CopyImageTo(
+ Rect((gx * 2 + 1) * borderx_write, y0src, borderx_write, y1src - y0src),
+ borders_vertical_[c],
+ Rect(group_data_x_border_ + x1 - x0, group_data_y_border_ + y0src - y0,
+ borderx_write, y1src - y0src),
+ out);
+ }
+}
+
+size_t LowMemoryRenderPipeline::GroupInputXSize(size_t c) const {
+ return (frame_dimensions_.group_dim << base_color_shift_) >>
+ channel_shifts_[0][c].first;
+}
+
+size_t LowMemoryRenderPipeline::GroupInputYSize(size_t c) const {
+ return (frame_dimensions_.group_dim << base_color_shift_) >>
+ channel_shifts_[0][c].second;
+}
+
+void LowMemoryRenderPipeline::EnsureBordersStorage() {
+ const auto& shifts = channel_shifts_[0];
+ if (borders_horizontal_.size() < shifts.size()) {
+ borders_horizontal_.resize(shifts.size());
+ borders_vertical_.resize(shifts.size());
+ }
+ for (size_t c = 0; c < shifts.size(); c++) {
+ auto borders = BorderToStore(c);
+ size_t borderx = borders.first;
+ size_t bordery = borders.second;
+ JXL_DASSERT(frame_dimensions_.xsize_groups > 0);
+ size_t num_xborders = (frame_dimensions_.xsize_groups - 1) * 2;
+ JXL_DASSERT(frame_dimensions_.ysize_groups > 0);
+ size_t num_yborders = (frame_dimensions_.ysize_groups - 1) * 2;
+ size_t downsampled_xsize =
+ DivCeil(frame_dimensions_.xsize_upsampled_padded, 1 << shifts[c].first);
+ size_t downsampled_ysize = DivCeil(frame_dimensions_.ysize_upsampled_padded,
+ 1 << shifts[c].second);
+ Rect horizontal = Rect(0, 0, downsampled_xsize, bordery * num_yborders);
+ if (!SameSize(horizontal, borders_horizontal_[c])) {
+ borders_horizontal_[c] = ImageF(horizontal.xsize(), horizontal.ysize());
+ }
+ Rect vertical = Rect(0, 0, borderx * num_xborders, downsampled_ysize);
+ if (!SameSize(vertical, borders_vertical_[c])) {
+ borders_vertical_[c] = ImageF(vertical.xsize(), vertical.ysize());
+ }
+ }
+}
+
+void LowMemoryRenderPipeline::Init() {
+ group_border_ = {0, 0};
+ base_color_shift_ = CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded /
+ frame_dimensions_.xsize_padded);
+
+ const auto& shifts = channel_shifts_[0];
+
+ // Ensure that each channel has enough many border pixels.
+ for (size_t c = 0; c < shifts.size(); c++) {
+ group_border_.first =
+ std::max(group_border_.first,
+ DivCeil(padding_[0][c].first << channel_shifts_[0][c].first,
+ 1 << base_color_shift_));
+ group_border_.second =
+ std::max(group_border_.second,
+ DivCeil(padding_[0][c].second << channel_shifts_[0][c].second,
+ 1 << base_color_shift_));
+ }
+
+ // Ensure that all channels have an integer number of border pixels in the
+ // input.
+ for (size_t c = 0; c < shifts.size(); c++) {
+ if (channel_shifts_[0][c].first >= base_color_shift_) {
+ group_border_.first =
+ RoundUpTo(group_border_.first,
+ 1 << (channel_shifts_[0][c].first - base_color_shift_));
+ }
+ if (channel_shifts_[0][c].second >= base_color_shift_) {
+ group_border_.second =
+ RoundUpTo(group_border_.second,
+ 1 << (channel_shifts_[0][c].second - base_color_shift_));
+ }
+ }
+ // Ensure that the X border on color channels is a multiple of kBlockDim or
+ // the vector size (required for EPF stages). Vectors on ARM NEON are never
+ // wider than 4 floats, so rounding to multiples of 4 is enough.
+#if JXL_ARCH_ARM
+ constexpr size_t kGroupXAlign = 4;
+#else
+ constexpr size_t kGroupXAlign = 16;
+#endif
+ group_border_.first = RoundUpTo(group_border_.first, kGroupXAlign);
+ // Allocate borders in group images that are just enough for storing the
+ // borders to be copied in, plus any rounding to ensure alignment.
+ std::pair<size_t, size_t> max_border = {0, 0};
+ for (size_t c = 0; c < shifts.size(); c++) {
+ max_border.first = std::max(BorderToStore(c).first, max_border.first);
+ max_border.second = std::max(BorderToStore(c).second, max_border.second);
+ }
+ group_data_x_border_ = RoundUpTo(max_border.first, kGroupXAlign);
+ group_data_y_border_ = max_border.second;
+
+ EnsureBordersStorage();
+ group_border_assigner_.Init(frame_dimensions_);
+
+ for (first_trailing_stage_ = stages_.size(); first_trailing_stage_ > 0;
+ first_trailing_stage_--) {
+ bool has_inout_c = false;
+ for (size_t c = 0; c < shifts.size(); c++) {
+ if (stages_[first_trailing_stage_ - 1]->GetChannelMode(c) ==
+ RenderPipelineChannelMode::kInOut) {
+ has_inout_c = true;
+ }
+ }
+ if (has_inout_c) {
+ break;
+ }
+ }
+
+ first_image_dim_stage_ = stages_.size();
+ for (size_t i = 0; i < stages_.size(); i++) {
+ std::vector<std::pair<size_t, size_t>> input_sizes(shifts.size());
+ for (size_t c = 0; c < shifts.size(); c++) {
+ input_sizes[c] =
+ std::make_pair(DivCeil(frame_dimensions_.xsize_upsampled,
+ 1 << channel_shifts_[i][c].first),
+ DivCeil(frame_dimensions_.ysize_upsampled,
+ 1 << channel_shifts_[i][c].second));
+ }
+ stages_[i]->SetInputSizes(input_sizes);
+ if (stages_[i]->SwitchToImageDimensions()) {
+ // We don't allow kInOut after switching to image dimensions.
+ JXL_ASSERT(i >= first_trailing_stage_);
+ first_image_dim_stage_ = i + 1;
+ stages_[i]->GetImageDimensions(&full_image_xsize_, &full_image_ysize_,
+ &frame_origin_);
+ break;
+ }
+ }
+ for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) {
+ if (stages_[i]->SwitchToImageDimensions()) {
+ JXL_ABORT("Cannot switch to image dimensions multiple times");
+ }
+ std::vector<std::pair<size_t, size_t>> input_sizes(shifts.size());
+ for (size_t c = 0; c < shifts.size(); c++) {
+ input_sizes[c] = {full_image_xsize_, full_image_ysize_};
+ }
+ stages_[i]->SetInputSizes(input_sizes);
+ }
+
+ anyc_.resize(stages_.size());
+ for (size_t i = 0; i < stages_.size(); i++) {
+ for (size_t c = 0; c < shifts.size(); c++) {
+ if (stages_[i]->GetChannelMode(c) !=
+ RenderPipelineChannelMode::kIgnored) {
+ anyc_[i] = c;
+ }
+ }
+ }
+
+ stage_input_for_channel_ = std::vector<std::vector<int32_t>>(
+ stages_.size(), std::vector<int32_t>(shifts.size()));
+ for (size_t c = 0; c < shifts.size(); c++) {
+ int input = -1;
+ for (size_t i = 0; i < stages_.size(); i++) {
+ stage_input_for_channel_[i][c] = input;
+ if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ input = i;
+ }
+ }
+ }
+
+ image_rect_.resize(stages_.size());
+ for (size_t i = 0; i < stages_.size(); i++) {
+ size_t x1 = DivCeil(frame_dimensions_.xsize_upsampled,
+ 1 << channel_shifts_[i][anyc_[i]].first);
+ size_t y1 = DivCeil(frame_dimensions_.ysize_upsampled,
+ 1 << channel_shifts_[i][anyc_[i]].second);
+ image_rect_[i] = Rect(0, 0, x1, y1);
+ }
+
+ virtual_ypadding_for_output_.resize(stages_.size());
+ xpadding_for_output_.resize(stages_.size());
+ for (size_t c = 0; c < shifts.size(); c++) {
+ int ypad = 0;
+ int xpad = 0;
+ for (size_t i = stages_.size(); i-- > 0;) {
+ if (stages_[i]->GetChannelMode(c) !=
+ RenderPipelineChannelMode::kIgnored) {
+ virtual_ypadding_for_output_[i] =
+ std::max(ypad, virtual_ypadding_for_output_[i]);
+ xpadding_for_output_[i] = std::max(xpad, xpadding_for_output_[i]);
+ }
+ if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ ypad = (DivCeil(ypad, 1 << channel_shifts_[i][c].second) +
+ stages_[i]->settings_.border_y)
+ << channel_shifts_[i][c].second;
+ xpad = DivCeil(xpad, 1 << stages_[i]->settings_.shift_x) +
+ stages_[i]->settings_.border_x;
+ }
+ }
+ }
+}
+
+void LowMemoryRenderPipeline::PrepareForThreadsInternal(size_t num,
+ bool use_group_ids) {
+ const auto& shifts = channel_shifts_[0];
+
+ use_group_ids_ = use_group_ids;
+ size_t num_buffers = use_group_ids_ ? frame_dimensions_.num_groups : num;
+ for (size_t t = group_data_.size(); t < num_buffers; t++) {
+ group_data_.emplace_back();
+ group_data_[t].resize(shifts.size());
+ for (size_t c = 0; c < shifts.size(); c++) {
+ group_data_[t][c] = ImageF(GroupInputXSize(c) + group_data_x_border_ * 2,
+ GroupInputYSize(c) + group_data_y_border_ * 2);
+ }
+ }
+ // TODO(veluca): avoid reallocating buffers if not needed.
+ stage_data_.resize(num);
+ size_t upsampling = 1u << base_color_shift_;
+ size_t group_dim = frame_dimensions_.group_dim * upsampling;
+ size_t padding =
+ 2 * group_data_x_border_ * upsampling + // maximum size of a rect
+ 2 * kRenderPipelineXOffset; // extra padding for processing
+ size_t stage_buffer_xsize = group_dim + padding;
+ for (size_t t = 0; t < num; t++) {
+ stage_data_[t].resize(shifts.size());
+ for (size_t c = 0; c < shifts.size(); c++) {
+ stage_data_[t][c].resize(stages_.size());
+ size_t next_y_border = 0;
+ for (size_t i = stages_.size(); i-- > 0;) {
+ if (stages_[i]->GetChannelMode(c) ==
+ RenderPipelineChannelMode::kInOut) {
+ size_t stage_buffer_ysize =
+ 2 * next_y_border + (1 << stages_[i]->settings_.shift_y);
+ stage_buffer_ysize = 1 << CeilLog2Nonzero(stage_buffer_ysize);
+ next_y_border = stages_[i]->settings_.border_y;
+ stage_data_[t][c][i] = ImageF(stage_buffer_xsize, stage_buffer_ysize);
+ }
+ }
+ }
+ }
+ if (first_image_dim_stage_ != stages_.size()) {
+ RectT<ssize_t> image_rect(0, 0, frame_dimensions_.xsize_upsampled,
+ frame_dimensions_.ysize_upsampled);
+ RectT<ssize_t> full_image_rect(0, 0, full_image_xsize_, full_image_ysize_);
+ image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+ image_rect = image_rect.Intersection(full_image_rect);
+ if (image_rect.xsize() == 0 || image_rect.ysize() == 0) {
+ image_rect = RectT<ssize_t>(0, 0, 0, 0);
+ }
+ size_t left_padding = image_rect.x0();
+ size_t middle_padding = group_dim;
+ size_t right_padding = full_image_xsize_ - image_rect.x1();
+ size_t out_of_frame_xsize =
+ padding +
+ std::max(left_padding, std::max(middle_padding, right_padding));
+ out_of_frame_data_.resize(num);
+ for (size_t t = 0; t < num; t++) {
+ out_of_frame_data_[t] = ImageF(out_of_frame_xsize, shifts.size());
+ }
+ }
+}
+
+std::vector<std::pair<ImageF*, Rect>> LowMemoryRenderPipeline::PrepareBuffers(
+ size_t group_id, size_t thread_id) {
+ std::vector<std::pair<ImageF*, Rect>> ret(channel_shifts_[0].size());
+ const size_t gx = group_id % frame_dimensions_.xsize_groups;
+ const size_t gy = group_id / frame_dimensions_.xsize_groups;
+ for (size_t c = 0; c < channel_shifts_[0].size(); c++) {
+ ret[c].first = &group_data_[use_group_ids_ ? group_id : thread_id][c];
+ ret[c].second = Rect(group_data_x_border_, group_data_y_border_,
+ GroupInputXSize(c), GroupInputYSize(c),
+ DivCeil(frame_dimensions_.xsize_upsampled,
+ 1 << channel_shifts_[0][c].first) -
+ gx * GroupInputXSize(c) + group_data_x_border_,
+ DivCeil(frame_dimensions_.ysize_upsampled,
+ 1 << channel_shifts_[0][c].second) -
+ gy * GroupInputYSize(c) + group_data_y_border_);
+ }
+ return ret;
+}
+
+namespace {
+
+JXL_INLINE int GetMirroredY(int y, ssize_t group_y0, ssize_t image_ysize) {
+ if (group_y0 == 0 && (y < 0 || y + group_y0 >= image_ysize)) {
+ return Mirror(y, image_ysize);
+ }
+ if (y + group_y0 >= image_ysize) {
+ // Here we know that the one mirroring step is sufficient.
+ return 2 * image_ysize - (y + group_y0) - 1 - group_y0;
+ }
+ return y;
+}
+
+JXL_INLINE void ApplyXMirroring(float* row, ssize_t borderx, ssize_t group_x0,
+ ssize_t group_xsize, ssize_t image_xsize) {
+ if (image_xsize <= borderx) {
+ if (group_x0 == 0) {
+ for (ssize_t ix = 0; ix < borderx; ix++) {
+ row[kRenderPipelineXOffset - ix - 1] =
+ row[kRenderPipelineXOffset + Mirror(-ix - 1, image_xsize)];
+ }
+ }
+ if (group_xsize + borderx + group_x0 >= image_xsize) {
+ for (ssize_t ix = 0; ix < borderx; ix++) {
+ row[kRenderPipelineXOffset + image_xsize + ix - group_x0] =
+ row[kRenderPipelineXOffset + Mirror(image_xsize + ix, image_xsize) -
+ group_x0];
+ }
+ }
+ } else {
+ // Here we know that the one mirroring step is sufficient.
+ if (group_x0 == 0) {
+ for (ssize_t ix = 0; ix < borderx; ix++) {
+ row[kRenderPipelineXOffset - ix - 1] = row[kRenderPipelineXOffset + ix];
+ }
+ }
+ if (group_xsize + borderx + group_x0 >= image_xsize) {
+ for (ssize_t ix = 0; ix < borderx; ix++) {
+ row[kRenderPipelineXOffset + image_xsize - group_x0 + ix] =
+ row[kRenderPipelineXOffset + image_xsize - group_x0 - ix - 1];
+ }
+ }
+ }
+}
+
+// Information about where the *output* of each stage is stored.
+class Rows {
+ public:
+ Rows(const std::vector<std::unique_ptr<RenderPipelineStage>>& stages,
+ const Rect data_max_color_channel_rect, int group_data_x_border,
+ int group_data_y_border,
+ const std::vector<std::pair<size_t, size_t>>& group_data_shift,
+ size_t base_color_shift, std::vector<std::vector<ImageF>>& thread_data,
+ std::vector<ImageF>& input_data) {
+ size_t num_stages = stages.size();
+ size_t num_channels = input_data.size();
+
+ JXL_ASSERT(thread_data.size() == num_channels);
+ JXL_ASSERT(group_data_shift.size() == num_channels);
+
+#if JXL_ENABLE_ASSERT
+ for (const auto& td : thread_data) {
+ JXL_ASSERT(td.size() == num_stages);
+ }
+#endif
+
+ rows_.resize(num_stages + 1, std::vector<RowInfo>(num_channels));
+
+ for (size_t i = 0; i < num_stages; i++) {
+ for (size_t c = 0; c < input_data.size(); c++) {
+ if (stages[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ rows_[i + 1][c].ymod_minus_1 = thread_data[c][i].ysize() - 1;
+ rows_[i + 1][c].base_ptr = thread_data[c][i].Row(0);
+ rows_[i + 1][c].stride = thread_data[c][i].PixelsPerRow();
+ }
+ }
+ }
+
+ for (size_t c = 0; c < input_data.size(); c++) {
+ auto channel_group_data_rect =
+ data_max_color_channel_rect.As<ssize_t>()
+ .Translate(-group_data_x_border, -group_data_y_border)
+ .ShiftLeft(base_color_shift)
+ .CeilShiftRight(group_data_shift[c])
+ .Translate(group_data_x_border - ssize_t(kRenderPipelineXOffset),
+ group_data_y_border);
+ rows_[0][c].base_ptr = channel_group_data_rect.Row(&input_data[c], 0);
+ rows_[0][c].stride = input_data[c].PixelsPerRow();
+ rows_[0][c].ymod_minus_1 = -1;
+ }
+ }
+
+ // Stage -1 refers to the input data; all other values must be nonnegative and
+ // refer to the data for the output of that stage.
+ JXL_INLINE float* GetBuffer(int stage, int y, size_t c) const {
+ JXL_DASSERT(stage >= -1);
+ const RowInfo& info = rows_[stage + 1][c];
+ return info.base_ptr + ssize_t(info.stride) * (y & info.ymod_minus_1);
+ }
+
+ private:
+ struct RowInfo {
+ // Pointer to beginning of the first row.
+ float* base_ptr;
+ // Modulo value for the y axis minus 1 (ymod is guaranteed to be a power of
+ // 2, which allows efficient mod computation by masking).
+ int ymod_minus_1;
+ // Number of floats per row.
+ size_t stride;
+ };
+ std::vector<std::vector<RowInfo>> rows_;
+};
+
+} // namespace
+
+void LowMemoryRenderPipeline::RenderRect(size_t thread_id,
+ std::vector<ImageF>& input_data,
+ Rect data_max_color_channel_rect,
+ Rect image_max_color_channel_rect) {
+ // For each stage, the rect corresponding to the image area currently being
+ // processed, in the coordinates of that stage (i.e. with the scaling factor
+ // that that stage has).
+ std::vector<Rect> group_rect;
+ group_rect.resize(stages_.size());
+ Rect image_area_rect =
+ image_max_color_channel_rect.ShiftLeft(base_color_shift_)
+ .Crop(frame_dimensions_.xsize_upsampled,
+ frame_dimensions_.ysize_upsampled);
+ for (size_t i = 0; i < stages_.size(); i++) {
+ group_rect[i] =
+ image_area_rect.CeilShiftRight(channel_shifts_[i][anyc_[i]]);
+ }
+
+ ssize_t frame_x0 =
+ first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.x0;
+ ssize_t frame_y0 =
+ first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.y0;
+ size_t full_image_xsize = first_image_dim_stage_ == stages_.size()
+ ? frame_dimensions_.xsize_upsampled
+ : full_image_xsize_;
+ size_t full_image_ysize = first_image_dim_stage_ == stages_.size()
+ ? frame_dimensions_.ysize_upsampled
+ : full_image_ysize_;
+
+ // Compute actual x-axis bounds for the current image area in the context of
+ // the full image this frame is part of. As the left boundary may be negative,
+ // we also create the x_pixels_skip value, defined as follows:
+ // - both x_pixels_skip and full_image_x0 are >= 0, and at least one is 0;
+ // - full_image_x0 - x_pixels_skip is the position of the current frame area
+ // in the full image.
+ ssize_t full_image_x0 = frame_x0 + image_area_rect.x0();
+ ssize_t x_pixels_skip = 0;
+ if (full_image_x0 < 0) {
+ x_pixels_skip = -full_image_x0;
+ full_image_x0 = 0;
+ }
+ ssize_t full_image_x1 = frame_x0 + image_area_rect.x1();
+ full_image_x1 = std::min<ssize_t>(full_image_x1, full_image_xsize);
+
+ // If the current image area is entirely outside of the visible image, there
+ // is no point in proceeding. Note: this uses the assumption that if there is
+ // a stage with observable effects (i.e. a kInput stage), it only appears
+ // after the stage that switches to image dimensions.
+ if (full_image_x1 <= full_image_x0) return;
+
+ // Data structures to hold information about input/output rows and their
+ // buffers.
+ Rows rows(stages_, data_max_color_channel_rect, group_data_x_border_,
+ group_data_y_border_, channel_shifts_[0], base_color_shift_,
+ stage_data_[thread_id], input_data);
+
+ std::vector<RenderPipelineStage::RowInfo> input_rows(first_trailing_stage_ +
+ 1);
+ for (size_t i = 0; i < first_trailing_stage_; i++) {
+ input_rows[i].resize(input_data.size());
+ }
+ input_rows[first_trailing_stage_].resize(input_data.size(),
+ std::vector<float*>(1));
+
+ // Maximum possible shift is 3.
+ RenderPipelineStage::RowInfo output_rows(input_data.size(),
+ std::vector<float*>(8));
+
+ // Fills in input_rows and output_rows for a given y value (relative to the
+ // start of the group, measured in actual pixels at the appropriate vertical
+ // scaling factor) and a given stage, applying mirroring if necessary. This
+ // function is somewhat inefficient for trailing kInOut or kInput stages,
+ // where just filling the input row once ought to be sufficient.
+ auto prepare_io_rows = [&](int y, size_t i) {
+ ssize_t bordery = stages_[i]->settings_.border_y;
+ size_t shifty = stages_[i]->settings_.shift_y;
+ auto make_row = [&](size_t c, ssize_t iy) {
+ size_t mirrored_y = GetMirroredY(y + iy - bordery, group_rect[i].y0(),
+ image_rect_[i].ysize());
+ input_rows[i][c][iy] =
+ rows.GetBuffer(stage_input_for_channel_[i][c], mirrored_y, c);
+ ApplyXMirroring(input_rows[i][c][iy], stages_[i]->settings_.border_x,
+ group_rect[i].x0(), group_rect[i].xsize(),
+ image_rect_[i].xsize());
+ };
+ for (size_t c = 0; c < input_data.size(); c++) {
+ RenderPipelineChannelMode mode = stages_[i]->GetChannelMode(c);
+ if (mode == RenderPipelineChannelMode::kIgnored) {
+ continue;
+ }
+ // If we already have rows from a previous iteration, we can just shift
+ // the rows by 1 and insert the new one.
+ if (input_rows[i][c].size() == 2 * size_t(bordery) + 1) {
+ for (ssize_t iy = 0; iy < 2 * bordery; iy++) {
+ input_rows[i][c][iy] = input_rows[i][c][iy + 1];
+ }
+ make_row(c, bordery * 2);
+ } else {
+ input_rows[i][c].resize(2 * bordery + 1);
+ for (ssize_t iy = 0; iy < 2 * bordery + 1; iy++) {
+ make_row(c, iy);
+ }
+ }
+
+ // If necessary, get the output buffers.
+ if (mode == RenderPipelineChannelMode::kInOut) {
+ for (size_t iy = 0; iy < (1u << shifty); iy++) {
+ output_rows[c][iy] = rows.GetBuffer(i, y * (1 << shifty) + iy, c);
+ }
+ }
+ }
+ };
+
+ // We pretend that every stage has a vertical shift of 0, i.e. it is as tall
+ // as the final image.
+ // We call each such row a "virtual" row, because it may or may not correspond
+ // to an actual row of the current processing stage; actual processing happens
+ // when vy % (1<<vshift) == 0.
+
+ int num_extra_rows = *std::max_element(virtual_ypadding_for_output_.begin(),
+ virtual_ypadding_for_output_.end());
+
+ for (int vy = -num_extra_rows;
+ vy < int(image_area_rect.ysize()) + num_extra_rows; vy++) {
+ for (size_t i = 0; i < first_trailing_stage_; i++) {
+ int stage_vy = vy - num_extra_rows + virtual_ypadding_for_output_[i];
+
+ if (stage_vy % (1 << channel_shifts_[i][anyc_[i]].second) != 0) {
+ continue;
+ }
+
+ if (stage_vy < -virtual_ypadding_for_output_[i]) {
+ continue;
+ }
+
+ int y = stage_vy >> channel_shifts_[i][anyc_[i]].second;
+
+ ssize_t image_y = ssize_t(group_rect[i].y0()) + y;
+ // Do not produce rows in out-of-bounds areas.
+ if (image_y < 0 || image_y >= ssize_t(image_rect_[i].ysize())) {
+ continue;
+ }
+
+ // Get the input/output rows and potentially apply mirroring to the input.
+ prepare_io_rows(y, i);
+
+ // Produce output rows.
+ stages_[i]->ProcessRow(input_rows[i], output_rows,
+ xpadding_for_output_[i], group_rect[i].xsize(),
+ group_rect[i].x0(), image_y, thread_id);
+ }
+
+ // Process trailing stages, i.e. the final set of non-kInOut stages; they
+ // all have the same input buffer and no need to use any mirroring.
+
+ int y = vy - num_extra_rows;
+
+ for (size_t c = 0; c < input_data.size(); c++) {
+ // Skip pixels that are not part of the actual final image area.
+ input_rows[first_trailing_stage_][c][0] =
+ rows.GetBuffer(stage_input_for_channel_[first_trailing_stage_][c], y,
+ c) +
+ x_pixels_skip;
+ }
+
+ // Check that we are not outside of the bounds for the current rendering
+ // rect. Not doing so might result in overwriting some rows that have been
+ // written (or will be written) by other threads.
+ if (y < 0 || y >= ssize_t(image_area_rect.ysize())) {
+ continue;
+ }
+
+ // Avoid running pipeline stages on pixels that are outside the full image
+ // area. As trailing stages have no borders, this is a free optimization
+ // (and may be necessary for correctness, as some stages assume coordinates
+ // are within bounds).
+ ssize_t full_image_y = frame_y0 + image_area_rect.y0() + y;
+ if (full_image_y < 0 || full_image_y >= ssize_t(full_image_ysize)) {
+ continue;
+ }
+
+ for (size_t i = first_trailing_stage_; i < stages_.size(); i++) {
+ // Before the first_image_dim_stage_, coordinates are relative to the
+ // current frame.
+ size_t x0 =
+ i < first_image_dim_stage_ ? full_image_x0 - frame_x0 : full_image_x0;
+ size_t y =
+ i < first_image_dim_stage_ ? full_image_y - frame_y0 : full_image_y;
+ stages_[i]->ProcessRow(input_rows[first_trailing_stage_], output_rows,
+ /*xextra=*/0, full_image_x1 - full_image_x0, x0, y,
+ thread_id);
+ }
+ }
+}
+
+void LowMemoryRenderPipeline::RenderPadding(size_t thread_id, Rect rect) {
+ if (rect.xsize() == 0) return;
+ size_t numc = channel_shifts_[0].size();
+ RenderPipelineStage::RowInfo input_rows(numc, std::vector<float*>(1));
+ RenderPipelineStage::RowInfo output_rows;
+
+ for (size_t c = 0; c < numc; c++) {
+ input_rows[c][0] = out_of_frame_data_[thread_id].Row(c);
+ }
+
+ for (size_t y = 0; y < rect.ysize(); y++) {
+ stages_[first_image_dim_stage_ - 1]->ProcessPaddingRow(
+ input_rows, rect.xsize(), rect.x0(), rect.y0() + y);
+ for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) {
+ stages_[i]->ProcessRow(input_rows, output_rows,
+ /*xextra=*/0, rect.xsize(), rect.x0(),
+ rect.y0() + y, thread_id);
+ }
+ }
+}
+
+void LowMemoryRenderPipeline::ProcessBuffers(size_t group_id,
+ size_t thread_id) {
+ std::vector<ImageF>& input_data =
+ group_data_[use_group_ids_ ? group_id : thread_id];
+
+ // Copy the group borders to the border storage.
+ for (size_t c = 0; c < input_data.size(); c++) {
+ SaveBorders(group_id, c, input_data[c]);
+ }
+
+ size_t gy = group_id / frame_dimensions_.xsize_groups;
+ size_t gx = group_id % frame_dimensions_.xsize_groups;
+
+ if (first_image_dim_stage_ != stages_.size()) {
+ size_t group_dim = frame_dimensions_.group_dim << base_color_shift_;
+ RectT<ssize_t> group_rect(gx * group_dim, gy * group_dim, group_dim,
+ group_dim);
+ RectT<ssize_t> image_rect(0, 0, frame_dimensions_.xsize_upsampled,
+ frame_dimensions_.ysize_upsampled);
+ RectT<ssize_t> full_image_rect(0, 0, full_image_xsize_, full_image_ysize_);
+ group_rect = group_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+ image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0);
+ image_rect = image_rect.Intersection(full_image_rect);
+ group_rect = group_rect.Intersection(image_rect);
+ size_t x0 = group_rect.x0();
+ size_t y0 = group_rect.y0();
+ size_t x1 = group_rect.x1();
+ size_t y1 = group_rect.y1();
+ JXL_DEBUG_V(6,
+ "Rendering padding for full image rect %s "
+ "outside group rect %s",
+ Description(full_image_rect).c_str(),
+ Description(group_rect).c_str());
+
+ if (group_id == 0 && (image_rect.xsize() == 0 || image_rect.ysize() == 0)) {
+ // If this frame does not intersect with the full image, we have to
+ // initialize the whole image area with RenderPadding.
+ RenderPadding(thread_id,
+ Rect(0, 0, full_image_xsize_, full_image_ysize_));
+ }
+
+ // Render padding for groups that intersect with the full image. The case
+ // where no groups intersect was handled above.
+ if (group_rect.xsize() > 0 && group_rect.ysize() > 0) {
+ if (gx == 0 && gy == 0) {
+ RenderPadding(thread_id, Rect(0, 0, x0, y0));
+ }
+ if (gy == 0) {
+ RenderPadding(thread_id, Rect(x0, 0, x1 - x0, y0));
+ }
+ if (gx == 0) {
+ RenderPadding(thread_id, Rect(0, y0, x0, y1 - y0));
+ }
+ if (gx == 0 && gy + 1 == frame_dimensions_.ysize_groups) {
+ RenderPadding(thread_id, Rect(0, y1, x0, full_image_ysize_ - y1));
+ }
+ if (gy + 1 == frame_dimensions_.ysize_groups) {
+ RenderPadding(thread_id, Rect(x0, y1, x1 - x0, full_image_ysize_ - y1));
+ }
+ if (gy == 0 && gx + 1 == frame_dimensions_.xsize_groups) {
+ RenderPadding(thread_id, Rect(x1, 0, full_image_xsize_ - x1, y0));
+ }
+ if (gx + 1 == frame_dimensions_.xsize_groups) {
+ RenderPadding(thread_id, Rect(x1, y0, full_image_xsize_ - x1, y1 - y0));
+ }
+ if (gy + 1 == frame_dimensions_.ysize_groups &&
+ gx + 1 == frame_dimensions_.xsize_groups) {
+ RenderPadding(thread_id, Rect(x1, y1, full_image_xsize_ - x1,
+ full_image_ysize_ - y1));
+ }
+ }
+ }
+
+ Rect ready_rects[GroupBorderAssigner::kMaxToFinalize];
+ size_t num_ready_rects = 0;
+ group_border_assigner_.GroupDone(group_id, group_border_.first,
+ group_border_.second, ready_rects,
+ &num_ready_rects);
+ for (size_t i = 0; i < num_ready_rects; i++) {
+ const Rect& image_max_color_channel_rect = ready_rects[i];
+ for (size_t c = 0; c < input_data.size(); c++) {
+ LoadBorders(group_id, c, image_max_color_channel_rect, &input_data[c]);
+ }
+ Rect data_max_color_channel_rect(
+ group_data_x_border_ + image_max_color_channel_rect.x0() -
+ gx * frame_dimensions_.group_dim,
+ group_data_y_border_ + image_max_color_channel_rect.y0() -
+ gy * frame_dimensions_.group_dim,
+ image_max_color_channel_rect.xsize(),
+ image_max_color_channel_rect.ysize());
+ RenderRect(thread_id, input_data, data_max_color_channel_rect,
+ image_max_color_channel_rect);
+ }
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.h
new file mode 100644
index 0000000000..b386f7c078
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/low_memory_render_pipeline.h
@@ -0,0 +1,111 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/dec_group_border.h"
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+namespace jxl {
+
+// A multithreaded, low-memory rendering pipeline that only allocates a minimal
+// amount of buffers.
+class LowMemoryRenderPipeline final : public RenderPipeline {
+ private:
+ std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+ size_t group_id, size_t thread_id) override;
+
+ void PrepareForThreadsInternal(size_t num, bool use_group_ids) override;
+
+ void ProcessBuffers(size_t group_id, size_t thread_id) override;
+
+ void ClearDone(size_t i) override { group_border_assigner_.ClearDone(i); }
+
+ void Init() override;
+
+ void EnsureBordersStorage();
+ size_t GroupInputXSize(size_t c) const;
+ size_t GroupInputYSize(size_t c) const;
+ void RenderRect(size_t thread_id, std::vector<ImageF>& input_data,
+ Rect data_max_color_channel_rect,
+ Rect image_max_color_channel_rect);
+ void RenderPadding(size_t thread_id, Rect rect);
+
+ void SaveBorders(size_t group_id, size_t c, const ImageF& in);
+ void LoadBorders(size_t group_id, size_t c, const Rect& r, ImageF* out);
+
+ std::pair<size_t, size_t> ColorDimensionsToChannelDimensions(
+ std::pair<size_t, size_t> in, size_t c, size_t stage) const;
+
+ std::pair<size_t, size_t> BorderToStore(size_t c) const;
+
+ bool use_group_ids_;
+
+ // Storage for borders between groups. Borders of adjacent groups are stacked
+ // together, e.g. bottom border of current group is followed by top border
+ // of next group.
+ std::vector<ImageF> borders_horizontal_;
+ std::vector<ImageF> borders_vertical_;
+
+ // Manages the status of borders.
+ GroupBorderAssigner group_border_assigner_;
+
+ // Size (in color-channel-pixels) of the border around each group that might
+ // be assigned to that group.
+ std::pair<size_t, size_t> group_border_;
+ // base_color_shift_ defines the size of groups in terms of final image
+ // pixels.
+ size_t base_color_shift_;
+
+ // Buffer for decoded pixel data for a group, indexed by [thread][channel] or
+ // [group][channel] depending on `use_group_ids_`.
+ std::vector<std::vector<ImageF>> group_data_;
+
+ // Borders for storing group data.
+ size_t group_data_x_border_;
+ size_t group_data_y_border_;
+
+ // Buffers for intermediate rows for the various stages, indexed by
+ // [thread][channel][stage].
+ std::vector<std::vector<std::vector<ImageF>>> stage_data_;
+
+ // Buffers for out-of-frame data, indexed by [thread]; every row is a
+ // different channel.
+ std::vector<ImageF> out_of_frame_data_;
+
+ // For each stage, a non-kIgnored channel.
+ std::vector<int32_t> anyc_;
+
+ // Size of the image at each stage.
+ std::vector<Rect> image_rect_;
+
+ // For each stage, for each channel, keep track of the kInOut stage that
+ // produced the input to that stage (which corresponds to the buffer index
+ // containing the data). -1 if data comes from the original input.
+ std::vector<std::vector<int32_t>> stage_input_for_channel_;
+
+ // Number of (virtual) extra rows that must be processed at each stage
+ // to produce sufficient output for future stages.
+ std::vector<int> virtual_ypadding_for_output_;
+
+ // Same thing for columns, except these are real columns and not virtual ones.
+ std::vector<int> xpadding_for_output_;
+
+ // First stage that doesn't have any kInOut channel.
+ size_t first_trailing_stage_;
+
+ // Origin and size of the frame after switching to image dimensions.
+ FrameOrigin frame_origin_;
+ size_t full_image_xsize_;
+ size_t full_image_ysize_;
+ size_t first_image_dim_stage_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.cc
new file mode 100644
index 0000000000..68b6ef613f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.cc
@@ -0,0 +1,132 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+#include <algorithm>
+
+#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h"
+#include "lib/jxl/render_pipeline/simple_render_pipeline.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+void RenderPipeline::Builder::AddStage(
+ std::unique_ptr<RenderPipelineStage> stage) {
+ stages_.push_back(std::move(stage));
+}
+
+std::unique_ptr<RenderPipeline> RenderPipeline::Builder::Finalize(
+ FrameDimensions frame_dimensions) && {
+#if JXL_ENABLE_ASSERT
+ // Check that the last stage is not an kInOut stage for any channel, and that
+ // there is at least one stage.
+ JXL_ASSERT(!stages_.empty());
+ for (size_t c = 0; c < num_c_; c++) {
+ JXL_ASSERT(stages_.back()->GetChannelMode(c) !=
+ RenderPipelineChannelMode::kInOut);
+ }
+#endif
+
+ std::unique_ptr<RenderPipeline> res;
+ if (use_simple_implementation_) {
+ res = jxl::make_unique<SimpleRenderPipeline>();
+ } else {
+ res = jxl::make_unique<LowMemoryRenderPipeline>();
+ }
+
+ res->padding_.resize(stages_.size());
+ for (size_t i = stages_.size(); i-- > 0;) {
+ const auto& stage = stages_[i];
+ res->padding_[i].resize(num_c_);
+ if (i + 1 == stages_.size()) {
+ continue;
+ }
+ for (size_t c = 0; c < num_c_; c++) {
+ if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ res->padding_[i][c].first = DivCeil(res->padding_[i + 1][c].first,
+ 1 << stage->settings_.shift_x) +
+ stage->settings_.border_x;
+ res->padding_[i][c].second = DivCeil(res->padding_[i + 1][c].second,
+ 1 << stage->settings_.shift_y) +
+ stage->settings_.border_y;
+ } else {
+ res->padding_[i][c] = res->padding_[i + 1][c];
+ }
+ }
+ }
+
+ res->frame_dimensions_ = frame_dimensions;
+ res->group_completed_passes_.resize(frame_dimensions.num_groups);
+ res->channel_shifts_.resize(stages_.size());
+ res->channel_shifts_[0].resize(num_c_);
+ for (size_t i = 1; i < stages_.size(); i++) {
+ auto& stage = stages_[i - 1];
+ for (size_t c = 0; c < num_c_; c++) {
+ if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ res->channel_shifts_[0][c].first += stage->settings_.shift_x;
+ res->channel_shifts_[0][c].second += stage->settings_.shift_y;
+ }
+ }
+ }
+ for (size_t i = 1; i < stages_.size(); i++) {
+ auto& stage = stages_[i - 1];
+ res->channel_shifts_[i].resize(num_c_);
+ for (size_t c = 0; c < num_c_; c++) {
+ if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) {
+ res->channel_shifts_[i][c].first =
+ res->channel_shifts_[i - 1][c].first - stage->settings_.shift_x;
+ res->channel_shifts_[i][c].second =
+ res->channel_shifts_[i - 1][c].second - stage->settings_.shift_y;
+ } else {
+ res->channel_shifts_[i][c].first = res->channel_shifts_[i - 1][c].first;
+ res->channel_shifts_[i][c].second =
+ res->channel_shifts_[i - 1][c].second;
+ }
+ }
+ }
+ res->stages_ = std::move(stages_);
+ res->Init();
+ return res;
+}
+
+RenderPipelineInput RenderPipeline::GetInputBuffers(size_t group_id,
+ size_t thread_id) {
+ RenderPipelineInput ret;
+ JXL_DASSERT(group_id < group_completed_passes_.size());
+ ret.group_id_ = group_id;
+ ret.thread_id_ = thread_id;
+ ret.pipeline_ = this;
+ ret.buffers_ = PrepareBuffers(group_id, thread_id);
+ return ret;
+}
+
+void RenderPipeline::InputReady(
+ size_t group_id, size_t thread_id,
+ const std::vector<std::pair<ImageF*, Rect>>& buffers) {
+ JXL_DASSERT(group_id < group_completed_passes_.size());
+ group_completed_passes_[group_id]++;
+ for (size_t i = 0; i < buffers.size(); ++i) {
+ (void)i;
+ JXL_CHECK_PLANE_INITIALIZED(*buffers[i].first, buffers[i].second, i);
+ }
+
+ ProcessBuffers(group_id, thread_id);
+}
+
+Status RenderPipeline::PrepareForThreads(size_t num, bool use_group_ids) {
+ for (const auto& stage : stages_) {
+ JXL_RETURN_IF_ERROR(stage->PrepareForThreads(num));
+ }
+ PrepareForThreadsInternal(num, use_group_ids);
+ return true;
+}
+
+void RenderPipelineInput::Done() {
+ JXL_ASSERT(pipeline_);
+ pipeline_->InputReady(group_id_, thread_id_, buffers_);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.h
new file mode 100644
index 0000000000..bf3ad4975e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline.h
@@ -0,0 +1,139 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Interface to provide input to the rendering pipeline. When this object is
+// destroyed, all the data in the provided ImageF's Rects must have been
+// initialized.
+class RenderPipelineInput {
+ public:
+ RenderPipelineInput(const RenderPipelineInput&) = delete;
+ RenderPipelineInput(RenderPipelineInput&& other) noexcept {
+ *this = std::move(other);
+ }
+ RenderPipelineInput& operator=(RenderPipelineInput&& other) noexcept {
+ pipeline_ = other.pipeline_;
+ group_id_ = other.group_id_;
+ thread_id_ = other.thread_id_;
+ buffers_ = std::move(other.buffers_);
+ other.pipeline_ = nullptr;
+ return *this;
+ }
+
+ RenderPipelineInput() = default;
+ void Done();
+
+ const std::pair<ImageF*, Rect>& GetBuffer(size_t c) const {
+ JXL_ASSERT(c < buffers_.size());
+ return buffers_[c];
+ }
+
+ private:
+ RenderPipeline* pipeline_ = nullptr;
+ size_t group_id_;
+ size_t thread_id_;
+ std::vector<std::pair<ImageF*, Rect>> buffers_;
+ friend class RenderPipeline;
+};
+
+class RenderPipeline {
+ public:
+ class Builder {
+ public:
+ explicit Builder(size_t num_c) : num_c_(num_c) { JXL_ASSERT(num_c > 0); }
+
+ // Adds a stage to the pipeline. Must be called at least once; the last
+ // added stage cannot have kInOut channels.
+ void AddStage(std::unique_ptr<RenderPipelineStage> stage);
+
+ // Enables using the simple (i.e. non-memory-efficient) implementation of
+ // the pipeline.
+ void UseSimpleImplementation() { use_simple_implementation_ = true; }
+
+ // Finalizes setup of the pipeline. Shifts for all channels should be 0 at
+ // this point.
+ std::unique_ptr<RenderPipeline> Finalize(
+ FrameDimensions frame_dimensions) &&;
+
+ private:
+ std::vector<std::unique_ptr<RenderPipelineStage>> stages_;
+ size_t num_c_;
+ bool use_simple_implementation_ = false;
+ };
+
+ friend class Builder;
+
+ virtual ~RenderPipeline() = default;
+
+ Status IsInitialized() const {
+ for (const auto& stage : stages_) {
+ JXL_RETURN_IF_ERROR(stage->IsInitialized());
+ }
+ return true;
+ }
+
+ // Allocates storage to run with `num` threads. If `use_group_ids` is true,
+ // storage is allocated for each group, not each thread. The behaviour is
+ // undefined if calling this function multiple times with a different value
+ // for `use_group_ids`.
+ Status PrepareForThreads(size_t num, bool use_group_ids);
+
+ // Retrieves a buffer where input data should be stored by the callee. When
+ // input has been provided for all buffers, the pipeline will complete its
+ // processing. This method may be called multiple times concurrently from
+ // different threads, provided that a different `thread_id` is given.
+ RenderPipelineInput GetInputBuffers(size_t group_id, size_t thread_id);
+
+ size_t PassesWithAllInput() const {
+ return *std::min_element(group_completed_passes_.begin(),
+ group_completed_passes_.end());
+ }
+
+ virtual void ClearDone(size_t i) {}
+
+ protected:
+ std::vector<std::unique_ptr<RenderPipelineStage>> stages_;
+ // Shifts for every channel at the input of each stage.
+ std::vector<std::vector<std::pair<size_t, size_t>>> channel_shifts_;
+
+ // Amount of (cumulative) padding required by each stage and channel, in
+ // either direction.
+ std::vector<std::vector<std::pair<size_t, size_t>>> padding_;
+
+ FrameDimensions frame_dimensions_;
+
+ std::vector<uint8_t> group_completed_passes_;
+
+ friend class RenderPipelineInput;
+
+ private:
+ void InputReady(size_t group_id, size_t thread_id,
+ const std::vector<std::pair<ImageF*, Rect>>& buffers);
+
+ virtual std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+ size_t group_id, size_t thread_id) = 0;
+
+ virtual void ProcessBuffers(size_t group_id, size_t thread_id) = 0;
+
+ // Note that this method may be called multiple times with different (or
+ // equal) `num`.
+ virtual void PrepareForThreadsInternal(size_t num, bool use_group_ids) = 0;
+
+ // Called once frame dimensions and stages are known.
+ virtual void Init() {}
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_stage.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_stage.h
new file mode 100644
index 0000000000..d1a0074161
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_stage.h
@@ -0,0 +1,171 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
+#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/base/arch_macros.h"
+#include "lib/jxl/frame_header.h"
+
+namespace jxl {
+
+// The first pixel in the input to RenderPipelineStage will be located at
+// this position. Pixels before this position may be accessed as padding.
+// This should be at least the RoundUpTo(maximum padding / 2, maximum vector
+// size) times 2: this is realized when using Gaborish + EPF + upsampling +
+// chroma subsampling.
+#if JXL_ARCH_ARM
+constexpr size_t kRenderPipelineXOffset = 16;
+#else
+constexpr size_t kRenderPipelineXOffset = 32;
+#endif
+
+enum class RenderPipelineChannelMode {
+ // This channel is not modified by this stage.
+ kIgnored = 0,
+ // This channel is modified in-place.
+ kInPlace = 1,
+ // This channel is modified and written to a new buffer.
+ kInOut = 2,
+ // This channel is only read. These are the only stages that are assumed to
+ // have observable effects, i.e. calls to ProcessRow for other stages may be
+ // omitted if it can be shown they can't affect any kInput stage ProcessRow
+ // call that happens inside image boundaries.
+ kInput = 3,
+};
+
+class RenderPipeline;
+
+class RenderPipelineStage {
+ protected:
+ using Row = float*;
+ using ChannelRows = std::vector<Row>;
+
+ public:
+ using RowInfo = std::vector<ChannelRows>;
+ struct Settings {
+ // Amount of padding required in the various directions by all channels
+ // that have kInOut mode.
+ size_t border_x = 0;
+ size_t border_y = 0;
+
+ // Log2 of the number of columns/rows of output that this stage will produce
+ // for every input row for kInOut channels.
+ size_t shift_x = 0;
+ size_t shift_y = 0;
+
+ static Settings ShiftX(size_t shift, size_t border) {
+ Settings settings;
+ settings.border_x = border;
+ settings.shift_x = shift;
+ return settings;
+ }
+
+ static Settings ShiftY(size_t shift, size_t border) {
+ Settings settings;
+ settings.border_y = border;
+ settings.shift_y = shift;
+ return settings;
+ }
+
+ static Settings Symmetric(size_t shift, size_t border) {
+ Settings settings;
+ settings.border_x = settings.border_y = border;
+ settings.shift_x = settings.shift_y = shift;
+ return settings;
+ }
+
+ static Settings SymmetricBorderOnly(size_t border) {
+ return Symmetric(0, border);
+ }
+ };
+
+ virtual ~RenderPipelineStage() = default;
+
+ // Processes one row of input, producing the appropriate number of rows of
+ // output. Input/output rows can be obtained by calls to
+ // `GetInputRow`/`GetOutputRow`. `xsize+2*xextra` represents the total number
+ // of pixels to be processed in the input row, where the first pixel is at
+ // position `kRenderPipelineXOffset-xextra`. All pixels in the
+ // `[kRenderPipelineXOffset-xextra-border_x,
+ // kRenderPipelineXOffset+xsize+xextra+border_x)` range are initialized and
+ // accessible. `xpos` and `ypos` represent the position of the first
+ // (non-extra, i.e. in position kRenderPipelineXOffset) pixel in the center
+ // row of the input in the full image. `xpos` is a multiple of
+ // `GroupBorderAssigner::kPaddingXRound`. If `settings_.temp_buffer_size` is
+ // nonzero, `temp` will point to an HWY-aligned buffer of at least that number
+ // of floats; concurrent calls will have different buffers.
+ virtual void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const = 0;
+
+ // How each channel will be processed. Channels are numbered starting from
+ // color channels (always 3) and followed by all other channels.
+ virtual RenderPipelineChannelMode GetChannelMode(size_t c) const = 0;
+
+ protected:
+ explicit RenderPipelineStage(Settings settings) : settings_(settings) {}
+
+ virtual Status IsInitialized() const { return true; }
+
+ // Informs the stage about the total size of each channel. Few stages will
+ // actually need to use this information.
+ virtual void SetInputSizes(
+ const std::vector<std::pair<size_t, size_t>>& input_sizes) {}
+
+ virtual Status PrepareForThreads(size_t num_threads) { return true; }
+
+ // Returns a pointer to the input row of channel `c` with offset `y`.
+ // `y` must be in [-settings_.border_y, settings_.border_y]. `c` must be such
+ // that `GetChannelMode(c) != kIgnored`. The returned pointer points to the
+ // offset-ed row (i.e. kRenderPipelineXOffset has been applied).
+ float* GetInputRow(const RowInfo& input_rows, size_t c, int offset) const {
+ JXL_DASSERT(GetChannelMode(c) != RenderPipelineChannelMode::kIgnored);
+ JXL_DASSERT(-offset <= static_cast<int>(settings_.border_y));
+ JXL_DASSERT(offset <= static_cast<int>(settings_.border_y));
+ return input_rows[c][settings_.border_y + offset] + kRenderPipelineXOffset;
+ }
+ // Similar to `GetInputRow`, but can only be used if `GetChannelMode(c) ==
+ // kInOut`. Offset must be less than `1<<settings_.shift_y`.. The returned
+ // pointer points to the offset-ed row (i.e. kRenderPipelineXOffset has been
+ // applied).
+ float* GetOutputRow(const RowInfo& output_rows, size_t c,
+ size_t offset) const {
+ JXL_DASSERT(GetChannelMode(c) == RenderPipelineChannelMode::kInOut);
+ JXL_DASSERT(offset <= 1ul << settings_.shift_y);
+ return output_rows[c][offset] + kRenderPipelineXOffset;
+ }
+
+ // Indicates whether, from this stage on, the pipeline will operate on an
+ // image- rather than frame-sized buffer. Only one stage in the pipeline
+ // should return true, and it should implement ProcessPaddingRow below too.
+ // It is assumed that, if there is a SwitchToImageDimensions() == true stage,
+ // all kInput stages appear after it.
+ virtual bool SwitchToImageDimensions() const { return false; }
+
+ // If SwitchToImageDimensions returns true, then this should set xsize and
+ // ysize to the image size, and frame_origin to the location of the frame
+ // within the image. Otherwise, this is not called at all.
+ virtual void GetImageDimensions(size_t* xsize, size_t* ysize,
+ FrameOrigin* frame_origin) const {}
+
+ // Produces the appropriate output data outside of the frame dimensions. xpos
+ // and ypos are now relative to the full image.
+ virtual void ProcessPaddingRow(const RowInfo& output_rows, size_t xsize,
+ size_t xpos, size_t ypos) const {}
+
+ virtual const char* GetName() const = 0;
+
+ Settings settings_;
+ friend class RenderPipeline;
+ friend class SimpleRenderPipeline;
+ friend class LowMemoryRenderPipeline;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_test.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_test.cc
new file mode 100644
index 0000000000..f638807be9
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/render_pipeline_test.cc
@@ -0,0 +1,562 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_frame.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/fake_parallel_runner_testonly.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/jpeg/enc_jpeg_data.h"
+#include "lib/jxl/render_pipeline/test_render_pipeline_stages.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+Status DecodeFile(const Span<const uint8_t> file, bool use_slow_pipeline,
+ CodecInOut* io, ThreadPool* pool) {
+ Status ret = true;
+ {
+ BitReader reader(file);
+ BitReaderScopedCloser reader_closer(&reader, &ret);
+ JXL_RETURN_IF_ERROR(reader.ReadFixedBits<16>() == 0x0AFF);
+ JXL_RETURN_IF_ERROR(ReadSizeHeader(&reader, &io->metadata.size));
+ JXL_RETURN_IF_ERROR(ReadImageMetadata(&reader, &io->metadata.m));
+ io->metadata.transform_data.nonserialized_xyb_encoded =
+ io->metadata.m.xyb_encoded;
+ JXL_RETURN_IF_ERROR(Bundle::Read(&reader, &io->metadata.transform_data));
+ if (io->metadata.m.color_encoding.WantICC()) {
+ PaddedBytes icc;
+ JXL_RETURN_IF_ERROR(ReadICC(&reader, &icc));
+ JXL_RETURN_IF_ERROR(io->metadata.m.color_encoding.SetICC(std::move(icc)));
+ }
+ PassesDecoderState dec_state;
+ JXL_RETURN_IF_ERROR(
+ dec_state.output_encoding_info.SetFromMetadata(io->metadata));
+ JXL_RETURN_IF_ERROR(reader.JumpToByteBoundary());
+ io->frames.clear();
+ do {
+ io->frames.emplace_back(&io->metadata.m);
+ // Skip frames that are not displayed.
+ do {
+ size_t frame_start = reader.TotalBitsConsumed() / kBitsPerByte;
+ size_t size_left = file.size() - frame_start;
+ JXL_RETURN_IF_ERROR(
+ DecodeFrame(&dec_state, pool, file.data() + frame_start, size_left,
+ &io->frames.back(), io->metadata, use_slow_pipeline));
+ reader.SkipBits(io->frames.back().decoded_bytes() * kBitsPerByte);
+ } while (dec_state.shared->frame_header.frame_type !=
+ FrameType::kRegularFrame &&
+ dec_state.shared->frame_header.frame_type !=
+ FrameType::kSkipProgressive);
+ } while (!dec_state.shared->frame_header.is_last);
+
+ if (io->frames.empty()) return JXL_FAILURE("Not enough data.");
+
+ if (reader.TotalBitsConsumed() != file.size() * kBitsPerByte) {
+ return JXL_FAILURE("Reader position not at EOF.");
+ }
+ if (!reader.AllReadsWithinBounds()) {
+ return JXL_FAILURE("Reader out of bounds read.");
+ }
+ io->CheckMetadata();
+ // reader is closed here.
+ }
+ return ret;
+}
+
+TEST(RenderPipelineTest, Build) {
+ RenderPipeline::Builder builder(/*num_c=*/1);
+ builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+ builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+ builder.AddStage(jxl::make_unique<Check0FinalStage>());
+ builder.UseSimpleImplementation();
+ FrameDimensions frame_dimensions;
+ frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+ /*max_hshift=*/0, /*max_vshift=*/0,
+ /*modular_mode=*/false, /*upsampling=*/1);
+ std::move(builder).Finalize(frame_dimensions);
+}
+
+TEST(RenderPipelineTest, CallAllGroups) {
+ RenderPipeline::Builder builder(/*num_c=*/1);
+ builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+ builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+ builder.AddStage(jxl::make_unique<Check0FinalStage>());
+ builder.UseSimpleImplementation();
+ FrameDimensions frame_dimensions;
+ frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+ /*max_hshift=*/0, /*max_vshift=*/0,
+ /*modular_mode=*/false, /*upsampling=*/1);
+ auto pipeline = std::move(builder).Finalize(frame_dimensions);
+ ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false));
+
+ for (size_t i = 0; i < frame_dimensions.num_groups; i++) {
+ auto input_buffers = pipeline->GetInputBuffers(i, 0);
+ FillPlane(0.0f, input_buffers.GetBuffer(0).first,
+ input_buffers.GetBuffer(0).second);
+ input_buffers.Done();
+ }
+
+ EXPECT_EQ(pipeline->PassesWithAllInput(), 1);
+}
+
+TEST(RenderPipelineTest, BuildFast) {
+ RenderPipeline::Builder builder(/*num_c=*/1);
+ builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+ builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+ builder.AddStage(jxl::make_unique<Check0FinalStage>());
+ FrameDimensions frame_dimensions;
+ frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+ /*max_hshift=*/0, /*max_vshift=*/0,
+ /*modular_mode=*/false, /*upsampling=*/1);
+ std::move(builder).Finalize(frame_dimensions);
+}
+
+TEST(RenderPipelineTest, CallAllGroupsFast) {
+ RenderPipeline::Builder builder(/*num_c=*/1);
+ builder.AddStage(jxl::make_unique<UpsampleXSlowStage>());
+ builder.AddStage(jxl::make_unique<UpsampleYSlowStage>());
+ builder.AddStage(jxl::make_unique<Check0FinalStage>());
+ builder.UseSimpleImplementation();
+ FrameDimensions frame_dimensions;
+ frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0,
+ /*max_hshift=*/0, /*max_vshift=*/0,
+ /*modular_mode=*/false, /*upsampling=*/1);
+ auto pipeline = std::move(builder).Finalize(frame_dimensions);
+ ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false));
+
+ for (size_t i = 0; i < frame_dimensions.num_groups; i++) {
+ auto input_buffers = pipeline->GetInputBuffers(i, 0);
+ FillPlane(0.0f, input_buffers.GetBuffer(0).first,
+ input_buffers.GetBuffer(0).second);
+ input_buffers.Done();
+ }
+
+ EXPECT_EQ(pipeline->PassesWithAllInput(), 1);
+}
+
+struct RenderPipelineTestInputSettings {
+ // Input image.
+ std::string input_path;
+ size_t xsize, ysize;
+ bool jpeg_transcode = false;
+ // Encoding settings.
+ CompressParams cparams;
+ // Short name for the encoder settings.
+ std::string cparams_descr;
+
+ bool add_spot_color = false;
+
+ Splines splines;
+};
+
+class RenderPipelineTestParam
+ : public ::testing::TestWithParam<RenderPipelineTestInputSettings> {};
+
+TEST_P(RenderPipelineTestParam, PipelineTest) {
+ RenderPipelineTestInputSettings config = GetParam();
+
+ // Use a parallel runner that randomly shuffles tasks to detect possible
+ // border handling bugs.
+ FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+ ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+ const PaddedBytes orig = jxl::test::ReadTestData(config.input_path);
+
+ CodecInOut io;
+ if (config.jpeg_transcode) {
+ ASSERT_TRUE(jpeg::DecodeImageJPG(Span<const uint8_t>(orig), &io));
+ } else {
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+ }
+ io.ShrinkTo(config.xsize, config.ysize);
+
+ if (config.add_spot_color) {
+ jxl::ImageF spot(config.xsize, config.ysize);
+ jxl::ZeroFillImage(&spot);
+
+ for (size_t y = 0; y < config.ysize; y++) {
+ float* JXL_RESTRICT row = spot.Row(y);
+ for (size_t x = 0; x < config.xsize; x++) {
+ row[x] = ((x ^ y) & 255) * (1.f / 255.f);
+ }
+ }
+ ExtraChannelInfo info;
+ info.bit_depth.bits_per_sample = 8;
+ info.dim_shift = 0;
+ info.type = jxl::ExtraChannel::kSpotColor;
+ info.spot_color[0] = 0.5f;
+ info.spot_color[1] = 0.2f;
+ info.spot_color[2] = 1.f;
+ info.spot_color[3] = 0.5f;
+
+ io.metadata.m.extra_channel_info.push_back(info);
+ std::vector<jxl::ImageF> ec;
+ ec.push_back(std::move(spot));
+ io.frames[0].SetExtraChannels(std::move(ec));
+ }
+
+ PaddedBytes compressed;
+
+ PassesEncoderState enc_state;
+ enc_state.shared.image_features.splines = config.splines;
+ ASSERT_TRUE(EncodeFile(config.cparams, &io, &enc_state, &compressed,
+ GetJxlCms(), /*aux_out=*/nullptr, &pool));
+
+
+ CodecInOut io_default;
+ ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+ /*use_slow_pipeline=*/false, &io_default, &pool));
+ CodecInOut io_slow_pipeline;
+ ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+ /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool));
+
+ ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size());
+ for (size_t i = 0; i < io_default.frames.size(); i++) {
+#if JXL_HIGH_PRECISION
+ constexpr float kMaxError = 1e-5;
+#else
+ constexpr float kMaxError = 1e-4;
+#endif
+ Image3F def = std::move(*io_default.frames[i].color());
+ Image3F pip = std::move(*io_slow_pipeline.frames[i].color());
+ JXL_ASSERT_OK(VerifyRelativeError(pip, def, kMaxError, kMaxError, _));
+ for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size();
+ ec++) {
+ JXL_ASSERT_OK(VerifyRelativeError(
+ io_slow_pipeline.frames[i].extra_channels()[ec],
+ io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _));
+ }
+ }
+}
+
+Splines CreateTestSplines() {
+ const ColorCorrelationMap cmap;
+ std::vector<Spline::Point> control_points{{9, 54}, {118, 159}, {97, 3},
+ {10, 40}, {150, 25}, {120, 300}};
+ const Spline spline{
+ control_points,
+ /*color_dct=*/
+ {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+ /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}};
+ std::vector<Spline> spline_data = {spline};
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (const Spline& spline : spline_data) {
+ quantized_splines.emplace_back(spline, /*quantization_adjustment=*/0,
+ cmap.YtoXRatio(0), cmap.YtoBRatio(0));
+ starting_points.push_back(spline.control_points.front());
+ }
+ return Splines(/*quantization_adjustment=*/0, std::move(quantized_splines),
+ std::move(starting_points));
+}
+
+std::vector<RenderPipelineTestInputSettings> GeneratePipelineTests() {
+ std::vector<RenderPipelineTestInputSettings> all_tests;
+
+ std::pair<size_t, size_t> sizes[] = {
+ {3, 8}, {128, 128}, {256, 256}, {258, 258}, {533, 401}, {777, 777},
+ };
+
+ for (auto size : sizes) {
+ RenderPipelineTestInputSettings settings;
+ settings.input_path = "jxl/flower/flower.png";
+ settings.xsize = size.first;
+ settings.ysize = size.second;
+
+ // Base settings.
+ settings.cparams.butteraugli_distance = 1.0;
+ settings.cparams.patches = Override::kOff;
+ settings.cparams.dots = Override::kOff;
+ settings.cparams.gaborish = Override::kOff;
+ settings.cparams.epf = 0;
+ settings.cparams.color_transform = ColorTransform::kXYB;
+
+ {
+ auto s = settings;
+ s.cparams_descr = "NoGabNoEpfNoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.color_transform = ColorTransform::kNone;
+ s.cparams_descr = "NoGabNoEpfNoPatchesNoXYB";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.gaborish = Override::kOn;
+ s.cparams_descr = "GabNoEpfNoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.epf = 1;
+ s.cparams_descr = "NoGabEpf1NoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.epf = 2;
+ s.cparams_descr = "NoGabEpf2NoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.epf = 3;
+ s.cparams_descr = "NoGabEpf3NoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.gaborish = Override::kOn;
+ s.cparams.epf = 3;
+ s.cparams_descr = "GabEpf3NoPatches";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "Splines";
+ s.splines = CreateTestSplines();
+ all_tests.push_back(s);
+ }
+
+ for (size_t ups : {2, 4, 8}) {
+ {
+ auto s = settings;
+ s.cparams.resampling = ups;
+ s.cparams_descr = "Ups" + std::to_string(ups);
+ all_tests.push_back(s);
+ }
+ {
+ auto s = settings;
+ s.cparams.resampling = ups;
+ s.cparams.epf = 1;
+ s.cparams_descr = "Ups" + std::to_string(ups) + "EPF1";
+ all_tests.push_back(s);
+ }
+ {
+ auto s = settings;
+ s.cparams.resampling = ups;
+ s.cparams.gaborish = Override::kOn;
+ s.cparams.epf = 1;
+ s.cparams_descr = "Ups" + std::to_string(ups) + "GabEPF1";
+ all_tests.push_back(s);
+ }
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "Noise";
+ s.cparams.photon_noise_iso = 3200;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "NoiseUps";
+ s.cparams.photon_noise_iso = 3200;
+ s.cparams.resampling = 2;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "ModularLossless";
+ s.cparams.modular_mode = true;
+ s.cparams.butteraugli_distance = 0;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "ProgressiveDC";
+ s.cparams.progressive_dc = 1;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "ModularLossy";
+ s.cparams.modular_mode = true;
+ s.cparams.butteraugli_distance = 1.f;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.input_path = "jxl/flower/flower_alpha.png";
+ s.cparams_descr = "AlphaVarDCT";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.input_path = "jxl/flower/flower_alpha.png";
+ s.cparams_descr = "AlphaVarDCTUpsamplingEPF";
+ s.cparams.epf = 1;
+ s.cparams.ec_resampling = 2;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams.modular_mode = true;
+ s.cparams.butteraugli_distance = 0;
+ s.input_path = "jxl/flower/flower_alpha.png";
+ s.cparams_descr = "AlphaLossless";
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.input_path = "jxl/flower/flower_alpha.png";
+ s.cparams_descr = "AlphaDownsample";
+ s.cparams.ec_resampling = 2;
+ all_tests.push_back(s);
+ }
+
+ {
+ auto s = settings;
+ s.cparams_descr = "SpotColor";
+ s.add_spot_color = true;
+ all_tests.push_back(s);
+ }
+ }
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+ for (const char* input : {"jxl/flower/flower.png.im_q85_444.jpg",
+ "jxl/flower/flower.png.im_q85_420.jpg",
+ "jxl/flower/flower.png.im_q85_422.jpg",
+ "jxl/flower/flower.png.im_q85_440.jpg"}) {
+ RenderPipelineTestInputSettings settings;
+ settings.input_path = input;
+ settings.jpeg_transcode = true;
+ settings.xsize = 2268;
+ settings.ysize = 1512;
+ settings.cparams_descr = "Default";
+ all_tests.push_back(settings);
+ }
+
+#endif
+
+ {
+ RenderPipelineTestInputSettings settings;
+ settings.input_path = "jxl/grayscale_patches.png";
+ settings.xsize = 1011;
+ settings.ysize = 277;
+ settings.cparams_descr = "Patches";
+ all_tests.push_back(settings);
+ }
+
+ {
+ RenderPipelineTestInputSettings settings;
+ settings.input_path = "jxl/grayscale_patches.png";
+ settings.xsize = 1011;
+ settings.ysize = 277;
+ settings.cparams.photon_noise_iso = 1000;
+ settings.cparams_descr = "PatchesAndNoise";
+ all_tests.push_back(settings);
+ }
+
+ {
+ RenderPipelineTestInputSettings settings;
+ settings.input_path = "jxl/grayscale_patches.png";
+ settings.xsize = 1011;
+ settings.ysize = 277;
+ settings.cparams.resampling = 2;
+ settings.cparams_descr = "PatchesAndUps2";
+ all_tests.push_back(settings);
+ }
+
+ return all_tests;
+}
+
+std::ostream& operator<<(std::ostream& os,
+ const RenderPipelineTestInputSettings& c) {
+ std::string filename;
+ size_t pos = c.input_path.find_last_of('/');
+ if (pos == std::string::npos) {
+ filename = c.input_path;
+ } else {
+ filename = c.input_path.substr(pos + 1);
+ }
+ std::replace_if(
+ filename.begin(), filename.end(), [](char c) { return !isalnum(c); },
+ '_');
+ os << filename << "_" << (c.jpeg_transcode ? "JPEG_" : "") << c.xsize << "x"
+ << c.ysize << "_" << c.cparams_descr;
+ return os;
+}
+
+std::string PipelineTestDescription(
+ const testing::TestParamInfo<RenderPipelineTestParam::ParamType>& info) {
+ std::stringstream name;
+ name << info.param;
+ return name.str();
+}
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RenderPipelineTest, RenderPipelineTestParam,
+ testing::ValuesIn(GeneratePipelineTests()),
+ PipelineTestDescription);
+
+TEST(RenderPipelineDecodingTest, Animation) {
+ FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8);
+ ThreadPool pool(&JxlFakeParallelRunner, &fake_pool);
+
+ PaddedBytes compressed =
+ jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl");
+
+ CodecInOut io_default;
+ ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+ /*use_slow_pipeline=*/false, &io_default, &pool));
+ CodecInOut io_slow_pipeline;
+ ASSERT_TRUE(DecodeFile(Span<const uint8_t>(compressed),
+ /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool));
+
+ ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size());
+ for (size_t i = 0; i < io_default.frames.size(); i++) {
+#if JXL_HIGH_PRECISION
+ constexpr float kMaxError = 1e-5;
+#else
+ constexpr float kMaxError = 1e-4;
+#endif
+
+ Image3F fast_pipeline = std::move(*io_default.frames[i].color());
+ Image3F slow_pipeline = std::move(*io_slow_pipeline.frames[i].color());
+ JXL_ASSERT_OK(VerifyRelativeError(slow_pipeline, fast_pipeline, kMaxError,
+ kMaxError, _))
+ for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size();
+ ec++) {
+ JXL_ASSERT_OK(VerifyRelativeError(
+ io_slow_pipeline.frames[i].extra_channels()[ec],
+ io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _));
+ }
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.cc
new file mode 100644
index 0000000000..4495288860
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.cc
@@ -0,0 +1,266 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/simple_render_pipeline.h"
+
+#include <hwy/base.h>
+
+#include "lib/jxl/image_ops.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/sanitizers.h"
+
+namespace jxl {
+
+void SimpleRenderPipeline::PrepareForThreadsInternal(size_t num,
+ bool use_group_ids) {
+ if (!channel_data_.empty()) {
+ return;
+ }
+ auto ch_size = [](size_t frame_size, size_t shift) {
+ return DivCeil(frame_size, 1 << shift) + kRenderPipelineXOffset * 2;
+ };
+ for (size_t c = 0; c < channel_shifts_[0].size(); c++) {
+ channel_data_.push_back(ImageF(
+ ch_size(frame_dimensions_.xsize_upsampled, channel_shifts_[0][c].first),
+ ch_size(frame_dimensions_.ysize_upsampled,
+ channel_shifts_[0][c].second)));
+ msan::PoisonImage(channel_data_.back());
+ }
+}
+
+Rect SimpleRenderPipeline::MakeChannelRect(size_t group_id, size_t channel) {
+ size_t base_color_shift =
+ CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded /
+ frame_dimensions_.xsize_padded);
+
+ const size_t gx = group_id % frame_dimensions_.xsize_groups;
+ const size_t gy = group_id / frame_dimensions_.xsize_groups;
+ size_t xgroupdim = (frame_dimensions_.group_dim << base_color_shift) >>
+ channel_shifts_[0][channel].first;
+ size_t ygroupdim = (frame_dimensions_.group_dim << base_color_shift) >>
+ channel_shifts_[0][channel].second;
+ return Rect(
+ kRenderPipelineXOffset + gx * xgroupdim,
+ kRenderPipelineXOffset + gy * ygroupdim, xgroupdim, ygroupdim,
+ kRenderPipelineXOffset + DivCeil(frame_dimensions_.xsize_upsampled,
+ 1 << channel_shifts_[0][channel].first),
+ kRenderPipelineXOffset +
+ DivCeil(frame_dimensions_.ysize_upsampled,
+ 1 << channel_shifts_[0][channel].second));
+}
+
+std::vector<std::pair<ImageF*, Rect>> SimpleRenderPipeline::PrepareBuffers(
+ size_t group_id, size_t thread_id) {
+ std::vector<std::pair<ImageF*, Rect>> ret;
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ ret.emplace_back(&channel_data_[c], MakeChannelRect(group_id, c));
+ }
+ return ret;
+}
+
+void SimpleRenderPipeline::ProcessBuffers(size_t group_id, size_t thread_id) {
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ Rect r = MakeChannelRect(group_id, c);
+ (void)r;
+ JXL_CHECK_PLANE_INITIALIZED(channel_data_[c], r, c);
+ }
+
+ if (PassesWithAllInput() <= processed_passes_) return;
+ processed_passes_++;
+
+ for (size_t stage_id = 0; stage_id < stages_.size(); stage_id++) {
+ const auto& stage = stages_[stage_id];
+ // Prepare buffers for kInOut channels.
+ std::vector<ImageF> new_channels(channel_data_.size());
+ std::vector<ImageF*> output_channels(channel_data_.size());
+
+ std::vector<std::pair<size_t, size_t>> input_sizes(channel_data_.size());
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ input_sizes[c] =
+ std::make_pair(channel_data_[c].xsize() - kRenderPipelineXOffset * 2,
+ channel_data_[c].ysize() - kRenderPipelineXOffset * 2);
+ }
+
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+ continue;
+ }
+ // Ensure that the newly allocated channels are large enough to avoid
+ // problems with padding.
+ new_channels[c] =
+ ImageF(frame_dimensions_.xsize_upsampled_padded +
+ kRenderPipelineXOffset * 2 + hwy::kMaxVectorSize * 8,
+ frame_dimensions_.ysize_upsampled_padded +
+ kRenderPipelineXOffset * 2);
+ new_channels[c].ShrinkTo(
+ (input_sizes[c].first << stage->settings_.shift_x) +
+ kRenderPipelineXOffset * 2,
+ (input_sizes[c].second << stage->settings_.shift_y) +
+ kRenderPipelineXOffset * 2);
+ output_channels[c] = &new_channels[c];
+ }
+
+ auto get_row = [&](size_t c, int64_t y) {
+ return channel_data_[c].Row(kRenderPipelineXOffset + y) +
+ kRenderPipelineXOffset;
+ };
+
+ // Add mirrored pixes to all kInOut channels.
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+ continue;
+ }
+ // Horizontal mirroring.
+ for (size_t y = 0; y < input_sizes[c].second; y++) {
+ float* row = get_row(c, y);
+ for (size_t ix = 0; ix < stage->settings_.border_x; ix++) {
+ *(row - ix - 1) = row[Mirror(-ssize_t(ix) - 1, input_sizes[c].first)];
+ }
+ for (size_t ix = 0; ix < stage->settings_.border_x; ix++) {
+ *(row + ix + input_sizes[c].first) =
+ row[Mirror(ix + input_sizes[c].first, input_sizes[c].first)];
+ }
+ }
+ // Vertical mirroring.
+ for (int y = 0; y < static_cast<int>(stage->settings_.border_y); y++) {
+ memcpy(get_row(c, -y - 1) - stage->settings_.border_x,
+ get_row(c, Mirror(-ssize_t(y) - 1, input_sizes[c].second)) -
+ stage->settings_.border_x,
+ sizeof(float) *
+ (input_sizes[c].first + 2 * stage->settings_.border_x));
+ }
+ for (int y = 0; y < static_cast<int>(stage->settings_.border_y); y++) {
+ memcpy(
+ get_row(c, input_sizes[c].second + y) - stage->settings_.border_x,
+ get_row(c,
+ Mirror(input_sizes[c].second + y, input_sizes[c].second)) -
+ stage->settings_.border_x,
+ sizeof(float) *
+ (input_sizes[c].first + 2 * stage->settings_.border_x));
+ }
+ }
+
+ size_t ysize = 0;
+ size_t xsize = 0;
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) {
+ continue;
+ }
+ ysize = std::max(input_sizes[c].second, ysize);
+ xsize = std::max(input_sizes[c].first, xsize);
+ }
+
+ JXL_ASSERT(ysize != 0);
+ JXL_ASSERT(xsize != 0);
+
+ RenderPipelineStage::RowInfo input_rows(channel_data_.size());
+ RenderPipelineStage::RowInfo output_rows(channel_data_.size());
+
+ // Run the pipeline.
+ {
+ stage->SetInputSizes(input_sizes);
+ int border_y = stage->settings_.border_y;
+ for (size_t y = 0; y < ysize; y++) {
+ // Prepare input rows.
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) {
+ continue;
+ }
+ input_rows[c].resize(2 * border_y + 1);
+ for (int iy = -border_y; iy <= border_y; iy++) {
+ input_rows[c][iy + border_y] =
+ channel_data_[c].Row(y + kRenderPipelineXOffset + iy);
+ }
+ }
+ // Prepare output rows.
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (!output_channels[c]) continue;
+ output_rows[c].resize(1 << stage->settings_.shift_y);
+ for (size_t iy = 0; iy < output_rows[c].size(); iy++) {
+ output_rows[c][iy] = output_channels[c]->Row(
+ (y << stage->settings_.shift_y) + iy + kRenderPipelineXOffset);
+ }
+ }
+ stage->ProcessRow(input_rows, output_rows, /*xextra=*/0, xsize,
+ /*xpos=*/0, y, thread_id);
+ }
+ }
+
+ // Move new channels to current channels.
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) {
+ continue;
+ }
+ channel_data_[c] = std::move(new_channels[c]);
+ }
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ size_t next_stage = std::min(stage_id + 1, channel_shifts_.size() - 1);
+ size_t xsize = DivCeil(frame_dimensions_.xsize_upsampled,
+ 1 << channel_shifts_[next_stage][c].first);
+ size_t ysize = DivCeil(frame_dimensions_.ysize_upsampled,
+ 1 << channel_shifts_[next_stage][c].second);
+ channel_data_[c].ShrinkTo(xsize + 2 * kRenderPipelineXOffset,
+ ysize + 2 * kRenderPipelineXOffset);
+ JXL_CHECK_PLANE_INITIALIZED(
+ channel_data_[c],
+ Rect(kRenderPipelineXOffset, kRenderPipelineXOffset, xsize, ysize),
+ c);
+ }
+
+ if (stage->SwitchToImageDimensions()) {
+ size_t image_xsize, image_ysize;
+ FrameOrigin frame_origin;
+ stage->GetImageDimensions(&image_xsize, &image_ysize, &frame_origin);
+ frame_dimensions_.Set(image_xsize, image_ysize, 0, 0, 0, false, 1);
+ std::vector<ImageF> old_channels = std::move(channel_data_);
+ channel_data_.clear();
+ channel_data_.reserve(old_channels.size());
+ for (size_t c = 0; c < old_channels.size(); c++) {
+ channel_data_.emplace_back(2 * kRenderPipelineXOffset + image_xsize,
+ 2 * kRenderPipelineXOffset + image_ysize);
+ }
+ for (size_t y = 0; y < image_ysize; ++y) {
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ output_rows[c].resize(1);
+ output_rows[c][0] = channel_data_[c].Row(kRenderPipelineXOffset + y);
+ }
+ // TODO(sboukortt): consider doing this only on the parts of the
+ // background that won't be occluded.
+ stage->ProcessPaddingRow(output_rows, image_xsize, 0, y);
+ }
+ ssize_t x0 = frame_origin.x0;
+ ssize_t y0 = frame_origin.y0;
+ size_t x0_fg = 0;
+ size_t y0_fg = 0;
+ if (x0 < 0) {
+ xsize += x0;
+ x0_fg -= x0;
+ x0 = 0;
+ }
+ if (x0 + xsize > image_xsize) {
+ xsize = image_xsize - x0;
+ }
+ if (y0 < 0) {
+ ysize += y0;
+ y0_fg -= x0;
+ y0 = 0;
+ }
+ if (y0 + ysize > image_ysize) {
+ ysize = image_ysize - y0;
+ }
+ const Rect rect_fg_relative_to_image =
+ Rect(x0, y0, xsize, ysize)
+ .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset);
+ const Rect rect_fg =
+ Rect(x0_fg, y0_fg, xsize, ysize)
+ .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset);
+ for (size_t c = 0; c < channel_data_.size(); c++) {
+ CopyImageTo(rect_fg, old_channels[c], rect_fg_relative_to_image,
+ &channel_data_[c]);
+ }
+ }
+ }
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.h
new file mode 100644
index 0000000000..10f4505912
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/simple_render_pipeline.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
+#define LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
+
+#include <stdint.h>
+
+#include "lib/jxl/render_pipeline/render_pipeline.h"
+
+namespace jxl {
+
+// A RenderPipeline that is "obviously correct"; it may use potentially large
+// amounts of memory and be slow. It is intended to be used mostly for testing
+// purposes.
+class SimpleRenderPipeline : public RenderPipeline {
+ std::vector<std::pair<ImageF*, Rect>> PrepareBuffers(
+ size_t group_id, size_t thread_id) override;
+
+ void ProcessBuffers(size_t group_id, size_t thread_id) override;
+
+ void PrepareForThreadsInternal(size_t num, bool use_group_ids) override;
+
+ // Full frame buffers. Both X and Y dimensions are padded by
+ // kRenderPipelineXOffset.
+ std::vector<ImageF> channel_data_;
+ size_t processed_passes_ = 0;
+
+ private:
+ Rect MakeChannelRect(size_t group_id, size_t channel);
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.cc
new file mode 100644
index 0000000000..b6668c5625
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.cc
@@ -0,0 +1,247 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_blending.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_blending.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/blending.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class BlendingStage : public RenderPipelineStage {
+ public:
+ explicit BlendingStage(const PassesDecoderState* dec_state,
+ const ColorEncoding& frame_color_encoding)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ state_(*dec_state->shared) {
+ image_xsize_ = state_.frame_header.nonserialized_metadata->xsize();
+ image_ysize_ = state_.frame_header.nonserialized_metadata->ysize();
+ extra_channel_info_ =
+ &state_.frame_header.nonserialized_metadata->m.extra_channel_info;
+ info_ = state_.frame_header.blending_info;
+ const std::vector<BlendingInfo>& ec_info =
+ state_.frame_header.extra_channel_blending_info;
+ const ImageBundle& bg = state_.reference_frames[info_.source].frame;
+ bg_ = &bg;
+ if (bg.xsize() == 0 || bg.ysize() == 0) {
+ zeroes_.resize(image_xsize_, 0.f);
+ } else if (state_.reference_frames[info_.source].ib_is_in_xyb) {
+ initialized_ = JXL_FAILURE(
+ "Trying to blend XYB reference frame %i and non-XYB frame",
+ info_.source);
+ return;
+ } else if (std::any_of(ec_info.begin(), ec_info.end(),
+ [this](const BlendingInfo& info) {
+ const ImageBundle& bg =
+ state_.reference_frames[info.source].frame;
+ return bg.xsize() == 0 || bg.ysize() == 0;
+ })) {
+ zeroes_.resize(image_xsize_, 0.f);
+ }
+
+ auto verify_bg_size = [&](const ImageBundle& bg) -> Status {
+ if (bg.xsize() != 0 && bg.ysize() != 0 &&
+ (bg.xsize() < image_xsize_ || bg.ysize() < image_ysize_ ||
+ bg.origin.x0 != 0 || bg.origin.y0 != 0)) {
+ return JXL_FAILURE("Trying to use a %" PRIuS "x%" PRIuS
+ " crop as a background",
+ bg.xsize(), bg.ysize());
+ }
+ return true;
+ };
+
+ Status ok = verify_bg_size(bg);
+ for (const auto& info : ec_info) {
+ const ImageBundle& bg = state_.reference_frames[info.source].frame;
+ if (!!ok) ok = verify_bg_size(bg);
+ }
+ if (!ok) {
+ initialized_ = ok;
+ return;
+ }
+
+ if (state_.metadata->m.xyb_encoded) {
+ if (!dec_state->output_encoding_info.color_encoding_is_original) {
+ initialized_ = JXL_FAILURE("Blending in unsupported color space");
+ return;
+ }
+ }
+
+ blending_info_.resize(ec_info.size() + 1);
+ auto make_blending = [&](const BlendingInfo& info, PatchBlending* pb) {
+ pb->alpha_channel = info.alpha_channel;
+ pb->clamp = info.clamp;
+ switch (info.mode) {
+ case BlendMode::kReplace: {
+ pb->mode = PatchBlendMode::kReplace;
+ break;
+ }
+ case BlendMode::kAdd: {
+ pb->mode = PatchBlendMode::kAdd;
+ break;
+ }
+ case BlendMode::kMul: {
+ pb->mode = PatchBlendMode::kMul;
+ break;
+ }
+ case BlendMode::kBlend: {
+ pb->mode = PatchBlendMode::kBlendAbove;
+ break;
+ }
+ case BlendMode::kAlphaWeightedAdd: {
+ pb->mode = PatchBlendMode::kAlphaWeightedAddAbove;
+ break;
+ }
+ default: {
+ JXL_ABORT("Invalid blend mode"); // should have failed to decode
+ }
+ }
+ };
+ make_blending(info_, &blending_info_[0]);
+ for (size_t i = 0; i < ec_info.size(); i++) {
+ make_blending(ec_info[i], &blending_info_[1 + i]);
+ }
+ }
+
+ Status IsInitialized() const override { return initialized_; }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("Blend");
+ JXL_ASSERT(initialized_);
+ const FrameOrigin& frame_origin = state_.frame_header.frame_origin;
+ ssize_t bg_xpos = frame_origin.x0 + static_cast<ssize_t>(xpos);
+ ssize_t bg_ypos = frame_origin.y0 + static_cast<ssize_t>(ypos);
+ int offset = 0;
+ if (bg_xpos + static_cast<ssize_t>(xsize) <= 0 ||
+ frame_origin.x0 >= static_cast<ssize_t>(image_xsize_) || bg_ypos < 0 ||
+ bg_ypos >= static_cast<ssize_t>(image_ysize_)) {
+ return;
+ }
+ if (bg_xpos < 0) {
+ offset -= bg_xpos;
+ xsize += bg_xpos;
+ bg_xpos = 0;
+ }
+ if (bg_xpos + xsize > image_xsize_) {
+ xsize =
+ std::max<ssize_t>(0, static_cast<ssize_t>(image_xsize_) - bg_xpos);
+ }
+ std::vector<const float*> bg_row_ptrs_(input_rows.size());
+ std::vector<float*> fg_row_ptrs_(input_rows.size());
+ size_t num_c = std::min(input_rows.size(), extra_channel_info_->size() + 3);
+ for (size_t c = 0; c < num_c; ++c) {
+ fg_row_ptrs_[c] = GetInputRow(input_rows, c, 0) + offset;
+ if (c < 3) {
+ bg_row_ptrs_[c] = bg_->xsize() != 0 && bg_->ysize() != 0
+ ? bg_->color().ConstPlaneRow(c, bg_ypos) + bg_xpos
+ : zeroes_.data();
+ } else {
+ const ImageBundle& ec_bg =
+ state_
+ .reference_frames[state_.frame_header
+ .extra_channel_blending_info[c - 3]
+ .source]
+ .frame;
+ bg_row_ptrs_[c] =
+ ec_bg.xsize() != 0 && ec_bg.ysize() != 0
+ ? ec_bg.extra_channels()[c - 3].ConstRow(bg_ypos) + bg_xpos
+ : zeroes_.data();
+ }
+ }
+ PerformBlending(bg_row_ptrs_.data(), fg_row_ptrs_.data(),
+ fg_row_ptrs_.data(), 0, xsize, blending_info_[0],
+ blending_info_.data() + 1, *extra_channel_info_);
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return RenderPipelineChannelMode::kInPlace;
+ }
+
+ bool SwitchToImageDimensions() const override { return true; }
+
+ void GetImageDimensions(size_t* xsize, size_t* ysize,
+ FrameOrigin* frame_origin) const override {
+ *xsize = image_xsize_;
+ *ysize = image_ysize_;
+ *frame_origin = state_.frame_header.frame_origin;
+ }
+
+ void ProcessPaddingRow(const RowInfo& output_rows, size_t xsize, size_t xpos,
+ size_t ypos) const override {
+ if (bg_->xsize() == 0 || bg_->ysize() == 0) {
+ for (size_t c = 0; c < 3; ++c) {
+ memset(GetInputRow(output_rows, c, 0), 0, xsize * sizeof(float));
+ }
+ } else {
+ for (size_t c = 0; c < 3; ++c) {
+ memcpy(GetInputRow(output_rows, c, 0),
+ bg_->color().ConstPlaneRow(c, ypos) + xpos,
+ xsize * sizeof(float));
+ }
+ }
+ for (size_t ec = 0; ec < extra_channel_info_->size(); ++ec) {
+ const ImageBundle& ec_bg =
+ state_
+ .reference_frames
+ [state_.frame_header.extra_channel_blending_info[ec].source]
+ .frame;
+ if (ec_bg.xsize() == 0 || ec_bg.ysize() == 0) {
+ memset(GetInputRow(output_rows, 3 + ec, 0), 0, xsize * sizeof(float));
+ } else {
+ memcpy(GetInputRow(output_rows, 3 + ec, 0),
+ ec_bg.extra_channels()[ec].ConstRow(ypos) + xpos,
+ xsize * sizeof(float));
+ }
+ }
+ }
+
+ const char* GetName() const override { return "Blending"; }
+
+ private:
+ const PassesSharedState& state_;
+ BlendingInfo info_;
+ const ImageBundle* bg_;
+ Status initialized_ = true;
+ size_t image_xsize_;
+ size_t image_ysize_;
+ std::vector<PatchBlending> blending_info_;
+ const std::vector<ExtraChannelInfo>* extra_channel_info_;
+ std::vector<float> zeroes_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+ const PassesDecoderState* dec_state,
+ const ColorEncoding& frame_color_encoding) {
+ return jxl::make_unique<BlendingStage>(dec_state, frame_color_encoding);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetBlendingStage);
+
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+ const PassesDecoderState* dec_state,
+ const ColorEncoding& frame_color_encoding) {
+ return HWY_DYNAMIC_DISPATCH(GetBlendingStage)(dec_state,
+ frame_color_encoding);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.h
new file mode 100644
index 0000000000..c8db7490cd
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_blending.h
@@ -0,0 +1,24 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
+
+#include <utility>
+
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Applies blending if applicable.
+std::unique_ptr<RenderPipelineStage> GetBlendingStage(
+ const PassesDecoderState* dec_state,
+ const ColorEncoding& frame_color_encoding);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc
new file mode 100644
index 0000000000..9b73ee91f1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc
@@ -0,0 +1,129 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_chroma_upsampling.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class HorizontalChromaUpsamplingStage : public RenderPipelineStage {
+ public:
+ explicit HorizontalChromaUpsamplingStage(size_t channel)
+ : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(
+ /*shift=*/1, /*border=*/1)),
+ c_(channel) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("HorizontalChromaUpsampling");
+ HWY_FULL(float) df;
+ xextra = RoundUpTo(xextra, Lanes(df));
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ const float* row_in = GetInputRow(input_rows, c_, 0);
+ float* row_out = GetOutputRow(output_rows, c_, 0);
+ for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+ x += Lanes(df)) {
+ auto current = Mul(LoadU(df, row_in + x), threefour);
+ auto prev = LoadU(df, row_in + x - 1);
+ auto next = LoadU(df, row_in + x + 1);
+ auto left = MulAdd(onefour, prev, current);
+ auto right = MulAdd(onefour, next, current);
+ StoreInterleaved(df, left, right, row_out + x * 2);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c == c_ ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "HChromaUps"; }
+
+ private:
+ size_t c_;
+};
+
+class VerticalChromaUpsamplingStage : public RenderPipelineStage {
+ public:
+ explicit VerticalChromaUpsamplingStage(size_t channel)
+ : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(
+ /*shift=*/1, /*border=*/1)),
+ c_(channel) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("VerticalChromaUpsampling");
+ HWY_FULL(float) df;
+ xextra = RoundUpTo(xextra, Lanes(df));
+ auto threefour = Set(df, 0.75f);
+ auto onefour = Set(df, 0.25f);
+ const float* row_top = GetInputRow(input_rows, c_, -1);
+ const float* row_mid = GetInputRow(input_rows, c_, 0);
+ const float* row_bot = GetInputRow(input_rows, c_, 1);
+ float* row_out0 = GetOutputRow(output_rows, c_, 0);
+ float* row_out1 = GetOutputRow(output_rows, c_, 1);
+ for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+ x += Lanes(df)) {
+ auto it = LoadU(df, row_top + x);
+ auto im = LoadU(df, row_mid + x);
+ auto ib = LoadU(df, row_bot + x);
+ auto im_scaled = Mul(im, threefour);
+ Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x);
+ Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c == c_ ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "VChromaUps"; }
+
+ private:
+ size_t c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+ bool horizontal) {
+ if (horizontal) {
+ return jxl::make_unique<HorizontalChromaUpsamplingStage>(channel);
+ } else {
+ return jxl::make_unique<VerticalChromaUpsamplingStage>(channel);
+ }
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetChromaUpsamplingStage);
+
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+ bool horizontal) {
+ return HWY_DYNAMIC_DISPATCH(GetChromaUpsamplingStage)(channel, horizontal);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.h
new file mode 100644
index 0000000000..b8bfc15f5f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_chroma_upsampling.h
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies simple upsampling, either horizontal or vertical, to the given
+// channel.
+std::unique_ptr<RenderPipelineStage> GetChromaUpsamplingStage(size_t channel,
+ bool horizontal);
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.cc
new file mode 100644
index 0000000000..d59c497843
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.cc
@@ -0,0 +1,524 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_epf.h"
+
+#include "lib/jxl/epf.h"
+#include "lib/jxl/sanitizers.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_epf.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+// TODO(veluca): In principle, vectors could be not capped, if we want to deal
+// with having two different sigma values in a single vector.
+using DF = HWY_CAPPED(float, 8);
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::AbsDiff;
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::VFromD;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+JXL_INLINE Vec<DF> Weight(Vec<DF> sad, Vec<DF> inv_sigma, Vec<DF> thres) {
+ auto v = MulAdd(sad, inv_sigma, Set(DF(), 1.0f));
+ return ZeroIfNegative(v);
+}
+
+// 5x5 plus-shaped kernel with 5 SADs per pixel (3x3 plus-shaped). So this makes
+// this filter a 7x7 filter.
+class EPF0Stage : public RenderPipelineStage {
+ public:
+ EPF0Stage(const LoopFilter& lf, const ImageF& sigma)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/3)),
+ lf_(lf),
+ sigma_(&sigma) {}
+
+ template <bool aligned>
+ JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][7], ssize_t x,
+ Vec<DF> sad, Vec<DF> inv_sigma,
+ Vec<DF>* JXL_RESTRICT X, Vec<DF>* JXL_RESTRICT Y,
+ Vec<DF>* JXL_RESTRICT B,
+ Vec<DF>* JXL_RESTRICT w) const {
+ auto cx = aligned ? Load(DF(), rows[0][3 + row] + x)
+ : LoadU(DF(), rows[0][3 + row] + x);
+ auto cy = aligned ? Load(DF(), rows[1][3 + row] + x)
+ : LoadU(DF(), rows[1][3 + row] + x);
+ auto cb = aligned ? Load(DF(), rows[2][3 + row] + x)
+ : LoadU(DF(), rows[2][3 + row] + x);
+
+ auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush));
+ *w = Add(*w, weight);
+ *X = MulAdd(weight, cx, *X);
+ *Y = MulAdd(weight, cy, *Y);
+ *B = MulAdd(weight, cb, *B);
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ DF df;
+
+ using V = decltype(Zero(df));
+ V t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, tA, tB;
+ V* sads[12] = {&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7, &t8, &t9, &tA, &tB};
+
+ xextra = RoundUpTo(xextra, Lanes(df));
+ const float* JXL_RESTRICT row_sigma =
+ sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+ float sm = lf_.epf_pass0_sigma_scale * 1.65;
+ float bsm = sm * lf_.epf_border_sad_mul;
+
+ HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+ sm, sm, sm, bsm};
+ HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+ bsm, bsm, bsm, bsm};
+ float* JXL_RESTRICT rows[3][7];
+ for (size_t c = 0; c < 3; c++) {
+ for (int i = 0; i < 7; i++) {
+ rows[c][i] = GetInputRow(input_rows, c, i - 3);
+ }
+ }
+
+ const float* sad_mul =
+ (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+ ? sad_mul_border
+ : sad_mul_center;
+
+ for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+ x += Lanes(df)) {
+ size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+ size_t ix = (x + xpos) % kBlockDim;
+
+ if (row_sigma[bx] < kMinSigma) {
+ for (size_t c = 0; c < 3; c++) {
+ auto px = Load(df, rows[c][3 + 0] + x);
+ StoreU(px, df, GetOutputRow(output_rows, c, 0) + x);
+ }
+ continue;
+ }
+
+ const auto sm = Load(df, sad_mul + ix);
+ const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+
+ for (size_t i = 0; i < 12; i++) *sads[i] = Zero(df);
+ constexpr std::array<int, 2> sads_off[12] = {
+ {{-2, 0}}, {{-1, -1}}, {{-1, 0}}, {{-1, 1}}, {{0, -2}}, {{0, -1}},
+ {{0, 1}}, {{0, 2}}, {{1, -1}}, {{1, 0}}, {{1, 1}}, {{2, 0}},
+ };
+
+ // compute sads
+ // TODO(veluca): consider unrolling and optimizing this.
+ for (size_t c = 0; c < 3; c++) {
+ auto scale = Set(df, lf_.epf_channel_scale[c]);
+ for (size_t i = 0; i < 12; i++) {
+ auto sad = Zero(df);
+ constexpr std::array<int, 2> plus_off[] = {
+ {{0, 0}}, {{-1, 0}}, {{0, -1}}, {{1, 0}}, {{0, 1}}};
+ for (size_t j = 0; j < 5; j++) {
+ const auto r11 =
+ LoadU(df, rows[c][3 + plus_off[j][0]] + x + plus_off[j][1]);
+ const auto c11 =
+ LoadU(df, rows[c][3 + sads_off[i][0] + plus_off[j][0]] + x +
+ sads_off[i][1] + plus_off[j][1]);
+ sad = Add(sad, AbsDiff(r11, c11));
+ }
+ *sads[i] = MulAdd(sad, scale, *sads[i]);
+ }
+ }
+ const auto x_cc = Load(df, rows[0][3 + 0] + x);
+ const auto y_cc = Load(df, rows[1][3 + 0] + x);
+ const auto b_cc = Load(df, rows[2][3 + 0] + x);
+
+ auto w = Set(df, 1);
+ auto X = x_cc;
+ auto Y = y_cc;
+ auto B = b_cc;
+
+ for (size_t i = 0; i < 12; i++) {
+ AddPixel</*aligned=*/false>(/*row=*/sads_off[i][0], rows,
+ x + sads_off[i][1], *sads[i], inv_sigma, &X,
+ &Y, &B, &w);
+ }
+#if JXL_HIGH_PRECISION
+ auto inv_w = Div(Set(df, 1.0f), w);
+#else
+ auto inv_w = ApproximateReciprocal(w);
+#endif
+ StoreU(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+ StoreU(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+ StoreU(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "EPF0"; }
+
+ private:
+ LoopFilter lf_;
+ const ImageF* sigma_;
+};
+
+// 3x3 plus-shaped kernel with 5 SADs per pixel (also 3x3 plus-shaped). So this
+// makes this filter a 5x5 filter.
+class EPF1Stage : public RenderPipelineStage {
+ public:
+ EPF1Stage(const LoopFilter& lf, const ImageF& sigma)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/2)),
+ lf_(lf),
+ sigma_(&sigma) {}
+
+ template <bool aligned>
+ JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][5], ssize_t x,
+ Vec<DF> sad, Vec<DF> inv_sigma,
+ Vec<DF>* JXL_RESTRICT X, Vec<DF>* JXL_RESTRICT Y,
+ Vec<DF>* JXL_RESTRICT B,
+ Vec<DF>* JXL_RESTRICT w) const {
+ auto cx = aligned ? Load(DF(), rows[0][2 + row] + x)
+ : LoadU(DF(), rows[0][2 + row] + x);
+ auto cy = aligned ? Load(DF(), rows[1][2 + row] + x)
+ : LoadU(DF(), rows[1][2 + row] + x);
+ auto cb = aligned ? Load(DF(), rows[2][2 + row] + x)
+ : LoadU(DF(), rows[2][2 + row] + x);
+
+ auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush));
+ *w = Add(*w, weight);
+ *X = MulAdd(weight, cx, *X);
+ *Y = MulAdd(weight, cy, *Y);
+ *B = MulAdd(weight, cb, *B);
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ DF df;
+ xextra = RoundUpTo(xextra, Lanes(df));
+ const float* JXL_RESTRICT row_sigma =
+ sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+ float sm = 1.65f;
+ float bsm = sm * lf_.epf_border_sad_mul;
+
+ HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+ sm, sm, sm, bsm};
+ HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+ bsm, bsm, bsm, bsm};
+
+ float* JXL_RESTRICT rows[3][5];
+ for (size_t c = 0; c < 3; c++) {
+ for (int i = 0; i < 5; i++) {
+ rows[c][i] = GetInputRow(input_rows, c, i - 2);
+ }
+ }
+
+ const float* sad_mul =
+ (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+ ? sad_mul_border
+ : sad_mul_center;
+
+ for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+ x += Lanes(df)) {
+ size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+ size_t ix = (x + xpos) % kBlockDim;
+
+ if (row_sigma[bx] < kMinSigma) {
+ for (size_t c = 0; c < 3; c++) {
+ auto px = Load(df, rows[c][2 + 0] + x);
+ Store(px, df, GetOutputRow(output_rows, c, 0) + x);
+ }
+ continue;
+ }
+
+ const auto sm = Load(df, sad_mul + ix);
+ const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+ auto sad0 = Zero(df);
+ auto sad1 = Zero(df);
+ auto sad2 = Zero(df);
+ auto sad3 = Zero(df);
+
+ // compute sads
+ for (size_t c = 0; c < 3; c++) {
+ // center px = 22, px above = 21
+ auto t = Undefined(df);
+
+ const auto p20 = Load(df, rows[c][2 + -2] + x);
+ const auto p21 = Load(df, rows[c][2 + -1] + x);
+ auto sad0c = AbsDiff(p20, p21); // SAD 2, 1
+
+ const auto p11 = LoadU(df, rows[c][2 + -1] + x - 1);
+ auto sad1c = AbsDiff(p11, p21); // SAD 1, 2
+
+ const auto p31 = LoadU(df, rows[c][2 + -1] + x + 1);
+ auto sad2c = AbsDiff(p31, p21); // SAD 3, 2
+
+ const auto p02 = LoadU(df, rows[c][2 + 0] + x - 2);
+ const auto p12 = LoadU(df, rows[c][2 + 0] + x - 1);
+ sad1c = Add(sad1c, AbsDiff(p02, p12)); // SAD 1, 2
+ sad0c = Add(sad0c, AbsDiff(p11, p12)); // SAD 2, 1
+
+ const auto p22 = LoadU(df, rows[c][2 + 0] + x);
+ t = AbsDiff(p12, p22);
+ sad1c = Add(sad1c, t); // SAD 1, 2
+ sad2c = Add(sad2c, t); // SAD 3, 2
+ t = AbsDiff(p22, p21);
+ auto sad3c = t; // SAD 2, 3
+ sad0c = Add(sad0c, t); // SAD 2, 1
+
+ const auto p32 = LoadU(df, rows[c][2 + 0] + x + 1);
+ sad0c = Add(sad0c, AbsDiff(p31, p32)); // SAD 2, 1
+ t = AbsDiff(p22, p32);
+ sad1c = Add(sad1c, t); // SAD 1, 2
+ sad2c = Add(sad2c, t); // SAD 3, 2
+
+ const auto p42 = LoadU(df, rows[c][2 + 0] + x + 2);
+ sad2c = Add(sad2c, AbsDiff(p42, p32)); // SAD 3, 2
+
+ const auto p13 = LoadU(df, rows[c][2 + 1] + x - 1);
+ sad3c = Add(sad3c, AbsDiff(p13, p12)); // SAD 2, 3
+
+ const auto p23 = Load(df, rows[c][2 + 1] + x);
+ t = AbsDiff(p22, p23);
+ sad0c = Add(sad0c, t); // SAD 2, 1
+ sad3c = Add(sad3c, t); // SAD 2, 3
+ sad1c = Add(sad1c, AbsDiff(p13, p23)); // SAD 1, 2
+
+ const auto p33 = LoadU(df, rows[c][2 + 1] + x + 1);
+ sad2c = Add(sad2c, AbsDiff(p33, p23)); // SAD 3, 2
+ sad3c = Add(sad3c, AbsDiff(p33, p32)); // SAD 2, 3
+
+ const auto p24 = Load(df, rows[c][2 + 2] + x);
+ sad3c = Add(sad3c, AbsDiff(p24, p23)); // SAD 2, 3
+
+ auto scale = Set(df, lf_.epf_channel_scale[c]);
+ sad0 = MulAdd(sad0c, scale, sad0);
+ sad1 = MulAdd(sad1c, scale, sad1);
+ sad2 = MulAdd(sad2c, scale, sad2);
+ sad3 = MulAdd(sad3c, scale, sad3);
+ }
+ const auto x_cc = Load(df, rows[0][2 + 0] + x);
+ const auto y_cc = Load(df, rows[1][2 + 0] + x);
+ const auto b_cc = Load(df, rows[2][2 + 0] + x);
+
+ auto w = Set(df, 1);
+ auto X = x_cc;
+ auto Y = y_cc;
+ auto B = b_cc;
+
+ // Top row
+ AddPixel</*aligned=*/true>(/*row=*/-1, rows, x, sad0, inv_sigma, &X, &Y,
+ &B, &w);
+ // Center
+ AddPixel</*aligned=*/false>(/*row=*/0, rows, x - 1, sad1, inv_sigma, &X,
+ &Y, &B, &w);
+ AddPixel</*aligned=*/false>(/*row=*/0, rows, x + 1, sad2, inv_sigma, &X,
+ &Y, &B, &w);
+ // Bottom
+ AddPixel</*aligned=*/true>(/*row=*/1, rows, x, sad3, inv_sigma, &X, &Y,
+ &B, &w);
+#if JXL_HIGH_PRECISION
+ auto inv_w = Div(Set(df, 1.0f), w);
+#else
+ auto inv_w = ApproximateReciprocal(w);
+#endif
+ Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+ Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+ Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "EPF1"; }
+
+ private:
+ LoopFilter lf_;
+ const ImageF* sigma_;
+};
+
+// 3x3 plus-shaped kernel with 1 SAD per pixel. So this makes this filter a 3x3
+// filter.
+class EPF2Stage : public RenderPipelineStage {
+ public:
+ EPF2Stage(const LoopFilter& lf, const ImageF& sigma)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/1)),
+ lf_(lf),
+ sigma_(&sigma) {}
+
+ template <bool aligned>
+ JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][3], ssize_t x,
+ Vec<DF> rx, Vec<DF> ry, Vec<DF> rb,
+ Vec<DF> inv_sigma, Vec<DF>* JXL_RESTRICT X,
+ Vec<DF>* JXL_RESTRICT Y, Vec<DF>* JXL_RESTRICT B,
+ Vec<DF>* JXL_RESTRICT w) const {
+ auto cx = aligned ? Load(DF(), rows[0][1 + row] + x)
+ : LoadU(DF(), rows[0][1 + row] + x);
+ auto cy = aligned ? Load(DF(), rows[1][1 + row] + x)
+ : LoadU(DF(), rows[1][1 + row] + x);
+ auto cb = aligned ? Load(DF(), rows[2][1 + row] + x)
+ : LoadU(DF(), rows[2][1 + row] + x);
+
+ auto sad = Mul(AbsDiff(cx, rx), Set(DF(), lf_.epf_channel_scale[0]));
+ sad = MulAdd(AbsDiff(cy, ry), Set(DF(), lf_.epf_channel_scale[1]), sad);
+ sad = MulAdd(AbsDiff(cb, rb), Set(DF(), lf_.epf_channel_scale[2]), sad);
+
+ auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass2_zeroflush));
+
+ *w = Add(*w, weight);
+ *X = MulAdd(weight, cx, *X);
+ *Y = MulAdd(weight, cy, *Y);
+ *B = MulAdd(weight, cb, *B);
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ DF df;
+ xextra = RoundUpTo(xextra, Lanes(df));
+ const float* JXL_RESTRICT row_sigma =
+ sigma_->Row(ypos / kBlockDim + kSigmaPadding);
+
+ float sm = lf_.epf_pass2_sigma_scale * 1.65;
+ float bsm = sm * lf_.epf_border_sad_mul;
+
+ HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm,
+ sm, sm, sm, bsm};
+ HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm,
+ bsm, bsm, bsm, bsm};
+
+ float* JXL_RESTRICT rows[3][3];
+ for (size_t c = 0; c < 3; c++) {
+ for (int i = 0; i < 3; i++) {
+ rows[c][i] = GetInputRow(input_rows, c, i - 1);
+ }
+ }
+
+ const float* sad_mul =
+ (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1)
+ ? sad_mul_border
+ : sad_mul_center;
+
+ for (ssize_t x = -xextra; x < static_cast<ssize_t>(xsize + xextra);
+ x += Lanes(df)) {
+ size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim;
+ size_t ix = (x + xpos) % kBlockDim;
+
+ if (row_sigma[bx] < kMinSigma) {
+ for (size_t c = 0; c < 3; c++) {
+ auto px = Load(df, rows[c][1 + 0] + x);
+ Store(px, df, GetOutputRow(output_rows, c, 0) + x);
+ }
+ continue;
+ }
+
+ const auto sm = Load(df, sad_mul + ix);
+ const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm);
+
+ const auto x_cc = Load(df, rows[0][1 + 0] + x);
+ const auto y_cc = Load(df, rows[1][1 + 0] + x);
+ const auto b_cc = Load(df, rows[2][1 + 0] + x);
+
+ auto w = Set(df, 1);
+ auto X = x_cc;
+ auto Y = y_cc;
+ auto B = b_cc;
+
+ // Top row
+ AddPixel</*aligned=*/true>(/*row=*/-1, rows, x, x_cc, y_cc, b_cc,
+ inv_sigma, &X, &Y, &B, &w);
+ // Center
+ AddPixel</*aligned=*/false>(/*row=*/0, rows, x - 1, x_cc, y_cc, b_cc,
+ inv_sigma, &X, &Y, &B, &w);
+ AddPixel</*aligned=*/false>(/*row=*/0, rows, x + 1, x_cc, y_cc, b_cc,
+ inv_sigma, &X, &Y, &B, &w);
+ // Bottom
+ AddPixel</*aligned=*/true>(/*row=*/1, rows, x, x_cc, y_cc, b_cc,
+ inv_sigma, &X, &Y, &B, &w);
+#if JXL_HIGH_PRECISION
+ auto inv_w = Div(Set(df, 1.0f), w);
+#else
+ auto inv_w = ApproximateReciprocal(w);
+#endif
+ Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x);
+ Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x);
+ Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "EPF2"; }
+
+ private:
+ LoopFilter lf_;
+ const ImageF* sigma_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage0(const LoopFilter& lf,
+ const ImageF& sigma) {
+ return jxl::make_unique<EPF0Stage>(lf, sigma);
+}
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage1(const LoopFilter& lf,
+ const ImageF& sigma) {
+ return jxl::make_unique<EPF1Stage>(lf, sigma);
+}
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage2(const LoopFilter& lf,
+ const ImageF& sigma) {
+ return jxl::make_unique<EPF2Stage>(lf, sigma);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetEPFStage0);
+HWY_EXPORT(GetEPFStage1);
+HWY_EXPORT(GetEPFStage2);
+
+std::unique_ptr<RenderPipelineStage> GetEPFStage(const LoopFilter& lf,
+ const ImageF& sigma,
+ size_t epf_stage) {
+ JXL_ASSERT(lf.epf_iters != 0);
+ switch (epf_stage) {
+ case 0:
+ return HWY_DYNAMIC_DISPATCH(GetEPFStage0)(lf, sigma);
+ case 1:
+ return HWY_DYNAMIC_DISPATCH(GetEPFStage1)(lf, sigma);
+ case 2:
+ return HWY_DYNAMIC_DISPATCH(GetEPFStage2)(lf, sigma);
+ default:
+ JXL_ABORT("Invalid EPF stage");
+ }
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.h
new file mode 100644
index 0000000000..c9d0d0c785
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_epf.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/image.h"
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies the `epf_stage`-th EPF step with the given settings and `sigma`.
+// `sigma` will be accessed with an offset of (kSigmaPadding, kSigmaPadding),
+// and should have (kSigmaBorder, kSigmaBorder) mirrored sigma values available
+// around the main image. See also filters.(h|cc)
+std::unique_ptr<RenderPipelineStage> GetEPFStage(const LoopFilter& lf,
+ const ImageF& sigma,
+ size_t epf_stage);
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.cc
new file mode 100644
index 0000000000..c7b22c663b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.cc
@@ -0,0 +1,191 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_from_linear.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_from_linear.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <typename Op>
+struct PerChannelOp {
+ explicit PerChannelOp(Op op) : op(op) {}
+ template <typename D, typename T>
+ void Transform(D d, T* r, T* g, T* b) const {
+ *r = op.Transform(d, *r);
+ *g = op.Transform(d, *g);
+ *b = op.Transform(d, *b);
+ }
+
+ Op op;
+};
+template <typename Op>
+PerChannelOp<Op> MakePerChannelOp(Op&& op) {
+ return PerChannelOp<Op>(std::forward<Op>(op));
+}
+
+struct OpLinear {
+ template <typename D, typename T>
+ T Transform(D d, const T& linear) const {
+ return linear;
+ }
+};
+
+struct OpRgb {
+ template <typename D, typename T>
+ T Transform(D d, const T& linear) const {
+#if JXL_HIGH_PRECISION
+ return TF_SRGB().EncodedFromDisplay(d, linear);
+#else
+ return FastLinearToSRGB(d, linear);
+#endif
+ }
+};
+
+struct OpPq {
+ template <typename D, typename T>
+ T Transform(D d, const T& linear) const {
+ return TF_PQ().EncodedFromDisplay(d, linear);
+ }
+};
+
+struct OpHlg {
+ explicit OpHlg(const float luminances[3], const float intensity_target)
+ : hlg_ootf_(HlgOOTF::ToSceneLight(/*display_luminance=*/intensity_target,
+ luminances)) {}
+
+ template <typename D, typename T>
+ void Transform(D d, T* r, T* g, T* b) const {
+ hlg_ootf_.Apply(r, g, b);
+ *r = TF_HLG().EncodedFromDisplay(d, *r);
+ *g = TF_HLG().EncodedFromDisplay(d, *g);
+ *b = TF_HLG().EncodedFromDisplay(d, *b);
+ }
+ HlgOOTF hlg_ootf_;
+};
+
+struct Op709 {
+ template <typename D, typename T>
+ T Transform(D d, const T& linear) const {
+ return TF_709().EncodedFromDisplay(d, linear);
+ }
+};
+
+struct OpGamma {
+ const float inverse_gamma;
+ template <typename D, typename T>
+ T Transform(D d, const T& linear) const {
+ return IfThenZeroElse(Le(linear, Set(d, 1e-5f)),
+ FastPowf(d, linear, Set(d, inverse_gamma)));
+ }
+};
+
+template <typename Op>
+class FromLinearStage : public RenderPipelineStage {
+ public:
+ explicit FromLinearStage(Op op)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ op_(std::move(op)) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("FromLinear");
+ const HWY_FULL(float) d;
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+ float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+ // All calculations are lane-wise, still some might require
+ // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+ // vector tail.
+ msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ auto r = LoadU(d, row0 + x);
+ auto g = LoadU(d, row1 + x);
+ auto b = LoadU(d, row2 + x);
+ op_.Transform(d, &r, &g, &b);
+ StoreU(r, d, row0 + x);
+ StoreU(g, d, row1 + x);
+ StoreU(b, d, row2 + x);
+ }
+ msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "FromLinear"; }
+
+ private:
+ Op op_;
+};
+
+template <typename Op>
+std::unique_ptr<FromLinearStage<Op>> MakeFromLinearStage(Op&& op) {
+ return jxl::make_unique<FromLinearStage<Op>>(std::forward<Op>(op));
+}
+
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ if (output_encoding_info.color_encoding.tf.IsLinear()) {
+ return MakeFromLinearStage(MakePerChannelOp(OpLinear()));
+ } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
+ return MakeFromLinearStage(MakePerChannelOp(OpRgb()));
+ } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
+ return MakeFromLinearStage(MakePerChannelOp(OpPq()));
+ } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
+ return MakeFromLinearStage(
+ OpHlg(output_encoding_info.luminances,
+ output_encoding_info.desired_intensity_target));
+ } else if (output_encoding_info.color_encoding.tf.Is709()) {
+ return MakeFromLinearStage(MakePerChannelOp(Op709()));
+ } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
+ output_encoding_info.color_encoding.tf.IsDCI()) {
+ return MakeFromLinearStage(
+ MakePerChannelOp(OpGamma{output_encoding_info.inverse_gamma}));
+ } else {
+ // This is a programming error.
+ JXL_ABORT("Invalid target encoding");
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetFromLinearStage);
+
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ return HWY_DYNAMIC_DISPATCH(GetFromLinearStage)(output_encoding_info);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.h
new file mode 100644
index 0000000000..548ab50b8c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_from_linear.h
@@ -0,0 +1,20 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from linear to the specified output encoding.
+std::unique_ptr<RenderPipelineStage> GetFromLinearStage(
+ const OutputEncodingInfo& output_encoding_info);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.cc
new file mode 100644
index 0000000000..fc90acb476
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.cc
@@ -0,0 +1,122 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_gaborish.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_gaborish.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class GaborishStage : public RenderPipelineStage {
+ public:
+ explicit GaborishStage(const LoopFilter& lf)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/1)) {
+ weights_[0] = 1;
+ weights_[1] = lf.gab_x_weight1;
+ weights_[2] = lf.gab_x_weight2;
+ weights_[3] = 1;
+ weights_[4] = lf.gab_y_weight1;
+ weights_[5] = lf.gab_y_weight2;
+ weights_[6] = 1;
+ weights_[7] = lf.gab_b_weight1;
+ weights_[8] = lf.gab_b_weight2;
+ // Normalize
+ for (size_t c = 0; c < 3; c++) {
+ const float div =
+ weights_[3 * c] + 4 * (weights_[3 * c + 1] + weights_[3 * c + 2]);
+ const float mul = 1.0f / div;
+ weights_[3 * c] *= mul;
+ weights_[3 * c + 1] *= mul;
+ weights_[3 * c + 2] *= mul;
+ }
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("Gaborish");
+
+ const HWY_FULL(float) d;
+ for (size_t c = 0; c < 3; c++) {
+ float* JXL_RESTRICT row_t = GetInputRow(input_rows, c, -1);
+ float* JXL_RESTRICT row_m = GetInputRow(input_rows, c, 0);
+ float* JXL_RESTRICT row_b = GetInputRow(input_rows, c, 1);
+ float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0);
+ const auto w0 = Set(d, weights_[3 * c + 0]);
+ const auto w1 = Set(d, weights_[3 * c + 1]);
+ const auto w2 = Set(d, weights_[3 * c + 2]);
+// Group data need only be aligned to a block; for >=512 bit vectors, this may
+// result in unaligned loads.
+#if HWY_CAP_GE512
+#define LoadMaybeU LoadU
+#else
+#define LoadMaybeU Load
+#endif
+ // Since GetInputRow(input_rows, c, {-1, 0, 1}) is aligned, rounding
+ // xextra up to Lanes(d) doesn't access anything problematic.
+ for (ssize_t x = -RoundUpTo(xextra, Lanes(d));
+ x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ const auto t = LoadMaybeU(d, row_t + x);
+ const auto tl = LoadU(d, row_t + x - 1);
+ const auto tr = LoadU(d, row_t + x + 1);
+ const auto m = LoadMaybeU(d, row_m + x);
+ const auto l = LoadU(d, row_m + x - 1);
+ const auto r = LoadU(d, row_m + x + 1);
+ const auto b = LoadMaybeU(d, row_b + x);
+ const auto bl = LoadU(d, row_b + x - 1);
+ const auto br = LoadU(d, row_b + x + 1);
+ const auto sum0 = m;
+ const auto sum1 = Add(Add(l, r), Add(t, b));
+ const auto sum2 = Add(Add(tl, tr), Add(bl, br));
+ auto pixels = MulAdd(sum2, w2, MulAdd(sum1, w1, Mul(sum0, w0)));
+ Store(pixels, d, row_out + x);
+ }
+ }
+ }
+#undef LoadMaybeU
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "Gab"; }
+
+ private:
+ float weights_[9];
+};
+
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf) {
+ return jxl::make_unique<GaborishStage>(lf);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetGaborishStage);
+
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf) {
+ JXL_ASSERT(lf.gab == 1);
+ return HWY_DYNAMIC_DISPATCH(GetGaborishStage)(lf);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.h
new file mode 100644
index 0000000000..761800f668
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_gaborish.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/loop_filter.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Applies decoder-side Gaborish with the given settings. `lf.gab` must be 1.
+std::unique_ptr<RenderPipelineStage> GetGaborishStage(const LoopFilter& lf);
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.cc
new file mode 100644
index 0000000000..187095cf61
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.cc
@@ -0,0 +1,311 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_noise.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_noise.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Vec;
+using hwy::HWY_NAMESPACE::ZeroIfNegative;
+
+using D = HWY_CAPPED(float, kBlockDim);
+using DI = hwy::HWY_NAMESPACE::Rebind<int32_t, D>;
+using DI8 = hwy::HWY_NAMESPACE::Repartition<uint8_t, D>;
+
+// [0, max_value]
+template <class D, class V>
+static HWY_INLINE V Clamp0ToMax(D d, const V x, const V max_value) {
+ const auto clamped = Min(x, max_value);
+ return ZeroIfNegative(clamped);
+}
+
+// x is in [0+delta, 1+delta], delta ~= 0.06
+template <class StrengthEval>
+typename StrengthEval::V NoiseStrength(const StrengthEval& eval,
+ const typename StrengthEval::V x) {
+ return Clamp0ToMax(D(), eval(x), Set(D(), 1.0f));
+}
+
+// TODO(veluca): SIMD-fy.
+class StrengthEvalLut {
+ public:
+ using V = Vec<D>;
+
+ explicit StrengthEvalLut(const NoiseParams& noise_params)
+#if HWY_TARGET == HWY_SCALAR
+ : noise_params_(noise_params)
+#endif
+ {
+#if HWY_TARGET != HWY_SCALAR
+ uint32_t lut[8];
+ memcpy(lut, noise_params.lut, sizeof(lut));
+ for (size_t i = 0; i < 8; i++) {
+ low16_lut[2 * i] = (lut[i] >> 0) & 0xFF;
+ low16_lut[2 * i + 1] = (lut[i] >> 8) & 0xFF;
+ high16_lut[2 * i] = (lut[i] >> 16) & 0xFF;
+ high16_lut[2 * i + 1] = (lut[i] >> 24) & 0xFF;
+ }
+#endif
+ }
+
+ V operator()(const V vx) const {
+ constexpr size_t kScale = NoiseParams::kNumNoisePoints - 2;
+ auto scaled_vx = Max(Zero(D()), Mul(vx, Set(D(), kScale)));
+ auto floor_x = Floor(scaled_vx);
+ auto frac_x = Sub(scaled_vx, floor_x);
+ floor_x = IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), kScale),
+ floor_x);
+ frac_x =
+ IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), 1), frac_x);
+ auto floor_x_int = ConvertTo(DI(), floor_x);
+#if HWY_TARGET == HWY_SCALAR
+ auto low = Set(D(), noise_params_.lut[floor_x_int.raw]);
+ auto hi = Set(D(), noise_params_.lut[floor_x_int.raw + 1]);
+#else
+ // Set each lane's bytes to {0, 0, 2x+1, 2x}.
+ auto floorx_indices_low =
+ Add(Mul(floor_x_int, Set(DI(), 0x0202)), Set(DI(), 0x0100));
+ // Set each lane's bytes to {2x+1, 2x, 0, 0}.
+ auto floorx_indices_hi =
+ Add(Mul(floor_x_int, Set(DI(), 0x02020000)), Set(DI(), 0x01000000));
+ // load LUT
+ auto low16 = BitCast(DI(), LoadDup128(DI8(), low16_lut));
+ auto lowm = Set(DI(), 0xFFFF);
+ auto hi16 = BitCast(DI(), LoadDup128(DI8(), high16_lut));
+ auto him = Set(DI(), 0xFFFF0000);
+ // low = noise_params.lut[floor_x]
+ auto low =
+ BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm),
+ And(TableLookupBytes(hi16, floorx_indices_hi), him)));
+ // hi = noise_params.lut[floor_x+1]
+ floorx_indices_low = Add(floorx_indices_low, Set(DI(), 0x0202));
+ floorx_indices_hi = Add(floorx_indices_hi, Set(DI(), 0x02020000));
+ auto hi =
+ BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm),
+ And(TableLookupBytes(hi16, floorx_indices_hi), him)));
+#endif
+ return MulAdd(Sub(hi, low), frac_x, low);
+ }
+
+ private:
+#if HWY_TARGET != HWY_SCALAR
+ // noise_params.lut transformed into two 16-bit lookup tables.
+ HWY_ALIGN uint8_t high16_lut[16];
+ HWY_ALIGN uint8_t low16_lut[16];
+#else
+ const NoiseParams& noise_params_;
+#endif
+};
+
+template <class D>
+void AddNoiseToRGB(const D d, const Vec<D> rnd_noise_r,
+ const Vec<D> rnd_noise_g, const Vec<D> rnd_noise_cor,
+ const Vec<D> noise_strength_g, const Vec<D> noise_strength_r,
+ float ytox, float ytob, float* JXL_RESTRICT out_x,
+ float* JXL_RESTRICT out_y, float* JXL_RESTRICT out_b) {
+ const auto kRGCorr = Set(d, 0.9921875f); // 127/128
+ const auto kRGNCorr = Set(d, 0.0078125f); // 1/128
+
+ const auto red_noise =
+ Mul(noise_strength_r,
+ MulAdd(kRGNCorr, rnd_noise_r, Mul(kRGCorr, rnd_noise_cor)));
+ const auto green_noise =
+ Mul(noise_strength_g,
+ MulAdd(kRGNCorr, rnd_noise_g, Mul(kRGCorr, rnd_noise_cor)));
+
+ auto vx = LoadU(d, out_x);
+ auto vy = LoadU(d, out_y);
+ auto vb = LoadU(d, out_b);
+
+ const auto rg_noise = Add(red_noise, green_noise);
+ vx = Add(MulAdd(Set(d, ytox), rg_noise, Sub(red_noise, green_noise)), vx);
+ vy = Add(vy, rg_noise);
+ vb = MulAdd(Set(d, ytob), rg_noise, vb);
+
+ StoreU(vx, d, out_x);
+ StoreU(vy, d, out_y);
+ StoreU(vb, d, out_b);
+}
+
+class AddNoiseStage : public RenderPipelineStage {
+ public:
+ AddNoiseStage(const NoiseParams& noise_params,
+ const ColorCorrelationMap& cmap, size_t first_c)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/0)),
+ noise_params_(noise_params),
+ cmap_(cmap),
+ first_c_(first_c) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("Noise apply");
+
+ if (!noise_params_.HasAny()) return;
+ const StrengthEvalLut noise_model(noise_params_);
+ D d;
+ const auto half = Set(d, 0.5f);
+
+ // With the prior subtract-random Laplacian approximation, rnd_* ranges were
+ // about [-1.5, 1.6]; Laplacian3 about doubles this to [-3.6, 3.6], so the
+ // normalizer is half of what it was before (0.5).
+ const auto norm_const = Set(d, 0.22f);
+
+ float ytox = cmap_.YtoXRatio(0);
+ float ytob = cmap_.YtoBRatio(0);
+
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+
+ float* JXL_RESTRICT row_x = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row_y = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row_b = GetInputRow(input_rows, 2, 0);
+ const float* JXL_RESTRICT row_rnd_r =
+ GetInputRow(input_rows, first_c_ + 0, 0);
+ const float* JXL_RESTRICT row_rnd_g =
+ GetInputRow(input_rows, first_c_ + 1, 0);
+ const float* JXL_RESTRICT row_rnd_c =
+ GetInputRow(input_rows, first_c_ + 2, 0);
+ // Needed by the calls to Floor() in StrengthEvalLut. Only arithmetic and
+ // shuffles are otherwise done on the data, so this is safe.
+ msan::UnpoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+ msan::UnpoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+ for (size_t x = 0; x < xsize_v; x += Lanes(d)) {
+ const auto vx = LoadU(d, row_x + x);
+ const auto vy = LoadU(d, row_y + x);
+ const auto in_g = Sub(vy, vx);
+ const auto in_r = Add(vy, vx);
+ const auto noise_strength_g = NoiseStrength(noise_model, Mul(in_g, half));
+ const auto noise_strength_r = NoiseStrength(noise_model, Mul(in_r, half));
+ const auto addit_rnd_noise_red = Mul(LoadU(d, row_rnd_r + x), norm_const);
+ const auto addit_rnd_noise_green =
+ Mul(LoadU(d, row_rnd_g + x), norm_const);
+ const auto addit_rnd_noise_correlated =
+ Mul(LoadU(d, row_rnd_c + x), norm_const);
+ AddNoiseToRGB(D(), addit_rnd_noise_red, addit_rnd_noise_green,
+ addit_rnd_noise_correlated, noise_strength_g,
+ noise_strength_r, ytox, ytob, row_x + x, row_y + x,
+ row_b + x);
+ }
+ msan::PoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float));
+ msan::PoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float));
+ msan::PoisonMemory(row_b + xsize, (xsize_v - xsize) * sizeof(float));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c >= first_c_ ? RenderPipelineChannelMode::kInput
+ : c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "AddNoise"; }
+
+ private:
+ const NoiseParams& noise_params_;
+ const ColorCorrelationMap& cmap_;
+ size_t first_c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+ const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+ size_t noise_c_start) {
+ return jxl::make_unique<AddNoiseStage>(noise_params, cmap, noise_c_start);
+}
+
+class ConvolveNoiseStage : public RenderPipelineStage {
+ public:
+ explicit ConvolveNoiseStage(size_t first_c)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/0, /*border=*/2)),
+ first_c_(first_c) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("Noise convolve");
+
+ const HWY_FULL(float) d;
+ for (size_t c = first_c_; c < first_c_ + 3; c++) {
+ float* JXL_RESTRICT rows[5];
+ for (size_t i = 0; i < 5; i++) {
+ rows[i] = GetInputRow(input_rows, c, i - 2);
+ }
+ float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0);
+ for (ssize_t x = -RoundUpTo(xextra, Lanes(d));
+ x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ const auto p00 = LoadU(d, rows[2] + x);
+ auto others = Zero(d);
+ // TODO(eustas): sum loaded values to reduce the calculation chain
+ for (ssize_t i = -2; i <= 2; i++) {
+ others = Add(others, LoadU(d, rows[0] + x + i));
+ others = Add(others, LoadU(d, rows[1] + x + i));
+ others = Add(others, LoadU(d, rows[3] + x + i));
+ others = Add(others, LoadU(d, rows[4] + x + i));
+ }
+ others = Add(others, LoadU(d, rows[2] + x - 2));
+ others = Add(others, LoadU(d, rows[2] + x - 1));
+ others = Add(others, LoadU(d, rows[2] + x + 1));
+ others = Add(others, LoadU(d, rows[2] + x + 2));
+ // 4 * (1 - box kernel)
+ auto pixels = MulAdd(others, Set(d, 0.16), Mul(p00, Set(d, -3.84)));
+ StoreU(pixels, d, row_out + x);
+ }
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c >= first_c_ ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "ConvNoise"; }
+
+ private:
+ size_t first_c_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+ size_t noise_c_start) {
+ return jxl::make_unique<ConvolveNoiseStage>(noise_c_start);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetAddNoiseStage);
+HWY_EXPORT(GetConvolveNoiseStage);
+
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+ const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+ size_t noise_c_start) {
+ return HWY_DYNAMIC_DISPATCH(GetAddNoiseStage)(noise_params, cmap,
+ noise_c_start);
+}
+
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+ size_t noise_c_start) {
+ return HWY_DYNAMIC_DISPATCH(GetConvolveNoiseStage)(noise_c_start);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.h
new file mode 100644
index 0000000000..bd7797f991
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_noise.h
@@ -0,0 +1,32 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_noise.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Adds noise to color channels.
+std::unique_ptr<RenderPipelineStage> GetAddNoiseStage(
+ const NoiseParams& noise_params, const ColorCorrelationMap& cmap,
+ size_t noise_c_start);
+
+// Applies a 5x5 subtract-box-filter convolution to the noise input channels.
+std::unique_ptr<RenderPipelineStage> GetConvolveNoiseStage(
+ size_t noise_c_start);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.cc
new file mode 100644
index 0000000000..527be03839
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.cc
@@ -0,0 +1,48 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_patches.h"
+
+namespace jxl {
+namespace {
+class PatchDictionaryStage : public RenderPipelineStage {
+ public:
+ PatchDictionaryStage(const PatchDictionary* patches, size_t num_channels)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ patches_(*patches),
+ num_channels_(num_channels) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("RenderPatches");
+ JXL_ASSERT(xpos == 0 || xpos >= xextra);
+ size_t x0 = xpos ? xpos - xextra : 0;
+ std::vector<float*> row_ptrs(num_channels_);
+ for (size_t i = 0; i < num_channels_; i++) {
+ row_ptrs[i] = GetInputRow(input_rows, i, 0) + x0 - xpos;
+ }
+ patches_.AddOneRow(row_ptrs.data(), ypos, x0, xsize + xextra + xpos - x0);
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < num_channels_ ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "Patches"; }
+
+ private:
+ const PatchDictionary& patches_;
+ const size_t num_channels_;
+};
+} // namespace
+
+std::unique_ptr<RenderPipelineStage> GetPatchesStage(
+ const PatchDictionary* patches, size_t num_channels) {
+ return jxl::make_unique<PatchDictionaryStage>(patches, num_channels);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.h
new file mode 100644
index 0000000000..b35abdc2eb
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_patches.h
@@ -0,0 +1,22 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
+
+#include <utility>
+
+#include "lib/jxl/patch_dictionary_internal.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Draws patches if applicable.
+std::unique_ptr<RenderPipelineStage> GetPatchesStage(
+ const PatchDictionary* patches, size_t num_channels);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.cc
new file mode 100644
index 0000000000..d97d97e5f2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.cc
@@ -0,0 +1,63 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_splines.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_splines.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class SplineStage : public RenderPipelineStage {
+ public:
+ explicit SplineStage(const Splines* splines)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ splines_(*splines) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("RenderSplines");
+ float* row_x = GetInputRow(input_rows, 0, 0);
+ float* row_y = GetInputRow(input_rows, 1, 0);
+ float* row_b = GetInputRow(input_rows, 2, 0);
+ splines_.AddToRow(row_x, row_y, row_b, Rect(xpos, ypos, xsize, 1));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "Splines"; }
+
+ private:
+ const Splines& splines_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines) {
+ return jxl::make_unique<SplineStage>(splines);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetSplineStage);
+
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines) {
+ return HWY_DYNAMIC_DISPATCH(GetSplineStage)(splines);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.h
new file mode 100644
index 0000000000..363af393ec
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_splines.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
+
+#include <utility>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+
+// Draws splines if applicable.
+std::unique_ptr<RenderPipelineStage> GetSplineStage(const Splines* splines);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.cc
new file mode 100644
index 0000000000..d4f6152994
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_spot.h"
+
+namespace jxl {
+class SpotColorStage : public RenderPipelineStage {
+ public:
+ explicit SpotColorStage(size_t spot_c, const float* spot_color)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ spot_c_(spot_c),
+ spot_color_(spot_color) {
+ JXL_ASSERT(spot_c_ >= 3);
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ // TODO(veluca): add SIMD.
+ PROFILER_ZONE("RenderSpotColors");
+ float scale = spot_color_[3];
+ for (size_t c = 0; c < 3; c++) {
+ float* JXL_RESTRICT p = GetInputRow(input_rows, c, 0);
+ const float* JXL_RESTRICT s = GetInputRow(input_rows, spot_c_, 0);
+ for (ssize_t x = -xextra; x < ssize_t(xsize + xextra); x++) {
+ float mix = scale * s[x];
+ p[x] = mix * spot_color_[c] + (1.0f - mix) * p[x];
+ }
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : c == spot_c_ ? RenderPipelineChannelMode::kInput
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "Spot"; }
+
+ private:
+ size_t spot_c_;
+ const float* spot_color_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetSpotColorStage(
+ size_t spot_c, const float* spot_color) {
+ return jxl::make_unique<SpotColorStage>(spot_c, spot_color);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.h
new file mode 100644
index 0000000000..3e79c75823
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_spot.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
+
+#include <utility>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Render the spot color channels.
+std::unique_ptr<RenderPipelineStage> GetSpotColorStage(size_t spot_c,
+ const float* spot_color);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.cc
new file mode 100644
index 0000000000..9f5b2b73dc
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.cc
@@ -0,0 +1,202 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_to_linear.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_to_linear.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::IfThenZeroElse;
+
+template <typename Op>
+struct PerChannelOp {
+ explicit PerChannelOp(Op op) : op(op) {}
+ template <typename D, typename T>
+ void Transform(D d, T* r, T* g, T* b) const {
+ *r = op.Transform(d, *r);
+ *g = op.Transform(d, *g);
+ *b = op.Transform(d, *b);
+ }
+
+ Op op;
+};
+template <typename Op>
+PerChannelOp<Op> MakePerChannelOp(Op&& op) {
+ return PerChannelOp<Op>(std::forward<Op>(op));
+}
+
+struct OpLinear {
+ template <typename D, typename T>
+ T Transform(D d, const T& encoded) const {
+ return encoded;
+ }
+};
+
+struct OpRgb {
+ template <typename D, typename T>
+ T Transform(D d, const T& encoded) const {
+ return TF_SRGB().DisplayFromEncoded(encoded);
+ }
+};
+
+struct OpPq {
+ template <typename D, typename T>
+ T Transform(D d, const T& encoded) const {
+ return TF_PQ().DisplayFromEncoded(d, encoded);
+ }
+};
+
+struct OpHlg {
+ explicit OpHlg(const float luminances[3], const float intensity_target)
+ : hlg_ootf_(HlgOOTF::FromSceneLight(
+ /*display_luminance=*/intensity_target, luminances)) {}
+
+ template <typename D, typename T>
+ void Transform(D d, T* r, T* g, T* b) const {
+ for (T* val : {r, g, b}) {
+ HWY_ALIGN float vals[MaxLanes(d)];
+ Store(*val, d, vals);
+ for (size_t i = 0; i < Lanes(d); ++i) {
+ vals[i] = TF_HLG().DisplayFromEncoded(vals[i]);
+ }
+ *val = Load(d, vals);
+ }
+ hlg_ootf_.Apply(r, g, b);
+ }
+ HlgOOTF hlg_ootf_;
+};
+
+struct Op709 {
+ template <typename D, typename T>
+ T Transform(D d, const T& encoded) const {
+ return TF_709().DisplayFromEncoded(d, encoded);
+ }
+};
+
+struct OpGamma {
+ const float gamma;
+ template <typename D, typename T>
+ T Transform(D d, const T& encoded) const {
+ return IfThenZeroElse(Le(encoded, Set(d, 1e-5f)),
+ FastPowf(d, encoded, Set(d, gamma)));
+ }
+};
+
+struct OpInvalid {
+ template <typename D, typename T>
+ void Transform(D d, T* r, T* g, T* b) const {}
+};
+
+template <typename Op>
+class ToLinearStage : public RenderPipelineStage {
+ public:
+ explicit ToLinearStage(Op op)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ op_(std::move(op)) {}
+
+ explicit ToLinearStage()
+ : RenderPipelineStage(RenderPipelineStage::Settings()), valid_(false) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("ToLinear");
+
+ const HWY_FULL(float) d;
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+ float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+ // All calculations are lane-wise, still some might require
+ // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+ // vector tail.
+ msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ auto r = LoadU(d, row0 + x);
+ auto g = LoadU(d, row1 + x);
+ auto b = LoadU(d, row2 + x);
+ op_.Transform(d, &r, &g, &b);
+ StoreU(r, d, row0 + x);
+ StoreU(g, d, row1 + x);
+ StoreU(b, d, row2 + x);
+ }
+ msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "ToLinear"; }
+
+ private:
+ Status IsInitialized() const override { return valid_; }
+
+ Op op_;
+ bool valid_ = true;
+};
+
+template <typename Op>
+std::unique_ptr<ToLinearStage<Op>> MakeToLinearStage(Op&& op) {
+ return jxl::make_unique<ToLinearStage<Op>>(std::forward<Op>(op));
+}
+
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ if (output_encoding_info.color_encoding.tf.IsLinear()) {
+ return MakeToLinearStage(MakePerChannelOp(OpLinear()));
+ } else if (output_encoding_info.color_encoding.tf.IsSRGB()) {
+ return MakeToLinearStage(MakePerChannelOp(OpRgb()));
+ } else if (output_encoding_info.color_encoding.tf.IsPQ()) {
+ return MakeToLinearStage(MakePerChannelOp(OpPq()));
+ } else if (output_encoding_info.color_encoding.tf.IsHLG()) {
+ return MakeToLinearStage(OpHlg(output_encoding_info.luminances,
+ output_encoding_info.orig_intensity_target));
+ } else if (output_encoding_info.color_encoding.tf.Is709()) {
+ return MakeToLinearStage(MakePerChannelOp(Op709()));
+ } else if (output_encoding_info.color_encoding.tf.IsGamma() ||
+ output_encoding_info.color_encoding.tf.IsDCI()) {
+ return MakeToLinearStage(
+ MakePerChannelOp(OpGamma{1.f / output_encoding_info.inverse_gamma}));
+ } else {
+ return jxl::make_unique<ToLinearStage<OpInvalid>>();
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetToLinearStage);
+
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ return HWY_DYNAMIC_DISPATCH(GetToLinearStage)(output_encoding_info);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.h
new file mode 100644
index 0000000000..ccee7b09f0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_to_linear.h
@@ -0,0 +1,21 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from `output_encoding_info.color_encoding` to
+// linear.
+std::unique_ptr<RenderPipelineStage> GetToLinearStage(
+ const OutputEncodingInfo& output_encoding_info);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.cc
new file mode 100644
index 0000000000..7609534a5b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.cc
@@ -0,0 +1,151 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_tone_mapping.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_tone_mapping.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_tone_mapping-inl.h"
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class ToneMappingStage : public RenderPipelineStage {
+ public:
+ explicit ToneMappingStage(OutputEncodingInfo output_encoding_info)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ output_encoding_info_(std::move(output_encoding_info)) {
+ if (output_encoding_info_.desired_intensity_target ==
+ output_encoding_info_.orig_intensity_target) {
+ // No tone mapping requested.
+ return;
+ }
+ if (output_encoding_info_.orig_color_encoding.tf.IsPQ() &&
+ output_encoding_info_.desired_intensity_target <
+ output_encoding_info_.orig_intensity_target) {
+ tone_mapper_ = jxl::make_unique<ToneMapper>(
+ /*source_range=*/std::pair<float, float>(
+ 0, output_encoding_info_.orig_intensity_target),
+ /*target_range=*/
+ std::pair<float, float>(
+ 0, output_encoding_info_.desired_intensity_target),
+ output_encoding_info_.luminances);
+ } else if (output_encoding_info_.orig_color_encoding.tf.IsHLG() &&
+ !output_encoding_info_.color_encoding.tf.IsHLG()) {
+ hlg_ootf_ = jxl::make_unique<HlgOOTF>(
+ /*source_luminance=*/output_encoding_info_.orig_intensity_target,
+ /*target_luminance=*/output_encoding_info_.desired_intensity_target,
+ output_encoding_info_.luminances);
+ }
+
+ if (output_encoding_info_.color_encoding.tf.IsPQ() &&
+ (tone_mapper_ || hlg_ootf_)) {
+ to_intensity_target_ =
+ 10000.f / output_encoding_info_.orig_intensity_target;
+ from_desired_intensity_target_ =
+ output_encoding_info_.desired_intensity_target / 10000.f;
+ }
+ }
+
+ bool IsNeeded() const { return tone_mapper_ || hlg_ootf_; }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("ToneMapping");
+
+ if (!(tone_mapper_ || hlg_ootf_)) return;
+
+ const HWY_FULL(float) d;
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+ float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+ // All calculations are lane-wise, still some might require
+ // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+ // vector tail.
+ msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ auto r = LoadU(d, row0 + x);
+ auto g = LoadU(d, row1 + x);
+ auto b = LoadU(d, row2 + x);
+ if (tone_mapper_ || hlg_ootf_) {
+ r = Mul(r, Set(d, to_intensity_target_));
+ g = Mul(g, Set(d, to_intensity_target_));
+ b = Mul(b, Set(d, to_intensity_target_));
+ if (tone_mapper_) {
+ tone_mapper_->ToneMap(&r, &g, &b);
+ } else {
+ JXL_ASSERT(hlg_ootf_);
+ hlg_ootf_->Apply(&r, &g, &b);
+ }
+ if (tone_mapper_ || hlg_ootf_->WarrantsGamutMapping()) {
+ GamutMap(&r, &g, &b, output_encoding_info_.luminances);
+ }
+ r = Mul(r, Set(d, from_desired_intensity_target_));
+ g = Mul(g, Set(d, from_desired_intensity_target_));
+ b = Mul(b, Set(d, from_desired_intensity_target_));
+ }
+ StoreU(r, d, row0 + x);
+ StoreU(g, d, row1 + x);
+ StoreU(b, d, row2 + x);
+ }
+ msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "ToneMapping"; }
+
+ private:
+ using ToneMapper = Rec2408ToneMapper<HWY_FULL(float)>;
+ OutputEncodingInfo output_encoding_info_;
+ std::unique_ptr<ToneMapper> tone_mapper_;
+ std::unique_ptr<HlgOOTF> hlg_ootf_;
+ // When the target colorspace is PQ, 1 represents 10000 nits instead of
+ // orig_intensity_target. This temporarily changes this if the tone mappers
+ // require it.
+ float to_intensity_target_ = 1.f;
+ float from_desired_intensity_target_ = 1.f;
+};
+
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ auto stage = jxl::make_unique<ToneMappingStage>(output_encoding_info);
+ if (!stage->IsNeeded()) return nullptr;
+ return stage;
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetToneMappingStage);
+
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ return HWY_DYNAMIC_DISPATCH(GetToneMappingStage)(output_encoding_info);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.h
new file mode 100644
index 0000000000..99824f8511
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_tone_mapping.h
@@ -0,0 +1,37 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Tone maps the image if appropriate. It must be in linear space and
+// `output_encoding_info.luminances` must contain the luminance for the
+// primaries of that space. It must also be encoded such that (1, 1, 1)
+// represents `output_encoding_info.orig_intensity_target` nits, unless
+// `output_encoding_info.color_encoding.tf.IsPQ()`, in which case (1, 1, 1) must
+// represent 10000 nits. This corresponds to what XYBStage outputs. After this
+// stage, (1, 1, 1) will represent
+// `output_encoding_info.desired_intensity_target` nits, except in the PQ
+// special case in which it remains 10000.
+//
+// If no tone mapping is necessary, this will return nullptr.
+std::unique_ptr<RenderPipelineStage> GetToneMappingStage(
+ const OutputEncodingInfo& output_encoding_info);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.cc
new file mode 100644
index 0000000000..a75e259865
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.cc
@@ -0,0 +1,187 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_upsampling.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_upsampling.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/sanitizers.h"
+#include "lib/jxl/simd_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Min;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class UpsamplingStage : public RenderPipelineStage {
+ public:
+ explicit UpsamplingStage(const CustomTransformData& ups_factors, size_t c,
+ size_t shift)
+ : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric(
+ /*shift=*/shift, /*border=*/2)),
+ c_(c) {
+ const float* weights = shift == 1 ? ups_factors.upsampling2_weights
+ : shift == 2 ? ups_factors.upsampling4_weights
+ : ups_factors.upsampling8_weights;
+ size_t N = 1 << (shift - 1);
+ for (size_t i = 0; i < 5 * N; i++) {
+ for (size_t j = 0; j < 5 * N; j++) {
+ size_t y = std::min(i, j);
+ size_t x = std::max(i, j);
+ kernel_[j / 5][i / 5][j % 5][i % 5] =
+ weights[5 * N * y - y * (y - 1) / 2 + x - y];
+ }
+ }
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("Upsampling");
+ static HWY_FULL(float) df;
+ size_t shift = settings_.shift_x;
+ size_t N = 1 << shift;
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(df));
+ for (ssize_t iy = -2; iy <= 2; iy++) {
+ msan::UnpoisonMemory(GetInputRow(input_rows, c_, iy) + xsize + 2,
+ sizeof(float) * (xsize_v - xsize));
+ }
+ JXL_ASSERT(xextra == 0);
+ ssize_t x0 = 0;
+ ssize_t x1 = xsize;
+ if (N == 2) {
+ ProcessRowImpl<2>(input_rows, output_rows, x0, x1);
+ }
+ if (N == 4) {
+ ProcessRowImpl<4>(input_rows, output_rows, x0, x1);
+ }
+ if (N == 8) {
+ ProcessRowImpl<8>(input_rows, output_rows, x0, x1);
+ }
+ for (size_t oy = 0; oy < N; oy++) {
+ float* dst_row = GetOutputRow(output_rows, c_, oy);
+ msan::PoisonMemory(dst_row + xsize * N,
+ sizeof(float) * (xsize_v - xsize) * N);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c == c_ ? RenderPipelineChannelMode::kInOut
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "Upsample"; }
+
+ private:
+ template <size_t N>
+ JXL_INLINE float Kernel(size_t x, size_t y, ssize_t ix, ssize_t iy) const {
+ ix += 2;
+ iy += 2;
+ if (N == 2) {
+ return kernel_[0][0][y % 2 ? 4 - iy : iy][x % 2 ? 4 - ix : ix];
+ }
+ if (N == 4) {
+ return kernel_[y % 4 < 2 ? y % 2 : 1 - y % 2]
+ [x % 4 < 2 ? x % 2 : 1 - x % 2][y % 4 < 2 ? iy : 4 - iy]
+ [x % 4 < 2 ? ix : 4 - ix];
+ }
+ if (N == 8) {
+ return kernel_[y % 8 < 4 ? y % 4 : 3 - y % 4]
+ [x % 8 < 4 ? x % 4 : 3 - x % 4][y % 8 < 4 ? iy : 4 - iy]
+ [x % 8 < 4 ? ix : 4 - ix];
+ }
+ JXL_ABORT("Invalid upsample");
+ }
+
+ template <ssize_t N>
+ void ProcessRowImpl(const RowInfo& input_rows, const RowInfo& output_rows,
+ ssize_t x0, ssize_t x1) const {
+ static HWY_FULL(float) df;
+ using V = hwy::HWY_NAMESPACE::Vec<HWY_FULL(float)>;
+ V ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7;
+ (void)ups2, (void)ups3, (void)ups4, (void)ups5, (void)ups6, (void)ups7;
+ V* ups[N];
+ if (N >= 2) {
+ ups[0] = &ups0;
+ ups[1] = &ups1;
+ }
+ if (N >= 4) {
+ ups[2] = &ups2;
+ ups[3] = &ups3;
+ }
+ if (N == 8) {
+ ups[4] = &ups4;
+ ups[5] = &ups5;
+ ups[6] = &ups6;
+ ups[7] = &ups7;
+ }
+ for (size_t oy = 0; oy < N; oy++) {
+ float* dst_row = GetOutputRow(output_rows, c_, oy);
+ for (ssize_t x = x0; x < x1; x += Lanes(df)) {
+ for (size_t ox = 0; ox < N; ox++) {
+ auto result = Zero(df);
+ auto min = LoadU(df, GetInputRow(input_rows, c_, 0) + x);
+ auto max = min;
+ for (ssize_t iy = -2; iy <= 2; iy++) {
+ for (ssize_t ix = -2; ix <= 2; ix++) {
+ auto v = LoadU(df, GetInputRow(input_rows, c_, iy) + x + ix);
+ result = MulAdd(Set(df, Kernel<N>(ox, oy, ix, iy)), v, result);
+ min = Min(v, min);
+ max = Max(v, max);
+ }
+ }
+ // Avoid overshooting.
+ *ups[ox] = Clamp(result, min, max);
+ }
+ if (N == 2) {
+ StoreInterleaved(df, ups0, ups1, dst_row + x * N);
+ }
+ if (N == 4) {
+ StoreInterleaved(df, ups0, ups1, ups2, ups3, dst_row + x * N);
+ }
+ if (N == 8) {
+ StoreInterleaved(df, ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7,
+ dst_row + x * N);
+ }
+ }
+ }
+ }
+
+ size_t c_;
+ float kernel_[4][4][5][5];
+};
+
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+ const CustomTransformData& ups_factors, size_t c, size_t shift) {
+ return jxl::make_unique<UpsamplingStage>(ups_factors, c, shift);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetUpsamplingStage);
+
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+ const CustomTransformData& ups_factors, size_t c, size_t shift) {
+ JXL_ASSERT(shift != 0);
+ JXL_ASSERT(shift <= 3);
+ return HWY_DYNAMIC_DISPATCH(GetUpsamplingStage)(ups_factors, c, shift);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.h
new file mode 100644
index 0000000000..7d5defd23c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_upsampling.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/image_metadata.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Upsamples the given channel by the given factor.
+std::unique_ptr<RenderPipelineStage> GetUpsamplingStage(
+ const CustomTransformData& ups_factors, size_t c, size_t shift);
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.cc
new file mode 100644
index 0000000000..902fc33b7e
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.cc
@@ -0,0 +1,601 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_write.h"
+
+#include "lib/jxl/alpha.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/sanitizers.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_write.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Clamp;
+using hwy::HWY_NAMESPACE::Div;
+using hwy::HWY_NAMESPACE::Max;
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::NearestInt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Rebind;
+using hwy::HWY_NAMESPACE::ShiftLeftSame;
+using hwy::HWY_NAMESPACE::ShiftRightSame;
+
+class WriteToOutputStage : public RenderPipelineStage {
+ public:
+ WriteToOutputStage(const ImageOutput& main_output, size_t width,
+ size_t height, bool has_alpha, bool unpremul_alpha,
+ size_t alpha_c, Orientation undo_orientation,
+ const std::vector<ImageOutput>& extra_output)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ width_(width),
+ height_(height),
+ main_(main_output),
+ num_color_(main_.num_channels_ < 3 ? 1 : 3),
+ want_alpha_(main_.num_channels_ == 2 || main_.num_channels_ == 4),
+ has_alpha_(has_alpha),
+ unpremul_alpha_(unpremul_alpha),
+ alpha_c_(alpha_c),
+ flip_x_(ShouldFlipX(undo_orientation)),
+ flip_y_(ShouldFlipY(undo_orientation)),
+ transpose_(ShouldTranspose(undo_orientation)),
+ opaque_alpha_(kMaxPixelsPerCall, 1.0f) {
+ for (size_t ec = 0; ec < extra_output.size(); ++ec) {
+ if (extra_output[ec].callback.IsPresent() || extra_output[ec].buffer) {
+ Output extra(extra_output[ec]);
+ extra.channel_index_ = 3 + ec;
+ extra_channels_.push_back(extra);
+ }
+ }
+ }
+
+ WriteToOutputStage(const WriteToOutputStage&) = delete;
+ WriteToOutputStage& operator=(const WriteToOutputStage&) = delete;
+ WriteToOutputStage(WriteToOutputStage&&) = delete;
+ WriteToOutputStage& operator=(WriteToOutputStage&&) = delete;
+
+ ~WriteToOutputStage() override {
+ if (main_.run_opaque_) {
+ main_.pixel_callback_.destroy(main_.run_opaque_);
+ }
+ for (auto& extra : extra_channels_) {
+ if (extra.run_opaque_) {
+ extra.pixel_callback_.destroy(extra.run_opaque_);
+ }
+ }
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ JXL_DASSERT(xextra == 0);
+ JXL_DASSERT(main_.run_opaque_ || main_.buffer_);
+ if (ypos >= height_) return;
+ if (xpos >= width_) return;
+ if (flip_y_) {
+ ypos = height_ - 1u - ypos;
+ }
+ size_t limit = std::min(xsize, width_ - xpos);
+ for (size_t x0 = 0; x0 < limit; x0 += kMaxPixelsPerCall) {
+ size_t xstart = xpos + x0;
+ size_t len = std::min<size_t>(kMaxPixelsPerCall, limit - x0);
+
+ const float* line_buffers[4];
+ for (size_t c = 0; c < num_color_; c++) {
+ line_buffers[c] = GetInputRow(input_rows, c, 0) + x0;
+ }
+ if (has_alpha_) {
+ line_buffers[num_color_] = GetInputRow(input_rows, alpha_c_, 0) + x0;
+ } else {
+ // opaque_alpha_ is a way to set all values to 1.0f.
+ line_buffers[num_color_] = opaque_alpha_.data();
+ }
+ if (has_alpha_ && want_alpha_ && unpremul_alpha_) {
+ UnpremulAlpha(thread_id, len, line_buffers);
+ }
+ OutputBuffers(main_, thread_id, ypos, xstart, len, line_buffers);
+ for (const auto& extra : extra_channels_) {
+ line_buffers[0] = GetInputRow(input_rows, extra.channel_index_, 0) + x0;
+ OutputBuffers(extra, thread_id, ypos, xstart, len, line_buffers);
+ }
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ if (c < num_color_ || (has_alpha_ && c == alpha_c_)) {
+ return RenderPipelineChannelMode::kInput;
+ }
+ for (const auto& extra : extra_channels_) {
+ if (c == extra.channel_index_) {
+ return RenderPipelineChannelMode::kInput;
+ }
+ }
+ return RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "WritePixelCB"; }
+
+ private:
+ struct Output {
+ Output(const ImageOutput& image_out)
+ : pixel_callback_(image_out.callback),
+ buffer_(image_out.buffer),
+ buffer_size_(image_out.buffer_size),
+ stride_(image_out.stride),
+ num_channels_(image_out.format.num_channels),
+ swap_endianness_(SwapEndianness(image_out.format.endianness)),
+ data_type_(image_out.format.data_type),
+ bits_per_sample_(image_out.bits_per_sample) {}
+
+ Status PrepareForThreads(size_t num_threads) {
+ if (pixel_callback_.IsPresent()) {
+ run_opaque_ =
+ pixel_callback_.Init(num_threads, /*num_pixels=*/kMaxPixelsPerCall);
+ JXL_RETURN_IF_ERROR(run_opaque_ != nullptr);
+ } else {
+ JXL_RETURN_IF_ERROR(buffer_ != nullptr);
+ }
+ return true;
+ }
+
+ PixelCallback pixel_callback_;
+ void* run_opaque_ = nullptr;
+ void* buffer_ = nullptr;
+ size_t buffer_size_;
+ size_t stride_;
+ size_t num_channels_;
+ bool swap_endianness_;
+ JxlDataType data_type_;
+ size_t bits_per_sample_;
+ size_t channel_index_; // used for extra_channels
+ };
+
+ Status PrepareForThreads(size_t num_threads) override {
+ JXL_RETURN_IF_ERROR(main_.PrepareForThreads(num_threads));
+ for (auto& extra : extra_channels_) {
+ JXL_RETURN_IF_ERROR(extra.PrepareForThreads(num_threads));
+ }
+ temp_out_.resize(num_threads);
+ for (CacheAlignedUniquePtr& temp : temp_out_) {
+ temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall *
+ main_.num_channels_);
+ }
+ if ((has_alpha_ && want_alpha_ && unpremul_alpha_) || flip_x_) {
+ temp_in_.resize(num_threads * main_.num_channels_);
+ for (CacheAlignedUniquePtr& temp : temp_in_) {
+ temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall);
+ }
+ }
+ return true;
+ }
+ static bool ShouldFlipX(Orientation undo_orientation) {
+ return (undo_orientation == Orientation::kFlipHorizontal ||
+ undo_orientation == Orientation::kRotate180 ||
+ undo_orientation == Orientation::kRotate270 ||
+ undo_orientation == Orientation::kAntiTranspose);
+ }
+ static bool ShouldFlipY(Orientation undo_orientation) {
+ return (undo_orientation == Orientation::kFlipVertical ||
+ undo_orientation == Orientation::kRotate180 ||
+ undo_orientation == Orientation::kRotate90 ||
+ undo_orientation == Orientation::kAntiTranspose);
+ }
+ static bool ShouldTranspose(Orientation undo_orientation) {
+ return (undo_orientation == Orientation::kTranspose ||
+ undo_orientation == Orientation::kRotate90 ||
+ undo_orientation == Orientation::kRotate270 ||
+ undo_orientation == Orientation::kAntiTranspose);
+ }
+
+ void UnpremulAlpha(size_t thread_id, size_t len,
+ const float** line_buffers) const {
+ const HWY_FULL(float) d;
+ auto one = Set(d, 1.0f);
+ float* temp_in[4];
+ for (size_t c = 0; c < main_.num_channels_; ++c) {
+ size_t tix = thread_id * main_.num_channels_ + c;
+ temp_in[c] = reinterpret_cast<float*>(temp_in_[tix].get());
+ memcpy(temp_in[c], line_buffers[c], sizeof(float) * len);
+ }
+ auto small_alpha = Set(d, kSmallAlpha);
+ for (size_t ix = 0; ix < len; ix += Lanes(d)) {
+ auto alpha = LoadU(d, temp_in[num_color_] + ix);
+ auto mul = Div(one, Max(small_alpha, alpha));
+ for (size_t c = 0; c < num_color_; ++c) {
+ auto val = LoadU(d, temp_in[c] + ix);
+ StoreU(Mul(val, mul), d, temp_in[c] + ix);
+ }
+ }
+ for (size_t c = 0; c < main_.num_channels_; ++c) {
+ line_buffers[c] = temp_in[c];
+ }
+ }
+
+ void OutputBuffers(const Output& out, size_t thread_id, size_t ypos,
+ size_t xstart, size_t len, const float* input[4]) const {
+ if (flip_x_) {
+ FlipX(out, thread_id, len, &xstart, input);
+ }
+ if (out.data_type_ == JXL_TYPE_UINT8) {
+ uint8_t* JXL_RESTRICT temp =
+ reinterpret_cast<uint8_t*>(temp_out_[thread_id].get());
+ StoreUnsignedRow(out, input, len, temp);
+ WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+ } else if (out.data_type_ == JXL_TYPE_UINT16 ||
+ out.data_type_ == JXL_TYPE_FLOAT16) {
+ uint16_t* JXL_RESTRICT temp =
+ reinterpret_cast<uint16_t*>(temp_out_[thread_id].get());
+ if (out.data_type_ == JXL_TYPE_UINT16) {
+ StoreUnsignedRow(out, input, len, temp);
+ } else {
+ StoreFloat16Row(out, input, len, temp);
+ }
+ if (out.swap_endianness_) {
+ const HWY_FULL(uint16_t) du;
+ size_t output_len = len * out.num_channels_;
+ for (size_t j = 0; j < output_len; j += Lanes(du)) {
+ auto v = LoadU(du, temp + j);
+ auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8));
+ StoreU(vswap, du, temp + j);
+ }
+ }
+ WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+ } else if (out.data_type_ == JXL_TYPE_FLOAT) {
+ float* JXL_RESTRICT temp =
+ reinterpret_cast<float*>(temp_out_[thread_id].get());
+ StoreFloatRow(out, input, len, temp);
+ if (out.swap_endianness_) {
+ size_t output_len = len * out.num_channels_;
+ for (size_t j = 0; j < output_len; ++j) {
+ temp[j] = BSwapFloat(temp[j]);
+ }
+ }
+ WriteToOutput(out, thread_id, ypos, xstart, len, temp);
+ }
+ }
+
+ void FlipX(const Output& out, size_t thread_id, size_t len, size_t* xstart,
+ const float** line_buffers) const {
+ float* temp_in[4];
+ for (size_t c = 0; c < out.num_channels_; ++c) {
+ size_t tix = thread_id * main_.num_channels_ + c;
+ temp_in[c] = reinterpret_cast<float*>(temp_in_[tix].get());
+ if (temp_in[c] != line_buffers[c]) {
+ memcpy(temp_in[c], line_buffers[c], sizeof(float) * len);
+ }
+ }
+ size_t last = (len - 1u);
+ size_t num = (len / 2);
+ for (size_t i = 0; i < num; ++i) {
+ for (size_t c = 0; c < out.num_channels_; ++c) {
+ std::swap(temp_in[c][i], temp_in[c][last - i]);
+ }
+ }
+ for (size_t c = 0; c < out.num_channels_; ++c) {
+ line_buffers[c] = temp_in[c];
+ }
+ *xstart = width_ - *xstart - len;
+ }
+
+ template <typename T>
+ void StoreUnsignedRow(const Output& out, const float* input[4], size_t len,
+ T* output) const {
+ const HWY_FULL(float) d;
+ auto zero = Zero(d);
+ auto one = Set(d, 1.0f);
+ auto mul = Set(d, (1u << (out.bits_per_sample_)) - 1);
+ const Rebind<T, decltype(d)> du;
+ const size_t padding = RoundUpTo(len, Lanes(d)) - len;
+ for (size_t c = 0; c < out.num_channels_; ++c) {
+ msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding);
+ }
+ if (out.num_channels_ == 1) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+ StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]);
+ }
+ } else if (out.num_channels_ == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+ auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+ StoreInterleaved2(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)), du, &output[2 * i]);
+ }
+ } else if (out.num_channels_ == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+ auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+ auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
+ StoreInterleaved3(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)), du, &output[3 * i]);
+ }
+ } else if (out.num_channels_ == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul);
+ auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul);
+ auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul);
+ auto v3 = Mul(Clamp(zero, LoadU(d, &input[3][i]), one), mul);
+ StoreInterleaved4(DemoteTo(du, NearestInt(v0)),
+ DemoteTo(du, NearestInt(v1)),
+ DemoteTo(du, NearestInt(v2)),
+ DemoteTo(du, NearestInt(v3)), du, &output[4 * i]);
+ }
+ }
+ msan::PoisonMemory(output + out.num_channels_ * len,
+ sizeof(output[0]) * out.num_channels_ * padding);
+ }
+
+ void StoreFloat16Row(const Output& out, const float* input[4], size_t len,
+ uint16_t* output) const {
+ const HWY_FULL(float) d;
+ const Rebind<uint16_t, decltype(d)> du;
+ const Rebind<hwy::float16_t, decltype(d)> df16;
+ const size_t padding = RoundUpTo(len, Lanes(d)) - len;
+ for (size_t c = 0; c < out.num_channels_; ++c) {
+ msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding);
+ }
+ if (out.num_channels_ == 1) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = LoadU(d, &input[0][i]);
+ StoreU(BitCast(du, DemoteTo(df16, v0)), du, &output[i]);
+ }
+ } else if (out.num_channels_ == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = LoadU(d, &input[0][i]);
+ auto v1 = LoadU(d, &input[1][i]);
+ StoreInterleaved2(BitCast(du, DemoteTo(df16, v0)),
+ BitCast(du, DemoteTo(df16, v1)), du, &output[2 * i]);
+ }
+ } else if (out.num_channels_ == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = LoadU(d, &input[0][i]);
+ auto v1 = LoadU(d, &input[1][i]);
+ auto v2 = LoadU(d, &input[2][i]);
+ StoreInterleaved3(BitCast(du, DemoteTo(df16, v0)),
+ BitCast(du, DemoteTo(df16, v1)),
+ BitCast(du, DemoteTo(df16, v2)), du, &output[3 * i]);
+ }
+ } else if (out.num_channels_ == 4) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ auto v0 = LoadU(d, &input[0][i]);
+ auto v1 = LoadU(d, &input[1][i]);
+ auto v2 = LoadU(d, &input[2][i]);
+ auto v3 = LoadU(d, &input[3][i]);
+ StoreInterleaved4(BitCast(du, DemoteTo(df16, v0)),
+ BitCast(du, DemoteTo(df16, v1)),
+ BitCast(du, DemoteTo(df16, v2)),
+ BitCast(du, DemoteTo(df16, v3)), du, &output[4 * i]);
+ }
+ }
+ msan::PoisonMemory(output + out.num_channels_ * len,
+ sizeof(output[0]) * out.num_channels_ * padding);
+ }
+
+ void StoreFloatRow(const Output& out, const float* input[4], size_t len,
+ float* output) const {
+ const HWY_FULL(float) d;
+ if (out.num_channels_ == 1) {
+ memcpy(output, input[0], len * sizeof(output[0]));
+ } else if (out.num_channels_ == 2) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved2(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]), d,
+ &output[2 * i]);
+ }
+ } else if (out.num_channels_ == 3) {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved3(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]),
+ LoadU(d, &input[2][i]), d, &output[3 * i]);
+ }
+ } else {
+ for (size_t i = 0; i < len; i += Lanes(d)) {
+ StoreInterleaved4(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]),
+ LoadU(d, &input[2][i]), LoadU(d, &input[3][i]), d,
+ &output[4 * i]);
+ }
+ }
+ }
+
+ template <typename T>
+ void WriteToOutput(const Output& out, size_t thread_id, size_t ypos,
+ size_t xstart, size_t len, T* output) const {
+ if (transpose_) {
+ // TODO(szabadka) Buffer 8x8 chunks and transpose with SIMD.
+ if (out.run_opaque_) {
+ for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) {
+ out.pixel_callback_.run(out.run_opaque_, thread_id, ypos, xstart + i,
+ 1, output + j);
+ }
+ } else {
+ const size_t pixel_stride = out.num_channels_ * sizeof(T);
+ const size_t offset = xstart * out.stride_ + ypos * pixel_stride;
+ for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) {
+ const size_t ix = offset + i * out.stride_;
+ JXL_DASSERT(ix + pixel_stride <= out.buffer_size_);
+ memcpy(reinterpret_cast<uint8_t*>(out.buffer_) + ix, output + j,
+ pixel_stride);
+ }
+ }
+ } else {
+ if (out.run_opaque_) {
+ out.pixel_callback_.run(out.run_opaque_, thread_id, xstart, ypos, len,
+ output);
+ } else {
+ const size_t pixel_stride = out.num_channels_ * sizeof(T);
+ const size_t offset = ypos * out.stride_ + xstart * pixel_stride;
+ JXL_DASSERT(offset + len * pixel_stride <= out.buffer_size_);
+ memcpy(reinterpret_cast<uint8_t*>(out.buffer_) + offset, output,
+ len * pixel_stride);
+ }
+ }
+ }
+
+ static constexpr size_t kMaxPixelsPerCall = 1024;
+ size_t width_;
+ size_t height_;
+ Output main_; // color + alpha
+ size_t num_color_;
+ bool want_alpha_;
+ bool has_alpha_;
+ bool unpremul_alpha_;
+ size_t alpha_c_;
+ bool flip_x_;
+ bool flip_y_;
+ bool transpose_;
+ std::vector<Output> extra_channels_;
+ std::vector<float> opaque_alpha_;
+ std::vector<CacheAlignedUniquePtr> temp_in_;
+ std::vector<CacheAlignedUniquePtr> temp_out_;
+};
+
+constexpr size_t WriteToOutputStage::kMaxPixelsPerCall;
+
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+ const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+ bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+ std::vector<ImageOutput>& extra_output) {
+ return jxl::make_unique<WriteToOutputStage>(
+ main_output, width, height, has_alpha, unpremul_alpha, alpha_c,
+ undo_orientation, extra_output);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace jxl {
+
+HWY_EXPORT(GetWriteToOutputStage);
+
+namespace {
+class WriteToImageBundleStage : public RenderPipelineStage {
+ public:
+ explicit WriteToImageBundleStage(ImageBundle* image_bundle,
+ ColorEncoding color_encoding)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ image_bundle_(image_bundle),
+ color_encoding_(std::move(color_encoding)) {}
+
+ void SetInputSizes(
+ const std::vector<std::pair<size_t, size_t>>& input_sizes) override {
+#if JXL_ENABLE_ASSERT
+ JXL_ASSERT(input_sizes.size() >= 3);
+ for (size_t c = 1; c < input_sizes.size(); c++) {
+ JXL_ASSERT(input_sizes[c].first == input_sizes[0].first);
+ JXL_ASSERT(input_sizes[c].second == input_sizes[0].second);
+ }
+#endif
+ // TODO(eustas): what should we do in the case of "want only ECs"?
+ image_bundle_->SetFromImage(
+ Image3F(input_sizes[0].first, input_sizes[0].second), color_encoding_);
+ // TODO(veluca): consider not reallocating ECs if not needed.
+ image_bundle_->extra_channels().clear();
+ for (size_t c = 3; c < input_sizes.size(); c++) {
+ image_bundle_->extra_channels().emplace_back(input_sizes[c].first,
+ input_sizes[c].second);
+ }
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ for (size_t c = 0; c < 3; c++) {
+ memcpy(image_bundle_->color()->PlaneRow(c, ypos) + xpos - xextra,
+ GetInputRow(input_rows, c, 0) - xextra,
+ sizeof(float) * (xsize + 2 * xextra));
+ }
+ for (size_t ec = 0; ec < image_bundle_->extra_channels().size(); ec++) {
+ JXL_ASSERT(image_bundle_->extra_channels()[ec].xsize() >=
+ xpos + xsize + xextra);
+ memcpy(image_bundle_->extra_channels()[ec].Row(ypos) + xpos - xextra,
+ GetInputRow(input_rows, 3 + ec, 0) - xextra,
+ sizeof(float) * (xsize + 2 * xextra));
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return RenderPipelineChannelMode::kInput;
+ }
+
+ const char* GetName() const override { return "WriteIB"; }
+
+ private:
+ ImageBundle* image_bundle_;
+ ColorEncoding color_encoding_;
+};
+
+class WriteToImage3FStage : public RenderPipelineStage {
+ public:
+ explicit WriteToImage3FStage(Image3F* image)
+ : RenderPipelineStage(RenderPipelineStage::Settings()), image_(image) {}
+
+ void SetInputSizes(
+ const std::vector<std::pair<size_t, size_t>>& input_sizes) override {
+#if JXL_ENABLE_ASSERT
+ JXL_ASSERT(input_sizes.size() >= 3);
+ for (size_t c = 1; c < 3; ++c) {
+ JXL_ASSERT(input_sizes[c].first == input_sizes[0].first);
+ JXL_ASSERT(input_sizes[c].second == input_sizes[0].second);
+ }
+#endif
+ *image_ = Image3F(input_sizes[0].first, input_sizes[0].second);
+ }
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ for (size_t c = 0; c < 3; c++) {
+ memcpy(image_->PlaneRow(c, ypos) + xpos - xextra,
+ GetInputRow(input_rows, c, 0) - xextra,
+ sizeof(float) * (xsize + 2 * xextra));
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInput
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "WriteI3F"; }
+
+ private:
+ Image3F* image_;
+};
+
+} // namespace
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImageBundleStage(
+ ImageBundle* image_bundle, ColorEncoding color_encoding) {
+ return jxl::make_unique<WriteToImageBundleStage>(image_bundle,
+ std::move(color_encoding));
+}
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImage3FStage(Image3F* image) {
+ return jxl::make_unique<WriteToImage3FStage>(image);
+}
+
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+ const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+ bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+ std::vector<ImageOutput>& extra_output) {
+ return HWY_DYNAMIC_DISPATCH(GetWriteToOutputStage)(
+ main_output, width, height, has_alpha, unpremul_alpha, alpha_c,
+ undo_orientation, extra_output);
+}
+
+} // namespace jxl
+
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.h
new file mode 100644
index 0000000000..c5f844ebe8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_write.h
@@ -0,0 +1,31 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
+
+#include <functional>
+
+#include "lib/jxl/dec_cache.h"
+#include "lib/jxl/image_bundle.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+std::unique_ptr<RenderPipelineStage> GetWriteToImageBundleStage(
+ ImageBundle* image_bundle, ColorEncoding color_encoding);
+
+// Gets a stage to write color channels to an Image3F.
+std::unique_ptr<RenderPipelineStage> GetWriteToImage3FStage(Image3F* image);
+
+// Gets a stage to write to a pixel callback or image buffer.
+std::unique_ptr<RenderPipelineStage> GetWriteToOutputStage(
+ const ImageOutput& main_output, size_t width, size_t height, bool has_alpha,
+ bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation,
+ std::vector<ImageOutput>& extra_output);
+
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc
new file mode 100644
index 0000000000..15cfc75b18
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.cc
@@ -0,0 +1,176 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_xyb.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_xyb.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/dec_xyb-inl.h"
+#include "lib/jxl/opsin_params.h"
+#include "lib/jxl/sanitizers.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+class XYBStage : public RenderPipelineStage {
+ public:
+ explicit XYBStage(const OutputEncodingInfo& output_encoding_info)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ opsin_params_(output_encoding_info.opsin_params),
+ output_is_xyb_(output_encoding_info.color_encoding.GetColorSpace() ==
+ ColorSpace::kXYB) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("UndoXYB");
+
+ const HWY_FULL(float) d;
+ JXL_ASSERT(xextra == 0);
+ const size_t xsize_v = RoundUpTo(xsize, Lanes(d));
+ float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+ // All calculations are lane-wise, still some might require
+ // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last
+ // vector tail.
+ msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ // TODO(eustas): when using frame origin, addresses might be unaligned;
+ // making them aligned will void performance penalty.
+ if (output_is_xyb_) {
+ const auto scale_x = Set(d, kScaledXYBScale[0]);
+ const auto scale_y = Set(d, kScaledXYBScale[1]);
+ const auto scale_bmy = Set(d, kScaledXYBScale[2]);
+ const auto offset_x = Set(d, kScaledXYBOffset[0]);
+ const auto offset_y = Set(d, kScaledXYBOffset[1]);
+ const auto offset_bmy = Set(d, kScaledXYBOffset[2]);
+ for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ const auto in_x = LoadU(d, row0 + x);
+ const auto in_y = LoadU(d, row1 + x);
+ const auto in_b = LoadU(d, row2 + x);
+ auto out_x = Mul(Add(in_x, offset_x), scale_x);
+ auto out_y = Mul(Add(in_y, offset_y), scale_y);
+ auto out_b = Mul(Add(Sub(in_b, in_y), offset_bmy), scale_bmy);
+ StoreU(out_x, d, row0 + x);
+ StoreU(out_y, d, row1 + x);
+ StoreU(out_b, d, row2 + x);
+ }
+ } else {
+ for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) {
+ const auto in_opsin_x = LoadU(d, row0 + x);
+ const auto in_opsin_y = LoadU(d, row1 + x);
+ const auto in_opsin_b = LoadU(d, row2 + x);
+ auto r = Undefined(d);
+ auto g = Undefined(d);
+ auto b = Undefined(d);
+ XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params_, &r, &g,
+ &b);
+ StoreU(r, d, row0 + x);
+ StoreU(g, d, row1 + x);
+ StoreU(b, d, row2 + x);
+ }
+ }
+ msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize));
+ msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize));
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "XYB"; }
+
+ private:
+ const OpsinParams opsin_params_;
+ const bool output_is_xyb_;
+};
+
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ return jxl::make_unique<XYBStage>(output_encoding_info);
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetXYBStage);
+
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+ const OutputEncodingInfo& output_encoding_info) {
+ return HWY_DYNAMIC_DISPATCH(GetXYBStage)(output_encoding_info);
+}
+
+namespace {
+class FastXYBStage : public RenderPipelineStage {
+ public:
+ FastXYBStage(uint8_t* rgb, size_t stride, size_t width, size_t height,
+ bool rgba, bool has_alpha, size_t alpha_c)
+ : RenderPipelineStage(RenderPipelineStage::Settings()),
+ rgb_(rgb),
+ stride_(stride),
+ width_(width),
+ height_(height),
+ rgba_(rgba),
+ has_alpha_(has_alpha),
+ alpha_c_(alpha_c) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ if (ypos >= height_) return;
+ JXL_ASSERT(xextra == 0);
+ const float* xyba[4] = {
+ GetInputRow(input_rows, 0, 0), GetInputRow(input_rows, 1, 0),
+ GetInputRow(input_rows, 2, 0),
+ has_alpha_ ? GetInputRow(input_rows, alpha_c_, 0) : nullptr};
+ uint8_t* out_buf = rgb_ + stride_ * ypos + (rgba_ ? 4 : 3) * xpos;
+ FastXYBTosRGB8(xyba, out_buf, rgba_,
+ xsize + xpos <= width_ ? xsize : width_ - xpos);
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 || (has_alpha_ && c == alpha_c_)
+ ? RenderPipelineChannelMode::kInput
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "FastXYB"; }
+
+ private:
+ uint8_t* rgb_;
+ size_t stride_;
+ size_t width_;
+ size_t height_;
+ bool rgba_;
+ bool has_alpha_;
+ size_t alpha_c_;
+ std::vector<float> opaque_alpha_;
+};
+
+} // namespace
+
+std::unique_ptr<RenderPipelineStage> GetFastXYBTosRGB8Stage(
+ uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba,
+ bool has_alpha, size_t alpha_c) {
+ JXL_ASSERT(HasFastXYBTosRGB8());
+ return make_unique<FastXYBStage>(rgb, stride, width, height, rgba, has_alpha,
+ alpha_c);
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.h
new file mode 100644
index 0000000000..7b06345c36
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_xyb.h
@@ -0,0 +1,26 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
+#include <stdint.h>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from XYB to linear with appropriate primaries.
+std::unique_ptr<RenderPipelineStage> GetXYBStage(
+ const OutputEncodingInfo& output_encoding_info);
+
+// Gets a stage to convert with fixed point arithmetic from XYB to sRGB8 and
+// write to a uint8 buffer.
+std::unique_ptr<RenderPipelineStage> GetFastXYBTosRGB8Stage(
+ uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba,
+ bool has_alpha, size_t alpha_c);
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.cc b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.cc
new file mode 100644
index 0000000000..5cba4a7d41
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.cc
@@ -0,0 +1,85 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/render_pipeline/stage_ycbcr.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_ycbcr.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::MulAdd;
+
+class kYCbCrStage : public RenderPipelineStage {
+ public:
+ kYCbCrStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ PROFILER_ZONE("UndoYCbCr");
+
+ const HWY_FULL(float) df;
+
+ // Full-range BT.601 as defined by JFIF Clause 7:
+ // https://www.itu.int/rec/T-REC-T.871-201105-I/en
+ const auto c128 = Set(df, 128.0f / 255);
+ const auto crcr = Set(df, 1.402f);
+ const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f);
+ const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f);
+ const auto cbcb = Set(df, 1.772f);
+
+ float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0);
+ float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0);
+ float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0);
+ // TODO(eustas): when using frame origin, addresses might be unaligned;
+ // making them aligned will void performance penalty.
+ for (size_t x = 0; x < xsize; x += Lanes(df)) {
+ const auto y_vec = Add(LoadU(df, row1 + x), c128);
+ const auto cb_vec = LoadU(df, row0 + x);
+ const auto cr_vec = LoadU(df, row2 + x);
+ const auto r_vec = MulAdd(crcr, cr_vec, y_vec);
+ const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec));
+ const auto b_vec = MulAdd(cbcb, cb_vec, y_vec);
+ StoreU(r_vec, df, row0 + x);
+ StoreU(g_vec, df, row1 + x);
+ StoreU(b_vec, df, row2 + x);
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return c < 3 ? RenderPipelineChannelMode::kInPlace
+ : RenderPipelineChannelMode::kIgnored;
+ }
+
+ const char* GetName() const override { return "YCbCr"; }
+};
+
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage() {
+ return jxl::make_unique<kYCbCrStage>();
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+HWY_EXPORT(GetYCbCrStage);
+
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage() {
+ return HWY_DYNAMIC_DISPATCH(GetYCbCrStage)();
+}
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.h
new file mode 100644
index 0000000000..9320c9723f
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/stage_ycbcr.h
@@ -0,0 +1,25 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
+#define LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/dec_xyb.h"
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+// Converts the color channels from YCbCr to RGB.
+std::unique_ptr<RenderPipelineStage> GetYCbCrStage();
+} // namespace jxl
+
+#endif // LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_
diff --git a/third_party/jpeg-xl/lib/jxl/render_pipeline/test_render_pipeline_stages.h b/third_party/jpeg-xl/lib/jxl/render_pipeline/test_render_pipeline_stages.h
new file mode 100644
index 0000000000..789a52f8b2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/render_pipeline/test_render_pipeline_stages.h
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/render_pipeline/render_pipeline_stage.h"
+
+namespace jxl {
+
+class UpsampleXSlowStage : public RenderPipelineStage {
+ public:
+ UpsampleXSlowStage()
+ : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(1, 1)) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ for (size_t c = 0; c < input_rows.size(); c++) {
+ const float* row = GetInputRow(input_rows, c, 0);
+ float* row_out = GetOutputRow(output_rows, c, 0);
+ for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) {
+ float xp = *(row + x - 1);
+ float xc = *(row + x);
+ float xn = *(row + x + 1);
+ float xout0 = xp * 0.25f + xc * 0.75f;
+ float xout1 = xc * 0.75f + xn * 0.25f;
+ *(row_out + 2 * x + 0) = xout0;
+ *(row_out + 2 * x + 1) = xout1;
+ }
+ }
+ }
+
+ const char* GetName() const override { return "TEST::UpsampleXSlowStage"; }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return RenderPipelineChannelMode::kInOut;
+ }
+};
+
+class UpsampleYSlowStage : public RenderPipelineStage {
+ public:
+ UpsampleYSlowStage()
+ : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(1, 1)) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ for (size_t c = 0; c < input_rows.size(); c++) {
+ const float* rowp = GetInputRow(input_rows, c, -1);
+ const float* rowc = GetInputRow(input_rows, c, 0);
+ const float* rown = GetInputRow(input_rows, c, 1);
+ float* row_out0 = GetOutputRow(output_rows, c, 0);
+ float* row_out1 = GetOutputRow(output_rows, c, 1);
+ for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) {
+ float xp = *(rowp + x);
+ float xc = *(rowc + x);
+ float xn = *(rown + x);
+ float yout0 = xp * 0.25f + xc * 0.75f;
+ float yout1 = xc * 0.75f + xn * 0.25f;
+ *(row_out0 + x) = yout0;
+ *(row_out1 + x) = yout1;
+ }
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return RenderPipelineChannelMode::kInOut;
+ }
+
+ const char* GetName() const override { return "TEST::UpsampleYSlowStage"; }
+};
+
+class Check0FinalStage : public RenderPipelineStage {
+ public:
+ Check0FinalStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {}
+
+ void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows,
+ size_t xextra, size_t xsize, size_t xpos, size_t ypos,
+ size_t thread_id) const final {
+ for (size_t c = 0; c < input_rows.size(); c++) {
+ for (size_t x = 0; x < xsize; x++) {
+ JXL_CHECK(fabsf(GetInputRow(input_rows, c, 0)[x]) < 1e-8);
+ }
+ }
+ }
+
+ RenderPipelineChannelMode GetChannelMode(size_t c) const final {
+ return RenderPipelineChannelMode::kInput;
+ }
+ const char* GetName() const override { return "TEST::Check0FinalStage"; }
+};
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/roundtrip_test.cc b/third_party/jpeg-xl/lib/jxl/roundtrip_test.cc
new file mode 100644
index 0000000000..f1529b500c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/roundtrip_test.cc
@@ -0,0 +1,839 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/codestream_header.h>
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+#include <jxl/types.h>
+
+#include <cmath> // std::abs
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <vector>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/dec_external_image.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_comparator.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/encode_internal.h"
+#include "lib/jxl/icc_codec.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace {
+
+// Converts a test image to a CodecInOut.
+// icc_profile can be empty to automatically deduce profile from the pixel
+// format, or filled in to force this ICC profile
+jxl::CodecInOut ConvertTestImage(const std::vector<uint8_t>& buf,
+ const size_t xsize, const size_t ysize,
+ const JxlPixelFormat& pixel_format,
+ const jxl::PaddedBytes& icc_profile) {
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+
+ bool is_gray = pixel_format.num_channels < 3;
+ bool has_alpha =
+ pixel_format.num_channels == 2 || pixel_format.num_channels == 4;
+
+ io.metadata.m.color_encoding.SetColorSpace(is_gray ? jxl::ColorSpace::kGray
+ : jxl::ColorSpace::kRGB);
+ if (has_alpha) {
+ // Note: alpha > 16 not yet supported by the C++ codec
+ switch (pixel_format.data_type) {
+ case JXL_TYPE_UINT8:
+ io.metadata.m.SetAlphaBits(8);
+ break;
+ case JXL_TYPE_UINT16:
+ case JXL_TYPE_FLOAT:
+ case JXL_TYPE_FLOAT16:
+ io.metadata.m.SetAlphaBits(16);
+ break;
+ default:
+ ADD_FAILURE() << "Roundtrip tests for data type "
+ << pixel_format.data_type << " not yet implemented.";
+ }
+ }
+ size_t bitdepth = 0;
+ switch (pixel_format.data_type) {
+ case JXL_TYPE_FLOAT:
+ bitdepth = 32;
+ io.metadata.m.SetFloat32Samples();
+ break;
+ case JXL_TYPE_FLOAT16:
+ bitdepth = 16;
+ io.metadata.m.SetFloat16Samples();
+ break;
+ case JXL_TYPE_UINT8:
+ bitdepth = 8;
+ io.metadata.m.SetUintSamples(8);
+ break;
+ case JXL_TYPE_UINT16:
+ bitdepth = 16;
+ io.metadata.m.SetUintSamples(16);
+ break;
+ default:
+ ADD_FAILURE() << "Roundtrip tests for data type "
+ << pixel_format.data_type << " not yet implemented.";
+ }
+ jxl::ColorEncoding color_encoding;
+ if (!icc_profile.empty()) {
+ jxl::PaddedBytes icc_profile_copy(icc_profile);
+ EXPECT_TRUE(color_encoding.SetICC(std::move(icc_profile_copy)));
+ } else if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+ color_encoding = jxl::ColorEncoding::LinearSRGB(is_gray);
+ } else {
+ color_encoding = jxl::ColorEncoding::SRGB(is_gray);
+ }
+ EXPECT_TRUE(
+ ConvertFromExternal(jxl::Span<const uint8_t>(buf.data(), buf.size()),
+ xsize, ysize, color_encoding,
+ /*bits_per_sample=*/bitdepth, pixel_format,
+ /*pool=*/nullptr, &io.Main()));
+ return io;
+}
+
+template <typename T>
+T ConvertTestPixel(float val);
+
+template <>
+float ConvertTestPixel<float>(const float val) {
+ return val;
+}
+
+template <>
+uint16_t ConvertTestPixel<uint16_t>(const float val) {
+ return (uint16_t)(val * UINT16_MAX);
+}
+
+template <>
+uint8_t ConvertTestPixel<uint8_t>(const float val) {
+ return (uint8_t)(val * UINT8_MAX);
+}
+
+// Returns a test image.
+template <typename T>
+std::vector<uint8_t> GetTestImage(const size_t xsize, const size_t ysize,
+ const JxlPixelFormat& pixel_format) {
+ std::vector<T> pixels(xsize * ysize * pixel_format.num_channels);
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ for (size_t chan = 0; chan < pixel_format.num_channels; chan++) {
+ float val;
+ switch (chan % 4) {
+ case 0:
+ val = static_cast<float>(y) / static_cast<float>(ysize);
+ break;
+ case 1:
+ val = static_cast<float>(x) / static_cast<float>(xsize);
+ break;
+ case 2:
+ val = static_cast<float>(x + y) / static_cast<float>(xsize + ysize);
+ break;
+ case 3:
+ val = static_cast<float>(x * y) / static_cast<float>(xsize * ysize);
+ break;
+ }
+ pixels[(y * xsize + x) * pixel_format.num_channels + chan] =
+ ConvertTestPixel<T>(val);
+ }
+ }
+ }
+ std::vector<uint8_t> bytes(pixels.size() * sizeof(T));
+ memcpy(bytes.data(), pixels.data(), sizeof(T) * pixels.size());
+ return bytes;
+}
+
+void EncodeWithEncoder(JxlEncoder* enc, std::vector<uint8_t>* compressed) {
+ compressed->resize(64);
+ uint8_t* next_out = compressed->data();
+ size_t avail_out = compressed->size() - (next_out - compressed->data());
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed->data();
+ compressed->resize(compressed->size() * 2);
+ next_out = compressed->data() + offset;
+ avail_out = compressed->size() - offset;
+ }
+ }
+ compressed->resize(next_out - compressed->data());
+ EXPECT_EQ(JXL_ENC_SUCCESS, process_result);
+}
+
+// Generates some pixels using some dimensions and pixel_format,
+// compresses them, and verifies that the decoded version is similar to the
+// original pixels.
+// TODO(firsching): change this to be a parameterized test, like in
+// decode_test.cc
+template <typename T>
+void VerifyRoundtripCompression(
+ const size_t xsize, const size_t ysize,
+ const JxlPixelFormat& input_pixel_format,
+ const JxlPixelFormat& output_pixel_format, const bool lossless,
+ const bool use_container, const uint32_t resampling = 1,
+ const bool already_downsampled = false,
+ const std::vector<std::pair<JxlExtraChannelType, std::string>>&
+ extra_channels = {}) {
+ size_t orig_xsize = xsize;
+ size_t orig_ysize = ysize;
+ if (already_downsampled) {
+ orig_xsize = jxl::DivCeil(xsize, resampling);
+ orig_ysize = jxl::DivCeil(ysize, resampling);
+ }
+
+ JxlPixelFormat extra_channel_pixel_format = input_pixel_format;
+ extra_channel_pixel_format.num_channels = 1;
+ const std::vector<uint8_t> extra_channel_bytes =
+ GetTestImage<T>(xsize, ysize, extra_channel_pixel_format);
+ const std::vector<uint8_t> original_bytes =
+ GetTestImage<T>(orig_xsize, orig_ysize, input_pixel_format);
+ jxl::CodecInOut original_io = ConvertTestImage(
+ original_bytes, orig_xsize, orig_ysize, input_pixel_format, {});
+
+ JxlEncoder* enc = JxlEncoderCreate(nullptr);
+ EXPECT_NE(nullptr, enc);
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, use_container));
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &input_pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = lossless;
+ uint32_t num_channels = input_pixel_format.num_channels;
+ size_t has_interleaved_alpha = num_channels == 2 || num_channels == 4;
+ JxlPixelFormat output_pixel_format_with_extra_channel_alpha =
+ output_pixel_format;
+
+ // In the case where we have an alpha channel, but it is provided as an extra
+ // channel and not interleaved, we do two things here:
+ // 1. modify the original_io to have the correct alpha channel
+ // 2. change the output_format_with_extra_alpha to have an alpha channel
+ bool alpha_in_extra_channels_vector = false;
+ for (const auto& extra_channel : extra_channels) {
+ if (extra_channel.first == JXL_CHANNEL_ALPHA) {
+ alpha_in_extra_channels_vector = true;
+ }
+ }
+ if (alpha_in_extra_channels_vector && !has_interleaved_alpha) {
+ jxl::ImageF alpha_channel(xsize, ysize);
+ EXPECT_TRUE(jxl::ConvertFromExternal(
+ jxl::Span<const uint8_t>(extra_channel_bytes.data(),
+ extra_channel_bytes.size()),
+ xsize, ysize, basic_info.bits_per_sample, extra_channel_pixel_format, 0,
+ /*pool=*/nullptr, &alpha_channel));
+
+ original_io.metadata.m.SetAlphaBits(basic_info.bits_per_sample);
+ original_io.Main().SetAlpha(std::move(alpha_channel));
+ output_pixel_format_with_extra_channel_alpha.num_channels++;
+ }
+ // Those are the num_extra_channels including a potential alpha channel.
+ basic_info.num_extra_channels = extra_channels.size() + has_interleaved_alpha;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+ EXPECT_EQ(enc->metadata.m.num_extra_channels,
+ extra_channels.size() + has_interleaved_alpha);
+ JxlColorEncoding color_encoding;
+ if (input_pixel_format.data_type == JXL_TYPE_FLOAT) {
+ JxlColorEncodingSetToLinearSRGB(
+ &color_encoding,
+ /*is_gray=*/input_pixel_format.num_channels < 3);
+ } else {
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/input_pixel_format.num_channels < 3);
+ }
+
+ std::vector<JxlExtraChannelInfo> channel_infos;
+ for (const auto& extra_channel : extra_channels) {
+ auto channel_type = extra_channel.first;
+ JxlExtraChannelInfo channel_info;
+ JxlEncoderInitExtraChannelInfo(channel_type, &channel_info);
+ channel_info.bits_per_sample = (lossless ? basic_info.bits_per_sample : 8);
+ channel_info.exponent_bits_per_sample =
+ (lossless ? basic_info.exponent_bits_per_sample : 0);
+ channel_infos.push_back(channel_info);
+ }
+ for (size_t index = 0; index < channel_infos.size(); index++) {
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetExtraChannelInfo(enc, index + has_interleaved_alpha,
+ &channel_infos[index]));
+ std::string name = extra_channels[index].second;
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetExtraChannelName(enc, index + has_interleaved_alpha,
+ name.c_str(), name.length()));
+ }
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc, nullptr);
+ JxlEncoderSetFrameLossless(frame_settings, lossless);
+ if (resampling > 1) {
+ EXPECT_EQ(
+ JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_RESAMPLING, resampling));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderFrameSettingsSetOption(
+ frame_settings, JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED,
+ already_downsampled));
+ }
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &input_pixel_format,
+ (void*)original_bytes.data(),
+ original_bytes.size()));
+ EXPECT_EQ(frame_settings->enc->input_queue.back()
+ .frame->frame.extra_channels()
+ .size(),
+ has_interleaved_alpha + extra_channels.size());
+ EXPECT_EQ(frame_settings->enc->input_queue.empty(), false);
+ for (size_t index = 0; index < channel_infos.size(); index++) {
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetExtraChannelBuffer(
+ frame_settings, &extra_channel_pixel_format,
+ (void*)extra_channel_bytes.data(), extra_channel_bytes.size(),
+ index + has_interleaved_alpha));
+ }
+ JxlEncoderCloseInput(enc);
+ EXPECT_EQ(frame_settings->enc->input_queue.back()
+ .frame->frame.extra_channels()
+ .size(),
+ has_interleaved_alpha + extra_channels.size());
+ std::vector<uint8_t> compressed;
+ EncodeWithEncoder(enc, &compressed);
+ JxlEncoderDestroy(enc);
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_NE(nullptr, dec);
+
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+ JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE));
+
+ JxlDecoderSetInput(dec, next_in, avail_in);
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(
+ dec, &output_pixel_format_with_extra_channel_alpha, &buffer_size));
+ if (&input_pixel_format == &output_pixel_format_with_extra_channel_alpha &&
+ !already_downsampled) {
+ EXPECT_EQ(buffer_size, original_bytes.size());
+ }
+
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+ EXPECT_EQ(extra_channels.size() + has_interleaved_alpha,
+ info.num_extra_channels);
+
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+ size_t icc_profile_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &output_pixel_format_with_extra_channel_alpha,
+ JXL_COLOR_PROFILE_TARGET_DATA, &icc_profile_size));
+ jxl::PaddedBytes icc_profile(icc_profile_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsICCProfile(
+ dec, &output_pixel_format, JXL_COLOR_PROFILE_TARGET_DATA,
+ icc_profile.data(), icc_profile.size()));
+
+ std::vector<uint8_t> decoded_bytes(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(
+ dec, &output_pixel_format_with_extra_channel_alpha,
+ decoded_bytes.data(), decoded_bytes.size()));
+ std::vector<std::vector<uint8_t>> extra_channel_decoded_bytes(
+ info.num_extra_channels - has_interleaved_alpha);
+
+ for (size_t index = has_interleaved_alpha; index < info.num_extra_channels;
+ index++) {
+ JxlExtraChannelInfo channel_info;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetExtraChannelInfo(dec, index, &channel_info));
+ EXPECT_EQ(channel_info.type,
+ extra_channels[index - has_interleaved_alpha].first);
+ std::string input_name =
+ extra_channels[index - has_interleaved_alpha].second;
+ const size_t name_length = channel_info.name_length;
+ EXPECT_EQ(input_name.size(), name_length);
+ std::vector<char> output_name(name_length + 1);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetExtraChannelName(dec, index, output_name.data(),
+ output_name.size()));
+ EXPECT_EQ(0,
+ memcmp(input_name.data(), output_name.data(), input_name.size()));
+ size_t extra_buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderExtraChannelBufferSize(dec, &output_pixel_format,
+ &extra_buffer_size, index));
+ std::vector<uint8_t> extra_decoded_bytes(extra_buffer_size);
+ extra_channel_decoded_bytes[index - has_interleaved_alpha] =
+ std::move(extra_decoded_bytes);
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderSetExtraChannelBuffer(
+ dec, &output_pixel_format,
+ extra_channel_decoded_bytes[index - has_interleaved_alpha].data(),
+ extra_channel_decoded_bytes[index - has_interleaved_alpha].size(),
+ index));
+ }
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+ // Check if there are no further errors after getting the full image, e.g.
+ // check that the final codestream box is actually marked as last.
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+
+ jxl::CodecInOut decoded_io = ConvertTestImage(
+ decoded_bytes, xsize, ysize, output_pixel_format_with_extra_channel_alpha,
+ icc_profile);
+
+ if (already_downsampled) {
+ jxl::Image3F* color = decoded_io.Main().color();
+ jxl::DownsampleImage(color, resampling);
+ if (decoded_io.Main().HasAlpha()) {
+ jxl::ImageF* alpha = decoded_io.Main().alpha();
+ jxl::DownsampleImage(alpha, resampling);
+ }
+ decoded_io.SetSize(color->xsize(), color->ysize());
+ }
+
+ if (lossless && !already_downsampled) {
+ JXL_EXPECT_OK(jxl::SamePixels(*original_io.Main().color(),
+ *decoded_io.Main().color(), _));
+ } else {
+ jxl::ButteraugliParams ba;
+ float butteraugli_score = ButteraugliDistance(
+ original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr);
+ EXPECT_LE(butteraugli_score, 2.0f);
+ }
+ JxlPixelFormat extra_channel_output_pixel_format = output_pixel_format;
+ extra_channel_output_pixel_format.num_channels = 1;
+ for (auto& extra_channel : extra_channel_decoded_bytes) {
+ EXPECT_EQ(extra_channel.size(), extra_channel_bytes.size());
+ if (lossless) {
+ EXPECT_EQ(jxl::test::ComparePixels(extra_channel.data(),
+ extra_channel_bytes.data(), xsize,
+ ysize, extra_channel_pixel_format,
+ extra_channel_output_pixel_format),
+ 0u);
+ EXPECT_EQ(extra_channel, extra_channel_bytes);
+ }
+ }
+}
+
+} // namespace
+
+TEST(RoundtripTest, FloatFrameRoundtripTest) {
+ std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+ extra_channels_cases = {{},
+ {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+ {{JXL_CHANNEL_CFA, "my cfa channel"}},
+ {{JXL_CHANNEL_DEPTH, "depth"},
+ {JXL_CHANNEL_SELECTION_MASK, "mask"},
+ {JXL_CHANNEL_BLACK, "black"},
+ {JXL_CHANNEL_CFA, "my cfa channel"},
+ {JXL_CHANNEL_OPTIONAL, "optional channel"}},
+ {{JXL_CHANNEL_DEPTH, "very deep"}}};
+ for (int use_container = 0; use_container < 2; use_container++) {
+ for (int lossless = 0; lossless < 2; lossless++) {
+ for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+ for (auto& extra_channels : extra_channels_cases) {
+ uint32_t has_alpha = static_cast<uint32_t>(num_channels % 2 == 0);
+ uint32_t total_extra_channels = has_alpha + extra_channels.size();
+ // There's no support (yet) for lossless extra float
+ // channels, so we don't test it.
+ if (total_extra_channels == 0 || !lossless) {
+ JxlPixelFormat pixel_format = JxlPixelFormat{
+ num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+ VerifyRoundtripCompression<float>(
+ 63, 129, pixel_format, pixel_format, (bool)lossless,
+ (bool)use_container, 1, false, extra_channels);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(RoundtripTest, Uint16FrameRoundtripTest) {
+ std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+ extra_channels_cases = {{},
+ {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+ {{JXL_CHANNEL_CFA, "my cfa channel"}},
+ {{JXL_CHANNEL_CFA, "my cfa channel"},
+ {JXL_CHANNEL_BLACK, "k_channel"}},
+ {{JXL_CHANNEL_DEPTH, "very deep"}}};
+ for (int use_container = 0; use_container < 2; use_container++) {
+ for (int lossless = 0; lossless < 2; lossless++) {
+ for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+ for (auto& extra_channels : extra_channels_cases) {
+ JxlPixelFormat pixel_format = JxlPixelFormat{
+ num_channels, JXL_TYPE_UINT16, JXL_NATIVE_ENDIAN, 0};
+ VerifyRoundtripCompression<uint16_t>(
+ 63, 129, pixel_format, pixel_format, (bool)lossless,
+ (bool)use_container, 1, false, extra_channels);
+ }
+ }
+ }
+ }
+}
+
+TEST(RoundtripTest, Uint8FrameRoundtripTest) {
+ std::vector<std::vector<std::pair<JxlExtraChannelType, std::string>>>
+ extra_channels_cases = {{},
+ {{JXL_CHANNEL_THERMAL, "temperature"}},
+ {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}},
+ {{JXL_CHANNEL_CFA, "my cfa channel"}},
+ {{JXL_CHANNEL_CFA, "my cfa channel"},
+ {JXL_CHANNEL_BLACK, "k_channel"}},
+ {{JXL_CHANNEL_DEPTH, "very deep"}}};
+ for (int use_container = 0; use_container < 2; use_container++) {
+ for (int lossless = 0; lossless < 2; lossless++) {
+ for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+ for (auto& extra_channels : extra_channels_cases) {
+ JxlPixelFormat pixel_format = JxlPixelFormat{
+ num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+ VerifyRoundtripCompression<uint8_t>(
+ 63, 129, pixel_format, pixel_format, (bool)lossless,
+ (bool)use_container, 1, false, extra_channels);
+ }
+ }
+ }
+ }
+}
+
+TEST(RoundtripTest, TestNonlinearSrgbAsXybEncoded) {
+ for (int use_container = 0; use_container < 2; use_container++) {
+ for (uint32_t num_channels = 1; num_channels < 5; num_channels++) {
+ JxlPixelFormat pixel_format_in =
+ JxlPixelFormat{num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+ JxlPixelFormat pixel_format_out =
+ JxlPixelFormat{num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+ VerifyRoundtripCompression<uint8_t>(
+ 63, 129, pixel_format_in, pixel_format_out,
+ /*lossless=*/false, (bool)use_container, {});
+ }
+ }
+}
+
+TEST(RoundtripTest, Resampling) {
+ JxlPixelFormat pixel_format =
+ JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+ VerifyRoundtripCompression<uint8_t>(63, 129, pixel_format, pixel_format,
+ /*lossless=*/false,
+ /*use_container=*/false, 2,
+ /*already_downsampled=*/false);
+
+ // TODO(lode): also make this work for odd sizes. This requires a fix in
+ // enc_frame.cc to not set custom_size_or_origin to true due to even/odd
+ // mismatch.
+ VerifyRoundtripCompression<uint8_t>(64, 128, pixel_format, pixel_format,
+ /*lossless=*/true,
+ /*use_container=*/false, 2,
+ /*already_downsampled=*/true);
+}
+
+TEST(RoundtripTest, ExtraBoxesTest) {
+ JxlPixelFormat pixel_format =
+ JxlPixelFormat{4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0};
+ const size_t xsize = 61;
+ const size_t ysize = 71;
+
+ const std::vector<uint8_t> original_bytes =
+ GetTestImage<float>(xsize, ysize, pixel_format);
+ jxl::CodecInOut original_io =
+ ConvertTestImage(original_bytes, xsize, ysize, pixel_format, {});
+
+ JxlEncoder* enc = JxlEncoderCreate(nullptr);
+ EXPECT_NE(nullptr, enc);
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, true));
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = false;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+ JxlColorEncoding color_encoding;
+ if (pixel_format.data_type == JXL_TYPE_FLOAT) {
+ JxlColorEncodingSetToLinearSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ } else {
+ JxlColorEncodingSetToSRGB(&color_encoding,
+ /*is_gray=*/pixel_format.num_channels < 3);
+ }
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc, nullptr);
+ JxlEncoderSetFrameLossless(frame_settings, false);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ (void*)original_bytes.data(),
+ original_bytes.size()));
+ JxlEncoderCloseInput(enc);
+
+ std::vector<uint8_t> compressed;
+ EncodeWithEncoder(enc, &compressed);
+ JxlEncoderDestroy(enc);
+
+ std::vector<uint8_t> extra_data(1023);
+ jxl::AppendBoxHeader(jxl::MakeBoxType("crud"), extra_data.size(), false,
+ &compressed);
+ compressed.insert(compressed.end(), extra_data.begin(), extra_data.end());
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_NE(nullptr, dec);
+
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+ JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE));
+
+ JxlDecoderSetInput(dec, next_in, avail_in);
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &pixel_format, &buffer_size));
+ EXPECT_EQ(buffer_size, original_bytes.size());
+
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+ size_t icc_profile_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(dec, &pixel_format,
+ JXL_COLOR_PROFILE_TARGET_DATA,
+ &icc_profile_size));
+ jxl::PaddedBytes icc_profile(icc_profile_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsICCProfile(
+ dec, &pixel_format, JXL_COLOR_PROFILE_TARGET_DATA,
+ icc_profile.data(), icc_profile.size()));
+
+ std::vector<uint8_t> decoded_bytes(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(dec, &pixel_format,
+ decoded_bytes.data(),
+ decoded_bytes.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+ JxlDecoderDestroy(dec);
+
+ jxl::CodecInOut decoded_io =
+ ConvertTestImage(decoded_bytes, xsize, ysize, pixel_format, icc_profile);
+
+ jxl::ButteraugliParams ba;
+ float butteraugli_score = ButteraugliDistance(
+ original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(),
+ /*distmap=*/nullptr, nullptr);
+ EXPECT_LE(butteraugli_score, 2.0f);
+}
+
+static const unsigned char kEncodedTestProfile[] = {
+ 0x1f, 0x8b, 0x1, 0x13, 0x10, 0x0, 0x0, 0x0, 0x20, 0x4c, 0xcc, 0x3,
+ 0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26,
+ 0x57, 0x54, 0xef, 0x0, 0xe8, 0x97, 0x2, 0xce, 0xa1, 0xd7, 0x85, 0x16,
+ 0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c,
+ 0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4, 0x75, 0x12, 0xc9, 0xcc,
+ 0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae,
+ 0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd, 0x40,
+ 0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7, 0xa6, 0xb9,
+ 0x27, 0x92, 0x38, 0x0, 0x3, 0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf,
+ 0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66,
+ 0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4,
+ 0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd,
+ 0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f,
+ 0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93,
+ 0x92, 0x3, 0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7,
+ 0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7, 0x38, 0x38, 0xd4, 0xa, 0x66,
+ 0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb, 0x7a, 0x24,
+ 0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3,
+ 0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8, 0xa8, 0xc4, 0x2a, 0x86,
+ 0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6,
+ 0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9,
+ 0x8, 0x98, 0xe1, 0x21, 0x4a, 0x9, 0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0,
+ 0x69, 0x1a, 0xeb, 0x52, 0x1, 0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29,
+ 0x70, 0xee, 0x4, 0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff,
+ 0xfe, 0xdb, 0xaf, 0x8, 0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8,
+ 0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8, 0xe9, 0x7, 0xee, 0x4b, 0x80, 0xda,
+ 0x4a, 0x4, 0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60,
+ 0xb, 0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6, 0x20, 0xb8, 0x64, 0x18,
+ 0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b,
+ 0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9, 0xf7,
+ 0x72, 0xf0, 0x7a, 0xe, 0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc,
+ 0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28,
+ 0xc1, 0xa7, 0x59, 0xe3, 0x0,
+};
+
+TEST(RoundtripTest, TestICCProfile) {
+ // JxlEncoderSetICCProfile parses the ICC profile, so a valid profile is
+ // needed. The profile should be passed correctly through the roundtrip.
+ jxl::BitReader reader(jxl::Span<const uint8_t>(kEncodedTestProfile,
+ sizeof(kEncodedTestProfile)));
+ jxl::PaddedBytes icc;
+ ASSERT_TRUE(ReadICC(&reader, &icc));
+ ASSERT_TRUE(reader.Close());
+
+ JxlPixelFormat format =
+ JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0};
+
+ size_t xsize = 25;
+ size_t ysize = 37;
+ const std::vector<uint8_t> original_bytes =
+ GetTestImage<uint8_t>(xsize, ysize, format);
+
+ JxlEncoder* enc = JxlEncoderCreate(nullptr);
+ EXPECT_NE(nullptr, enc);
+
+ JxlBasicInfo basic_info;
+ jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &format);
+ basic_info.xsize = xsize;
+ basic_info.ysize = ysize;
+ basic_info.uses_original_profile = JXL_TRUE;
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info));
+
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderSetICCProfile(enc, icc.data(), icc.size()));
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc, nullptr);
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddImageFrame(frame_settings, &format,
+ (void*)original_bytes.data(),
+ original_bytes.size()));
+ JxlEncoderCloseInput(enc);
+
+ std::vector<uint8_t> compressed;
+ EncodeWithEncoder(enc, &compressed);
+ JxlEncoderDestroy(enc);
+
+ JxlDecoder* dec = JxlDecoderCreate(nullptr);
+ EXPECT_NE(nullptr, dec);
+
+ const uint8_t* next_in = compressed.data();
+ size_t avail_in = compressed.size();
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO |
+ JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE));
+
+ JxlDecoderSetInput(dec, next_in, avail_in);
+ EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec));
+ size_t buffer_size;
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderImageOutBufferSize(dec, &format, &buffer_size));
+ EXPECT_EQ(buffer_size, original_bytes.size());
+
+ JxlBasicInfo info;
+ EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info));
+ EXPECT_EQ(xsize, info.xsize);
+ EXPECT_EQ(ysize, info.ysize);
+
+ EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec));
+
+ size_t dec_icc_size;
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderGetICCProfileSize(
+ dec, &format, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_icc_size));
+ EXPECT_EQ(icc.size(), dec_icc_size);
+ jxl::PaddedBytes dec_icc(dec_icc_size);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderGetColorAsICCProfile(dec, &format,
+ JXL_COLOR_PROFILE_TARGET_ORIGINAL,
+ dec_icc.data(), dec_icc.size()));
+
+ std::vector<uint8_t> decoded_bytes(buffer_size);
+
+ EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetImageOutBuffer(dec, &format, decoded_bytes.data(),
+ decoded_bytes.size()));
+
+ EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec));
+
+ EXPECT_EQ(icc, dec_icc);
+
+ JxlDecoderDestroy(dec);
+}
+
+#if JPEGXL_ENABLE_JPEG // Loading .jpg files requires libjpeg support.
+TEST(RoundtripTest, JXL_TRANSCODE_JPEG_TEST(TestJPEGReconstruction)) {
+ const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg";
+ const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path);
+ jxl::CodecInOut orig_io;
+ ASSERT_TRUE(
+ SetFromBytes(jxl::Span<const uint8_t>(orig), &orig_io, /*pool=*/nullptr));
+
+ JxlEncoderPtr enc = JxlEncoderMake(nullptr);
+ JxlEncoderFrameSettings* frame_settings =
+ JxlEncoderFrameSettingsCreate(enc.get(), NULL);
+
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), JXL_TRUE));
+ EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE));
+ EXPECT_EQ(JXL_ENC_SUCCESS,
+ JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size()));
+ JxlEncoderCloseInput(enc.get());
+
+ std::vector<uint8_t> compressed;
+ EncodeWithEncoder(enc.get(), &compressed);
+
+ JxlDecoderPtr dec = JxlDecoderMake(nullptr);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSubscribeEvents(
+ dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE));
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+ EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get()));
+ std::vector<uint8_t> reconstructed_buffer(128);
+ EXPECT_EQ(JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(),
+ reconstructed_buffer.size()));
+ size_t used = 0;
+ JxlDecoderStatus dec_process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT;
+ while (dec_process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) {
+ used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+ reconstructed_buffer.resize(reconstructed_buffer.size() * 2);
+ EXPECT_EQ(
+ JXL_DEC_SUCCESS,
+ JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used,
+ reconstructed_buffer.size() - used));
+ dec_process_result = JxlDecoderProcessInput(dec.get());
+ }
+ ASSERT_EQ(JXL_DEC_FULL_IMAGE, dec_process_result);
+ used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get());
+ ASSERT_EQ(used, orig.size());
+ EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), orig.data(), used));
+}
+#endif // JPEGXL_ENABLE_JPEG
diff --git a/third_party/jpeg-xl/lib/jxl/sanitizers.h b/third_party/jpeg-xl/lib/jxl/sanitizers.h
new file mode 100644
index 0000000000..ce0bd8dc63
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/sanitizers.h
@@ -0,0 +1,242 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SANITIZERS_H_
+#define LIB_JXL_SANITIZERS_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/sanitizer_definitions.h"
+#include "lib/jxl/image.h"
+
+#if JXL_MEMORY_SANITIZER
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "lib/jxl/base/status.h"
+#include "sanitizer/msan_interface.h"
+#endif
+
+namespace jxl {
+namespace msan {
+
+#if JXL_MEMORY_SANITIZER
+
+// Chosen so that kSanitizerSentinel is four copies of kSanitizerSentinelByte.
+constexpr uint8_t kSanitizerSentinelByte = 0x48;
+constexpr float kSanitizerSentinel = 205089.125f;
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const volatile void* m,
+ size_t size) {
+ __msan_poison(m, size);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const volatile void* m,
+ size_t size) {
+ __msan_unpoison(m, size);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char* c) {
+ do {
+ UnpoisonMemory(c, 1);
+ } while (*c++);
+}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized(
+ const volatile void* m, size_t size) {
+ __msan_check_mem_is_initialized(m, size);
+}
+
+// Mark all the bytes of an image (including padding) as poisoned bytes.
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {
+ PoisonMemory(im.bytes(), im.bytes_per_row() * im.ysize());
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Image3<T>& im) {
+ PoisonImage(im.Plane(0));
+ PoisonImage(im.Plane(1));
+ PoisonImage(im.Plane(2));
+}
+
+// Print the uninitialized regions of an image.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PrintImageUninitialized(
+ const Plane<T>& im) {
+ fprintf(stderr,
+ "Uninitialized regions for image of size %" PRIu64 "x%" PRIu64 ":\n",
+ static_cast<uint64_t>(im.xsize()), static_cast<uint64_t>(im.ysize()));
+
+ // A segment of uninitialized pixels in a row, in the format [first, second).
+ typedef std::pair<size_t, size_t> PixelSegment;
+
+ // Helper class to merge and print a list of rows of PixelSegment that may be
+ // the same over big ranges of rows. This compacts the output to ranges of
+ // rows like "[y0, y1): [x0, x1) [x2, x3)".
+ class RowsMerger {
+ public:
+ // Add a new row the list of rows. If the row is the same as the previous
+ // one it will be merged showing a range of rows [y0, y1), but if the new
+ // row is different the current range of rows (if any) will be printed and a
+ // new one will be started.
+ void AddRow(size_t y, std::vector<PixelSegment>&& new_row) {
+ if (start_y_ != -1 && new_row != segments_) {
+ PrintRow(y);
+ }
+ if (new_row.empty()) {
+ // Skip ranges with no uninitialized pixels.
+ start_y_ = -1;
+ segments_.clear();
+ return;
+ }
+ if (start_y_ == -1) {
+ start_y_ = y;
+ segments_ = std::move(new_row);
+ }
+ }
+
+ // Print the contents of the range of rows [start_y_, end_y) if any.
+ void PrintRow(size_t end_y) {
+ if (start_y_ == -1) return;
+ if (segments_.empty()) {
+ start_y_ = -1;
+ return;
+ }
+ if (end_y - start_y_ > 1) {
+ fprintf(stderr, " y=[%" PRId64 ", %" PRIu64 "):",
+ static_cast<int64_t>(start_y_), static_cast<uint64_t>(end_y));
+ } else {
+ fprintf(stderr, " y=[%" PRId64 "]:", static_cast<int64_t>(start_y_));
+ }
+ for (const auto& seg : segments_) {
+ if (seg.first + 1 == seg.second) {
+ fprintf(stderr, " [%" PRId64 "]", static_cast<int64_t>(seg.first));
+ } else {
+ fprintf(stderr, " [%" PRId64 ", %" PRIu64 ")",
+ static_cast<int64_t>(seg.first),
+ static_cast<uint64_t>(seg.second));
+ }
+ }
+ fprintf(stderr, "\n");
+ start_y_ = -1;
+ }
+
+ private:
+ std::vector<PixelSegment> segments_;
+ // Row number of the first row in the range of rows that have |segments| as
+ // the undefined segments.
+ ssize_t start_y_ = -1;
+ } rows_merger;
+
+ class SegmentsMerger {
+ public:
+ void AddValue(size_t x) {
+ if (row.empty() || row.back().second != x) {
+ row.emplace_back(x, x + 1);
+ } else {
+ row.back().second = x + 1;
+ }
+ }
+
+ std::vector<PixelSegment> row;
+ };
+
+ for (size_t y = 0; y < im.ysize(); y++) {
+ auto* row = im.Row(y);
+ SegmentsMerger seg_merger;
+ size_t x = 0;
+ while (x < im.xsize()) {
+ intptr_t ret =
+ __msan_test_shadow(row + x, (im.xsize() - x) * sizeof(row[0]));
+ if (ret < 0) break;
+ size_t next_x = x + ret / sizeof(row[0]);
+ seg_merger.AddValue(next_x);
+ x = next_x + 1;
+ }
+ rows_merger.AddRow(y, std::move(seg_merger.row));
+ }
+ rows_merger.PrintRow(im.ysize());
+}
+
+// Check that all the pixels in the provided rect of the image are initialized
+// (not poisoned). If any of the values is poisoned it will abort.
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+ const Plane<T>& im, const Rect& r, size_t c, const char* message) {
+ JXL_ASSERT(r.x0() <= im.xsize());
+ JXL_ASSERT(r.x0() + r.xsize() <= im.xsize());
+ JXL_ASSERT(r.y0() <= im.ysize());
+ JXL_ASSERT(r.y0() + r.ysize() <= im.ysize());
+ for (size_t y = r.y0(); y < r.y0() + r.ysize(); y++) {
+ const auto* row = im.Row(y);
+ intptr_t ret = __msan_test_shadow(row + r.x0(), sizeof(*row) * r.xsize());
+ if (ret != -1) {
+ JXL_DEBUG(
+ 1,
+ "Checking an image of %" PRIu64 " x %" PRIu64 ", rect x0=%" PRIu64
+ ", y0=%" PRIu64
+ ", "
+ "xsize=%" PRIu64 ", ysize=%" PRIu64,
+ static_cast<uint64_t>(im.xsize()), static_cast<uint64_t>(im.ysize()),
+ static_cast<uint64_t>(r.x0()), static_cast<uint64_t>(r.y0()),
+ static_cast<uint64_t>(r.xsize()), static_cast<uint64_t>(r.ysize()));
+ size_t x = ret / sizeof(*row);
+ JXL_DEBUG(1,
+ "CheckImageInitialized failed at x=%" PRIu64 ", y=%" PRIu64
+ ", c=%" PRIu64 ": %s",
+ static_cast<uint64_t>(r.x0() + x), static_cast<uint64_t>(y),
+ static_cast<uint64_t>(c), message ? message : "");
+ PrintImageUninitialized(im);
+ }
+ // This will report an error if memory is not initialized.
+ __msan_check_mem_is_initialized(row + r.x0(), sizeof(*row) * r.xsize());
+ }
+}
+
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized(
+ const Image3<T>& im, const Rect& r, const char* message) {
+ for (size_t c = 0; c < 3; c++) {
+ std::string str_message(message);
+ str_message += " c=" + std::to_string(c);
+ CheckImageInitialized(im.Plane(c), r, c, str_message.c_str());
+ }
+}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r) \
+ ::jxl::msan::CheckImageInitialized(im, r, "im=" #im ", r=" #r);
+
+#define JXL_CHECK_PLANE_INITIALIZED(im, r, c) \
+ ::jxl::msan::CheckImageInitialized(im, r, c, "im=" #im ", r=" #r ", c=" #c);
+
+#else // JXL_MEMORY_SANITIZER
+
+// In non-msan mode these functions don't use volatile since it is not needed
+// for the empty functions.
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const void*, size_t) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const void*, size_t) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char*) {}
+static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized(const void*,
+ size_t) {}
+
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {}
+template <typename T>
+static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Plane<T>& im) {}
+
+#define JXL_CHECK_IMAGE_INITIALIZED(im, r)
+#define JXL_CHECK_PLANE_INITIALIZED(im, r, c)
+
+#endif
+
+} // namespace msan
+} // namespace jxl
+
+#endif // LIB_JXL_SANITIZERS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/simd_util-inl.h b/third_party/jpeg-xl/lib/jxl/simd_util-inl.h
new file mode 100644
index 0000000000..77b207ffe8
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/simd_util-inl.h
@@ -0,0 +1,349 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Misc utilities for SIMD operations
+
+#if defined(LIB_JXL_SIMD_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_SIMD_UTIL_INL_H_
+#undef LIB_JXL_SIMD_UTIL_INL_H_
+#else
+#define LIB_JXL_SIMD_UTIL_INL_H_
+#endif
+
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+#if HWY_CAP_GE512
+using hwy::HWY_NAMESPACE::Half;
+using hwy::HWY_NAMESPACE::Vec;
+template <size_t i, class DF, class V>
+HWY_INLINE Vec<Half<Half<DF>>> Quarter(const DF df, V v) {
+ using HF = Half<DF>;
+ using HHF = Half<HF>;
+ auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v);
+ return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half);
+}
+
+template <class DF, class V>
+HWY_INLINE Vec<DF> Concat4(const DF df, V v0, V v1, V v2, V v3) {
+ using HF = Half<DF>;
+ return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0));
+}
+
+#endif
+
+// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be
+// aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, T* mem) {
+ static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+ Store(v0, df, mem);
+ Store(v1, df, mem + 1);
+#elif !HWY_CAP_GE256
+ Store(InterleaveLower(df, v0, v1), df, mem);
+ Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df));
+#else
+ if (!HWY_CAP_GE512 || Lanes(df) == 8) {
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(ConcatLowerLower(df, t1, t0), df, mem);
+ Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df));
+ } else {
+#if HWY_CAP_GE512
+ auto t0 = InterleaveLower(df, v0, v1);
+ auto t1 = InterleaveUpper(df, v0, v1);
+ Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1),
+ Quarter<1>(df, t0), Quarter<1>(df, t1)),
+ df, mem);
+ Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1),
+ Quarter<3>(df, t0), Quarter<3>(df, t1)),
+ df, mem + Lanes(df));
+#endif
+ }
+#endif
+}
+
+// Stores v0[0], v1[0], v2[0], v3[0], v0[1] ... to mem, in this order. Mem must
+// be aligned.
+template <class DF, class V, typename T>
+void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, T* mem) {
+ static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types");
+#if HWY_TARGET == HWY_SCALAR
+ Store(v0, df, mem);
+ Store(v1, df, mem + 1);
+ Store(v2, df, mem + 2);
+ Store(v3, df, mem + 3);
+#elif !HWY_CAP_GE256
+ auto t0 = InterleaveLower(df, v0, v2);
+ auto t1 = InterleaveLower(df, v1, v3);
+ auto t2 = InterleaveUpper(df, v0, v2);
+ auto t3 = InterleaveUpper(df, v1, v3);
+ Store(InterleaveLower(df, t0, t1), df, mem);
+ Store(InterleaveUpper(df, t0, t1), df, mem + Lanes(df));
+ Store(InterleaveLower(df, t2, t3), df, mem + 2 * Lanes(df));
+ Store(InterleaveUpper(df, t2, t3), df, mem + 3 * Lanes(df));
+#elif !HWY_CAP_GE512
+ auto t0 = InterleaveLower(df, v0, v2);
+ auto t1 = InterleaveLower(df, v1, v3);
+ auto t2 = InterleaveUpper(df, v0, v2);
+ auto t3 = InterleaveUpper(df, v1, v3);
+
+ auto m0 = InterleaveLower(df, t0, t1);
+ auto m1 = InterleaveUpper(df, t0, t1);
+ auto m2 = InterleaveLower(df, t2, t3);
+ auto m3 = InterleaveUpper(df, t2, t3);
+
+ Store(ConcatLowerLower(df, m1, m0), df, mem);
+ Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df));
+ Store(ConcatUpperUpper(df, m1, m0), df, mem + 2 * Lanes(df));
+ Store(ConcatUpperUpper(df, m3, m2), df, mem + 3 * Lanes(df));
+#else
+ auto t0 = InterleaveLower(df, v0, v2);
+ auto t1 = InterleaveLower(df, v1, v3);
+ auto t2 = InterleaveUpper(df, v0, v2);
+ auto t3 = InterleaveUpper(df, v1, v3);
+
+ auto m0 = InterleaveLower(df, t0, t1);
+ auto m1 = InterleaveUpper(df, t0, t1);
+ auto m2 = InterleaveLower(df, t2, t3);
+ auto m3 = InterleaveUpper(df, t2, t3);
+
+ Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2),
+ Quarter<0>(df, m3)),
+ df, mem);
+ Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2),
+ Quarter<1>(df, m3)),
+ df, mem + Lanes(df));
+ Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2),
+ Quarter<2>(df, m3)),
+ df, mem + 2 * Lanes(df));
+ Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2),
+ Quarter<3>(df, m3)),
+ df, mem + 3 * Lanes(df));
+#endif
+}
+
+// Stores v0[0], v1[0], v2[0], v3[0], v4[0], v5[0], v6[0], v7[0], v0[1] ... to
+// mem, in this order. Mem must be aligned.
+template <class DF, class V>
+void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, V v4, V v5, V v6,
+ V v7, float* mem) {
+#if HWY_TARGET == HWY_SCALAR
+ Store(v0, df, mem);
+ Store(v1, df, mem + 1);
+ Store(v2, df, mem + 2);
+ Store(v3, df, mem + 3);
+ Store(v4, df, mem + 4);
+ Store(v5, df, mem + 5);
+ Store(v6, df, mem + 6);
+ Store(v7, df, mem + 7);
+#elif !HWY_CAP_GE256
+ auto t0 = InterleaveLower(df, v0, v4);
+ auto t1 = InterleaveLower(df, v1, v5);
+ auto t2 = InterleaveLower(df, v2, v6);
+ auto t3 = InterleaveLower(df, v3, v7);
+ auto t4 = InterleaveUpper(df, v0, v4);
+ auto t5 = InterleaveUpper(df, v1, v5);
+ auto t6 = InterleaveUpper(df, v2, v6);
+ auto t7 = InterleaveUpper(df, v3, v7);
+
+ auto w0 = InterleaveLower(df, t0, t2);
+ auto w1 = InterleaveLower(df, t1, t3);
+ auto w2 = InterleaveUpper(df, t0, t2);
+ auto w3 = InterleaveUpper(df, t1, t3);
+ auto w4 = InterleaveLower(df, t4, t6);
+ auto w5 = InterleaveLower(df, t5, t7);
+ auto w6 = InterleaveUpper(df, t4, t6);
+ auto w7 = InterleaveUpper(df, t5, t7);
+
+ Store(InterleaveLower(df, w0, w1), df, mem);
+ Store(InterleaveUpper(df, w0, w1), df, mem + Lanes(df));
+ Store(InterleaveLower(df, w2, w3), df, mem + 2 * Lanes(df));
+ Store(InterleaveUpper(df, w2, w3), df, mem + 3 * Lanes(df));
+ Store(InterleaveLower(df, w4, w5), df, mem + 4 * Lanes(df));
+ Store(InterleaveUpper(df, w4, w5), df, mem + 5 * Lanes(df));
+ Store(InterleaveLower(df, w6, w7), df, mem + 6 * Lanes(df));
+ Store(InterleaveUpper(df, w6, w7), df, mem + 7 * Lanes(df));
+#elif !HWY_CAP_GE512
+ auto t0 = InterleaveLower(df, v0, v4);
+ auto t1 = InterleaveLower(df, v1, v5);
+ auto t2 = InterleaveLower(df, v2, v6);
+ auto t3 = InterleaveLower(df, v3, v7);
+ auto t4 = InterleaveUpper(df, v0, v4);
+ auto t5 = InterleaveUpper(df, v1, v5);
+ auto t6 = InterleaveUpper(df, v2, v6);
+ auto t7 = InterleaveUpper(df, v3, v7);
+
+ auto w0 = InterleaveLower(df, t0, t2);
+ auto w1 = InterleaveLower(df, t1, t3);
+ auto w2 = InterleaveUpper(df, t0, t2);
+ auto w3 = InterleaveUpper(df, t1, t3);
+ auto w4 = InterleaveLower(df, t4, t6);
+ auto w5 = InterleaveLower(df, t5, t7);
+ auto w6 = InterleaveUpper(df, t4, t6);
+ auto w7 = InterleaveUpper(df, t5, t7);
+
+ auto m0 = InterleaveLower(df, w0, w1);
+ auto m1 = InterleaveUpper(df, w0, w1);
+ auto m2 = InterleaveLower(df, w2, w3);
+ auto m3 = InterleaveUpper(df, w2, w3);
+ auto m4 = InterleaveLower(df, w4, w5);
+ auto m5 = InterleaveUpper(df, w4, w5);
+ auto m6 = InterleaveLower(df, w6, w7);
+ auto m7 = InterleaveUpper(df, w6, w7);
+
+ Store(ConcatLowerLower(df, m1, m0), df, mem);
+ Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df));
+ Store(ConcatLowerLower(df, m5, m4), df, mem + 2 * Lanes(df));
+ Store(ConcatLowerLower(df, m7, m6), df, mem + 3 * Lanes(df));
+ Store(ConcatUpperUpper(df, m1, m0), df, mem + 4 * Lanes(df));
+ Store(ConcatUpperUpper(df, m3, m2), df, mem + 5 * Lanes(df));
+ Store(ConcatUpperUpper(df, m5, m4), df, mem + 6 * Lanes(df));
+ Store(ConcatUpperUpper(df, m7, m6), df, mem + 7 * Lanes(df));
+#else
+ auto t0 = InterleaveLower(df, v0, v4);
+ auto t1 = InterleaveLower(df, v1, v5);
+ auto t2 = InterleaveLower(df, v2, v6);
+ auto t3 = InterleaveLower(df, v3, v7);
+ auto t4 = InterleaveUpper(df, v0, v4);
+ auto t5 = InterleaveUpper(df, v1, v5);
+ auto t6 = InterleaveUpper(df, v2, v6);
+ auto t7 = InterleaveUpper(df, v3, v7);
+
+ auto w0 = InterleaveLower(df, t0, t2);
+ auto w1 = InterleaveLower(df, t1, t3);
+ auto w2 = InterleaveUpper(df, t0, t2);
+ auto w3 = InterleaveUpper(df, t1, t3);
+ auto w4 = InterleaveLower(df, t4, t6);
+ auto w5 = InterleaveLower(df, t5, t7);
+ auto w6 = InterleaveUpper(df, t4, t6);
+ auto w7 = InterleaveUpper(df, t5, t7);
+
+ auto m0 = InterleaveLower(df, w0, w1);
+ auto m1 = InterleaveUpper(df, w0, w1);
+ auto m2 = InterleaveLower(df, w2, w3);
+ auto m3 = InterleaveUpper(df, w2, w3);
+ auto m4 = InterleaveLower(df, w4, w5);
+ auto m5 = InterleaveUpper(df, w4, w5);
+ auto m6 = InterleaveLower(df, w6, w7);
+ auto m7 = InterleaveUpper(df, w6, w7);
+
+ Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2),
+ Quarter<0>(df, m3)),
+ df, mem);
+ Store(Concat4(df, Quarter<0>(df, m4), Quarter<0>(df, m5), Quarter<0>(df, m6),
+ Quarter<0>(df, m7)),
+ df, mem + Lanes(df));
+ Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2),
+ Quarter<1>(df, m3)),
+ df, mem + 2 * Lanes(df));
+ Store(Concat4(df, Quarter<1>(df, m4), Quarter<1>(df, m5), Quarter<1>(df, m6),
+ Quarter<1>(df, m7)),
+ df, mem + 3 * Lanes(df));
+ Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2),
+ Quarter<2>(df, m3)),
+ df, mem + 4 * Lanes(df));
+ Store(Concat4(df, Quarter<2>(df, m4), Quarter<2>(df, m5), Quarter<2>(df, m6),
+ Quarter<2>(df, m7)),
+ df, mem + 5 * Lanes(df));
+ Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2),
+ Quarter<3>(df, m3)),
+ df, mem + 6 * Lanes(df));
+ Store(Concat4(df, Quarter<3>(df, m4), Quarter<3>(df, m5), Quarter<3>(df, m6),
+ Quarter<3>(df, m7)),
+ df, mem + 7 * Lanes(df));
+#endif
+}
+
+#if HWY_CAP_GE256
+JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from,
+ int32_t* JXL_RESTRICT to, size_t fromstride) {
+ const HWY_CAPPED(int32_t, 8) d;
+ auto i0 = Load(d, from);
+ auto i1 = Load(d, from + 1 * fromstride);
+ auto i2 = Load(d, from + 2 * fromstride);
+ auto i3 = Load(d, from + 3 * fromstride);
+ auto i4 = Load(d, from + 4 * fromstride);
+ auto i5 = Load(d, from + 5 * fromstride);
+ auto i6 = Load(d, from + 6 * fromstride);
+ auto i7 = Load(d, from + 7 * fromstride);
+
+ const auto q0 = InterleaveLower(d, i0, i2);
+ const auto q1 = InterleaveLower(d, i1, i3);
+ const auto q2 = InterleaveUpper(d, i0, i2);
+ const auto q3 = InterleaveUpper(d, i1, i3);
+ const auto q4 = InterleaveLower(d, i4, i6);
+ const auto q5 = InterleaveLower(d, i5, i7);
+ const auto q6 = InterleaveUpper(d, i4, i6);
+ const auto q7 = InterleaveUpper(d, i5, i7);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ const auto r4 = InterleaveLower(d, q4, q5);
+ const auto r5 = InterleaveUpper(d, q4, q5);
+ const auto r6 = InterleaveLower(d, q6, q7);
+ const auto r7 = InterleaveUpper(d, q6, q7);
+
+ i0 = ConcatLowerLower(d, r4, r0);
+ i1 = ConcatLowerLower(d, r5, r1);
+ i2 = ConcatLowerLower(d, r6, r2);
+ i3 = ConcatLowerLower(d, r7, r3);
+ i4 = ConcatUpperUpper(d, r4, r0);
+ i5 = ConcatUpperUpper(d, r5, r1);
+ i6 = ConcatUpperUpper(d, r6, r2);
+ i7 = ConcatUpperUpper(d, r7, r3);
+
+ Store(i0, d, to);
+ Store(i1, d, to + 1 * 8);
+ Store(i2, d, to + 2 * 8);
+ Store(i3, d, to + 3 * 8);
+ Store(i4, d, to + 4 * 8);
+ Store(i5, d, to + 5 * 8);
+ Store(i6, d, to + 6 * 8);
+ Store(i7, d, to + 7 * 8);
+}
+#elif HWY_TARGET != HWY_SCALAR
+JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from,
+ int32_t* JXL_RESTRICT to, size_t fromstride) {
+ const HWY_CAPPED(int32_t, 4) d;
+ for (size_t n = 0; n < 8; n += 4) {
+ for (size_t m = 0; m < 8; m += 4) {
+ auto p0 = Load(d, from + n * fromstride + m);
+ auto p1 = Load(d, from + (n + 1) * fromstride + m);
+ auto p2 = Load(d, from + (n + 2) * fromstride + m);
+ auto p3 = Load(d, from + (n + 3) * fromstride + m);
+ const auto q0 = InterleaveLower(d, p0, p2);
+ const auto q1 = InterleaveLower(d, p1, p3);
+ const auto q2 = InterleaveUpper(d, p0, p2);
+ const auto q3 = InterleaveUpper(d, p1, p3);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ Store(r0, d, to + m * 8 + n);
+ Store(r1, d, to + (1 + m) * 8 + n);
+ Store(r2, d, to + (2 + m) * 8 + n);
+ Store(r3, d, to + (3 + m) * 8 + n);
+ }
+ }
+}
+
+#endif
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_SIMD_UTIL_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/simd_util_test.cc b/third_party/jpeg-xl/lib/jxl/simd_util_test.cc
new file mode 100644
index 0000000000..b81f5d1279
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/simd_util_test.cc
@@ -0,0 +1,84 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdio.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/simd_util_test.cc"
+#include <hwy/foreach_target.h>
+
+#include "lib/jxl/simd_util-inl.h"
+
+// Test utils
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+HWY_NOINLINE void TestInterleave2() {
+ HWY_FULL(float) d;
+ auto vec1 = Iota(d, 0 * 128.0);
+ auto vec2 = Iota(d, 1 * 128.0);
+ HWY_ALIGN float mem[MaxLanes(d) * 2];
+ StoreInterleaved(d, vec1, vec2, mem);
+ for (size_t i = 0; i < Lanes(d); i++) {
+ for (size_t j = 0; j < 2; j++) {
+ EXPECT_EQ(mem[2 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+ }
+ }
+}
+HWY_NOINLINE void TestInterleave4() {
+ HWY_FULL(float) d;
+ auto vec1 = Iota(d, 0 * 128.0);
+ auto vec2 = Iota(d, 1 * 128.0);
+ auto vec3 = Iota(d, 2 * 128.0);
+ auto vec4 = Iota(d, 3 * 128.0);
+ HWY_ALIGN float mem[MaxLanes(d) * 4];
+ StoreInterleaved(d, vec1, vec2, vec3, vec4, mem);
+ for (size_t i = 0; i < Lanes(d); i++) {
+ for (size_t j = 0; j < 4; j++) {
+ EXPECT_EQ(mem[4 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+ }
+ }
+}
+HWY_NOINLINE void TestInterleave8() {
+ HWY_FULL(float) d;
+ auto vec1 = Iota(d, 0 * 128.0);
+ auto vec2 = Iota(d, 1 * 128.0);
+ auto vec3 = Iota(d, 2 * 128.0);
+ auto vec4 = Iota(d, 3 * 128.0);
+ auto vec5 = Iota(d, 4 * 128.0);
+ auto vec6 = Iota(d, 5 * 128.0);
+ auto vec7 = Iota(d, 6 * 128.0);
+ auto vec8 = Iota(d, 7 * 128.0);
+ HWY_ALIGN float mem[MaxLanes(d) * 8];
+ StoreInterleaved(d, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, mem);
+ for (size_t i = 0; i < Lanes(d); i++) {
+ for (size_t j = 0; j < 8; j++) {
+ EXPECT_EQ(mem[8 * i + j], j * 128 + i) << "i: " << i << " j: " << j;
+ }
+ }
+}
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class SimdUtilTargetTest : public hwy::TestWithParamTarget {};
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(SimdUtilTargetTest);
+
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave2);
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave4);
+HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave8);
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/speed_tier_test.cc b/third_party/jpeg-xl/lib/jxl/speed_tier_test.cc
new file mode 100644
index 0000000000..b3f30c3e4c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/speed_tier_test.cc
@@ -0,0 +1,108 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string>
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_file.h"
+#include "lib/jxl/enc_params.h"
+#include "lib/jxl/image.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+struct SpeedTierTestParams {
+ explicit SpeedTierTestParams(const SpeedTier speed_tier,
+ const bool shrink8 = false)
+ : speed_tier(speed_tier), shrink8(shrink8) {}
+ SpeedTier speed_tier;
+ bool shrink8;
+};
+
+std::ostream& operator<<(std::ostream& os, SpeedTierTestParams params) {
+ auto previous_flags = os.flags();
+ os << std::boolalpha;
+ os << "SpeedTierTestParams{" << static_cast<size_t>(params.speed_tier)
+ << ", /*shrink8=*/" << params.shrink8 << "}";
+ os.flags(previous_flags);
+ return os;
+}
+
+class SpeedTierTest : public testing::TestWithParam<SpeedTierTestParams> {};
+
+JXL_GTEST_INSTANTIATE_TEST_SUITE_P(
+ SpeedTierTestInstantiation, SpeedTierTest,
+ testing::Values(SpeedTierTestParams{SpeedTier::kCheetah,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kCheetah,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kThunder,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kThunder,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kLightning,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kLightning,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kFalcon,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kFalcon,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kHare,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kHare,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kWombat,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kWombat,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kSquirrel,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kSquirrel,
+ /*shrink8=*/false},
+ SpeedTierTestParams{SpeedTier::kKitten,
+ /*shrink8=*/true},
+ SpeedTierTestParams{SpeedTier::kKitten,
+ /*shrink8=*/false},
+ // Only downscaled image for Tortoise mode.
+ SpeedTierTestParams{SpeedTier::kTortoise,
+ /*shrink8=*/true}));
+
+TEST_P(SpeedTierTest, Roundtrip) {
+ const PaddedBytes orig = jxl::test::ReadTestData(
+ "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png");
+ CodecInOut io;
+ test::ThreadPoolForTests pool(8);
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io, &pool));
+
+ const SpeedTierTestParams& params = GetParam();
+
+ if (params.shrink8) {
+ io.ShrinkTo(io.xsize() / 8, io.ysize() / 8);
+ }
+
+ CompressParams cparams;
+ cparams.speed_tier = params.speed_tier;
+
+ CodecInOut io2;
+ JXL_EXPECT_OK(test::Roundtrip(&io, cparams, {}, &io2, _));
+
+ // Can be 2.2 in non-hare mode.
+ EXPECT_LE(
+ ButteraugliDistance(io.frames, io2.frames, cparams.ba_params, GetJxlCms(),
+ /*distmap=*/nullptr, /*pool=*/nullptr),
+ 2.8);
+}
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/splines.cc b/third_party/jpeg-xl/lib/jxl/splines.cc
new file mode 100644
index 0000000000..04d1df8e49
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/splines.cc
@@ -0,0 +1,694 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/dct_scales.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/opsin_params.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/fast_math-inl.h"
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Mul;
+using hwy::HWY_NAMESPACE::MulAdd;
+using hwy::HWY_NAMESPACE::MulSub;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Given a set of DCT coefficients, this returns the result of performing cosine
+// interpolation on the original samples.
+float ContinuousIDCT(const float dct[32], const float t) {
+ // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of
+ // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces
+ // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform
+ // treatment of all the coefficients.
+ constexpr float kMultipliers[32] = {
+ kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4,
+ kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9,
+ kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14,
+ kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19,
+ kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24,
+ kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29,
+ kPi / 32 * 30, kPi / 32 * 31,
+ };
+ HWY_CAPPED(float, 32) df;
+ auto result = Zero(df);
+ const auto tandhalf = Set(df, t + 0.5f);
+ for (int i = 0; i < 32; i += Lanes(df)) {
+ auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf);
+ auto cos = FastCosf(df, cos_arg);
+ auto local_res = Mul(LoadU(df, dct + i), cos);
+ result = MulAdd(Set(df, kSqrt2), local_res, result);
+ }
+ return GetLane(SumOfLanes(df, result));
+}
+
+template <typename DF>
+void DrawSegment(DF df, const SplineSegment& segment, const bool add,
+ const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) {
+ Rebind<int32_t, DF> di;
+ const auto inv_sigma = Set(df, segment.inv_sigma);
+ const auto half = Set(df, 0.5f);
+ const auto one_over_2s2 = Set(df, 0.353553391f);
+ const auto sigma_over_4_times_intensity =
+ Set(df, segment.sigma_over_4_times_intensity);
+ const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x));
+ const auto dy = Set(df, y - segment.center_y);
+ const auto sqd = MulAdd(dx, dx, Mul(dy, dy));
+ const auto distance = Sqrt(sqd);
+ const auto one_dimensional_factor =
+ Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)),
+ FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma)));
+ auto local_intensity =
+ Mul(sigma_over_4_times_intensity,
+ Mul(one_dimensional_factor, one_dimensional_factor));
+ for (size_t c = 0; c < 3; ++c) {
+ const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]);
+ const auto in = LoadU(df, rows[c] + x);
+ StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x);
+ }
+}
+
+void DrawSegment(const SplineSegment& segment, const bool add, const size_t y,
+ const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) {
+ ssize_t x =
+ std::max<ssize_t>(x0, segment.center_x - segment.maximum_distance + 0.5f);
+ // one-past-the-end
+ x1 =
+ std::min<ssize_t>(x1, segment.center_x + segment.maximum_distance + 1.5f);
+ HWY_FULL(float) df;
+ for (; x + static_cast<ssize_t>(Lanes(df)) <= x1; x += Lanes(df)) {
+ DrawSegment(df, segment, add, y, x, rows);
+ }
+ for (; x < x1; ++x) {
+ DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows);
+ }
+}
+
+void ComputeSegments(const Spline::Point& center, const float intensity,
+ const float color[3], const float sigma,
+ std::vector<SplineSegment>& segments,
+ std::vector<std::pair<size_t, size_t>>& segments_by_y) {
+ // Sanity check sigma, inverse sigma and intensity
+ if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) &&
+ std::isfinite(intensity))) {
+ return;
+ }
+#if JXL_HIGH_PRECISION
+ constexpr float kDistanceExp = 5;
+#else
+ // About 30% faster.
+ constexpr float kDistanceExp = 3;
+#endif
+ // We cap from below colors to at least 0.01.
+ float max_color = 0.01f;
+ for (size_t c = 0; c < 3; c++) {
+ max_color = std::max(max_color, std::abs(color[c] * intensity));
+ }
+ // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops
+ // below 10^-kDistanceExp.
+ const float maximum_distance =
+ std::sqrt(-2 * sigma * sigma *
+ (std::log(0.1) * kDistanceExp - std::log(max_color)));
+ SplineSegment segment;
+ segment.center_y = center.y;
+ segment.center_x = center.x;
+ memcpy(segment.color, color, sizeof(segment.color));
+ segment.inv_sigma = 1.0f / sigma;
+ segment.sigma_over_4_times_intensity = .25f * sigma * intensity;
+ segment.maximum_distance = maximum_distance;
+ ssize_t y0 = center.y - maximum_distance + .5f;
+ ssize_t y1 = center.y + maximum_distance + 1.5f; // one-past-the-end
+ for (ssize_t y = std::max<ssize_t>(y0, 0); y < y1; y++) {
+ segments_by_y.emplace_back(y, segments.size());
+ }
+ segments.push_back(segment);
+}
+
+void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+ float* JXL_RESTRICT row_b, const Rect& image_rect,
+ const bool add, const SplineSegment* segments,
+ const size_t* segment_indices,
+ const size_t* segment_y_start) {
+ JXL_ASSERT(image_rect.ysize() == 1);
+ float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(),
+ row_y - image_rect.x0(),
+ row_b - image_rect.x0()};
+ size_t y = image_rect.y0();
+ for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) {
+ DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(),
+ image_rect.x0() + image_rect.xsize(), rows);
+ }
+}
+
+void SegmentsFromPoints(
+ const Spline& spline,
+ const std::vector<std::pair<Spline::Point, float>>& points_to_draw,
+ const float arc_length, std::vector<SplineSegment>& segments,
+ std::vector<std::pair<size_t, size_t>>& segments_by_y) {
+ const float inv_arc_length = 1.0f / arc_length;
+ int k = 0;
+ for (const auto& point_to_draw : points_to_draw) {
+ const Spline::Point& point = point_to_draw.first;
+ const float multiplier = point_to_draw.second;
+ const float progress_along_arc =
+ std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length);
+ ++k;
+ float color[3];
+ for (size_t c = 0; c < 3; ++c) {
+ color[c] =
+ ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc);
+ }
+ const float sigma =
+ ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc);
+ ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y);
+ }
+}
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+HWY_EXPORT(SegmentsFromPoints);
+HWY_EXPORT(DrawSegments);
+
+namespace {
+
+// It is not in spec, but reasonable limit to avoid overflows.
+template <typename T>
+Status ValidateSplinePointPos(const T& x, const T& y) {
+ constexpr T kSplinePosLimit = 1u << 23;
+ if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) ||
+ (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) {
+ return JXL_FAILURE("Spline coordinates out of bounds");
+ }
+ return true;
+}
+
+// Maximum number of spline control points per frame is
+// std::min(kMaxNumControlPoints, xsize * ysize / 2)
+constexpr size_t kMaxNumControlPoints = 1u << 20u;
+constexpr size_t kMaxNumControlPointsPerPixelRatio = 2;
+
+float AdjustedQuant(const int32_t adjustment) {
+ return (adjustment >= 0) ? (1.f + .125f * adjustment)
+ : 1.f / (1.f - .125f * adjustment);
+}
+
+float InvAdjustedQuant(const int32_t adjustment) {
+ return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment)
+ : (1.f - .125f * adjustment);
+}
+
+// X, Y, B, sigma.
+static constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f};
+
+Status DecodeAllStartingPoints(std::vector<Spline::Point>* const points,
+ BitReader* const br, ANSSymbolReader* reader,
+ const std::vector<uint8_t>& context_map,
+ const size_t num_splines) {
+ points->clear();
+ points->reserve(num_splines);
+ int64_t last_x = 0;
+ int64_t last_y = 0;
+ for (size_t i = 0; i < num_splines; i++) {
+ int64_t x =
+ reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+ int64_t y =
+ reader->ReadHybridUint(kStartingPositionContext, br, context_map);
+ if (i != 0) {
+ x = UnpackSigned(x) + last_x;
+ y = UnpackSigned(y) + last_y;
+ }
+ JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y));
+ points->emplace_back(static_cast<float>(x), static_cast<float>(y));
+ last_x = x;
+ last_y = y;
+ }
+ return true;
+}
+
+struct Vector {
+ float x, y;
+ Vector operator-() const { return {-x, -y}; }
+ Vector operator+(const Vector& other) const {
+ return {x + other.x, y + other.y};
+ }
+ float SquaredNorm() const { return x * x + y * y; }
+};
+Vector operator*(const float k, const Vector& vec) {
+ return {k * vec.x, k * vec.y};
+}
+
+Spline::Point operator+(const Spline::Point& p, const Vector& vec) {
+ return {p.x + vec.x, p.y + vec.y};
+}
+Vector operator-(const Spline::Point& a, const Spline::Point& b) {
+ return {a.x - b.x, a.y - b.y};
+}
+
+// TODO(eustas): avoid making a copy of "points".
+void DrawCentripetalCatmullRomSpline(std::vector<Spline::Point> points,
+ std::vector<Spline::Point>& result) {
+ if (points.empty()) return;
+ if (points.size() == 1) {
+ result.push_back(points[0]);
+ return;
+ }
+ // Number of points to compute between each control point.
+ static constexpr int kNumPoints = 16;
+ result.reserve((points.size() - 1) * kNumPoints + 1);
+ points.insert(points.begin(), points[0] + (points[0] - points[1]));
+ points.push_back(points[points.size() - 1] +
+ (points[points.size() - 1] - points[points.size() - 2]));
+ // points has at least 4 elements at this point.
+ for (size_t start = 0; start < points.size() - 3; ++start) {
+ // 4 of them are used, and we draw from p[1] to p[2].
+ const Spline::Point* const p = &points[start];
+ result.push_back(p[1]);
+ float d[3];
+ float t[4];
+ t[0] = 0;
+ for (int k = 0; k < 3; ++k) {
+ // TODO(eustas): for each segment delta is calculated 3 times...
+ // TODO(eustas): restrict d[k] with reasonable limit and spec it.
+ d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y));
+ t[k + 1] = t[k] + d[k];
+ }
+ for (int i = 1; i < kNumPoints; ++i) {
+ const float tt = d[0] + (static_cast<float>(i) / kNumPoints) * d[1];
+ Spline::Point a[3];
+ for (int k = 0; k < 3; ++k) {
+ // TODO(eustas): reciprocal multiplication would be faster.
+ a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]);
+ }
+ Spline::Point b[2];
+ for (int k = 0; k < 2; ++k) {
+ b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]);
+ }
+ result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0]));
+ }
+ }
+ result.push_back(points[points.size() - 2]);
+}
+
+// Move along the line segments defined by `points`, `kDesiredRenderingDistance`
+// pixels at a time, and call `functor` with each point and the actual distance
+// to the previous point (which will always be kDesiredRenderingDistance except
+// possibly for the very last point).
+// TODO(eustas): this method always adds the last point, but never the first
+// (unless those are one); I believe both ends matter.
+template <typename Points, typename Functor>
+void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) {
+ JXL_ASSERT(!points.empty());
+ Spline::Point current = points.front();
+ functor(current, kDesiredRenderingDistance);
+ auto next = points.begin();
+ while (next != points.end()) {
+ const Spline::Point* previous = &current;
+ float arclength_from_previous = 0.f;
+ for (;;) {
+ if (next == points.end()) {
+ functor(*previous, arclength_from_previous);
+ return;
+ }
+ const float arclength_to_next =
+ std::sqrt((*next - *previous).SquaredNorm());
+ if (arclength_from_previous + arclength_to_next >=
+ kDesiredRenderingDistance) {
+ current =
+ *previous + ((kDesiredRenderingDistance - arclength_from_previous) /
+ arclength_to_next) *
+ (*next - *previous);
+ functor(current, kDesiredRenderingDistance);
+ break;
+ }
+ arclength_from_previous += arclength_to_next;
+ previous = &*next;
+ ++next;
+ }
+ }
+}
+
+} // namespace
+
+QuantizedSpline::QuantizedSpline(const Spline& original,
+ const int32_t quantization_adjustment,
+ const float y_to_x, const float y_to_b) {
+ JXL_ASSERT(!original.control_points.empty());
+ control_points_.reserve(original.control_points.size() - 1);
+ const Spline::Point& starting_point = original.control_points.front();
+ int previous_x = static_cast<int>(roundf(starting_point.x)),
+ previous_y = static_cast<int>(roundf(starting_point.y));
+ int previous_delta_x = 0, previous_delta_y = 0;
+ for (auto it = original.control_points.begin() + 1;
+ it != original.control_points.end(); ++it) {
+ const int new_x = static_cast<int>(roundf(it->x));
+ const int new_y = static_cast<int>(roundf(it->y));
+ const int new_delta_x = new_x - previous_x;
+ const int new_delta_y = new_y - previous_y;
+ control_points_.emplace_back(new_delta_x - previous_delta_x,
+ new_delta_y - previous_delta_y);
+ previous_delta_x = new_delta_x;
+ previous_delta_y = new_delta_y;
+ previous_x = new_x;
+ previous_y = new_y;
+ }
+
+ const auto to_int = [](float v) -> int {
+ return static_cast<int>(roundf(v));
+ };
+
+ const auto quant = AdjustedQuant(quantization_adjustment);
+ const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
+ for (int c : {1, 0, 2}) {
+ float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b;
+ for (int i = 0; i < 32; ++i) {
+ const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
+ const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+ auto restored_y =
+ color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant;
+ auto decorellated = original.color_dct[c][i] - factor * restored_y;
+ color_dct_[c][i] =
+ to_int(decorellated * dct_factor * quant / kChannelWeight[c]);
+ }
+ }
+ for (int i = 0; i < 32; ++i) {
+ const float dct_factor = (i == 0) ? kSqrt2 : 1.0f;
+ sigma_dct_[i] =
+ to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]);
+ }
+}
+
+Status QuantizedSpline::Dequantize(const Spline::Point& starting_point,
+ const int32_t quantization_adjustment,
+ const float y_to_x, const float y_to_b,
+ const uint64_t image_size,
+ uint64_t* total_estimated_area_reached,
+ Spline& result) const {
+ result.control_points.clear();
+ result.control_points.reserve(control_points_.size() + 1);
+ float px = roundf(starting_point.x);
+ float py = roundf(starting_point.y);
+ JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py));
+ int current_x = static_cast<int>(px);
+ int current_y = static_cast<int>(py);
+ result.control_points.push_back(Spline::Point{static_cast<float>(current_x),
+ static_cast<float>(current_y)});
+ int current_delta_x = 0, current_delta_y = 0;
+ size_t manhattan_distance = 0;
+ for (const auto& point : control_points_) {
+ current_delta_x += point.first;
+ current_delta_y += point.second;
+ manhattan_distance += abs(current_delta_x) + abs(current_delta_y);
+ JXL_RETURN_IF_ERROR(
+ ValidateSplinePointPos(current_delta_x, current_delta_y));
+ current_x += current_delta_x;
+ current_y += current_delta_y;
+ JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y));
+ result.control_points.push_back(Spline::Point{
+ static_cast<float>(current_x), static_cast<float>(current_y)});
+ }
+
+ const auto inv_quant = InvAdjustedQuant(quantization_adjustment);
+ for (int c = 0; c < 3; ++c) {
+ for (int i = 0; i < 32; ++i) {
+ const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+ result.color_dct[c][i] =
+ color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant;
+ }
+ }
+ for (int i = 0; i < 32; ++i) {
+ result.color_dct[0][i] += y_to_x * result.color_dct[1][i];
+ result.color_dct[2][i] += y_to_b * result.color_dct[1][i];
+ }
+ uint64_t width_estimate = 0;
+
+ uint64_t color[3] = {};
+ for (int c = 0; c < 3; ++c) {
+ for (int i = 0; i < 32; ++i) {
+ color[c] +=
+ static_cast<uint64_t>(ceil(inv_quant * std::abs(color_dct_[c][i])));
+ }
+ }
+ color[0] += static_cast<uint64_t>(ceil(abs(y_to_x))) * color[1];
+ color[2] += static_cast<uint64_t>(ceil(abs(y_to_b))) * color[1];
+ // This is not taking kChannelWeight into account, but up to constant factors
+ // it gives an indication of the influence of the color values on the area
+ // that will need to be rendered.
+ uint64_t logcolor = std::max(
+ uint64_t(1),
+ static_cast<uint64_t>(CeilLog2Nonzero(
+ uint64_t(1) + std::max(color[1], std::max(color[0], color[2])))));
+
+ for (int i = 0; i < 32; ++i) {
+ const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f;
+ result.sigma_dct[i] =
+ sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant;
+ // If we include the factor kChannelWeight[3]=.3333f here, we get a
+ // realistic area estimate. We leave it out to simplify the calculations,
+ // and understand that this way we underestimate the area by a factor of
+ // 1/(0.3333*0.3333). This is taken into account in the limits below.
+ uint64_t weight = std::max(
+ uint64_t(1),
+ static_cast<uint64_t>(ceil(inv_quant * std::abs(sigma_dct_[i]))));
+ width_estimate += weight * weight * logcolor;
+ }
+ *total_estimated_area_reached += (width_estimate * manhattan_distance);
+ if (*total_estimated_area_reached >
+ std::min((1024 * image_size + (uint64_t(1) << 32)),
+ (uint64_t(1) << 42))) {
+ return JXL_FAILURE("Too large total_estimated_area_reached: %" PRIu64,
+ *total_estimated_area_reached);
+ }
+
+ return true;
+}
+
+Status QuantizedSpline::Decode(const std::vector<uint8_t>& context_map,
+ ANSSymbolReader* const decoder,
+ BitReader* const br,
+ const size_t max_control_points,
+ size_t* total_num_control_points) {
+ const size_t num_control_points =
+ decoder->ReadHybridUint(kNumControlPointsContext, br, context_map);
+ *total_num_control_points += num_control_points;
+ if (*total_num_control_points > max_control_points) {
+ return JXL_FAILURE("Too many control points: %" PRIuS,
+ *total_num_control_points);
+ }
+ control_points_.resize(num_control_points);
+ // Maximal image dimension.
+ constexpr int64_t kDeltaLimit = 1u << 30;
+ for (std::pair<int64_t, int64_t>& control_point : control_points_) {
+ control_point.first = UnpackSigned(
+ decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+ control_point.second = UnpackSigned(
+ decoder->ReadHybridUint(kControlPointsContext, br, context_map));
+ // Check delta-deltas are not outrageous; it is not in spec, but there is
+ // no reason to allow larger values.
+ if ((control_point.first >= kDeltaLimit) ||
+ (control_point.first <= -kDeltaLimit) ||
+ (control_point.second >= kDeltaLimit) ||
+ (control_point.second <= -kDeltaLimit)) {
+ return JXL_FAILURE("Spline delta-delta is out of bounds");
+ }
+ }
+
+ const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status {
+ for (int i = 0; i < 32; ++i) {
+ dct[i] =
+ UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map));
+ }
+ return true;
+ };
+ for (int c = 0; c < 3; ++c) {
+ JXL_RETURN_IF_ERROR(decode_dct(color_dct_[c]));
+ }
+ JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_));
+ return true;
+}
+
+void Splines::Clear() {
+ quantization_adjustment_ = 0;
+ splines_.clear();
+ starting_points_.clear();
+ segments_.clear();
+ segment_indices_.clear();
+ segment_y_start_.clear();
+}
+
+Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) {
+ std::vector<uint8_t> context_map;
+ ANSCode code;
+ JXL_RETURN_IF_ERROR(
+ DecodeHistograms(br, kNumSplineContexts, &code, &context_map));
+ ANSSymbolReader decoder(&code, br);
+ const size_t num_splines =
+ 1 + decoder.ReadHybridUint(kNumSplinesContext, br, context_map);
+ size_t max_control_points = std::min(
+ kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio);
+ if (num_splines > max_control_points) {
+ return JXL_FAILURE("Too many splines: %" PRIuS, num_splines);
+ }
+ JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder,
+ context_map, num_splines));
+
+ quantization_adjustment_ = UnpackSigned(
+ decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map));
+
+ splines_.clear();
+ splines_.reserve(num_splines);
+ size_t num_control_points = num_splines;
+ for (size_t i = 0; i < num_splines; ++i) {
+ QuantizedSpline spline;
+ JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br,
+ max_control_points, &num_control_points));
+ splines_.push_back(std::move(spline));
+ }
+
+ JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState());
+
+ if (!HasAny()) {
+ return JXL_FAILURE("Decoded splines but got none");
+ }
+
+ return true;
+}
+
+void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect,
+ const Rect& image_rect) const {
+ return Apply</*add=*/true>(opsin, opsin_rect, image_rect);
+}
+void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+ float* JXL_RESTRICT row_b, const Rect& image_row) const {
+ return ApplyToRow</*add=*/true>(row_x, row_y, row_b, image_row);
+}
+
+void Splines::SubtractFrom(Image3F* const opsin) const {
+ return Apply</*add=*/false>(opsin, Rect(*opsin), Rect(*opsin));
+}
+
+Status Splines::InitializeDrawCache(const size_t image_xsize,
+ const size_t image_ysize,
+ const ColorCorrelationMap& cmap) {
+ // TODO(veluca): avoid storing segments that are entirely outside image
+ // boundaries.
+ segments_.clear();
+ segment_indices_.clear();
+ segment_y_start_.clear();
+ std::vector<std::pair<size_t, size_t>> segments_by_y;
+ std::vector<Spline::Point> intermediate_points;
+ uint64_t total_estimated_area_reached = 0;
+ std::vector<Spline> splines;
+ for (size_t i = 0; i < splines_.size(); ++i) {
+ Spline spline;
+ JXL_RETURN_IF_ERROR(splines_[i].Dequantize(
+ starting_points_[i], quantization_adjustment_, cmap.YtoXRatio(0),
+ cmap.YtoBRatio(0), image_xsize * image_ysize,
+ &total_estimated_area_reached, spline));
+ if (std::adjacent_find(spline.control_points.begin(),
+ spline.control_points.end()) !=
+ spline.control_points.end()) {
+ // Otherwise division by zero might occur. Once control points coincide,
+ // the direction of curve is undefined...
+ return JXL_FAILURE(
+ "identical successive control points in spline %" PRIuS, i);
+ }
+ splines.push_back(spline);
+ }
+ // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams.
+ if (total_estimated_area_reached >
+ std::min((8 * image_xsize * image_ysize + (uint64_t(1) << 25)),
+ (uint64_t(1) << 30))) {
+ JXL_WARNING(
+ "Large total_estimated_area_reached, expect slower decoding: %" PRIu64,
+ total_estimated_area_reached);
+ }
+
+ for (Spline& spline : splines) {
+ std::vector<std::pair<Spline::Point, float>> points_to_draw;
+ auto add_point = [&](const Spline::Point& point, const float multiplier) {
+ points_to_draw.emplace_back(point, multiplier);
+ };
+ intermediate_points.clear();
+ DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points);
+ ForEachEquallySpacedPoint(intermediate_points, add_point);
+ const float arc_length =
+ (points_to_draw.size() - 2) * kDesiredRenderingDistance +
+ points_to_draw.back().second;
+ if (arc_length <= 0.f) {
+ // This spline wouldn't have any effect.
+ continue;
+ }
+ HWY_DYNAMIC_DISPATCH(SegmentsFromPoints)
+ (spline, points_to_draw, arc_length, segments_, segments_by_y);
+ }
+
+ // TODO(eustas): consider linear sorting here.
+ std::sort(segments_by_y.begin(), segments_by_y.end());
+ segment_indices_.resize(segments_by_y.size());
+ segment_y_start_.resize(image_ysize + 1);
+ for (size_t i = 0; i < segments_by_y.size(); i++) {
+ segment_indices_[i] = segments_by_y[i].second;
+ size_t y = segments_by_y[i].first;
+ if (y < image_ysize) {
+ segment_y_start_[y + 1]++;
+ }
+ }
+ for (size_t y = 0; y < image_ysize; y++) {
+ segment_y_start_[y + 1] += segment_y_start_[y];
+ }
+ return true;
+}
+
+template <bool add>
+void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+ float* JXL_RESTRICT row_b,
+ const Rect& image_row) const {
+ if (segments_.empty()) return;
+ JXL_ASSERT(image_row.ysize() == 1);
+ for (size_t iy = 0; iy < image_row.ysize(); iy++) {
+ HWY_DYNAMIC_DISPATCH(DrawSegments)
+ (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(),
+ segment_indices_.data(), segment_y_start_.data());
+ }
+}
+
+template <bool add>
+void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect,
+ const Rect& image_rect) const {
+ if (segments_.empty()) return;
+ for (size_t iy = 0; iy < image_rect.ysize(); iy++) {
+ const size_t y0 = opsin_rect.Line(iy).y0();
+ const size_t x0 = opsin_rect.x0();
+ ApplyToRow<add>(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0,
+ opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy));
+ }
+}
+
+} // namespace jxl
+#endif // HWY_ONCE
diff --git a/third_party/jpeg-xl/lib/jxl/splines.h b/third_party/jpeg-xl/lib/jxl/splines.h
new file mode 100644
index 0000000000..c8dad3417c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/splines.h
@@ -0,0 +1,148 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_SPLINES_H_
+#define LIB_JXL_SPLINES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <utility>
+#include <vector>
+
+#include "lib/jxl/ans_params.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/chroma_from_luma.h"
+#include "lib/jxl/dec_ans.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/entropy_coder.h"
+#include "lib/jxl/image.h"
+
+namespace jxl {
+
+static constexpr float kDesiredRenderingDistance = 1.f;
+
+enum SplineEntropyContexts : size_t {
+ kQuantizationAdjustmentContext = 0,
+ kStartingPositionContext,
+ kNumSplinesContext,
+ kNumControlPointsContext,
+ kControlPointsContext,
+ kDCTContext,
+ kNumSplineContexts
+};
+
+struct Spline {
+ struct Point {
+ Point() : x(0.0f), y(0.0f) {}
+ Point(float x, float y) : x(x), y(y) {}
+ float x, y;
+ bool operator==(const Point& other) const {
+ return std::fabs(x - other.x) < 1e-3f && std::fabs(y - other.y) < 1e-3f;
+ }
+ };
+ std::vector<Point> control_points;
+ // X, Y, B.
+ float color_dct[3][32];
+ // Splines are draws by normalized Gaussian splatting. This controls the
+ // Gaussian's parameter along the spline.
+ float sigma_dct[32];
+};
+
+class QuantizedSplineEncoder;
+
+class QuantizedSpline {
+ public:
+ QuantizedSpline() = default;
+ explicit QuantizedSpline(const Spline& original,
+ int32_t quantization_adjustment, float y_to_x,
+ float y_to_b);
+
+ Status Dequantize(const Spline::Point& starting_point,
+ int32_t quantization_adjustment, float y_to_x, float y_to_b,
+ uint64_t image_size, uint64_t* total_estimated_area_reached,
+ Spline& result) const;
+
+ Status Decode(const std::vector<uint8_t>& context_map,
+ ANSSymbolReader* decoder, BitReader* br,
+ size_t max_control_points, size_t* total_num_control_points);
+
+ private:
+ friend class QuantizedSplineEncoder;
+
+ std::vector<std::pair<int64_t, int64_t>>
+ control_points_; // Double delta-encoded.
+ int color_dct_[3][32] = {};
+ int sigma_dct_[32] = {};
+};
+
+// A single "drawable unit" of a spline, i.e. a line of the region in which we
+// render each Gaussian. The structure doesn't actually depend on the exact
+// row, which allows reuse for different y values (which are tracked
+// separately).
+struct SplineSegment {
+ float center_x, center_y;
+ float maximum_distance;
+ float inv_sigma;
+ float sigma_over_4_times_intensity;
+ float color[3];
+};
+
+class Splines {
+ public:
+ Splines() = default;
+ explicit Splines(const int32_t quantization_adjustment,
+ std::vector<QuantizedSpline> splines,
+ std::vector<Spline::Point> starting_points)
+ : quantization_adjustment_(quantization_adjustment),
+ splines_(std::move(splines)),
+ starting_points_(std::move(starting_points)) {}
+
+ bool HasAny() const { return !splines_.empty(); }
+
+ void Clear();
+
+ Status Decode(BitReader* br, size_t num_pixels);
+
+ void AddTo(Image3F* opsin, const Rect& opsin_rect,
+ const Rect& image_rect) const;
+ void AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+ float* JXL_RESTRICT row_b, const Rect& image_row) const;
+ void SubtractFrom(Image3F* opsin) const;
+
+ const std::vector<QuantizedSpline>& QuantizedSplines() const {
+ return splines_;
+ }
+ const std::vector<Spline::Point>& StartingPoints() const {
+ return starting_points_;
+ }
+
+ int32_t GetQuantizationAdjustment() const { return quantization_adjustment_; }
+
+ Status InitializeDrawCache(size_t image_xsize, size_t image_ysize,
+ const ColorCorrelationMap& cmap);
+
+ private:
+ template <bool>
+ void ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y,
+ float* JXL_RESTRICT row_b, const Rect& image_row) const;
+ template <bool>
+ void Apply(Image3F* opsin, const Rect& opsin_rect,
+ const Rect& image_rect) const;
+
+ // If positive, quantization weights are multiplied by 1 + this/8, which
+ // increases precision. If negative, they are divided by 1 - this/8. If 0,
+ // they are unchanged.
+ int32_t quantization_adjustment_ = 0;
+ std::vector<QuantizedSpline> splines_;
+ std::vector<Spline::Point> starting_points_;
+ std::vector<SplineSegment> segments_;
+ std::vector<size_t> segment_indices_;
+ std::vector<size_t> segment_y_start_;
+};
+
+} // namespace jxl
+
+#endif // LIB_JXL_SPLINES_H_
diff --git a/third_party/jpeg-xl/lib/jxl/splines_gbench.cc b/third_party/jpeg-xl/lib/jxl/splines_gbench.cc
new file mode 100644
index 0000000000..78ff6d41c0
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/splines_gbench.cc
@@ -0,0 +1,52 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/splines.h"
+
+namespace jxl {
+namespace {
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+void BM_Splines(benchmark::State& state) {
+ const size_t n = state.range();
+
+ std::vector<Spline> spline_data = {
+ {/*control_points=*/{
+ {9, 54}, {118, 159}, {97, 3}, {10, 40}, {150, 25}, {120, 300}},
+ /*color_dct=*/
+ {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}},
+ /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}}};
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (const Spline& spline : spline_data) {
+ quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+ kYToB);
+ starting_points.push_back(spline.control_points.front());
+ }
+ Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+ std::move(starting_points));
+
+ Image3F drawing_area(320, 320);
+ ZeroFillImage(&drawing_area);
+ for (auto _ : state) {
+ for (size_t i = 0; i < n; ++i) {
+ JXL_CHECK(splines.InitializeDrawCache(drawing_area.xsize(),
+ drawing_area.ysize(), *cmap));
+ splines.AddTo(&drawing_area, Rect(drawing_area), Rect(drawing_area));
+ }
+ }
+
+ state.SetItemsProcessed(n * state.iterations());
+}
+
+BENCHMARK(BM_Splines)->Range(1, 1 << 10);
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/splines_test.cc b/third_party/jpeg-xl/lib/jxl/splines_test.cc
new file mode 100644
index 0000000000..8d6bc7ed1c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/splines_test.cc
@@ -0,0 +1,348 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/splines.h"
+
+#include "lib/extras/codec.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_splines.h"
+#include "lib/jxl/image_test_utils.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+
+std::ostream& operator<<(std::ostream& os, const Spline::Point& p) {
+ return os << "(" << p.x << ", " << p.y << ")";
+}
+
+std::ostream& operator<<(std::ostream& os, const Spline& spline) {
+ return os << "(spline with " << spline.control_points.size()
+ << " control points)";
+}
+
+namespace {
+
+using ::testing::AllOf;
+using ::testing::Field;
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+constexpr int kQuantizationAdjustment = 0;
+const ColorCorrelationMap* const cmap = new ColorCorrelationMap;
+const float kYToX = cmap->YtoXRatio(0);
+const float kYToB = cmap->YtoBRatio(0);
+
+constexpr float kTolerance = 0.003125;
+
+std::vector<Spline> DequantizeSplines(const Splines& splines) {
+ const auto& quantized_splines = splines.QuantizedSplines();
+ const auto& starting_points = splines.StartingPoints();
+ JXL_CHECK(quantized_splines.size() == starting_points.size());
+
+ std::vector<Spline> dequantized;
+ uint64_t total = 0;
+ for (size_t i = 0; i < quantized_splines.size(); ++i) {
+ dequantized.emplace_back();
+ JXL_CHECK(quantized_splines[i].Dequantize(
+ starting_points[i], kQuantizationAdjustment, kYToX, kYToB, 2u << 30u,
+ &total, dequantized.back()));
+ }
+ return dequantized;
+}
+
+MATCHER(ControlPointIs, "") {
+ const Spline::Point& actual = std::get<0>(arg);
+ const Spline::Point& expected = std::get<1>(arg);
+ return testing::ExplainMatchResult(
+ AllOf(Field(&Spline::Point::x, FloatNear(expected.x, kTolerance)),
+ Field(&Spline::Point::y, FloatNear(expected.y, kTolerance))),
+ actual, result_listener);
+}
+
+MATCHER(ControlPointsMatch, "") {
+ const Spline& actual = std::get<0>(arg);
+ const Spline& expected = std::get<1>(arg);
+ return testing::ExplainMatchResult(
+ Field(&Spline::control_points,
+ Pointwise(ControlPointIs(), expected.control_points)),
+ actual, result_listener);
+}
+
+MATCHER(SplinesMatch, "") {
+ const Spline& actual = std::get<0>(arg);
+ const Spline& expected = std::get<1>(arg);
+ if (!testing::ExplainMatchResult(ControlPointsMatch(), arg,
+ result_listener)) {
+ return false;
+ }
+ for (int i = 0; i < 3; ++i) {
+ size_t color_dct_size =
+ sizeof(expected.color_dct[i]) / sizeof(expected.color_dct[i][0]);
+ for (size_t j = 0; j < color_dct_size; j++) {
+ testing::StringMatchResultListener color_dct_listener;
+ if (!testing::ExplainMatchResult(
+ FloatNear(expected.color_dct[i][j], kTolerance),
+ actual.color_dct[i][j], &color_dct_listener)) {
+ *result_listener << ", where color_dct[" << i << "][" << j
+ << "] don't match, " << color_dct_listener.str();
+ return false;
+ }
+ }
+ }
+ size_t sigma_dct_size =
+ sizeof(expected.sigma_dct) / sizeof(expected.sigma_dct[0]);
+ for (size_t i = 0; i < sigma_dct_size; i++) {
+ testing::StringMatchResultListener sigma_listener;
+ if (!testing::ExplainMatchResult(
+ FloatNear(expected.sigma_dct[i], kTolerance), actual.sigma_dct[i],
+ &sigma_listener)) {
+ *result_listener << ", where sigma_dct[" << i << "] don't match, "
+ << sigma_listener.str();
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace
+
+TEST(SplinesTest, Serialization) {
+ std::vector<Spline> spline_data = {
+ {/*control_points=*/{
+ {109, 54}, {218, 159}, {80, 3}, {110, 274}, {94, 185}, {17, 277}},
+ /*color_dct=*/
+ {{36.3, 39.7, 23.2, 67.5, 4.4, 71.5, 62.3, 32.3, 92.2, 10.1, 10.8,
+ 9.2, 6.1, 10.5, 79.1, 7, 24.6, 90.8, 5.5, 84, 43.8, 49,
+ 33.5, 78.9, 54.5, 77.9, 62.1, 51.4, 36.4, 14.3, 83.7, 35.4},
+ {9.4, 53.4, 9.5, 74.9, 72.7, 26.7, 7.9, 0.9, 84.9, 23.2, 26.5,
+ 31.1, 91, 11.7, 74.1, 39.3, 23.7, 82.5, 4.8, 2.7, 61.2, 96.4,
+ 13.7, 66.7, 62.9, 82.4, 5.9, 98.7, 21.5, 7.9, 51.7, 63.1},
+ {48, 39.3, 6.9, 26.3, 33.3, 6.2, 1.7, 98.9, 59.9, 59.6, 95,
+ 61.3, 82.7, 53, 6.1, 30.4, 34.7, 96.9, 93.4, 17, 38.8, 80.8,
+ 63, 18.6, 43.6, 32.3, 61, 20.2, 24.3, 28.3, 69.1, 62.4}},
+ /*sigma_dct=*/{32.7, 21.5, 44.4, 1.8, 45.8, 90.6, 29.3, 59.2,
+ 23.7, 85.2, 84.8, 27.2, 42.1, 84.1, 50.6, 17.6,
+ 93.7, 4.9, 2.6, 69.8, 94.9, 52, 24.3, 18.8,
+ 12.1, 95.7, 28.5, 81.4, 89.9, 31.4, 74.8, 52}},
+ {/*control_points=*/{{172, 309},
+ {196, 277},
+ {42, 238},
+ {114, 350},
+ {307, 290},
+ {316, 269},
+ {124, 66},
+ {233, 267}},
+ /*color_dct=*/
+ {{15, 28.9, 22, 6.6, 41.8, 83, 8.6, 56.8, 68.9, 9.7, 5.4,
+ 19.8, 70.8, 90, 52.5, 65.2, 7.8, 23.5, 26.4, 72.2, 64.7, 87.1,
+ 1.3, 67.5, 46, 68.4, 65.4, 35.5, 29.1, 13, 41.6, 23.9},
+ {47.7, 79.4, 62.7, 29.1, 96.8, 18.5, 17.6, 15.2, 80.5, 56, 96.2,
+ 59.9, 26.7, 96.1, 92.3, 42.1, 35.8, 54, 23.2, 55, 76, 35.8,
+ 58.4, 88.7, 2.4, 78.1, 95.6, 27.5, 6.6, 78.5, 24.1, 69.8},
+ {43.8, 96.5, 0.9, 95.1, 49.1, 71.2, 25.1, 33.6, 75.2, 95, 82.1,
+ 19.7, 10.5, 44.9, 50, 93.3, 83.5, 99.5, 64.6, 54, 3.5, 99.7,
+ 45.3, 82.1, 22.4, 37.9, 60, 32.2, 12.6, 4.6, 65.5, 96.4}},
+ /*sigma_dct=*/{72.5, 2.6, 41.7, 2.2, 39.7, 79.1, 69.6, 19.9,
+ 92.3, 71.5, 41.9, 62.1, 30, 49.4, 70.3, 45.3,
+ 62.5, 47.2, 46.7, 41.2, 90.8, 46.8, 91.2, 55,
+ 8.1, 69.6, 25.4, 84.7, 61.7, 27.6, 3.7, 46.9}},
+ {/*control_points=*/{{100, 186},
+ {257, 97},
+ {170, 49},
+ {25, 169},
+ {309, 104},
+ {232, 237},
+ {385, 101},
+ {122, 168},
+ {26, 300},
+ {390, 88}},
+ /*color_dct=*/
+ {{16.9, 64.8, 4.2, 10.6, 23.5, 17, 79.3, 5.7, 60.4, 16.6, 94.9,
+ 63.7, 87.6, 10.5, 3.8, 61.1, 22.9, 81.9, 80.4, 40.5, 45.9, 25.4,
+ 39.8, 30, 50.2, 90.4, 27.9, 93.7, 65.1, 48.2, 22.3, 43.9},
+ {24.9, 66, 3.5, 90.2, 97.1, 15.8, 35.6, 0.6, 68, 39.6, 24.4,
+ 85.9, 57.7, 77.6, 47.5, 67.9, 4.3, 5.4, 91.2, 58.5, 0.1, 52.2,
+ 3.5, 47.8, 63.2, 43.5, 85.8, 35.8, 50.2, 35.9, 19.2, 48.2},
+ {82.8, 44.9, 76.4, 39.5, 94.1, 14.3, 89.8, 10, 10.5, 74.5, 56.3,
+ 65.8, 7.8, 23.3, 52.8, 99.3, 56.8, 46, 76.7, 13.5, 67, 22.4,
+ 29.9, 43.3, 70.3, 26, 74.3, 53.9, 62, 19.1, 49.3, 46.7}},
+ /*sigma_dct=*/{83.5, 1.7, 25.1, 18.7, 46.5, 75.3, 28, 62.3,
+ 50.3, 23.3, 85.6, 96, 45.8, 33.1, 33.4, 52.9,
+ 26.3, 58.5, 19.6, 70, 92.6, 22.5, 57, 21.6,
+ 76.8, 87.5, 22.9, 66.3, 35.7, 35.6, 56.8, 67.2}},
+ };
+
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (const Spline& spline : spline_data) {
+ quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+ kYToB);
+ starting_points.push_back(spline.control_points.front());
+ }
+
+ Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+ std::move(starting_points));
+ const std::vector<Spline> quantized_spline_data = DequantizeSplines(splines);
+ EXPECT_THAT(quantized_spline_data,
+ Pointwise(ControlPointsMatch(), spline_data));
+
+ BitWriter writer;
+ EncodeSplines(splines, &writer, kLayerSplines, HistogramParams(), nullptr);
+ writer.ZeroPadToByte();
+ const size_t bits_written = writer.BitsWritten();
+
+ printf("Wrote %" PRIuS " bits of splines.\n", bits_written);
+
+ BitReader reader(writer.GetSpan());
+ Splines decoded_splines;
+ ASSERT_TRUE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+ ASSERT_TRUE(reader.JumpToByteBoundary());
+ EXPECT_EQ(reader.TotalBitsConsumed(), bits_written);
+ ASSERT_TRUE(reader.Close());
+
+ const std::vector<Spline> decoded_spline_data =
+ DequantizeSplines(decoded_splines);
+ EXPECT_THAT(decoded_spline_data,
+ Pointwise(SplinesMatch(), quantized_spline_data));
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_TooManySplinesTest) {
+#else
+TEST(SplinesTest, TooManySplinesTest) {
+#endif
+ // This is more than the limit for 1000 pixels.
+ const size_t kNumSplines = 300;
+
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (size_t i = 0; i < kNumSplines; i++) {
+ Spline spline = {
+ /*control_points=*/{{1.f + i, 2}, {10.f + i, 25}, {30.f + i, 300}},
+ /*color_dct=*/
+ {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+ /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+ quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+ kYToB);
+ starting_points.push_back(spline.control_points.front());
+ }
+
+ Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+ std::move(starting_points));
+ BitWriter writer;
+ EncodeSplines(splines, &writer, kLayerSplines,
+ HistogramParams(SpeedTier::kFalcon, 1), nullptr);
+ writer.ZeroPadToByte();
+ // Re-read splines.
+ BitReader reader(writer.GetSpan());
+ Splines decoded_splines;
+ EXPECT_FALSE(decoded_splines.Decode(&reader, /*num_pixels=*/1000));
+ EXPECT_TRUE(reader.Close());
+}
+
+#ifdef JXL_CRASH_ON_ERROR
+TEST(SplinesTest, DISABLED_DuplicatePoints) {
+#else
+TEST(SplinesTest, DuplicatePoints) {
+#endif
+ std::vector<Spline::Point> control_points{
+ {9, 54}, {118, 159}, {97, 3}, // Repeated.
+ {97, 3}, {10, 40}, {150, 25}, {120, 300}};
+ Spline spline{control_points,
+ /*color_dct=*/
+ {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}},
+ /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}};
+ std::vector<Spline> spline_data{spline};
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (const Spline& spline : spline_data) {
+ quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+ kYToB);
+ starting_points.push_back(spline.control_points.front());
+ }
+ Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+ std::move(starting_points));
+
+ Image3F image(320, 320);
+ ZeroFillImage(&image);
+ EXPECT_FALSE(
+ splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap));
+}
+
+TEST(SplinesTest, Drawing) {
+ CodecInOut io_expected;
+ const PaddedBytes orig = jxl::test::ReadTestData("jxl/splines.pfm");
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(orig), &io_expected,
+ /*pool=*/nullptr));
+
+ std::vector<Spline::Point> control_points{{9, 54}, {118, 159}, {97, 3},
+ {10, 40}, {150, 25}, {120, 300}};
+ // Use values that survive quant/decorellation roundtrip.
+ const Spline spline{
+ control_points,
+ /*color_dct=*/
+ {{0.4989345073699951171875000f, 0.4997999966144561767578125f},
+ {0.4772970676422119140625000f, 0.f, 0.5250000357627868652343750f},
+ {-0.0176776945590972900390625f, 0.4900000095367431640625000f,
+ 0.5250000357627868652343750f}},
+ /*sigma_dct=*/
+ {0.9427147507667541503906250f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f,
+ 0.6665999889373779296875000f}};
+ std::vector<Spline> spline_data = {spline};
+ std::vector<QuantizedSpline> quantized_splines;
+ std::vector<Spline::Point> starting_points;
+ for (const Spline& spline : spline_data) {
+ quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX,
+ kYToB);
+ starting_points.push_back(spline.control_points.front());
+ }
+ Splines splines(kQuantizationAdjustment, std::move(quantized_splines),
+ std::move(starting_points));
+
+ Image3F image(320, 320);
+ ZeroFillImage(&image);
+ ASSERT_TRUE(splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap));
+ splines.AddTo(&image, Rect(image), Rect(image));
+
+ CodecInOut io_actual;
+ io_actual.SetFromImage(CopyImage(image), ColorEncoding::SRGB());
+ ASSERT_TRUE(io_actual.frames[0].TransformTo(io_expected.Main().c_current(),
+ GetJxlCms()));
+
+ JXL_ASSERT_OK(VerifyRelativeError(
+ *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _));
+}
+
+TEST(SplinesTest, ClearedEveryFrame) {
+ CodecInOut io_expected;
+ const PaddedBytes bytes_expected =
+ jxl::test::ReadTestData("jxl/spline_on_first_frame.png");
+ ASSERT_TRUE(SetFromBytes(Span<const uint8_t>(bytes_expected), &io_expected,
+ /*pool=*/nullptr));
+ CodecInOut io_actual;
+ const PaddedBytes bytes_actual =
+ jxl::test::ReadTestData("jxl/spline_on_first_frame.jxl");
+ ASSERT_TRUE(
+ test::DecodeFile({}, Span<const uint8_t>(bytes_actual), &io_actual));
+
+ ASSERT_TRUE(
+ io_actual.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms()));
+ for (size_t c = 0; c < 3; ++c) {
+ for (size_t y = 0; y < io_actual.ysize(); ++y) {
+ float* const JXL_RESTRICT row = io_actual.Main().color()->PlaneRow(c, y);
+ for (size_t x = 0; x < io_actual.xsize(); ++x) {
+ row[x] = Clamp1(row[x], 0.f, 1.f);
+ }
+ }
+ }
+ JXL_ASSERT_OK(VerifyRelativeError(
+ *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _));
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/test_image.cc b/third_party/jpeg-xl/lib/jxl/test_image.cc
new file mode 100644
index 0000000000..af1d1293ef
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/test_image.cc
@@ -0,0 +1,453 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/test_image.h"
+
+#include <jxl/encode.h>
+
+#include <algorithm>
+#include <cstring>
+#include <utility>
+
+#include "lib/extras/dec/color_description.h"
+#include "lib/extras/dec/color_hints.h"
+#include "lib/extras/dec/decode.h"
+#include "lib/jxl/base/byte_order.h"
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/color_encoding_internal.h"
+
+namespace jxl {
+namespace test {
+
+namespace {
+
+void StoreValue(float val, size_t bits_per_sample, JxlPixelFormat format,
+ uint8_t** out) {
+ const float mul = (1u << bits_per_sample) - 1;
+ if (format.data_type == JXL_TYPE_UINT8) {
+ **out = val * mul;
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ uint16_t uval = val * mul;
+ if (SwapEndianness(format.endianness)) {
+ uval = JXL_BSWAP16(uval);
+ }
+ memcpy(*out, &uval, 2);
+ } else if (format.data_type == JXL_TYPE_FLOAT) {
+ // TODO(szabadka) Add support for custom bits / exponent bits floats.
+ if (SwapEndianness(format.endianness)) {
+ val = BSwapFloat(val);
+ }
+ memcpy(*out, &val, 4);
+ } else {
+ // TODO(szabadka) Add support for FLOAT16.
+ }
+ *out += extras::PackedImage::BitsPerChannel(format.data_type) / 8;
+}
+
+void FillPackedImage(size_t bits_per_sample, uint16_t seed,
+ extras::PackedImage* image) {
+ const size_t xsize = image->xsize;
+ const size_t ysize = image->ysize;
+ const JxlPixelFormat format = image->format;
+
+ // Cause more significant image difference for successive seeds.
+ Rng generator(seed);
+
+ // Returns random integer in interval [0, max_value)
+ auto rngu = [&generator](size_t max_value) -> size_t {
+ return generator.UniformU(0, max_value);
+ };
+
+ // Returns random float in interval [0.0, max_value)
+ auto rngf = [&generator](float max_value) {
+ return generator.UniformF(0.0f, max_value);
+ };
+
+ // Dark background gradient color
+ float r0 = rngf(0.5f);
+ float g0 = rngf(0.5f);
+ float b0 = rngf(0.5f);
+ float a0 = rngf(0.5f);
+ float r1 = rngf(0.5f);
+ float g1 = rngf(0.5f);
+ float b1 = rngf(0.5f);
+ float a1 = rngf(0.5f);
+
+ // Circle with different color
+ size_t circle_x = rngu(xsize);
+ size_t circle_y = rngu(ysize);
+ size_t circle_r = rngu(std::min(xsize, ysize));
+
+ // Rectangle with random noise
+ size_t rect_x0 = rngu(xsize);
+ size_t rect_y0 = rngu(ysize);
+ size_t rect_x1 = rngu(xsize);
+ size_t rect_y1 = rngu(ysize);
+ if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+ if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+ // Create pixel content to test, actual content does not matter as long as it
+ // can be compared after roundtrip.
+ uint8_t* out = reinterpret_cast<uint8_t*>(image->pixels());
+ const float imul16 = 1.0f / 65536.0f;
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ float r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+ float g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+ float b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+ float a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize;
+ // put some shape in there for visual debugging
+ if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+ circle_r * circle_r) {
+ r = std::min(1.0f, ((65535 - x * y) ^ seed) * imul16);
+ g = std::min(1.0f, ((x << 8) + y + seed) * imul16);
+ b = std::min(1.0f, ((y << 8) + x * seed) * imul16);
+ a = std::min(1.0f, (32768 + x * 256 - y) * imul16);
+ } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+ r = rngf(1.0f);
+ g = rngf(1.0f);
+ b = rngf(1.0f);
+ a = rngf(1.0f);
+ }
+ if (format.num_channels == 1) {
+ StoreValue(g, bits_per_sample, format, &out);
+ } else if (format.num_channels == 2) {
+ StoreValue(g, bits_per_sample, format, &out);
+ StoreValue(a, bits_per_sample, format, &out);
+ } else if (format.num_channels == 3) {
+ StoreValue(r, bits_per_sample, format, &out);
+ StoreValue(g, bits_per_sample, format, &out);
+ StoreValue(b, bits_per_sample, format, &out);
+ } else if (format.num_channels == 4) {
+ StoreValue(r, bits_per_sample, format, &out);
+ StoreValue(g, bits_per_sample, format, &out);
+ StoreValue(b, bits_per_sample, format, &out);
+ StoreValue(a, bits_per_sample, format, &out);
+ }
+ }
+ }
+}
+
+} // namespace
+
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+ size_t num_channels, uint16_t seed) {
+ // Cause more significant image difference for successive seeds.
+ Rng generator(seed);
+
+ // Returns random integer in interval [0, max_value)
+ auto rng = [&generator](size_t max_value) -> size_t {
+ return generator.UniformU(0, max_value);
+ };
+
+ // Dark background gradient color
+ uint16_t r0 = rng(32768);
+ uint16_t g0 = rng(32768);
+ uint16_t b0 = rng(32768);
+ uint16_t a0 = rng(32768);
+ uint16_t r1 = rng(32768);
+ uint16_t g1 = rng(32768);
+ uint16_t b1 = rng(32768);
+ uint16_t a1 = rng(32768);
+
+ // Circle with different color
+ size_t circle_x = rng(xsize);
+ size_t circle_y = rng(ysize);
+ size_t circle_r = rng(std::min(xsize, ysize));
+
+ // Rectangle with random noise
+ size_t rect_x0 = rng(xsize);
+ size_t rect_y0 = rng(ysize);
+ size_t rect_x1 = rng(xsize);
+ size_t rect_y1 = rng(ysize);
+ if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1);
+ if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1);
+
+ size_t num_pixels = xsize * ysize;
+ // 16 bits per channel, big endian, 4 channels
+ std::vector<uint8_t> pixels(num_pixels * num_channels * 2);
+ // Create pixel content to test, actual content does not matter as long as it
+ // can be compared after roundtrip.
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ uint16_t r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize;
+ uint16_t g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize;
+ uint16_t b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize;
+ uint16_t a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize;
+ // put some shape in there for visual debugging
+ if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) <
+ circle_r * circle_r) {
+ r = (65535 - x * y) ^ seed;
+ g = (x << 8) + y + seed;
+ b = (y << 8) + x * seed;
+ a = 32768 + x * 256 - y;
+ } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) {
+ r = rng(65536);
+ g = rng(65536);
+ b = rng(65536);
+ a = rng(65536);
+ }
+ size_t i = (y * xsize + x) * 2 * num_channels;
+ pixels[i + 0] = (r >> 8);
+ pixels[i + 1] = (r & 255);
+ if (num_channels >= 2) {
+ // This may store what is called 'g' in the alpha channel of a 2-channel
+ // image, but that's ok since the content is arbitrary
+ pixels[i + 2] = (g >> 8);
+ pixels[i + 3] = (g & 255);
+ }
+ if (num_channels >= 3) {
+ pixels[i + 4] = (b >> 8);
+ pixels[i + 5] = (b & 255);
+ }
+ if (num_channels >= 4) {
+ pixels[i + 6] = (a >> 8);
+ pixels[i + 7] = (a & 255);
+ }
+ }
+ }
+ return pixels;
+}
+
+TestImage::TestImage() {
+ SetChannels(3);
+ SetAllBitDepths(8);
+ SetColorEncoding("RGB_D65_SRG_Rel_SRG");
+}
+
+TestImage& TestImage::DecodeFromBytes(const PaddedBytes& bytes) {
+ ColorEncoding c_enc;
+ JXL_CHECK(
+ ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc));
+ extras::ColorHints color_hints;
+ color_hints.Add("color_space", Description(c_enc));
+ JXL_CHECK(
+ extras::DecodeBytes(Span<const uint8_t>(bytes), color_hints, &ppf_));
+ return *this;
+}
+
+TestImage& TestImage::ClearMetadata() {
+ ppf_.metadata = extras::PackedMetadata();
+ return *this;
+}
+
+TestImage& TestImage::SetDimensions(size_t xsize, size_t ysize) {
+ if (xsize <= ppf_.info.xsize && ysize <= ppf_.info.ysize) {
+ for (auto& frame : ppf_.frames) {
+ CropLayerInfo(xsize, ysize, &frame.frame_info.layer_info);
+ CropImage(xsize, ysize, &frame.color);
+ for (auto& ec : frame.extra_channels) {
+ CropImage(xsize, ysize, &ec);
+ }
+ }
+ } else {
+ JXL_CHECK(ppf_.info.xsize == 0 && ppf_.info.ysize == 0);
+ }
+ ppf_.info.xsize = xsize;
+ ppf_.info.ysize = ysize;
+ return *this;
+}
+
+TestImage& TestImage::SetChannels(size_t num_channels) {
+ JXL_CHECK(ppf_.frames.empty());
+ JXL_CHECK(!ppf_.preview_frame);
+ ppf_.info.num_color_channels = num_channels < 3 ? 1 : 3;
+ ppf_.info.num_extra_channels = num_channels - ppf_.info.num_color_channels;
+ if (ppf_.info.num_extra_channels > 0 && ppf_.info.alpha_bits == 0) {
+ ppf_.info.alpha_bits = ppf_.info.bits_per_sample;
+ ppf_.info.alpha_exponent_bits = ppf_.info.exponent_bits_per_sample;
+ }
+ ppf_.extra_channels_info.clear();
+ for (size_t i = 1; i < ppf_.info.num_extra_channels; ++i) {
+ extras::PackedExtraChannel ec;
+ ec.index = i;
+ JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &ec.ec_info);
+ if (ec.ec_info.bits_per_sample == 0) {
+ ec.ec_info.bits_per_sample = ppf_.info.bits_per_sample;
+ ec.ec_info.exponent_bits_per_sample = ppf_.info.exponent_bits_per_sample;
+ }
+ ppf_.extra_channels_info.emplace_back(std::move(ec));
+ }
+ format_.num_channels = std::min(static_cast<size_t>(4), num_channels);
+ if (ppf_.info.num_color_channels == 1 &&
+ ppf_.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) {
+ SetColorEncoding("Gra_D65_Rel_SRG");
+ }
+ return *this;
+}
+
+// Sets the same bit depth on color, alpha and all extra channels.
+TestImage& TestImage::SetAllBitDepths(uint32_t bits_per_sample,
+ uint32_t exponent_bits_per_sample) {
+ ppf_.info.bits_per_sample = bits_per_sample;
+ ppf_.info.exponent_bits_per_sample = exponent_bits_per_sample;
+ if (ppf_.info.num_extra_channels > 0) {
+ ppf_.info.alpha_bits = bits_per_sample;
+ ppf_.info.alpha_exponent_bits = exponent_bits_per_sample;
+ }
+ for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+ extras::PackedExtraChannel& ec = ppf_.extra_channels_info[i];
+ ec.ec_info.bits_per_sample = bits_per_sample;
+ ec.ec_info.exponent_bits_per_sample = exponent_bits_per_sample;
+ }
+ format_.data_type = DefaultDataType(ppf_.info);
+ return *this;
+}
+
+TestImage& TestImage::SetDataType(JxlDataType data_type) {
+ format_.data_type = data_type;
+ return *this;
+}
+
+TestImage& TestImage::SetEndianness(JxlEndianness endianness) {
+ format_.endianness = endianness;
+ return *this;
+}
+
+TestImage& TestImage::SetColorEncoding(const std::string& description) {
+ JXL_CHECK(ParseDescription(description, &ppf_.color_encoding));
+ ColorEncoding c_enc;
+ JXL_CHECK(
+ ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc));
+ JXL_CHECK(c_enc.CreateICC());
+ PaddedBytes icc = c_enc.ICC();
+ ppf_.icc.assign(icc.begin(), icc.end());
+ return *this;
+}
+
+TestImage& TestImage::CoalesceGIFAnimationWithAlpha() {
+ extras::PackedFrame canvas = ppf_.frames[0].Copy();
+ JXL_CHECK(canvas.color.format.num_channels == 3);
+ JXL_CHECK(canvas.color.format.data_type == JXL_TYPE_UINT8);
+ JXL_CHECK(canvas.extra_channels.size() == 1);
+ for (size_t i = 1; i < ppf_.frames.size(); i++) {
+ const extras::PackedFrame& frame = ppf_.frames[i];
+ JXL_CHECK(frame.extra_channels.size() == 1);
+ const JxlLayerInfo& layer_info = frame.frame_info.layer_info;
+ extras::PackedFrame rendered = canvas.Copy();
+ uint8_t* pixels_rendered =
+ reinterpret_cast<uint8_t*>(rendered.color.pixels());
+ const uint8_t* pixels_frame =
+ reinterpret_cast<const uint8_t*>(frame.color.pixels());
+ uint8_t* alpha_rendered =
+ reinterpret_cast<uint8_t*>(rendered.extra_channels[0].pixels());
+ const uint8_t* alpha_frame =
+ reinterpret_cast<const uint8_t*>(frame.extra_channels[0].pixels());
+ for (size_t y = 0; y < frame.color.ysize; y++) {
+ for (size_t x = 0; x < frame.color.xsize; x++) {
+ size_t idx_frame = y * frame.color.xsize + x;
+ size_t idx_rendered = ((layer_info.crop_y0 + y) * rendered.color.xsize +
+ (layer_info.crop_x0 + x));
+ if (alpha_frame[idx_frame] != 0) {
+ memcpy(&pixels_rendered[idx_rendered * 3],
+ &pixels_frame[idx_frame * 3], 3);
+ alpha_rendered[idx_rendered] = alpha_frame[idx_frame];
+ }
+ }
+ }
+ if (layer_info.save_as_reference != 0) {
+ canvas = rendered.Copy();
+ }
+ ppf_.frames[i] = std::move(rendered);
+ }
+ return *this;
+}
+
+TestImage::Frame::Frame(TestImage* parent, bool is_preview, size_t index)
+ : parent_(parent), is_preview_(is_preview), index_(index) {}
+
+void TestImage::Frame::ZeroFill() {
+ memset(frame().color.pixels(), 0, frame().color.pixels_size);
+ for (auto& ec : frame().extra_channels) {
+ memset(ec.pixels(), 0, ec.pixels_size);
+ }
+}
+
+void TestImage::Frame::RandomFill(uint16_t seed) {
+ FillPackedImage(ppf().info.bits_per_sample, seed, &frame().color);
+ for (size_t i = 0; i < ppf().extra_channels_info.size(); ++i) {
+ FillPackedImage(ppf().extra_channels_info[i].ec_info.bits_per_sample,
+ seed + 1 + i, &frame().extra_channels[i]);
+ }
+}
+
+void TestImage::Frame::SetValue(size_t y, size_t x, size_t c, float val) {
+ const extras::PackedImage& color = frame().color;
+ JxlPixelFormat format = color.format;
+ JXL_CHECK(y < ppf().info.ysize);
+ JXL_CHECK(x < ppf().info.xsize);
+ JXL_CHECK(c < format.num_channels);
+ size_t pwidth = extras::PackedImage::BitsPerChannel(format.data_type) / 8;
+ size_t idx = ((y * color.xsize + x) * format.num_channels + c) * pwidth;
+ uint8_t* pixels = reinterpret_cast<uint8_t*>(frame().color.pixels());
+ uint8_t* p = pixels + idx;
+ StoreValue(val, ppf().info.bits_per_sample, frame().color.format, &p);
+}
+
+TestImage::Frame TestImage::AddFrame() {
+ size_t index = ppf_.frames.size();
+ extras::PackedFrame frame(ppf_.info.xsize, ppf_.info.ysize, format_);
+ for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+ JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0};
+ extras::PackedImage image(ppf_.info.xsize, ppf_.info.ysize, ec_format);
+ frame.extra_channels.emplace_back(std::move(image));
+ }
+ ppf_.frames.emplace_back(std::move(frame));
+ return Frame(this, false, index);
+}
+
+TestImage::Frame TestImage::AddPreview(size_t xsize, size_t ysize) {
+ extras::PackedFrame frame(xsize, ysize, format_);
+ for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) {
+ JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0};
+ extras::PackedImage image(xsize, ysize, ec_format);
+ frame.extra_channels.emplace_back(std::move(image));
+ }
+ ppf_.preview_frame = make_unique<extras::PackedFrame>(std::move(frame));
+ return Frame(this, true, 0);
+}
+
+void TestImage::CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info) {
+ if (info->crop_x0 < static_cast<ssize_t>(xsize)) {
+ info->xsize = std::min<size_t>(info->xsize, xsize - info->crop_x0);
+ } else {
+ info->xsize = 0;
+ }
+ if (info->crop_y0 < static_cast<ssize_t>(ysize)) {
+ info->ysize = std::min<size_t>(info->ysize, ysize - info->crop_y0);
+ } else {
+ info->ysize = 0;
+ }
+}
+
+void TestImage::CropImage(size_t xsize, size_t ysize,
+ extras::PackedImage* image) {
+ size_t new_stride = (image->stride / image->xsize) * xsize;
+ uint8_t* buf = reinterpret_cast<uint8_t*>(image->pixels());
+ for (size_t y = 0; y < ysize; ++y) {
+ memmove(&buf[y * new_stride], &buf[y * image->stride], new_stride);
+ }
+ image->xsize = xsize;
+ image->ysize = ysize;
+ image->stride = new_stride;
+ image->pixels_size = ysize * new_stride;
+}
+
+JxlDataType TestImage::DefaultDataType(const JxlBasicInfo& info) {
+ if (info.bits_per_sample == 16 && info.exponent_bits_per_sample == 5) {
+ return JXL_TYPE_FLOAT16;
+ } else if (info.exponent_bits_per_sample > 0 || info.bits_per_sample > 16) {
+ return JXL_TYPE_FLOAT;
+ } else if (info.bits_per_sample > 8) {
+ return JXL_TYPE_UINT16;
+ } else {
+ return JXL_TYPE_UINT8;
+ }
+}
+
+} // namespace test
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/test_image.h b/third_party/jpeg-xl/lib/jxl/test_image.h
new file mode 100644
index 0000000000..0106a4b341
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/test_image.h
@@ -0,0 +1,94 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TEST_IMAGE_H_
+#define LIB_JXL_TEST_IMAGE_H_
+
+#include <jxl/codestream_header.h>
+#include <jxl/types.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/padded_bytes.h"
+
+namespace jxl {
+namespace test {
+
+// Returns a test image with some autogenerated pixel content, using 16 bits per
+// channel, big endian order, 1 to 4 channels
+// The seed parameter allows to create images with different pixel content.
+std::vector<uint8_t> GetSomeTestImage(size_t xsize, size_t ysize,
+ size_t num_channels, uint16_t seed);
+
+class TestImage {
+ public:
+ TestImage();
+
+ extras::PackedPixelFile& ppf() { return ppf_; }
+
+ TestImage& DecodeFromBytes(const PaddedBytes& bytes);
+
+ TestImage& ClearMetadata();
+
+ TestImage& SetDimensions(size_t xsize, size_t ysize);
+
+ TestImage& SetChannels(size_t num_channels);
+
+ // Sets the same bit depth on color, alpha and all extra channels.
+ TestImage& SetAllBitDepths(uint32_t bits_per_sample,
+ uint32_t exponent_bits_per_sample = 0);
+
+ TestImage& SetDataType(JxlDataType data_type);
+
+ TestImage& SetEndianness(JxlEndianness endianness);
+
+ TestImage& SetColorEncoding(const std::string& description);
+
+ TestImage& CoalesceGIFAnimationWithAlpha();
+
+ class Frame {
+ public:
+ Frame(TestImage* parent, bool is_preview, size_t index);
+
+ void ZeroFill();
+ void RandomFill(uint16_t seed = 177);
+
+ void SetValue(size_t y, size_t x, size_t c, float val);
+
+ private:
+ extras::PackedPixelFile& ppf() const { return parent_->ppf(); }
+
+ extras::PackedFrame& frame() {
+ return is_preview_ ? *ppf().preview_frame : ppf().frames[index_];
+ }
+
+ TestImage* parent_;
+ bool is_preview_;
+ size_t index_;
+ };
+
+ Frame AddFrame();
+
+ Frame AddPreview(size_t xsize, size_t ysize);
+
+ private:
+ extras::PackedPixelFile ppf_;
+ JxlPixelFormat format_ = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0};
+
+ static void CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info);
+
+ static void CropImage(size_t xsize, size_t ysize, extras::PackedImage* image);
+
+ static JxlDataType DefaultDataType(const JxlBasicInfo& info);
+};
+
+} // namespace test
+} // namespace jxl
+
+#endif // LIB_JXL_TEST_IMAGE_H_
diff --git a/third_party/jpeg-xl/lib/jxl/test_utils.cc b/third_party/jpeg-xl/lib/jxl/test_utils.cc
new file mode 100644
index 0000000000..223641a6a5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/test_utils.cc
@@ -0,0 +1,673 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/test_utils.h"
+
+#include <memory>
+#include <string>
+
+#include "lib/extras/packed_image_convert.h"
+#include "lib/jxl/base/file_io.h"
+#include "lib/jxl/base/printf_macros.h"
+#include "lib/jxl/enc_butteraugli_comparator.h"
+#include "lib/jxl/enc_butteraugli_pnorm.h"
+#include "lib/jxl/enc_cache.h"
+#include "lib/jxl/enc_color_management.h"
+#include "lib/jxl/enc_external_image.h"
+#include "lib/jxl/enc_file.h"
+
+#if !defined(TEST_DATA_PATH)
+#include "tools/cpp/runfiles/runfiles.h"
+#endif
+
+namespace jxl {
+namespace test {
+
+#if defined(TEST_DATA_PATH)
+std::string GetTestDataPath(const std::string& filename) {
+ return std::string(TEST_DATA_PATH "/") + filename;
+}
+#else
+using bazel::tools::cpp::runfiles::Runfiles;
+const std::unique_ptr<Runfiles> kRunfiles(Runfiles::Create(""));
+std::string GetTestDataPath(const std::string& filename) {
+ std::string root(JPEGXL_ROOT_PACKAGE "/testdata/");
+ return kRunfiles->Rlocation(root + filename);
+}
+#endif
+
+PaddedBytes ReadTestData(const std::string& filename) {
+ std::string full_path = GetTestDataPath(filename);
+ PaddedBytes data;
+ fprintf(stderr, "ReadTestData %s\n", full_path.c_str());
+ JXL_CHECK(jxl::ReadFile(full_path, &data));
+ printf("Test data %s is %d bytes long.\n", filename.c_str(),
+ static_cast<int>(data.size()));
+ return data;
+}
+
+Status DecodeFile(extras::JXLDecompressParams dparams,
+ const Span<const uint8_t> file, CodecInOut* JXL_RESTRICT io,
+ ThreadPool* pool) {
+ SetThreadParallelRunner(dparams, pool);
+ extras::PackedPixelFile ppf;
+ JXL_RETURN_IF_ERROR(DecodeImageJXL(file.data(), file.size(), dparams,
+ /*decoded_bytes=*/nullptr, &ppf));
+ JXL_RETURN_IF_ERROR(ConvertPackedPixelFileToCodecInOut(ppf, pool, io));
+ return true;
+}
+
+void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info,
+ const JxlPixelFormat* pixel_format) {
+ JxlEncoderInitBasicInfo(basic_info);
+ switch (pixel_format->data_type) {
+ case JXL_TYPE_FLOAT:
+ basic_info->bits_per_sample = 32;
+ basic_info->exponent_bits_per_sample = 8;
+ break;
+ case JXL_TYPE_FLOAT16:
+ basic_info->bits_per_sample = 16;
+ basic_info->exponent_bits_per_sample = 5;
+ break;
+ case JXL_TYPE_UINT8:
+ basic_info->bits_per_sample = 8;
+ basic_info->exponent_bits_per_sample = 0;
+ break;
+ case JXL_TYPE_UINT16:
+ basic_info->bits_per_sample = 16;
+ basic_info->exponent_bits_per_sample = 0;
+ break;
+ default:
+ JXL_ABORT("Unhandled JxlDataType");
+ }
+ if (pixel_format->num_channels < 3) {
+ basic_info->num_color_channels = 1;
+ } else {
+ basic_info->num_color_channels = 3;
+ }
+ if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) {
+ basic_info->alpha_exponent_bits = basic_info->exponent_bits_per_sample;
+ basic_info->alpha_bits = basic_info->bits_per_sample;
+ basic_info->num_extra_channels = 1;
+ } else {
+ basic_info->alpha_exponent_bits = 0;
+ basic_info->alpha_bits = 0;
+ }
+}
+
+ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc) {
+ ColorEncoding c;
+ c.SetColorSpace(desc.color_space);
+ if (desc.color_space != ColorSpace::kXYB) {
+ c.white_point = desc.white_point;
+ c.primaries = desc.primaries;
+ c.tf.SetTransferFunction(desc.tf);
+ }
+ c.rendering_intent = desc.rendering_intent;
+ JXL_CHECK(c.CreateICC());
+ return c;
+}
+
+namespace {
+void CheckSameEncodings(const std::vector<ColorEncoding>& a,
+ const std::vector<ColorEncoding>& b,
+ const std::string& check_name,
+ std::stringstream& failures) {
+ JXL_CHECK(a.size() == b.size());
+ for (size_t i = 0; i < a.size(); ++i) {
+ if ((a[i].ICC() == b[i].ICC()) ||
+ ((a[i].primaries == b[i].primaries) && a[i].tf.IsSame(b[i].tf))) {
+ continue;
+ }
+ failures << "CheckSameEncodings " << check_name << ": " << i
+ << "-th encoding mismatch\n";
+ }
+}
+} // namespace
+
+bool Roundtrip(const CodecInOut* io, const CompressParams& cparams,
+ extras::JXLDecompressParams dparams,
+ CodecInOut* JXL_RESTRICT io2, std::stringstream& failures,
+ size_t* compressed_size, ThreadPool* pool, AuxOut* aux_out) {
+ if (compressed_size) {
+ *compressed_size = static_cast<size_t>(-1);
+ }
+ PaddedBytes compressed;
+
+ std::vector<ColorEncoding> original_metadata_encodings;
+ std::vector<ColorEncoding> original_current_encodings;
+ std::vector<ColorEncoding> metadata_encodings_1;
+ std::vector<ColorEncoding> metadata_encodings_2;
+ std::vector<ColorEncoding> current_encodings_2;
+ original_metadata_encodings.reserve(io->frames.size());
+ original_current_encodings.reserve(io->frames.size());
+ metadata_encodings_1.reserve(io->frames.size());
+ metadata_encodings_2.reserve(io->frames.size());
+ current_encodings_2.reserve(io->frames.size());
+
+ for (const ImageBundle& ib : io->frames) {
+ // Remember original encoding, will be returned by decoder.
+ original_metadata_encodings.push_back(ib.metadata()->color_encoding);
+ // c_current should not change during encoding.
+ original_current_encodings.push_back(ib.c_current());
+ }
+
+ std::unique_ptr<PassesEncoderState> enc_state =
+ jxl::make_unique<PassesEncoderState>();
+ JXL_CHECK(EncodeFile(cparams, io, enc_state.get(), &compressed, GetJxlCms(),
+ aux_out, pool));
+
+ for (const ImageBundle& ib1 : io->frames) {
+ metadata_encodings_1.push_back(ib1.metadata()->color_encoding);
+ }
+
+ // Should still be in the same color space after encoding.
+ CheckSameEncodings(metadata_encodings_1, original_metadata_encodings,
+ "original vs after encoding", failures);
+
+ JXL_CHECK(DecodeFile(dparams, Span<const uint8_t>(compressed), io2, pool));
+ JXL_CHECK(io2->frames.size() == io->frames.size());
+
+ for (const ImageBundle& ib2 : io2->frames) {
+ metadata_encodings_2.push_back(ib2.metadata()->color_encoding);
+ current_encodings_2.push_back(ib2.c_current());
+ }
+
+ // We always produce the original color encoding if a color transform hook is
+ // set.
+ CheckSameEncodings(current_encodings_2, original_current_encodings,
+ "current: original vs decoded", failures);
+
+ // Decoder returns the originals passed to the encoder.
+ CheckSameEncodings(metadata_encodings_2, original_metadata_encodings,
+ "metadata: original vs decoded", failures);
+
+ if (compressed_size) {
+ *compressed_size = compressed.size();
+ }
+
+ return failures.str().empty();
+}
+
+size_t Roundtrip(const extras::PackedPixelFile& ppf_in,
+ extras::JXLCompressParams cparams,
+ extras::JXLDecompressParams dparams, ThreadPool* pool,
+ extras::PackedPixelFile* ppf_out) {
+ SetThreadParallelRunner(cparams, pool);
+ SetThreadParallelRunner(dparams, pool);
+ std::vector<uint8_t> compressed;
+ JXL_CHECK(extras::EncodeImageJXL(cparams, ppf_in, /*jpeg_bytes=*/nullptr,
+ &compressed));
+ size_t decoded_bytes = 0;
+ JXL_CHECK(extras::DecodeImageJXL(compressed.data(), compressed.size(),
+ dparams, &decoded_bytes, ppf_out));
+ JXL_CHECK(decoded_bytes == compressed.size());
+ return compressed.size();
+}
+
+std::vector<ColorEncodingDescriptor> AllEncodings() {
+ std::vector<ColorEncodingDescriptor> all_encodings;
+ all_encodings.reserve(300);
+ ColorEncoding c;
+
+ for (ColorSpace cs : Values<ColorSpace>()) {
+ if (cs == ColorSpace::kUnknown || cs == ColorSpace::kXYB) continue;
+ c.SetColorSpace(cs);
+
+ for (WhitePoint wp : Values<WhitePoint>()) {
+ if (wp == WhitePoint::kCustom) continue;
+ if (c.ImplicitWhitePoint() && c.white_point != wp) continue;
+ c.white_point = wp;
+
+ for (Primaries primaries : Values<Primaries>()) {
+ if (primaries == Primaries::kCustom) continue;
+ if (!c.HasPrimaries()) continue;
+ c.primaries = primaries;
+
+ for (TransferFunction tf : Values<TransferFunction>()) {
+ if (tf == TransferFunction::kUnknown) continue;
+ if (c.tf.SetImplicit() &&
+ (c.tf.IsGamma() || c.tf.GetTransferFunction() != tf)) {
+ continue;
+ }
+ c.tf.SetTransferFunction(tf);
+
+ for (RenderingIntent ri : Values<RenderingIntent>()) {
+ ColorEncodingDescriptor cdesc;
+ cdesc.color_space = cs;
+ cdesc.white_point = wp;
+ cdesc.primaries = primaries;
+ cdesc.tf = tf;
+ cdesc.rendering_intent = ri;
+ all_encodings.push_back(cdesc);
+ }
+ }
+ }
+ }
+ }
+
+ return all_encodings;
+}
+
+jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector<uint8_t>& buf,
+ size_t num_channels, size_t xsize,
+ size_t ysize) {
+ jxl::CodecInOut io;
+ io.SetSize(xsize, ysize);
+ io.metadata.m.SetAlphaBits(16);
+ io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(
+ /*is_gray=*/num_channels == 1 || num_channels == 2);
+ JxlPixelFormat format = {static_cast<uint32_t>(num_channels), JXL_TYPE_UINT16,
+ JXL_BIG_ENDIAN, 0};
+ JXL_CHECK(ConvertFromExternal(
+ jxl::Span<const uint8_t>(buf.data(), buf.size()), xsize, ysize,
+ jxl::ColorEncoding::SRGB(/*is_gray=*/num_channels < 3),
+ /*bits_per_sample=*/16, format,
+ /*pool=*/nullptr,
+ /*ib=*/&io.Main()));
+ return io;
+}
+
+bool Near(double expected, double value, double max_dist) {
+ double dist = expected > value ? expected - value : value - expected;
+ return dist <= max_dist;
+}
+
+float LoadFloat16(uint16_t bits16) {
+ const uint32_t sign = bits16 >> 15;
+ const uint32_t biased_exp = (bits16 >> 10) & 0x1F;
+ const uint32_t mantissa = bits16 & 0x3FF;
+
+ // Subnormal or zero
+ if (biased_exp == 0) {
+ const float subnormal = (1.0f / 16384) * (mantissa * (1.0f / 1024));
+ return sign ? -subnormal : subnormal;
+ }
+
+ // Normalized: convert the representation directly (faster than ldexp/tables).
+ const uint32_t biased_exp32 = biased_exp + (127 - 15);
+ const uint32_t mantissa32 = mantissa << (23 - 10);
+ const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32;
+
+ float result;
+ memcpy(&result, &bits32, 4);
+ return result;
+}
+
+float LoadLEFloat16(const uint8_t* p) {
+ uint16_t bits16 = LoadLE16(p);
+ return LoadFloat16(bits16);
+}
+
+float LoadBEFloat16(const uint8_t* p) {
+ uint16_t bits16 = LoadBE16(p);
+ return LoadFloat16(bits16);
+}
+
+size_t GetPrecision(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 8;
+ case JXL_TYPE_UINT16:
+ return 16;
+ case JXL_TYPE_FLOAT:
+ // Floating point mantissa precision
+ return 24;
+ case JXL_TYPE_FLOAT16:
+ return 11;
+ default:
+ JXL_ABORT("Unhandled JxlDataType");
+ }
+}
+
+size_t GetDataBits(JxlDataType data_type) {
+ switch (data_type) {
+ case JXL_TYPE_UINT8:
+ return 8;
+ case JXL_TYPE_UINT16:
+ return 16;
+ case JXL_TYPE_FLOAT:
+ return 32;
+ case JXL_TYPE_FLOAT16:
+ return 16;
+ default:
+ JXL_ABORT("Unhandled JxlDataType");
+ }
+}
+
+std::vector<double> ConvertToRGBA32(const uint8_t* pixels, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format,
+ double factor) {
+ std::vector<double> result(xsize * ysize * 4);
+ size_t num_channels = format.num_channels;
+ bool gray = num_channels == 1 || num_channels == 2;
+ bool alpha = num_channels == 2 || num_channels == 4;
+ JxlEndianness endianness = format.endianness;
+ // Compute actual type:
+ if (endianness == JXL_NATIVE_ENDIAN) {
+ endianness = IsLittleEndian() ? JXL_LITTLE_ENDIAN : JXL_BIG_ENDIAN;
+ }
+
+ size_t stride =
+ xsize * jxl::DivCeil(GetDataBits(format.data_type) * num_channels,
+ jxl::kBitsPerByte);
+ if (format.align > 1) stride = jxl::RoundUpTo(stride, format.align);
+
+ if (format.data_type == JXL_TYPE_UINT8) {
+ // Multiplier to bring to 0-1.0 range
+ double mul = factor > 0.0 ? factor : 1.0 / 255.0;
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ size_t j = (y * xsize + x) * 4;
+ size_t i = y * stride + x * num_channels;
+ double r = pixels[i];
+ double g = gray ? r : pixels[i + 1];
+ double b = gray ? r : pixels[i + 2];
+ double a = alpha ? pixels[i + num_channels - 1] : 255;
+ result[j + 0] = r * mul;
+ result[j + 1] = g * mul;
+ result[j + 2] = b * mul;
+ result[j + 3] = a * mul;
+ }
+ }
+ } else if (format.data_type == JXL_TYPE_UINT16) {
+ JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+ // Multiplier to bring to 0-1.0 range
+ double mul = factor > 0.0 ? factor : 1.0 / 65535.0;
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ size_t j = (y * xsize + x) * 4;
+ size_t i = y * stride + x * num_channels * 2;
+ double r, g, b, a;
+ if (endianness == JXL_BIG_ENDIAN) {
+ r = (pixels[i + 0] << 8) + pixels[i + 1];
+ g = gray ? r : (pixels[i + 2] << 8) + pixels[i + 3];
+ b = gray ? r : (pixels[i + 4] << 8) + pixels[i + 5];
+ a = alpha ? (pixels[i + num_channels * 2 - 2] << 8) +
+ pixels[i + num_channels * 2 - 1]
+ : 65535;
+ } else {
+ r = (pixels[i + 1] << 8) + pixels[i + 0];
+ g = gray ? r : (pixels[i + 3] << 8) + pixels[i + 2];
+ b = gray ? r : (pixels[i + 5] << 8) + pixels[i + 4];
+ a = alpha ? (pixels[i + num_channels * 2 - 1] << 8) +
+ pixels[i + num_channels * 2 - 2]
+ : 65535;
+ }
+ result[j + 0] = r * mul;
+ result[j + 1] = g * mul;
+ result[j + 2] = b * mul;
+ result[j + 3] = a * mul;
+ }
+ }
+ } else if (format.data_type == JXL_TYPE_FLOAT) {
+ JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ size_t j = (y * xsize + x) * 4;
+ size_t i = y * stride + x * num_channels * 4;
+ double r, g, b, a;
+ if (endianness == JXL_BIG_ENDIAN) {
+ r = LoadBEFloat(pixels + i);
+ g = gray ? r : LoadBEFloat(pixels + i + 4);
+ b = gray ? r : LoadBEFloat(pixels + i + 8);
+ a = alpha ? LoadBEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+ } else {
+ r = LoadLEFloat(pixels + i);
+ g = gray ? r : LoadLEFloat(pixels + i + 4);
+ b = gray ? r : LoadLEFloat(pixels + i + 8);
+ a = alpha ? LoadLEFloat(pixels + i + num_channels * 4 - 4) : 1.0;
+ }
+ result[j + 0] = r;
+ result[j + 1] = g;
+ result[j + 2] = b;
+ result[j + 3] = a;
+ }
+ }
+ } else if (format.data_type == JXL_TYPE_FLOAT16) {
+ JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN);
+ for (size_t y = 0; y < ysize; ++y) {
+ for (size_t x = 0; x < xsize; ++x) {
+ size_t j = (y * xsize + x) * 4;
+ size_t i = y * stride + x * num_channels * 2;
+ double r, g, b, a;
+ if (endianness == JXL_BIG_ENDIAN) {
+ r = LoadBEFloat16(pixels + i);
+ g = gray ? r : LoadBEFloat16(pixels + i + 2);
+ b = gray ? r : LoadBEFloat16(pixels + i + 4);
+ a = alpha ? LoadBEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+ } else {
+ r = LoadLEFloat16(pixels + i);
+ g = gray ? r : LoadLEFloat16(pixels + i + 2);
+ b = gray ? r : LoadLEFloat16(pixels + i + 4);
+ a = alpha ? LoadLEFloat16(pixels + i + num_channels * 2 - 2) : 1.0;
+ }
+ result[j + 0] = r;
+ result[j + 1] = g;
+ result[j + 2] = b;
+ result[j + 3] = a;
+ }
+ }
+ } else {
+ JXL_ASSERT(false); // Unsupported type
+ }
+ return result;
+}
+
+size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format_a,
+ const JxlPixelFormat& format_b,
+ double threshold_multiplier) {
+ // Convert both images to equal full precision for comparison.
+ std::vector<double> a_full = ConvertToRGBA32(a, xsize, ysize, format_a);
+ std::vector<double> b_full = ConvertToRGBA32(b, xsize, ysize, format_b);
+ bool gray_a = format_a.num_channels < 3;
+ bool gray_b = format_b.num_channels < 3;
+ bool alpha_a = !(format_a.num_channels & 1);
+ bool alpha_b = !(format_b.num_channels & 1);
+ size_t bits_a = GetPrecision(format_a.data_type);
+ size_t bits_b = GetPrecision(format_b.data_type);
+ size_t bits = std::min(bits_a, bits_b);
+ // How much distance is allowed in case of pixels with lower bit depths, given
+ // that the double precision float images use range 0-1.0.
+ // E.g. in case of 1-bit this is 0.5 since 0.499 must map to 0 and 0.501 must
+ // map to 1.
+ double precision = 0.5 * threshold_multiplier / ((1ull << bits) - 1ull);
+ if (format_a.data_type == JXL_TYPE_FLOAT16 ||
+ format_b.data_type == JXL_TYPE_FLOAT16) {
+ // Lower the precision for float16, because it currently looks like the
+ // scalar and wasm implementations of hwy have 1 less bit of precision
+ // than the x86 implementations.
+ // TODO(lode): Set the required precision back to 11 bits when possible.
+ precision = 0.5 * threshold_multiplier / ((1ull << (bits - 1)) - 1ull);
+ }
+ size_t numdiff = 0;
+ for (size_t y = 0; y < ysize; y++) {
+ for (size_t x = 0; x < xsize; x++) {
+ size_t i = (y * xsize + x) * 4;
+ bool ok = true;
+ if (gray_a || gray_b) {
+ if (!Near(a_full[i + 0], b_full[i + 0], precision)) ok = false;
+ // If the input was grayscale and the output not, then the output must
+ // have all channels equal.
+ if (gray_a && b_full[i + 0] != b_full[i + 1] &&
+ b_full[i + 2] != b_full[i + 2]) {
+ ok = false;
+ }
+ } else {
+ if (!Near(a_full[i + 0], b_full[i + 0], precision) ||
+ !Near(a_full[i + 1], b_full[i + 1], precision) ||
+ !Near(a_full[i + 2], b_full[i + 2], precision)) {
+ ok = false;
+ }
+ }
+ if (alpha_a && alpha_b) {
+ if (!Near(a_full[i + 3], b_full[i + 3], precision)) ok = false;
+ } else {
+ // If the input had no alpha channel, the output should be opaque
+ // after roundtrip.
+ if (alpha_b && !Near(1.0, b_full[i + 3], precision)) ok = false;
+ }
+ if (!ok) numdiff++;
+ }
+ }
+ return numdiff;
+}
+
+double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format) {
+ // Convert both images to equal full precision for comparison.
+ std::vector<double> a_full = ConvertToRGBA32(a, xsize, ysize, format);
+ std::vector<double> b_full = ConvertToRGBA32(b, xsize, ysize, format);
+ double sum = 0.0;
+ for (size_t y = 0; y < ysize; y++) {
+ double row_sum = 0.0;
+ for (size_t x = 0; x < xsize; x++) {
+ size_t i = (y * xsize + x) * 4;
+ for (size_t c = 0; c < format.num_channels; ++c) {
+ double diff = a_full[i + c] - b_full[i + c];
+ row_sum += diff * diff;
+ }
+ }
+ sum += row_sum;
+ }
+ sum /= (xsize * ysize);
+ return sqrt(sum);
+}
+
+float ButteraugliDistance(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b, ThreadPool* pool) {
+ CodecInOut io0;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0));
+ CodecInOut io1;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1));
+ // TODO(eustas): simplify?
+ return ButteraugliDistance(io0.frames, io1.frames, ButteraugliParams(),
+ GetJxlCms(),
+ /*distmap=*/nullptr, pool);
+}
+
+float Butteraugli3Norm(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b, ThreadPool* pool) {
+ CodecInOut io0;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0));
+ CodecInOut io1;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1));
+ ButteraugliParams ba;
+ ImageF distmap;
+ ButteraugliDistance(io0.frames, io1.frames, ba, GetJxlCms(), &distmap, pool);
+ return ComputeDistanceP(distmap, ba, 3);
+}
+
+float ComputeDistance2(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b) {
+ CodecInOut io0;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, nullptr, &io0));
+ CodecInOut io1;
+ JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, nullptr, &io1));
+ return ComputeDistance2(io0.Main(), io1.Main(), GetJxlCms());
+}
+
+bool SameAlpha(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b) {
+ JXL_CHECK(a.info.xsize == b.info.xsize);
+ JXL_CHECK(a.info.ysize == b.info.ysize);
+ JXL_CHECK(a.info.alpha_bits == b.info.alpha_bits);
+ JXL_CHECK(a.info.alpha_exponent_bits == b.info.alpha_exponent_bits);
+ JXL_CHECK(a.info.alpha_bits > 0);
+ JXL_CHECK(a.frames.size() == b.frames.size());
+ for (size_t i = 0; i < a.frames.size(); ++i) {
+ const extras::PackedImage& color_a = a.frames[i].color;
+ const extras::PackedImage& color_b = b.frames[i].color;
+ JXL_CHECK(color_a.format.num_channels == color_b.format.num_channels);
+ JXL_CHECK(color_a.format.data_type == color_b.format.data_type);
+ JXL_CHECK(color_a.format.endianness == color_b.format.endianness);
+ JXL_CHECK(color_a.pixels_size == color_b.pixels_size);
+ size_t pwidth =
+ extras::PackedImage::BitsPerChannel(color_a.format.data_type) / 8;
+ size_t num_color = color_a.format.num_channels < 3 ? 1 : 3;
+ const uint8_t* p_a = reinterpret_cast<const uint8_t*>(color_a.pixels());
+ const uint8_t* p_b = reinterpret_cast<const uint8_t*>(color_b.pixels());
+ for (size_t y = 0; y < a.info.ysize; ++y) {
+ for (size_t x = 0; x < a.info.xsize; ++x) {
+ size_t idx =
+ ((y * a.info.xsize + x) * color_a.format.num_channels + num_color) *
+ pwidth;
+ if (memcmp(&p_a[idx], &p_b[idx], pwidth) != 0) {
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b) {
+ JXL_CHECK(a.xsize == b.xsize);
+ JXL_CHECK(a.ysize == b.ysize);
+ JXL_CHECK(a.format.num_channels == b.format.num_channels);
+ JXL_CHECK(a.format.data_type == b.format.data_type);
+ JXL_CHECK(a.format.endianness == b.format.endianness);
+ JXL_CHECK(a.pixels_size == b.pixels_size);
+ const uint8_t* p_a = reinterpret_cast<const uint8_t*>(a.pixels());
+ const uint8_t* p_b = reinterpret_cast<const uint8_t*>(b.pixels());
+ for (size_t y = 0; y < a.ysize; ++y) {
+ for (size_t x = 0; x < a.xsize; ++x) {
+ size_t idx = (y * a.xsize + x) * a.pixel_stride();
+ if (memcmp(&p_a[idx], &p_b[idx], a.pixel_stride()) != 0) {
+ printf("Mismatch at row %" PRIuS " col %" PRIuS "\n", y, x);
+ printf(" a: ");
+ for (size_t j = 0; j < a.pixel_stride(); ++j) {
+ printf(" %3u", p_a[idx + j]);
+ }
+ printf("\n b: ");
+ for (size_t j = 0; j < a.pixel_stride(); ++j) {
+ printf(" %3u", p_b[idx + j]);
+ }
+ printf("\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool SamePixels(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b) {
+ JXL_CHECK(a.info.xsize == b.info.xsize);
+ JXL_CHECK(a.info.ysize == b.info.ysize);
+ JXL_CHECK(a.info.bits_per_sample == b.info.bits_per_sample);
+ JXL_CHECK(a.info.exponent_bits_per_sample == b.info.exponent_bits_per_sample);
+ JXL_CHECK(a.frames.size() == b.frames.size());
+ for (size_t i = 0; i < a.frames.size(); ++i) {
+ const auto& frame_a = a.frames[i];
+ const auto& frame_b = b.frames[i];
+ if (!SamePixels(frame_a.color, frame_b.color)) {
+ return false;
+ }
+ JXL_CHECK(frame_a.extra_channels.size() == frame_b.extra_channels.size());
+ for (size_t j = 0; j < frame_a.extra_channels.size(); ++j) {
+ if (!SamePixels(frame_a.extra_channels[i], frame_b.extra_channels[i])) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+} // namespace test
+
+bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+ if (a.size() != b.size()) return false;
+ if (memcmp(a.data(), b.data(), a.size()) != 0) return false;
+ return true;
+}
+
+// Allow using EXPECT_EQ on jxl::PaddedBytes
+bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) {
+ return !(a == b);
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/test_utils.h b/third_party/jpeg-xl/lib/jxl/test_utils.h
new file mode 100644
index 0000000000..8c5cd434f5
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/test_utils.h
@@ -0,0 +1,175 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TEST_UTILS_H_
+#define LIB_JXL_TEST_UTILS_H_
+
+// TODO(eustas): reduce includes (move to .cc)
+
+// Macros and functions useful for tests.
+
+#include <jxl/codestream_header.h>
+#include <jxl/thread_parallel_runner_cxx.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <ostream>
+#include <vector>
+
+#include "lib/extras/dec/jxl.h"
+#include "lib/extras/enc/jxl.h"
+#include "lib/extras/packed_image.h"
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/base/padded_bytes.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/codec_in_out.h"
+#include "lib/jxl/color_encoding_internal.h"
+#include "lib/jxl/enc_params.h"
+
+namespace jxl {
+
+struct AuxOut;
+
+namespace test {
+
+std::string GetTestDataPath(const std::string& filename);
+PaddedBytes ReadTestData(const std::string& filename);
+
+void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info,
+ const JxlPixelFormat* pixel_format);
+
+template <typename Params>
+void SetThreadParallelRunner(Params params, ThreadPool* pool) {
+ if (pool && !params.runner_opaque) {
+ params.runner = pool->runner();
+ params.runner_opaque = pool->runner_opaque();
+ }
+}
+
+Status DecodeFile(extras::JXLDecompressParams dparams,
+ const Span<const uint8_t> file, CodecInOut* JXL_RESTRICT io,
+ ThreadPool* pool = nullptr);
+
+bool Roundtrip(const CodecInOut* io, const CompressParams& cparams,
+ extras::JXLDecompressParams dparams,
+ CodecInOut* JXL_RESTRICT io2, std::stringstream& failures,
+ size_t* compressed_size = nullptr, ThreadPool* pool = nullptr,
+ AuxOut* aux_out = nullptr);
+
+// Returns compressed size [bytes].
+size_t Roundtrip(const extras::PackedPixelFile& ppf_in,
+ extras::JXLCompressParams cparams,
+ extras::JXLDecompressParams dparams, ThreadPool* pool,
+ extras::PackedPixelFile* ppf_out);
+
+// A POD descriptor of a ColorEncoding. Only used in tests as the return value
+// of AllEncodings().
+struct ColorEncodingDescriptor {
+ ColorSpace color_space;
+ WhitePoint white_point;
+ Primaries primaries;
+ TransferFunction tf;
+ RenderingIntent rendering_intent;
+};
+
+ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc);
+
+// Define the operator<< for tests.
+static inline ::std::ostream& operator<<(::std::ostream& os,
+ const ColorEncodingDescriptor& c) {
+ return os << "ColorEncoding/" << Description(ColorEncodingFromDescriptor(c));
+}
+
+// Returns ColorEncodingDescriptors, which are only used in tests. To obtain a
+// ColorEncoding object call ColorEncodingFromDescriptor and then call
+// ColorEncoding::CreateProfile() on that object to generate a profile.
+std::vector<ColorEncodingDescriptor> AllEncodings();
+
+// Returns a CodecInOut based on the buf, xsize, ysize, and the assumption
+// that the buffer was created using `GetSomeTestImage`.
+jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector<uint8_t>& buf,
+ size_t num_channels, size_t xsize,
+ size_t ysize);
+
+bool Near(double expected, double value, double max_dist);
+
+// Based on highway scalar implementation, for testing
+float LoadFloat16(uint16_t bits16);
+
+float LoadLEFloat16(const uint8_t* p);
+
+float LoadBEFloat16(const uint8_t* p);
+
+size_t GetPrecision(JxlDataType data_type);
+
+size_t GetDataBits(JxlDataType data_type);
+
+// Procedure to convert pixels to double precision, not efficient, but
+// well-controlled for testing. It uses double, to be able to represent all
+// precisions needed for the maximum data types the API supports: uint32_t
+// integers, and, single precision float. The values are in range 0-1 for SDR.
+std::vector<double> ConvertToRGBA32(const uint8_t* pixels, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format,
+ double factor = 0.0);
+
+// Returns amount of pixels which differ between the two pictures. Image b is
+// the image after roundtrip after roundtrip, image a before roundtrip. There
+// are more strict requirements for the alpha channel and grayscale values of
+// the output image.
+size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format_a,
+ const JxlPixelFormat& format_b,
+ double threshold_multiplier = 1.0);
+
+double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize,
+ size_t ysize, const JxlPixelFormat& format);
+
+float ButteraugliDistance(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b,
+ ThreadPool* pool = nullptr);
+
+float Butteraugli3Norm(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b,
+ ThreadPool* pool = nullptr);
+
+float ComputeDistance2(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b);
+
+bool SameAlpha(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b);
+
+bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b);
+
+bool SamePixels(const extras::PackedPixelFile& a,
+ const extras::PackedPixelFile& b);
+
+class ThreadPoolForTests {
+ public:
+ explicit ThreadPoolForTests(int num_threads) {
+ runner_ =
+ JxlThreadParallelRunnerMake(/* memory_manager */ nullptr, num_threads);
+ pool_ =
+ jxl::make_unique<ThreadPool>(JxlThreadParallelRunner, runner_.get());
+ }
+ ThreadPoolForTests(const ThreadPoolForTests&) = delete;
+ ThreadPoolForTests& operator&(const ThreadPoolForTests&) = delete;
+ ThreadPool* operator&() { return pool_.get(); }
+
+ private:
+ JxlThreadParallelRunnerPtr runner_;
+ std::unique_ptr<ThreadPool> pool_;
+};
+
+} // namespace test
+
+bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b);
+
+// Allow using EXPECT_EQ on jxl::PaddedBytes
+bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b);
+
+} // namespace jxl
+
+#endif // LIB_JXL_TEST_UTILS_H_
diff --git a/third_party/jpeg-xl/lib/jxl/testing.h b/third_party/jpeg-xl/lib/jxl/testing.h
new file mode 100644
index 0000000000..d10b0c3c54
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/testing.h
@@ -0,0 +1,73 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TESTING_H_
+#define LIB_JXL_TESTING_H_
+
+// GTest/GMock specific macros / wrappers.
+
+// gmock unconditionally redefines those macros (to wrong values).
+// Lets include it only here and mitigate the problem.
+#pragma push_macro("PRIdS")
+#pragma push_macro("PRIuS")
+#include "gmock/gmock.h"
+#pragma pop_macro("PRIuS")
+#pragma pop_macro("PRIdS")
+
+#include <sstream>
+
+#include "gtest/gtest.h"
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+#define JXL_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_SLOW_TEST(X) X
+#endif // JXL_DISABLE_SLOW_TESTS
+
+#if JPEGXL_ENABLE_TRANSCODE_JPEG
+#define JXL_TRANSCODE_JPEG_TEST(X) X
+#else
+#define JXL_TRANSCODE_JPEG_TEST(X) DISABLED_##X
+#endif // JPEGXL_ENABLE_TRANSCODE_JPEG
+
+#if JPEGXL_ENABLE_BOXES
+#define JXL_BOXES_TEST(X) X
+#else
+#define JXL_BOXES_TEST(X) DISABLED_##X
+#endif // JPEGXL_ENABLE_BOXES
+
+#ifdef THREAD_SANITIZER
+#define JXL_TSAN_SLOW_TEST(X) DISABLED_##X
+#else
+#define JXL_TSAN_SLOW_TEST(X) X
+#endif // THREAD_SANITIZER
+
+// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead
+// used INSTANTIATE_TEST_CASE_P which is now deprecated.
+#ifdef INSTANTIATE_TEST_SUITE_P
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P
+#else
+#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P
+#endif
+
+// Ensures that we don't make our test bounds too lax, effectively disabling the
+// tests.
+MATCHER_P(IsSlightlyBelow, max, "") {
+ return max * 0.75 <= arg && arg <= max * 1.0;
+}
+
+#define JXL_EXPECT_OK(F) \
+ { \
+ std::stringstream _; \
+ EXPECT_TRUE(F) << _.str(); \
+ }
+
+#define JXL_ASSERT_OK(F) \
+ { \
+ std::stringstream _; \
+ ASSERT_TRUE(F) << _.str(); \
+ }
+
+#endif // LIB_JXL_TESTING_H_
diff --git a/third_party/jpeg-xl/lib/jxl/tf_gbench.cc b/third_party/jpeg-xl/lib/jxl/tf_gbench.cc
new file mode 100644
index 0000000000..9c010d460a
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/tf_gbench.cc
@@ -0,0 +1,143 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "benchmark/benchmark.h"
+#include "lib/jxl/image_ops.h"
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+
+#include "lib/jxl/transfer_functions-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#define RUN_BENCHMARK(F) \
+ constexpr size_t kNum = 1 << 12; \
+ HWY_FULL(float) d; \
+ /* Three parallel runs, as this will run on R, G and B. */ \
+ auto sum1 = Zero(d); \
+ auto sum2 = Zero(d); \
+ auto sum3 = Zero(d); \
+ for (auto _ : state) { \
+ auto x = Set(d, 1e-5); \
+ auto v1 = Set(d, 1e-5); \
+ auto v2 = Set(d, 1.1e-5); \
+ auto v3 = Set(d, 1.2e-5); \
+ for (size_t i = 0; i < kNum; i++) { \
+ sum1 += F(d, v1); \
+ sum2 += F(d, v2); \
+ sum3 += F(d, v3); \
+ v1 += x; \
+ v2 += x; \
+ v3 += x; \
+ } \
+ } \
+ /* floats per second */ \
+ state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \
+ benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+#define RUN_BENCHMARK_SCALAR(F) \
+ constexpr size_t kNum = 1 << 12; \
+ /* Three parallel runs, as this will run on R, G and B. */ \
+ float sum1 = 0, sum2 = 0, sum3 = 0; \
+ for (auto _ : state) { \
+ float x = 1e-5; \
+ float v1 = 1e-5; \
+ float v2 = 1.1e-5; \
+ float v3 = 1.2e-5; \
+ for (size_t i = 0; i < kNum; i++) { \
+ sum1 += F(v1); \
+ sum2 += F(v2); \
+ sum3 += F(v3); \
+ v1 += x; \
+ v2 += x; \
+ v3 += x; \
+ } \
+ } \
+ /* floats per second */ \
+ state.SetItemsProcessed(kNum* state.iterations() * 3); \
+ benchmark::DoNotOptimize(sum1 + sum2 + sum3);
+
+HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) {
+ RUN_BENCHMARK(FastLinearToSRGB);
+}
+
+HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) {
+ RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQDFE(benchmark::State& state) {
+ RUN_BENCHMARK(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQEFD(benchmark::State& state) {
+ RUN_BENCHMARK(TF_PQ().EncodedFromDisplay);
+}
+
+HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) {
+ RUN_BENCHMARK_SCALAR(TF_PQ().DisplayFromEncoded);
+}
+
+HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) {
+ RUN_BENCHMARK_SCALAR(TF_PQ().EncodedFromDisplay);
+}
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+namespace {
+
+HWY_EXPORT(BM_FastSRGB);
+HWY_EXPORT(BM_TFSRGB);
+HWY_EXPORT(BM_PQDFE);
+HWY_EXPORT(BM_PQEFD);
+HWY_EXPORT(BM_PQSlowDFE);
+HWY_EXPORT(BM_PQSlowEFD);
+
+float SRGB_pow(float x) {
+ return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f;
+}
+
+void BM_FastSRGB(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state);
+}
+void BM_TFSRGB(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state);
+}
+void BM_PQDFE(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state);
+}
+void BM_PQEFD(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state);
+}
+void BM_PQSlowDFE(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state);
+}
+void BM_PQSlowEFD(benchmark::State& state) {
+ HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state);
+}
+
+void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow); }
+
+BENCHMARK(BM_FastSRGB);
+BENCHMARK(BM_TFSRGB);
+BENCHMARK(BM_SRGB_pow);
+BENCHMARK(BM_PQDFE);
+BENCHMARK(BM_PQEFD);
+BENCHMARK(BM_PQSlowDFE);
+BENCHMARK(BM_PQSlowEFD);
+
+} // namespace
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl/toc.cc b/third_party/jpeg-xl/lib/jxl/toc.cc
new file mode 100644
index 0000000000..fd7740c144
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/toc.cc
@@ -0,0 +1,105 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include <stdint.h>
+
+#include "lib/jxl/coeff_order.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/fields.h"
+
+namespace jxl {
+size_t MaxBits(const size_t num_sizes) {
+ const size_t entry_bits = U32Coder::MaxEncodedBits(kTocDist) * num_sizes;
+ // permutation bit (not its tokens!), padding, entries, padding.
+ return 1 + kBitsPerByte + entry_bits + kBitsPerByte;
+}
+
+Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+ std::vector<uint32_t>* JXL_RESTRICT sizes,
+ std::vector<coeff_order_t>* JXL_RESTRICT permutation) {
+ if (toc_entries > 65536) {
+ // Prevent out of memory if invalid JXL codestream causes a bogus amount
+ // of toc_entries such as 2720436919446 to be computed.
+ // TODO(lode): verify whether 65536 is a reasonable upper bound
+ return JXL_FAILURE("too many toc entries");
+ }
+
+ sizes->clear();
+ sizes->resize(toc_entries);
+ if (reader->TotalBitsConsumed() >= reader->TotalBytes() * kBitsPerByte) {
+ return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC");
+ }
+ const auto check_bit_budget = [&](size_t num_entries) -> Status {
+ // U32Coder reads 2 bits to recognize variant and kTocDist cheapest variant
+ // is Bits(10), this way at least 12 bits are required per toc-entry.
+ size_t minimal_bit_cost = num_entries * (2 + 10);
+ size_t bit_budget = reader->TotalBytes() * 8;
+ size_t expenses = reader->TotalBitsConsumed();
+ if ((expenses <= bit_budget) &&
+ (minimal_bit_cost <= bit_budget - expenses)) {
+ return true;
+ }
+ return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC");
+ };
+
+ JXL_DASSERT(toc_entries > 0);
+ if (reader->ReadFixedBits<1>() == 1) {
+ JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+ permutation->resize(toc_entries);
+ JXL_RETURN_IF_ERROR(DecodePermutation(/*skip=*/0, toc_entries,
+ permutation->data(), reader));
+ }
+ JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+ JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries));
+ for (size_t i = 0; i < toc_entries; ++i) {
+ (*sizes)[i] = U32Coder::Read(kTocDist, reader);
+ }
+ JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary());
+ JXL_RETURN_IF_ERROR(check_bit_budget(0));
+ return true;
+}
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+ std::vector<uint64_t>* JXL_RESTRICT offsets,
+ std::vector<uint32_t>* JXL_RESTRICT sizes,
+ uint64_t* total_size) {
+ std::vector<coeff_order_t> permutation;
+ JXL_RETURN_IF_ERROR(ReadToc(toc_entries, reader, sizes, &permutation));
+
+ offsets->clear();
+ offsets->resize(toc_entries);
+
+ // Prefix sum starting with 0 and ending with the offset of the last group
+ uint64_t offset = 0;
+ for (size_t i = 0; i < toc_entries; ++i) {
+ if (offset + (*sizes)[i] < offset) {
+ return JXL_FAILURE("group offset overflow");
+ }
+ (*offsets)[i] = offset;
+ offset += (*sizes)[i];
+ }
+ if (total_size) {
+ *total_size = offset;
+ }
+
+ if (!permutation.empty()) {
+ std::vector<uint64_t> permuted_offsets;
+ std::vector<uint32_t> permuted_sizes;
+ permuted_offsets.reserve(toc_entries);
+ permuted_sizes.reserve(toc_entries);
+ for (coeff_order_t index : permutation) {
+ permuted_offsets.push_back((*offsets)[index]);
+ permuted_sizes.push_back((*sizes)[index]);
+ }
+ std::swap(*offsets, permuted_offsets);
+ std::swap(*sizes, permuted_sizes);
+ }
+
+ return true;
+}
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/toc.h b/third_party/jpeg-xl/lib/jxl/toc.h
new file mode 100644
index 0000000000..a97197ad45
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/toc.h
@@ -0,0 +1,55 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef LIB_JXL_TOC_H_
+#define LIB_JXL_TOC_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <vector>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/coeff_order_fwd.h"
+#include "lib/jxl/dec_bit_reader.h"
+#include "lib/jxl/field_encodings.h"
+
+namespace jxl {
+
+// (2+bits) = 2,3,4 bytes so encoders can patch TOC after encoding.
+// 30 is sufficient for 4K channels of uncompressed 16-bit samples.
+constexpr U32Enc kTocDist(Bits(10), BitsOffset(14, 1024), BitsOffset(22, 17408),
+ BitsOffset(30, 4211712));
+
+size_t MaxBits(const size_t num_sizes);
+
+// TODO(veluca): move these to FrameDimensions.
+static JXL_INLINE size_t AcGroupIndex(size_t pass, size_t group,
+ size_t num_groups, size_t num_dc_groups,
+ bool has_ac_global) {
+ return 1 + num_dc_groups + static_cast<size_t>(has_ac_global) +
+ pass * num_groups + group;
+}
+
+static JXL_INLINE size_t NumTocEntries(size_t num_groups, size_t num_dc_groups,
+ size_t num_passes, bool has_ac_global) {
+ if (num_groups == 1 && num_passes == 1) return 1;
+ return AcGroupIndex(0, 0, num_groups, num_dc_groups, has_ac_global) +
+ num_groups * num_passes;
+}
+
+Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+ std::vector<uint32_t>* JXL_RESTRICT sizes,
+ std::vector<coeff_order_t>* JXL_RESTRICT permutation);
+
+Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader,
+ std::vector<uint64_t>* JXL_RESTRICT offsets,
+ std::vector<uint32_t>* JXL_RESTRICT sizes,
+ uint64_t* total_size);
+
+} // namespace jxl
+
+#endif // LIB_JXL_TOC_H_
diff --git a/third_party/jpeg-xl/lib/jxl/toc_test.cc b/third_party/jpeg-xl/lib/jxl/toc_test.cc
new file mode 100644
index 0000000000..a7f0f2c27b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/toc_test.cc
@@ -0,0 +1,92 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/jxl/toc.h"
+
+#include "lib/jxl/base/random.h"
+#include "lib/jxl/base/span.h"
+#include "lib/jxl/common.h"
+#include "lib/jxl/enc_aux_out.h"
+#include "lib/jxl/enc_toc.h"
+#include "lib/jxl/testing.h"
+
+namespace jxl {
+namespace {
+
+void Roundtrip(size_t num_entries, bool permute, Rng* rng) {
+ // Generate a random permutation.
+ std::vector<coeff_order_t> permutation(num_entries);
+ std::vector<coeff_order_t> inv_permutation(num_entries);
+ for (size_t i = 0; i < num_entries; i++) {
+ permutation[i] = i;
+ inv_permutation[i] = i;
+ }
+ if (permute) {
+ rng->Shuffle(permutation.data(), permutation.size());
+ for (size_t i = 0; i < num_entries; i++) {
+ inv_permutation[permutation[i]] = i;
+ }
+ }
+
+ // Generate num_entries groups of random (byte-aligned) length
+ std::vector<BitWriter> group_codes(num_entries);
+ for (BitWriter& writer : group_codes) {
+ const size_t max_bits = (*rng)() & 0xFFF;
+ BitWriter::Allotment allotment(&writer, max_bits + kBitsPerByte);
+ size_t i = 0;
+ for (; i + BitWriter::kMaxBitsPerCall < max_bits;
+ i += BitWriter::kMaxBitsPerCall) {
+ writer.Write(BitWriter::kMaxBitsPerCall, 0);
+ }
+ for (; i < max_bits; i += 1) {
+ writer.Write(/*n_bits=*/1, 0);
+ }
+ writer.ZeroPadToByte();
+ AuxOut aux_out;
+ allotment.ReclaimAndCharge(&writer, 0, &aux_out);
+ }
+
+ BitWriter writer;
+ AuxOut aux_out;
+ ASSERT_TRUE(WriteGroupOffsets(group_codes, permute ? &permutation : nullptr,
+ &writer, &aux_out));
+
+ BitReader reader(writer.GetSpan());
+ std::vector<uint64_t> group_offsets;
+ std::vector<uint32_t> group_sizes;
+ uint64_t total_size;
+ ASSERT_TRUE(ReadGroupOffsets(num_entries, &reader, &group_offsets,
+ &group_sizes, &total_size));
+ ASSERT_EQ(num_entries, group_offsets.size());
+ ASSERT_EQ(num_entries, group_sizes.size());
+ EXPECT_TRUE(reader.Close());
+
+ uint64_t prefix_sum = 0;
+ for (size_t i = 0; i < num_entries; ++i) {
+ EXPECT_EQ(prefix_sum, group_offsets[inv_permutation[i]]);
+
+ EXPECT_EQ(0u, group_codes[i].BitsWritten() % kBitsPerByte);
+ prefix_sum += group_codes[i].BitsWritten() / kBitsPerByte;
+
+ if (i + 1 < num_entries) {
+ EXPECT_EQ(
+ group_offsets[inv_permutation[i]] + group_sizes[inv_permutation[i]],
+ group_offsets[inv_permutation[i + 1]]);
+ }
+ }
+ EXPECT_EQ(prefix_sum, total_size);
+}
+
+TEST(TocTest, Test) {
+ Rng rng(0);
+ for (size_t num_entries = 1; num_entries < 10; ++num_entries) {
+ for (bool permute : std::vector<bool>{false, true}) {
+ Roundtrip(num_entries, permute, &rng);
+ }
+ }
+}
+
+} // namespace
+} // namespace jxl
diff --git a/third_party/jpeg-xl/lib/jxl/transfer_functions-inl.h b/third_party/jpeg-xl/lib/jxl/transfer_functions-inl.h
new file mode 100644
index 0000000000..9f4c10c76d
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/transfer_functions-inl.h
@@ -0,0 +1,413 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Transfer functions for color encodings.
+
+#if defined(LIB_JXL_TRANSFER_FUNCTIONS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#undef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#else
+#define LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
+#endif
+
+#include <algorithm>
+#include <cmath>
+#include <hwy/highway.h>
+
+#include "lib/jxl/base/compiler_specific.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/fast_math-inl.h"
+#include "lib/jxl/rational_polynomial-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::And;
+using hwy::HWY_NAMESPACE::AndNot;
+using hwy::HWY_NAMESPACE::Gt;
+using hwy::HWY_NAMESPACE::IfThenElse;
+using hwy::HWY_NAMESPACE::Lt;
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::Sqrt;
+using hwy::HWY_NAMESPACE::TableLookupBytes;
+
+// Definitions for BT.2100-2 transfer functions (used inside/outside SIMD):
+// "display" is linear light (nits) normalized to [0, 1].
+// "encoded" is a nonlinear encoding (e.g. PQ) in [0, 1].
+// "scene" is a linear function of photon counts, normalized to [0, 1].
+
+// Despite the stated ranges, we need unbounded transfer functions: see
+// http://www.littlecms.com/CIC18_UnboundedCMM.pdf. Inputs can be negative or
+// above 1 due to chromatic adaptation. To avoid severe round-trip errors caused
+// by clamping, we mirror negative inputs via copysign (f(-x) = -f(x), see
+// https://developer.apple.com/documentation/coregraphics/cgcolorspace/1644735-extendedsrgb)
+// and extend the function domains above 1.
+
+// Hybrid Log-Gamma.
+class TF_HLG {
+ public:
+ // EOTF. e = encoded.
+ JXL_INLINE double DisplayFromEncoded(const double e) const {
+ return OOTF(InvOETF(e));
+ }
+
+ // Inverse EOTF. d = display.
+ JXL_INLINE double EncodedFromDisplay(const double d) const {
+ return OETF(InvOOTF(d));
+ }
+
+ // Maximum error 5e-7.
+ template <class D, class V>
+ JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+ const V kSign = BitCast(d, Set(du, 0x80000000u));
+ const V original_sign = And(x, kSign);
+ x = AndNot(kSign, x); // abs
+ const V below_div12 = Sqrt(Mul(Set(d, 3.0f), x));
+ const V e =
+ MulAdd(Set(d, kA * 0.693147181f),
+ FastLog2f(d, MulAdd(Set(d, 12), x, Set(d, -kB))), Set(d, kC));
+ const V magnitude = IfThenElse(Le(x, Set(d, kDiv12)), below_div12, e);
+ return Or(AndNot(kSign, magnitude), original_sign);
+ }
+
+ private:
+ // OETF (defines the HLG approach). s = scene, returns encoded.
+ JXL_INLINE double OETF(double s) const {
+ if (s == 0.0) return 0.0;
+ const double original_sign = s;
+ s = std::abs(s);
+
+ if (s <= kDiv12) return copysignf(std::sqrt(3.0 * s), original_sign);
+
+ const double e = kA * std::log(12 * s - kB) + kC;
+ JXL_ASSERT(e > 0.0);
+ return copysignf(e, original_sign);
+ }
+
+ // e = encoded, returns scene.
+ JXL_INLINE double InvOETF(double e) const {
+ if (e == 0.0) return 0.0;
+ const double original_sign = e;
+ e = std::abs(e);
+
+ if (e <= 0.5) return copysignf(e * e * (1.0 / 3), original_sign);
+
+ const double s = (std::exp((e - kC) * kRA) + kB) * kDiv12;
+ JXL_ASSERT(s >= 0);
+ return copysignf(s, original_sign);
+ }
+
+ // s = scene, returns display.
+ JXL_INLINE double OOTF(const double s) const {
+ // The actual (red channel) OOTF is RD = alpha * YS^(gamma-1) * RS, where
+ // YS = 0.2627 * RS + 0.6780 * GS + 0.0593 * BS. Let alpha = 1 so we return
+ // "display" (normalized [0, 1]) instead of nits. Our transfer function
+ // interface does not allow a dependency on YS. Fortunately, the system
+ // gamma at 334 nits is 1.0, so this reduces to RD = RS.
+ return s;
+ }
+
+ // d = display, returns scene.
+ JXL_INLINE double InvOOTF(const double d) const {
+ return d; // see OOTF().
+ }
+
+ static constexpr double kA = 0.17883277;
+ static constexpr double kRA = 1.0 / kA;
+ static constexpr double kB = 1 - 4 * kA;
+ static constexpr double kC = 0.5599107295;
+ static constexpr double kDiv12 = 1.0 / 12;
+};
+
+class TF_709 {
+ public:
+ JXL_INLINE double EncodedFromDisplay(const double d) const {
+ if (d < kThresh) return kMulLow * d;
+ return kMulHi * std::pow(d, kPowHi) + kSub;
+ }
+
+ // Maximum error 1e-6.
+ template <class D, class V>
+ JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+ auto low = Mul(Set(d, kMulLow), x);
+ auto hi =
+ MulAdd(Set(d, kMulHi), FastPowf(d, x, Set(d, kPowHi)), Set(d, kSub));
+ return IfThenElse(Le(x, Set(d, kThresh)), low, hi);
+ }
+
+ template <class D, class V>
+ JXL_INLINE V DisplayFromEncoded(D d, V x) const {
+ auto low = Mul(Set(d, kInvMulLow), x);
+ auto hi = FastPowf(d, MulAdd(x, Set(d, kInvMulHi), Set(d, kInvAdd)),
+ Set(d, kInvPowHi));
+ return IfThenElse(Lt(x, Set(d, kInvThresh)), low, hi);
+ }
+
+ private:
+ static constexpr double kThresh = 0.018;
+ static constexpr double kMulLow = 4.5;
+ static constexpr double kMulHi = 1.099;
+ static constexpr double kPowHi = 0.45;
+ static constexpr double kSub = -0.099;
+
+ static constexpr double kInvThresh = 0.081;
+ static constexpr double kInvMulLow = 1 / 4.5;
+ static constexpr double kInvMulHi = 1 / 1.099;
+ static constexpr double kInvPowHi = 1 / 0.45;
+ static constexpr double kInvAdd = 0.099 * kInvMulHi;
+};
+
+// Perceptual Quantization
+class TF_PQ {
+ public:
+ // EOTF (defines the PQ approach). e = encoded.
+ JXL_INLINE double DisplayFromEncoded(double e) const {
+ if (e == 0.0) return 0.0;
+ const double original_sign = e;
+ e = std::abs(e);
+
+ const double xp = std::pow(e, 1.0 / kM2);
+ const double num = std::max(xp - kC1, 0.0);
+ const double den = kC2 - kC3 * xp;
+ JXL_DASSERT(den != 0.0);
+ const double d = std::pow(num / den, 1.0 / kM1);
+ JXL_DASSERT(d >= 0.0); // Equal for e ~= 1E-9
+ return copysignf(d, original_sign);
+ }
+
+ // Maximum error 3e-6
+ template <class D, class V>
+ JXL_INLINE V DisplayFromEncoded(D d, V x) const {
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+ const V kSign = BitCast(d, Set(du, 0x80000000u));
+ const V original_sign = And(x, kSign);
+ x = AndNot(kSign, x); // abs
+ // 4-over-4-degree rational polynomial approximation on x+x*x. This improves
+ // the maximum error by about 5x over a rational polynomial for x.
+ auto xpxx = MulAdd(x, x, x);
+ HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+ HWY_REP4(2.62975656e-04f), HWY_REP4(-6.23553089e-03f),
+ HWY_REP4(7.38602301e-01f), HWY_REP4(2.64553172e+00f),
+ HWY_REP4(5.50034862e-01f),
+ };
+ HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+ HWY_REP4(4.21350107e+02f), HWY_REP4(-4.28736818e+02f),
+ HWY_REP4(1.74364667e+02f), HWY_REP4(-3.39078883e+01f),
+ HWY_REP4(2.67718770e+00f),
+ };
+ auto magnitude = EvalRationalPolynomial(d, xpxx, p, q);
+ return Or(AndNot(kSign, magnitude), original_sign);
+ }
+
+ // Inverse EOTF. d = display.
+ JXL_INLINE double EncodedFromDisplay(double d) const {
+ if (d == 0.0) return 0.0;
+ const double original_sign = d;
+ d = std::abs(d);
+
+ const double xp = std::pow(d, kM1);
+ const double num = kC1 + xp * kC2;
+ const double den = 1.0 + xp * kC3;
+ const double e = std::pow(num / den, kM2);
+ JXL_DASSERT(e > 0.0);
+ return copysignf(e, original_sign);
+ }
+
+ // Maximum error 7e-7.
+ template <class D, class V>
+ JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+ const V kSign = BitCast(d, Set(du, 0x80000000u));
+ const V original_sign = And(x, kSign);
+ x = AndNot(kSign, x); // abs
+ // 4-over-4-degree rational polynomial approximation on x**0.25, with two
+ // different polynomials above and below 1e-4.
+ auto xto025 = Sqrt(Sqrt(x));
+ HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+ HWY_REP4(1.351392e-02f), HWY_REP4(-1.095778e+00f),
+ HWY_REP4(5.522776e+01f), HWY_REP4(1.492516e+02f),
+ HWY_REP4(4.838434e+01f),
+ };
+ HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+ HWY_REP4(1.012416e+00f), HWY_REP4(2.016708e+01f),
+ HWY_REP4(9.263710e+01f), HWY_REP4(1.120607e+02f),
+ HWY_REP4(2.590418e+01f),
+ };
+
+ HWY_ALIGN constexpr float plo[(4 + 1) * 4] = {
+ HWY_REP4(9.863406e-06f), HWY_REP4(3.881234e-01f),
+ HWY_REP4(1.352821e+02f), HWY_REP4(6.889862e+04f),
+ HWY_REP4(-2.864824e+05f),
+ };
+ HWY_ALIGN constexpr float qlo[(4 + 1) * 4] = {
+ HWY_REP4(3.371868e+01f), HWY_REP4(1.477719e+03f),
+ HWY_REP4(1.608477e+04f), HWY_REP4(-4.389884e+04f),
+ HWY_REP4(-2.072546e+05f),
+ };
+
+ auto magnitude = IfThenElse(Lt(x, Set(d, 1e-4f)),
+ EvalRationalPolynomial(d, xto025, plo, qlo),
+ EvalRationalPolynomial(d, xto025, p, q));
+ return Or(AndNot(kSign, magnitude), original_sign);
+ }
+
+ private:
+ static constexpr double kM1 = 2610.0 / 16384;
+ static constexpr double kM2 = (2523.0 / 4096) * 128;
+ static constexpr double kC1 = 3424.0 / 4096;
+ static constexpr double kC2 = (2413.0 / 4096) * 32;
+ static constexpr double kC3 = (2392.0 / 4096) * 32;
+};
+
+// sRGB
+class TF_SRGB {
+ public:
+ template <typename V>
+ JXL_INLINE V DisplayFromEncoded(V x) const {
+ const HWY_FULL(float) d;
+ const HWY_FULL(uint32_t) du;
+ const V kSign = BitCast(d, Set(du, 0x80000000u));
+ const V original_sign = And(x, kSign);
+ x = AndNot(kSign, x); // abs
+
+ // TODO(janwas): range reduction
+ // Computed via af_cheb_rational (k=100); replicated 4x.
+ HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+ 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f,
+ 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f,
+ 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f,
+ 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f,
+ 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f,
+ };
+ HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+ 2.631846970e-01f, 2.631846970e-01f, 2.631846970e-01f,
+ 2.631846970e-01f, 1.076976492e+00f, 1.076976492e+00f,
+ 1.076976492e+00f, 1.076976492e+00f, 4.987528350e-01f,
+ 4.987528350e-01f, 4.987528350e-01f, 4.987528350e-01f,
+ -5.512498495e-02f, -5.512498495e-02f, -5.512498495e-02f,
+ -5.512498495e-02f, 6.521209011e-03f, 6.521209011e-03f,
+ 6.521209011e-03f, 6.521209011e-03f,
+ };
+ const V linear = Mul(x, Set(d, kLowDivInv));
+ const V poly = EvalRationalPolynomial(d, x, p, q);
+ const V magnitude =
+ IfThenElse(Gt(x, Set(d, kThreshSRGBToLinear)), poly, linear);
+ return Or(AndNot(kSign, magnitude), original_sign);
+ }
+
+ // Error ~5e-07
+ template <class D, class V>
+ JXL_INLINE V EncodedFromDisplay(D d, V x) const {
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+ const V kSign = BitCast(d, Set(du, 0x80000000u));
+ const V original_sign = And(x, kSign);
+ x = AndNot(kSign, x); // abs
+
+ // Computed via af_cheb_rational (k=100); replicated 4x.
+ HWY_ALIGN constexpr float p[(4 + 1) * 4] = {
+ -5.135152395e-04f, -5.135152395e-04f, -5.135152395e-04f,
+ -5.135152395e-04f, 5.287254571e-03f, 5.287254571e-03f,
+ 5.287254571e-03f, 5.287254571e-03f, 3.903842876e-01f,
+ 3.903842876e-01f, 3.903842876e-01f, 3.903842876e-01f,
+ 1.474205315e+00f, 1.474205315e+00f, 1.474205315e+00f,
+ 1.474205315e+00f, 7.352629620e-01f, 7.352629620e-01f,
+ 7.352629620e-01f, 7.352629620e-01f,
+ };
+ HWY_ALIGN constexpr float q[(4 + 1) * 4] = {
+ 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f,
+ 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f,
+ 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f,
+ 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f,
+ 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f,
+ };
+ const V linear = Mul(x, Set(d, kLowDiv));
+ const V poly = EvalRationalPolynomial(d, Sqrt(x), p, q);
+ const V magnitude =
+ IfThenElse(Gt(x, Set(d, kThreshLinearToSRGB)), poly, linear);
+ return Or(AndNot(kSign, magnitude), original_sign);
+ }
+
+ private:
+ static constexpr float kThreshSRGBToLinear = 0.04045f;
+ static constexpr float kThreshLinearToSRGB = 0.0031308f;
+ static constexpr float kLowDiv = 12.92f;
+ static constexpr float kLowDivInv = 1.0f / kLowDiv;
+};
+
+// Linear to sRGB conversion with error of at most 1.2e-4.
+template <typename D, typename V>
+V FastLinearToSRGB(D d, V v) {
+ const hwy::HWY_NAMESPACE::Rebind<uint32_t, D> du;
+ const hwy::HWY_NAMESPACE::Rebind<int32_t, D> di;
+ // Convert to 0.25 - 0.5 range.
+ auto v025_05 = BitCast(
+ d, And(Or(BitCast(du, v), Set(du, 0x3e800000)), Set(du, 0x3effffff)));
+ // third degree polynomial approximation between 0.25 and 0.5
+ // of 1.055/2^(7/2.4) * x^(1/2.4) * 0.5. A degree 4 polynomial only improves
+ // accuracy by about 3x.
+ auto d1 = MulAdd(v025_05, Set(d, 0.059914046f), Set(d, -0.108894556f));
+ auto d2 = MulAdd(d1, v025_05, Set(d, 0.107963754f));
+ auto pow = MulAdd(d2, v025_05, Set(d, 0.018092343f));
+ // Compute extra multiplier depending on exponent. Valid exponent range for
+ // [0.0031308f, 1.0) is 0...8 after subtracting 118.
+ // The next three constants contain a representation of the powers of
+ // 2**(1/2.4) = 2**(5/12) times two; in particular, bits from 26 to 31 are
+ // always the same and in k2to512powers_basebits, and the two arrays contain
+ // the next groups of 8 bits. This ends up being a 22-bit representation (with
+ // a mantissa of 13 bits). The choice of polynomial to approximate is such
+ // that the multiplication factor has the highest 5 bits constant, and that
+ // the factor for the lowest possible exponent is a power of two (thus making
+ // the additional bits 0, which is used to correctly merge back together the
+ // floats).
+ constexpr uint32_t k2to512powers_basebits = 0x40000000;
+ HWY_ALIGN constexpr uint8_t k2to512powers_25to18bits[16] = {
+ 0x0, 0xa, 0x19, 0x26, 0x32, 0x41, 0x4d, 0x5c,
+ 0x68, 0x75, 0x83, 0x8f, 0xa0, 0xaa, 0xb9, 0xc6,
+ };
+ HWY_ALIGN constexpr uint8_t k2to512powers_17to10bits[16] = {
+ 0x0, 0xb7, 0x4, 0xd, 0xcb, 0xe7, 0x41, 0x68,
+ 0x51, 0xd1, 0xeb, 0xf2, 0x0, 0xb7, 0x4, 0xd,
+ };
+ // Note that vld1q_s8_x2 on ARM seems to actually be slower.
+#if HWY_TARGET != HWY_SCALAR
+ using hwy::HWY_NAMESPACE::ShiftLeft;
+ using hwy::HWY_NAMESPACE::ShiftRight;
+ // Every lane of exp is now (if cast to byte) {0, 0, 0, <index for lookup>}.
+ auto exp = Sub(ShiftRight<23>(BitCast(di, v)), Set(di, 118));
+ auto pow25to18bits = TableLookupBytes(
+ LoadDup128(di,
+ reinterpret_cast<const int32_t*>(k2to512powers_25to18bits)),
+ exp);
+ auto pow17to10bits = TableLookupBytes(
+ LoadDup128(di,
+ reinterpret_cast<const int32_t*>(k2to512powers_17to10bits)),
+ exp);
+ // Now, pow* contain {0, 0, 0, <part of float repr of multiplier>}. Here
+ // we take advantage of the fact that each table has its position 0 equal to
+ // 0.
+ // We can now just reassemble the float.
+ auto mul = BitCast(
+ d, Or(Or(ShiftLeft<18>(pow25to18bits), ShiftLeft<10>(pow17to10bits)),
+ Set(di, k2to512powers_basebits)));
+#else
+ // Fallback for scalar.
+ uint32_t exp = ((BitCast(di, v).raw >> 23) - 118) & 0xf;
+ auto mul = BitCast(d, Set(di, (k2to512powers_25to18bits[exp] << 18) |
+ (k2to512powers_17to10bits[exp] << 10) |
+ k2to512powers_basebits));
+#endif
+ return IfThenElse(Lt(v, Set(d, 0.0031308f)), Mul(v, Set(d, 12.92f)),
+ MulAdd(pow, mul, Set(d, -0.055)));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_TRANSFER_FUNCTIONS_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/transpose-inl.h b/third_party/jpeg-xl/lib/jxl/transpose-inl.h
new file mode 100644
index 0000000000..4674420737
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/transpose-inl.h
@@ -0,0 +1,203 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Block transpose for DCT/IDCT
+
+#if defined(LIB_JXL_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_TRANSPOSE_INL_H_
+#undef LIB_JXL_TRANSPOSE_INL_H_
+#else
+#define LIB_JXL_TRANSPOSE_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+#include <type_traits>
+
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/dct_block-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+#ifndef JXL_INLINE_TRANSPOSE
+// Workaround for issue #42 - (excessive?) inlining causes invalid codegen.
+#if defined(__arm__)
+#define JXL_INLINE_TRANSPOSE HWY_NOINLINE
+#else
+#define JXL_INLINE_TRANSPOSE HWY_INLINE
+#endif
+#endif // JXL_INLINE_TRANSPOSE
+
+// Simple wrapper that ensures that a function will not be inlined.
+template <typename T, typename... Args>
+JXL_NOINLINE void NoInlineWrapper(const T& f, const Args&... args) {
+ return f(args...);
+}
+
+template <bool enabled>
+struct TransposeSimdTag {};
+
+// TODO(veluca): it's not super useful to have this in the SIMD namespace.
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<false>,
+ const From& from, const To& to,
+ size_t ROWSp, size_t COLSp) {
+ size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+ size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+ for (size_t n = 0; n < ROWS; ++n) {
+ for (size_t m = 0; m < COLS; ++m) {
+ to.Write(from.Read(n, m), m, n);
+ }
+ }
+}
+
+// TODO(veluca): AVX3?
+#if HWY_CAP_GE256
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+ return ROWS % 8 == 0 && COLS % 8 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+ const From& from, const To& to,
+ size_t ROWSp, size_t COLSp) {
+ size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+ size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+ static_assert(MaxLanes(BlockDesc<8>()) == 8, "Invalid descriptor size");
+ static_assert(ROWS_or_0 % 8 == 0, "Invalid number of rows");
+ static_assert(COLS_or_0 % 8 == 0, "Invalid number of columns");
+ for (size_t n = 0; n < ROWS; n += 8) {
+ for (size_t m = 0; m < COLS; m += 8) {
+ const BlockDesc<8> d;
+ auto i0 = from.LoadPart(d, n + 0, m + 0);
+ auto i1 = from.LoadPart(d, n + 1, m + 0);
+ auto i2 = from.LoadPart(d, n + 2, m + 0);
+ auto i3 = from.LoadPart(d, n + 3, m + 0);
+ auto i4 = from.LoadPart(d, n + 4, m + 0);
+ auto i5 = from.LoadPart(d, n + 5, m + 0);
+ auto i6 = from.LoadPart(d, n + 6, m + 0);
+ auto i7 = from.LoadPart(d, n + 7, m + 0);
+ // Surprisingly, this straightforward implementation (24 cycles on port5)
+ // is faster than load128+insert and LoadDup128+ConcatUpperLower+blend.
+ const auto q0 = InterleaveLower(d, i0, i2);
+ const auto q1 = InterleaveLower(d, i1, i3);
+ const auto q2 = InterleaveUpper(d, i0, i2);
+ const auto q3 = InterleaveUpper(d, i1, i3);
+ const auto q4 = InterleaveLower(d, i4, i6);
+ const auto q5 = InterleaveLower(d, i5, i7);
+ const auto q6 = InterleaveUpper(d, i4, i6);
+ const auto q7 = InterleaveUpper(d, i5, i7);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+ const auto r4 = InterleaveLower(d, q4, q5);
+ const auto r5 = InterleaveUpper(d, q4, q5);
+ const auto r6 = InterleaveLower(d, q6, q7);
+ const auto r7 = InterleaveUpper(d, q6, q7);
+
+ i0 = ConcatLowerLower(d, r4, r0);
+ i1 = ConcatLowerLower(d, r5, r1);
+ i2 = ConcatLowerLower(d, r6, r2);
+ i3 = ConcatLowerLower(d, r7, r3);
+ i4 = ConcatUpperUpper(d, r4, r0);
+ i5 = ConcatUpperUpper(d, r5, r1);
+ i6 = ConcatUpperUpper(d, r6, r2);
+ i7 = ConcatUpperUpper(d, r7, r3);
+ to.StorePart(d, i0, m + 0, n + 0);
+ to.StorePart(d, i1, m + 1, n + 0);
+ to.StorePart(d, i2, m + 2, n + 0);
+ to.StorePart(d, i3, m + 3, n + 0);
+ to.StorePart(d, i4, m + 4, n + 0);
+ to.StorePart(d, i5, m + 5, n + 0);
+ to.StorePart(d, i6, m + 6, n + 0);
+ to.StorePart(d, i7, m + 7, n + 0);
+ }
+ }
+}
+#elif HWY_TARGET != HWY_SCALAR
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) {
+ return ROWS % 4 == 0 && COLS % 4 == 0;
+}
+
+template <size_t ROWS_or_0, size_t COLS_or_0, class From, class To>
+JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag<true>,
+ const From& from, const To& to,
+ size_t ROWSp, size_t COLSp) {
+ size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0;
+ size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0;
+ static_assert(MaxLanes(BlockDesc<4>()) == 4, "Invalid descriptor size");
+ static_assert(ROWS_or_0 % 4 == 0, "Invalid number of rows");
+ static_assert(COLS_or_0 % 4 == 0, "Invalid number of columns");
+ for (size_t n = 0; n < ROWS; n += 4) {
+ for (size_t m = 0; m < COLS; m += 4) {
+ const BlockDesc<4> d;
+ const auto p0 = from.LoadPart(d, n + 0, m + 0);
+ const auto p1 = from.LoadPart(d, n + 1, m + 0);
+ const auto p2 = from.LoadPart(d, n + 2, m + 0);
+ const auto p3 = from.LoadPart(d, n + 3, m + 0);
+
+ const auto q0 = InterleaveLower(d, p0, p2);
+ const auto q1 = InterleaveLower(d, p1, p3);
+ const auto q2 = InterleaveUpper(d, p0, p2);
+ const auto q3 = InterleaveUpper(d, p1, p3);
+
+ const auto r0 = InterleaveLower(d, q0, q1);
+ const auto r1 = InterleaveUpper(d, q0, q1);
+ const auto r2 = InterleaveLower(d, q2, q3);
+ const auto r3 = InterleaveUpper(d, q2, q3);
+
+ to.StorePart(d, r0, m + 0, n + 0);
+ to.StorePart(d, r1, m + 1, n + 0);
+ to.StorePart(d, r2, m + 2, n + 0);
+ to.StorePart(d, r3, m + 3, n + 0);
+ }
+ }
+}
+#else
+constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { return false; }
+#endif
+
+template <size_t N, size_t M, typename = void>
+struct Transpose {
+ template <typename From, typename To>
+ static void Run(const From& from, const To& to) {
+ // This does not guarantee anything, just saves from the most stupid
+ // mistakes.
+ JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+ TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+ GenericTransposeBlock<N, M>(tag, from, to, N, M);
+ }
+};
+
+// Avoid inlining and unrolling transposes for large blocks.
+template <size_t N, size_t M>
+struct Transpose<
+ N, M, typename std::enable_if<(N >= 8 && M >= 8 && N * M >= 512)>::type> {
+ template <typename From, typename To>
+ static void Run(const From& from, const To& to) {
+ // This does not guarantee anything, just saves from the most stupid
+ // mistakes.
+ JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0));
+ TransposeSimdTag<TransposeUseSimd(N, M)> tag;
+ constexpr void (*transpose)(TransposeSimdTag<TransposeUseSimd(N, M)>,
+ const From&, const To&, size_t, size_t) =
+ GenericTransposeBlock<0, 0, From, To>;
+ NoInlineWrapper(transpose, tag, from, to, N, M);
+ }
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_TRANSPOSE_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/version.h.in b/third_party/jpeg-xl/lib/jxl/version.h.in
new file mode 100644
index 0000000000..d077abec79
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/version.h.in
@@ -0,0 +1,39 @@
+/* Copyright (c) the JPEG XL Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style
+ * license that can be found in the LICENSE file.
+ */
+
+/** @addtogroup libjxl_common
+ * @{
+ * @file version.h
+ * @brief libjxl version information
+ */
+
+#ifndef JXL_VERSION_H_
+#define JXL_VERSION_H_
+
+#define JPEGXL_MAJOR_VERSION @JPEGXL_MAJOR_VERSION@ ///< JPEG XL Major version
+#define JPEGXL_MINOR_VERSION @JPEGXL_MINOR_VERSION@ ///< JPEG XL Minor version
+#define JPEGXL_PATCH_VERSION @JPEGXL_PATCH_VERSION@ ///< JPEG XL Patch version
+
+/** Can be used to conditionally compile code for a specific JXL version
+ * @param[maj] major version
+ * @param[min] minor version
+ *
+ * @code
+ * #if JPEGXL_NUMERIC_VERSION < JPEGXL_COMPUTE_NUMERIC_VERSION(0,8,0)
+ * // use old/deprecated api
+ * #else
+ * // use current api
+ * #endif
+ * @endcode
+ */
+#define JPEGXL_COMPUTE_NUMERIC_VERSION(major,minor,patch) ((major<<24) | (minor<<16) | (patch<<8) | 0)
+
+/* Numeric representation of the version */
+#define JPEGXL_NUMERIC_VERSION JPEGXL_COMPUTE_NUMERIC_VERSION(JPEGXL_MAJOR_VERSION,JPEGXL_MINOR_VERSION,JPEGXL_PATCH_VERSION)
+
+#endif /* JXL_VERSION_H_ */
+
+/** @}*/
diff --git a/third_party/jpeg-xl/lib/jxl/xorshift128plus-inl.h b/third_party/jpeg-xl/lib/jxl/xorshift128plus-inl.h
new file mode 100644
index 0000000000..a473d591f2
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/xorshift128plus-inl.h
@@ -0,0 +1,103 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fast but weak random generator.
+
+#if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE)
+#ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#undef LIB_JXL_XORSHIFT128PLUS_INL_H_
+#else
+#define LIB_JXL_XORSHIFT128PLUS_INL_H_
+#endif
+
+#include <stddef.h>
+
+#include <hwy/highway.h>
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+namespace {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Add;
+using hwy::HWY_NAMESPACE::ShiftLeft;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Xor;
+
+// Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/
+// (MIT-license)
+class Xorshift128Plus {
+ public:
+ // 8 independent generators (= single iteration for AVX-512)
+ enum { N = 8 };
+
+ explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) {
+ // Init state using SplitMix64 generator
+ s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull);
+ s1_[0] = SplitMix64(s0_[0]);
+ for (size_t i = 1; i < N; ++i) {
+ s0_[i] = SplitMix64(s1_[i - 1]);
+ s1_[i] = SplitMix64(s0_[i]);
+ }
+ }
+
+ HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2,
+ const uint32_t seed3, const uint32_t seed4) {
+ // Init state using SplitMix64 generator
+ s0_[0] = SplitMix64(((static_cast<uint64_t>(seed1) << 32) + seed2) +
+ 0x9E3779B97F4A7C15ull);
+ s1_[0] = SplitMix64(((static_cast<uint64_t>(seed3) << 32) + seed4) +
+ 0x9E3779B97F4A7C15ull);
+ for (size_t i = 1; i < N; ++i) {
+ s0_[i] = SplitMix64(s0_[i - 1]);
+ s1_[i] = SplitMix64(s1_[i - 1]);
+ }
+ }
+
+ HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) {
+#if HWY_CAP_INTEGER64
+ const HWY_FULL(uint64_t) d;
+ for (size_t i = 0; i < N; i += Lanes(d)) {
+ auto s1 = Load(d, s0_ + i);
+ const auto s0 = Load(d, s1_ + i);
+ const auto bits = Add(s1, s0); // b, c
+ Store(s0, d, s0_ + i);
+ s1 = Xor(s1, ShiftLeft<23>(s1));
+ Store(bits, d, random_bits + i);
+ s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0))));
+ Store(s1, d, s1_ + i);
+ }
+#else
+ for (size_t i = 0; i < N; ++i) {
+ auto s1 = s0_[i];
+ const auto s0 = s1_[i];
+ const auto bits = s1 + s0; // b, c
+ s0_[i] = s0;
+ s1 ^= s1 << 23;
+ random_bits[i] = bits;
+ s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5);
+ s1_[i] = s1;
+ }
+#endif
+ }
+
+ private:
+ static uint64_t SplitMix64(uint64_t z) {
+ z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
+ z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
+ return z ^ (z >> 31);
+ }
+
+ HWY_ALIGN uint64_t s0_[N];
+ HWY_ALIGN uint64_t s1_[N];
+};
+
+} // namespace
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#endif // LIB_JXL_XORSHIFT128PLUS_INL_H_
diff --git a/third_party/jpeg-xl/lib/jxl/xorshift128plus_test.cc b/third_party/jpeg-xl/lib/jxl/xorshift128plus_test.cc
new file mode 100644
index 0000000000..2b0c78b1d1
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl/xorshift128plus_test.cc
@@ -0,0 +1,378 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <vector>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "lib/jxl/xorshift128plus_test.cc"
+#include <hwy/foreach_target.h>
+#include <hwy/highway.h>
+#include <hwy/tests/test_util-inl.h>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/xorshift128plus-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace jxl {
+namespace HWY_NAMESPACE {
+
+// These templates are not found via ADL.
+using hwy::HWY_NAMESPACE::Or;
+using hwy::HWY_NAMESPACE::ShiftRight;
+using hwy::HWY_NAMESPACE::Sub;
+
+// Define to nonzero in order to print the (new) golden outputs.
+#define PRINT_RESULTS 0
+
+const size_t kVectors = 64;
+
+#if PRINT_RESULTS
+
+template <int kNumLanes>
+void Print(const uint64_t (&result)[kNumLanes]) {
+ printf("{ ");
+ for (int i = 0; i < kNumLanes; ++i) {
+ if (i != 0) {
+ printf(", ");
+ }
+ printf("0x%016llXull", result[i]);
+ }
+ printf("},\n");
+}
+
+#else // PRINT_RESULTS
+
+const uint64_t kExpected[kVectors][Xorshift128Plus::N] = {
+ {0x6E901576D477CBB1ull, 0xE9E53789195DA2A2ull, 0xB681F6DDA5E0AE99ull,
+ 0x8EFD18CE21FD6896ull, 0xA898A80DF75CF532ull, 0x50CEB2C9E2DE7E32ull,
+ 0x3CA7C2FEB25C0DD0ull, 0xA4D0866B80B4D836ull},
+ {0x8CD6A1E6233D3A26ull, 0x3D4603ADE98B112Dull, 0xDC427AF674019E36ull,
+ 0xE28B4D230705AC53ull, 0x7297E9BBA88783DDull, 0x34D3D23CFCD9B41Aull,
+ 0x5A223615ADBE96B8ull, 0xE5EB529027CFBD01ull},
+ {0xC1894CF00DFAC6A2ull, 0x18EDF8AE9085E404ull, 0x8E936625296B4CCDull,
+ 0x31971EF3A14A899Bull, 0xBE87535FCE0BF26Aull, 0x576F7A752BC6649Full,
+ 0xA44CBADCE0C6B937ull, 0x3DBA819BB17A353Aull},
+ {0x27CE38DFCC1C5EB6ull, 0x920BEB5606340256ull, 0x3986CBC40C9AFC2Cull,
+ 0xE22BCB3EEB1E191Eull, 0x6E1FCDD3602A8FBAull, 0x052CB044E5415A29ull,
+ 0x46266646EFB9ECD7ull, 0x8F44914618D29335ull},
+ {0xDD30AEDF72A362C5ull, 0xBC1D824E16BB98F4ull, 0x9EA6009C2AA3D2F1ull,
+ 0xF65C0FBBE17AF081ull, 0x22424D06A8738991ull, 0x8A62763F2B7611D2ull,
+ 0x2F3E89F722637939ull, 0x84D338BEF50AFD50ull},
+ {0x00F46494898E2B0Bull, 0x81239DC4FB8E8003ull, 0x414AD93EC5773FE7ull,
+ 0x791473C450E4110Full, 0x87F127BF68C959ACull, 0x6429282D695EF67Bull,
+ 0x661082E11546CBA8ull, 0x5815D53FA5436BFDull},
+ {0xB3DEADAB9BE6E0F9ull, 0xAA1B7B8F7CED0202ull, 0x4C5ED437699D279Eull,
+ 0xA4471727F1CB39D3ull, 0xE439DA193F802F70ull, 0xF89401BB04FA6493ull,
+ 0x3B08045A4FE898BAull, 0x32137BFE98227950ull},
+ {0xFBAE4A092897FEF3ull, 0x0639F6CE56E71C8Eull, 0xF0AD6465C07F0C1Eull,
+ 0xFF8E28563361DCE5ull, 0xC2013DB7F86BC6B9ull, 0x8EFCC0503330102Full,
+ 0x3F6B767EA5C4DA40ull, 0xB9864B950B2232E1ull},
+ {0x76EB58DE8E5EC22Aull, 0x9BBBF49A18B32F4Full, 0xC8405F02B2B2FAB9ull,
+ 0xC3E122A5F146BC34ull, 0xC90BB046660F5765ull, 0xB933981310DBECCFull,
+ 0x5A2A7BFC9126FD1Cull, 0x8BB388C94DF87901ull},
+ {0x753EB89AD63EF3C3ull, 0xF24AAF40C89D65ADull, 0x23F68931C1A6AA6Dull,
+ 0xF47E79BF702C6DD0ull, 0xA3AD113244EE7EAEull, 0xD42CBEA28F793DC3ull,
+ 0xD896FCF1820F497Cull, 0x042B86D2818948C1ull},
+ {0x8F2A4FC5A4265763ull, 0xEC499E6F95EAA10Cull, 0xE3786D4ECCD0DEB5ull,
+ 0xC725C53D3AC4CC43ull, 0x065A4ACBBF83610Eull, 0x35C61C9FEF167129ull,
+ 0x7B720AEAA7D70048ull, 0x14206B841377D039ull},
+ {0xAD27D78BF96055F6ull, 0x5F43B20FF47ADCD4ull, 0xE184C2401E2BF71Eull,
+ 0x30B263D78990045Dull, 0xC22F00EBFF9BA201ull, 0xAE7F86522B53A562ull,
+ 0x2853312BC039F0A4ull, 0x868D619E6549C3C8ull},
+ {0xFD5493D8AE9A8371ull, 0x773D5E224DF61B3Bull, 0x5377C54FBB1A8280ull,
+ 0xCAD4DE3B8265CAFAull, 0xCDF3F19C91EBD5F6ull, 0xC8EA0F182D73BD78ull,
+ 0x220502D593433FF1ull, 0xB81205E612DC31B1ull},
+ {0x8F32A39EAEDA4C70ull, 0x1D4B0914AA4DAC7Full, 0x56EF1570F3A8B405ull,
+ 0x29812CB17404A592ull, 0x97A2AAF69CAE90F2ull, 0x12BF5E02778BBFE5ull,
+ 0x9D4B55AD42A05FD2ull, 0x06C2BAB5E6086620ull},
+ {0x8DB4B9648302B253ull, 0xD756AD9E3AEA12C7ull, 0x68709B7F11D4B188ull,
+ 0x7CC299DDCD707A4Bull, 0x97B860C370A7661Dull, 0xCECD314FC20E64F5ull,
+ 0x55F412CDFB4C7EC3ull, 0x55EE97591193B525ull},
+ {0xCF70F3ACA96E6254ull, 0x022FEDECA2E09F46ull, 0x686823DB60AE1ECFull,
+ 0xFD36190D3739830Eull, 0x74E1C09027F68120ull, 0xB5883A835C093842ull,
+ 0x93E1EFB927E9E4E3ull, 0xB2721E249D7E5EBEull},
+ {0x69B6E21C44188CB8ull, 0x5D6CFB853655A7AAull, 0x3E001A0B425A66DCull,
+ 0x8C57451103A5138Full, 0x7BF8B4BE18EAB402ull, 0x494102EB8761A365ull,
+ 0xB33796A9F6A81F0Eull, 0x10005AB3BCCFD960ull},
+ {0xB2CF25740AE965DCull, 0x6F7C1DF7EF53D670ull, 0x648DD6087AC2251Eull,
+ 0x040955D9851D487Dull, 0xBD550FC7E21A7F66ull, 0x57408F484DEB3AB5ull,
+ 0x481E24C150B506C1ull, 0x72C0C3EAF91A40D6ull},
+ {0x1997A481858A5D39ull, 0x539718F4BEF50DC1ull, 0x2EC4DC4787E7E368ull,
+ 0xFF1CE78879419845ull, 0xE219A93DD6F6DD30ull, 0x85328618D02FEC1Aull,
+ 0xC86E02D969181B20ull, 0xEBEC8CD8BBA34E6Eull},
+ {0x28B55088A16CE947ull, 0xDD25AC11E6350195ull, 0xBD1F176694257B1Cull,
+ 0x09459CCF9FCC9402ull, 0xF8047341E386C4E4ull, 0x7E8E9A9AD984C6C0ull,
+ 0xA4661E95062AA092ull, 0x70A9947005ED1152ull},
+ {0x4C01CF75DBE98CCDull, 0x0BA076CDFC7373B9ull, 0x6C5E7A004B57FB59ull,
+ 0x336B82297FD3BC56ull, 0x7990C0BE74E8D60Full, 0xF0275CC00EC5C8C8ull,
+ 0x6CF29E682DFAD2E9ull, 0xFA4361524BD95D72ull},
+ {0x631D2A19FF62F018ull, 0x41C43863B985B3FAull, 0xE052B2267038EFD9ull,
+ 0xE2A535FAC575F430ull, 0xE004EEA90B1FF5B8ull, 0x42DFE2CA692A1F26ull,
+ 0x90FB0BFC9A189ECCull, 0x4484102BD3536BD0ull},
+ {0xD027134E9ACCA5A5ull, 0xBBAB4F966D476A9Bull, 0x713794A96E03D693ull,
+ 0x9F6335E6B94CD44Aull, 0xC5090C80E7471617ull, 0x6D9C1B0C87B58E33ull,
+ 0x1969CE82E31185A5ull, 0x2099B97E87754EBEull},
+ {0x60EBAF4ED934350Full, 0xC26FBF0BA5E6ECFFull, 0x9E54150F0312EC57ull,
+ 0x0973B48364ED0041ull, 0x800A523241426CFCull, 0x03AB5EC055F75989ull,
+ 0x8CF315935DEEB40Aull, 0x83D3FC0190BD1409ull},
+ {0x26D35394CF720A51ull, 0xCE9EAA15243CBAFEull, 0xE2B45FBAF21B29E0ull,
+ 0xDB92E98EDE73F9E0ull, 0x79B16F5101C26387ull, 0x1AC15959DE88C86Full,
+ 0x387633AEC6D6A580ull, 0xA6FC05807BFC5EB8ull},
+ {0x2D26C8E47C6BADA9ull, 0x820E6EC832D52D73ull, 0xB8432C3E0ED0EE5Bull,
+ 0x0F84B3C4063AAA87ull, 0xF393E4366854F651ull, 0x749E1B4D2366A567ull,
+ 0x805EACA43480D004ull, 0x244EBF3AA54400A5ull},
+ {0xBFDC3763AA79F75Aull, 0x9E3A74CC751F41DBull, 0xF401302A149DBC55ull,
+ 0x6B25F7973D7BF7BCull, 0x13371D34FDBC3DAEull, 0xC5E1998C8F484DCDull,
+ 0x7031B8AE5C364464ull, 0x3847F0C4F3DA2C25ull},
+ {0x24C6387D2C0F1225ull, 0x77CCE960255C67A4ull, 0x21A0947E497B10EBull,
+ 0xBB5DB73A825A9D7Eull, 0x26294A41999E553Dull, 0x3953E0089F87D925ull,
+ 0x3DAE6E5D4E5EAAFEull, 0x74B545460341A7AAull},
+ {0x710E5EB08A7DB820ull, 0x7E43C4E77CAEA025ull, 0xD4C91529C8B060C1ull,
+ 0x09AE26D8A7B0CA29ull, 0xAB9F356BB360A772ull, 0xB68834A25F19F6E9ull,
+ 0x79B8D9894C5734E2ull, 0xC6847E7C8FFD265Full},
+ {0x10C4BCB06A5111E6ull, 0x57CB50955B6A2516ull, 0xEF53C87798B6995Full,
+ 0xAB38E15BBD8D0197ull, 0xA51C6106EFF73C93ull, 0x83D7F0E2270A7134ull,
+ 0x0923FD330397FCE5ull, 0xF9DE54EDFE58FB45ull},
+ {0x07D44833ACCD1A94ull, 0xAAD3C9E945E2F9F3ull, 0xABF4C879B876AA37ull,
+ 0xF29C69A21B301619ull, 0x2DDCE959111C788Bull, 0x7CEDB48F8AC1729Bull,
+ 0x93F3BA9A02B659BEull, 0xF20A87FF17933CBEull},
+ {0x8E96EBE93180CFE6ull, 0x94CAA12873937079ull, 0x05F613D9380D4189ull,
+ 0xBCAB40C1DC79F38Aull, 0x0AD8907B7C61D19Eull, 0x88534E189D103910ull,
+ 0x2DB2FAABA160AB8Full, 0xA070E7506B06F15Cull},
+ {0x6FB1FCDAFFEF87A9ull, 0xE735CF25337A090Dull, 0x172C6EDCEFEF1825ull,
+ 0x76957EA49EF0542Dull, 0x819BF4CD250F7C49ull, 0xD6FF23E4AD00C4D4ull,
+ 0xE79673C1EC358FF0ull, 0xAC9C048144337938ull},
+ {0x4C5387FF258B3AF4ull, 0xEDB68FAEC2CB1AA3ull, 0x02A624E67B4E1DA4ull,
+ 0x5C44797A38E08AF2ull, 0x36546A70E9411B4Bull, 0x47C17B24D2FD9675ull,
+ 0x101957AAA020CA26ull, 0x47A1619D4779F122ull},
+ {0xF84B8BCDC92D9A3Cull, 0x951D7D2C74B3066Bull, 0x7AC287C06EDDD9B2ull,
+ 0x4C38FC476608D38Full, 0x224D793B19CB4BCDull, 0x835A255899BF1A41ull,
+ 0x4AD250E9F62DB4ABull, 0xD9B44F4B58781096ull},
+ {0xABBAF99A8EB5C6B8ull, 0xFB568E900D3A9F56ull, 0x11EDF63D23C5DF11ull,
+ 0xA9C3011D3FA7C5A8ull, 0xAEDD3CF11AFFF725ull, 0xABCA472B5F1EDD6Bull,
+ 0x0600B6BB5D879804ull, 0xDB4DE007F22191A0ull},
+ {0xD76CC9EFF0CE9392ull, 0xF5E0A772B59BA49Aull, 0x7D1AE1ED0C1261B5ull,
+ 0x79224A33B5EA4F4Aull, 0x6DD825D80C40EA60ull, 0x47FC8E747E51C953ull,
+ 0x695C05F72888BF98ull, 0x1A012428440B9015ull},
+ {0xD754DD61F9B772BFull, 0xC4A2FCF4C0F9D4EBull, 0x461167CDF67A24A2ull,
+ 0x434748490EBCB9D4ull, 0x274DD9CDCA5781DEull, 0x36BAC63BA9A85209ull,
+ 0x30324DAFDA36B70Full, 0x337570DB4FE6DAB3ull},
+ {0xF46CBDD57C551546ull, 0x8E02507E676DA3E3ull, 0xD826245A8C15406Dull,
+ 0xDFB38A5B71113B72ull, 0x5EA38454C95B16B5ull, 0x28C054FB87ABF3E1ull,
+ 0xAA2724C0BA1A8096ull, 0xECA83EC980304F2Full},
+ {0x6AA76EC294EB3303ull, 0x42D4CDB2A8032E3Bull, 0x7999EDF75DCD8735ull,
+ 0xB422BFFE696CCDCCull, 0x8F721461FD7CCDFEull, 0x148E1A5814FDE253ull,
+ 0x4DC941F4375EF8FFull, 0x27B2A9E0EB5B49CFull},
+ {0xCEA592EF9343EBE1ull, 0xF7D38B5FA7698903ull, 0x6CCBF352203FEAB6ull,
+ 0x830F3095FCCDA9C5ull, 0xDBEEF4B81B81C8F4ull, 0x6D7EB9BCEECA5CF9ull,
+ 0xC58ABB0FBE436C69ull, 0xE4B97E6DB2041A4Bull},
+ {0x7E40FC772978AF14ull, 0xCDDA4BBAE28354A1ull, 0xE4F993B832C32613ull,
+ 0xD3608093C68A4B35ull, 0x9A3B60E01BEE3699ull, 0x03BEF248F3288713ull,
+ 0x70B9294318F3E9B4ull, 0x8D2ABB913B8610DEull},
+ {0x37F209128E7D8B2Cull, 0x81D2AB375BD874BCull, 0xA716A1B7373F7408ull,
+ 0x0CEE97BEC4706540ull, 0xA40C5FD9CDBC1512ull, 0x73CAF6C8918409E7ull,
+ 0x45E11BCEDF0BBAA1ull, 0x612C612BFF6E6605ull},
+ {0xF8ECB14A12D0F649ull, 0xDA683CD7C01BA1ACull, 0xA2203F7510E124C1ull,
+ 0x7F83E52E162F3C78ull, 0x77D2BB73456ACADBull, 0x37FC34FC840BBA6Full,
+ 0x3076BC7D4C6EBC1Full, 0x4F514123632B5FA9ull},
+ {0x44D789DED935E884ull, 0xF8291591E09FEC9Full, 0xD9CED2CF32A2E4B7ull,
+ 0x95F70E1EB604904Aull, 0xDE438FE43C14F6ABull, 0x4C8D23E4FAFCF8D8ull,
+ 0xC716910A3067EB86ull, 0x3D6B7915315095D3ull},
+ {0x3170FDBADAB92095ull, 0x8F1963933FC5650Bull, 0x72F94F00ABECFEABull,
+ 0x6E3AE826C6AAB4CEull, 0xA677A2BF31068258ull, 0x9660CDC4F363AF10ull,
+ 0xD81A15A152379EF1ull, 0x5D7D285E1080A3F9ull},
+ {0xDAD5DDFF9A2249B3ull, 0x6F9721D926103FAEull, 0x1418CBB83FFA349Aull,
+ 0xE71A30AD48C012B2ull, 0xBE76376C63751132ull, 0x3496467ACA713AE6ull,
+ 0x8D7EC01369F991A3ull, 0xD8C73A88B96B154Eull},
+ {0x8B5D9C74AEB4833Aull, 0xF914FB3F867B912Full, 0xB894EA034936B1DCull,
+ 0x8A16D21BE51C4F5Bull, 0x31FF048ED582D98Eull, 0xB95AB2F4DC65B820ull,
+ 0x04082B9170561AF7ull, 0xA215610A5DC836FAull},
+ {0xB2ADE592C092FAACull, 0x7A1E683BCBF13294ull, 0xC7A4DBF86858C096ull,
+ 0x3A49940F97BFF316ull, 0xCAE5C06B82C46703ull, 0xC7F413A0F951E2BDull,
+ 0x6665E7BB10EB5916ull, 0x86F84A5A94EDE319ull},
+ {0x4EA199D8FAA79CA3ull, 0xDFA26E5BF1981704ull, 0x0F5E081D37FA4E01ull,
+ 0x9CB632F89CD675CDull, 0x4A09DB89D48C0304ull, 0x88142742EA3C7672ull,
+ 0xAC4F149E6D2E9BDBull, 0x6D9E1C23F8B1C6C6ull},
+ {0xD58BE47B92DEC0E9ull, 0x8E57573645E34328ull, 0x4CC094CCB5FB5126ull,
+ 0x5F1D66AF6FB40E3Cull, 0x2BA15509132D3B00ull, 0x0D6545646120E567ull,
+ 0x3CF680C45C223666ull, 0x96B28E32930179DAull},
+ {0x5900C45853AC7990ull, 0x61881E3E8B7FF169ull, 0x4DE5F835DF2230FFull,
+ 0x4427A9E7932F73FFull, 0x9B641BAD379A8C8Dull, 0xDF271E5BF98F4E5Cull,
+ 0xDFDA16DB830FF5EEull, 0x371C7E7CFB89C0E9ull},
+ {0x4410A8576247A250ull, 0x6AD2DA12B45AC0D9ull, 0x18DFC72AAC85EECCull,
+ 0x06FC8BB2A0EF25C8ull, 0xEB287619C85E6118ull, 0x19553ECA67F25A2Cull,
+ 0x3B9557F1DCEC5BAAull, 0x7BAD9E8B710D1079ull},
+ {0x34F365D66BD22B28ull, 0xE6E124B9F10F835Dull, 0x0573C38ABF2B24DCull,
+ 0xD32E6AF10A0125AEull, 0x383590ACEA979519ull, 0x8376ED7A39E28205ull,
+ 0xF0B7F184DCBDA435ull, 0x062A203390E31794ull},
+ {0xA2AFFD7E41918760ull, 0x7F90FC1BD0819C86ull, 0x5033C08E5A969533ull,
+ 0x2707AF5C6D039590ull, 0x57BBD5980F17DF9Cull, 0xD3FE6E61D763268Aull,
+ 0x9E0A0AE40F335A3Bull, 0x43CF4EB0A99613C5ull},
+ {0xD4D2A397CE1A7C2Eull, 0x3DF7CE7CC3212DADull, 0x0880F0D5D356C75Aull,
+ 0xA8AFC44DD03B1346ull, 0x79263B46C13A29E0ull, 0x11071B3C0ED58E7Aull,
+ 0xED46DC9F538406BFull, 0x2C94974F2B94843Dull},
+ {0xE246E13C39AB5D5Eull, 0xAC1018489D955B20ull, 0x8601B558771852B8ull,
+ 0x110BD4C06DB40173ull, 0x738FC8A18CCA0EBBull, 0x6673E09BE0EA76E5ull,
+ 0x024BC7A0C7527877ull, 0x45E6B4652E2EC34Eull},
+ {0xD1ED26A1A375CDC8ull, 0xAABC4E896A617CB8ull, 0x0A9C9E8E57D753C6ull,
+ 0xA3774A75FEB4C30Eull, 0x30B816C01C93E49Eull, 0xF405BABC06D2408Cull,
+ 0xCC0CE6B4CE788ABCull, 0x75E7922D0447956Cull},
+ {0xD07C1676A698BC95ull, 0x5F9AEA4840E2D860ull, 0xD5FC10D58BDF6F02ull,
+ 0xF190A2AD4BC2EEA7ull, 0x0C24D11F51726931ull, 0xDB646899A16B6512ull,
+ 0x7BC10670047B1DD8ull, 0x2413A5ABCD45F092ull},
+ {0x4E66892190CFD923ull, 0xF10162440365EC8Eull, 0x158ACA5A6A2280AEull,
+ 0x0D60ED11C0224166ull, 0x7CD2E9A71B9D7488ull, 0x450D7289706AB2A3ull,
+ 0x88FAE34EC9A0D7DCull, 0x96FF9103575A97DAull},
+ {0x77990FAC6046C446ull, 0xB174B5FB30C76676ull, 0xE352CE3EB56CF82Aull,
+ 0xC6039B6873A9A082ull, 0xE3F80F3AE333148Aull, 0xB853BA24BA3539B9ull,
+ 0xE8863E52ECCB0C74ull, 0x309B4CC1092CC245ull},
+ {0xBC2B70BEE8388D9Full, 0xE48D92AE22216DCEull, 0xF15F3BF3E2C15D8Full,
+ 0x1DD964D4812D8B24ull, 0xD56AF02FB4665E4Cull, 0x98002200595BD9A3ull,
+ 0x049246D50BB8FA12ull, 0x1B542DF485B579B9ull},
+ {0x2347409ADFA8E497ull, 0x36015C2211D62498ull, 0xE9F141F32EB82690ull,
+ 0x1F839912D0449FB9ull, 0x4E4DCFFF2D02D97Cull, 0xF8A03AB4C0F625C9ull,
+ 0x0605F575795DAC5Cull, 0x4746C9BEA0DDA6B1ull},
+ {0xCA5BB519ECE7481Bull, 0xFD496155E55CA945ull, 0xF753B9DBB1515F81ull,
+ 0x50549E8BAC0F70E7ull, 0x8614FB0271E21C60ull, 0x60C72947EB0F0070ull,
+ 0xA6511C10AEE742B6ull, 0x48FB48F2CACCB43Eull}};
+
+#endif // PRINT_RESULTS
+
+// Ensures Xorshift128+ returns consistent and unchanging values.
+void TestGolden() {
+ HWY_ALIGN Xorshift128Plus rng(12345);
+ for (uint64_t vector = 0; vector < kVectors; ++vector) {
+ HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+ rng.Fill(lanes);
+#if PRINT_RESULTS
+ Print(lanes);
+#else
+ for (size_t i = 0; i < Xorshift128Plus::N; ++i) {
+ ASSERT_EQ(kExpected[vector][i], lanes[i])
+ << "Where vector=" << vector << " i=" << i;
+ }
+#endif
+ }
+}
+
+// Output changes when given different seeds
+void TestSeedChanges() {
+ HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+ std::vector<uint64_t> first;
+ constexpr size_t kNumSeeds = 16384;
+ first.reserve(kNumSeeds);
+
+ // All 14-bit seeds
+ for (size_t seed = 0; seed < kNumSeeds; ++seed) {
+ HWY_ALIGN Xorshift128Plus rng(seed);
+
+ rng.Fill(lanes);
+ first.push_back(lanes[0]);
+ }
+
+ // All outputs are unique
+ ASSERT_EQ(kNumSeeds, first.size());
+ std::sort(first.begin(), first.end());
+ first.erase(std::unique(first.begin(), first.end()), first.end());
+ EXPECT_EQ(kNumSeeds, first.size());
+}
+
+void TestFloat() {
+ test::ThreadPoolForTests pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+ const uint32_t kMaxSeed = 256;
+#else // JXL_DISABLE_SLOW_TESTS
+ const uint32_t kMaxSeed = 4096;
+#endif // JXL_DISABLE_SLOW_TESTS
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, kMaxSeed, ThreadPool::NoInit,
+ [](const uint32_t seed, size_t /*thread*/) {
+ HWY_ALIGN Xorshift128Plus rng(seed);
+
+ const HWY_FULL(uint32_t) du;
+ const HWY_FULL(float) df;
+ HWY_ALIGN uint64_t batch[Xorshift128Plus::N];
+ HWY_ALIGN float lanes[MaxLanes(df)];
+ double sum = 0.0;
+ size_t count = 0;
+ const size_t kReps = 2000;
+ for (size_t reps = 0; reps < kReps; ++reps) {
+ rng.Fill(batch);
+ for (size_t i = 0; i < Xorshift128Plus::N * 2; i += Lanes(df)) {
+ const auto bits =
+ Load(du, reinterpret_cast<const uint32_t*>(batch) + i);
+ // 1.0 + 23 random mantissa bits = [1, 2)
+ const auto rand12 =
+ BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000)));
+ const auto rand01 = Sub(rand12, Set(df, 1.0f));
+ Store(rand01, df, lanes);
+ for (float lane : lanes) {
+ sum += lane;
+ count += 1;
+ EXPECT_LE(lane, 1.0f);
+ EXPECT_GE(lane, 0.0f);
+ }
+ }
+ }
+
+ // Verify average (uniform distribution)
+ EXPECT_NEAR(0.5, sum / count, 0.00702);
+ },
+ "TestXorShift"));
+}
+
+// Not more than one 64-bit zero
+void TestNotZero() {
+ test::ThreadPoolForTests pool(8);
+
+#ifdef JXL_DISABLE_SLOW_TESTS
+ const uint32_t kMaxSeed = 500;
+#else // JXL_DISABLE_SLOW_TESTS
+ const uint32_t kMaxSeed = 2000;
+#endif // JXL_DISABLE_SLOW_TESTS
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, kMaxSeed, ThreadPool::NoInit,
+ [](const uint32_t task, size_t /*thread*/) {
+ HWY_ALIGN uint64_t lanes[Xorshift128Plus::N];
+
+ HWY_ALIGN Xorshift128Plus rng(task);
+ size_t num_zero = 0;
+ for (size_t vectors = 0; vectors < 10000; ++vectors) {
+ rng.Fill(lanes);
+ for (uint64_t lane : lanes) {
+ num_zero += static_cast<size_t>(lane == 0);
+ }
+ }
+ EXPECT_LE(num_zero, 1u);
+ },
+ "TestNotZero"));
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace jxl
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace jxl {
+
+class Xorshift128Test : public hwy::TestWithParamTarget {};
+
+HWY_TARGET_INSTANTIATE_TEST_SUITE_P(Xorshift128Test);
+
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestNotZero);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestGolden);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestSeedChanges);
+HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestFloat);
+
+} // namespace jxl
+#endif
diff --git a/third_party/jpeg-xl/lib/jxl_benchmark.cmake b/third_party/jpeg-xl/lib/jxl_benchmark.cmake
new file mode 100644
index 0000000000..10871e3073
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_benchmark.cmake
@@ -0,0 +1,36 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(jxl_lists.cmake)
+
+# benchmark.h doesn't work in our MINGW set up since it ends up including the
+# wrong stdlib header. We don't run gbench on MINGW targets anyway.
+if(NOT MINGW)
+
+# This is the Google benchmark project (https://github.com/google/benchmark).
+find_package(benchmark QUIET)
+
+if(benchmark_FOUND)
+ if(JPEGXL_STATIC AND NOT MINGW)
+ # benchmark::benchmark hardcodes the librt.so which obviously doesn't
+ # compile in static mode.
+ set_target_properties(benchmark::benchmark PROPERTIES
+ INTERFACE_LINK_LIBRARIES "Threads::Threads;-lrt")
+ endif()
+
+ # Compiles all the benchmark files into a single binary. Individual benchmarks
+ # can be run with --benchmark_filter.
+ add_executable(jxl_gbench "${JPEGXL_INTERNAL_GBENCH_SOURCES}" gbench_main.cc)
+
+ target_compile_definitions(jxl_gbench PRIVATE
+ -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+ target_link_libraries(jxl_gbench
+ jxl_extras-static
+ jxl-static
+ benchmark::benchmark
+ )
+endif() # benchmark_FOUND
+
+endif() # MINGW
diff --git a/third_party/jpeg-xl/lib/jxl_extras.cmake b/third_party/jpeg-xl/lib/jxl_extras.cmake
new file mode 100644
index 0000000000..c1071278e4
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_extras.cmake
@@ -0,0 +1,169 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+list(APPEND JPEGXL_EXTRAS_CORE_SOURCES
+ "${JPEGXL_INTERNAL_EXTRAS_SOURCES}"
+ "${JPEGXL_INTERNAL_CODEC_JXL_SOURCES}"
+ "${JPEGXL_INTERNAL_CODEC_PGX_SOURCES}"
+ "${JPEGXL_INTERNAL_CODEC_PNM_SOURCES}"
+ "${JPEGXL_INTERNAL_CODEC_NPY_SOURCES}"
+)
+
+add_library(jxl_extras_codec-obj OBJECT "${JPEGXL_EXTRAS_CORE_SOURCES}")
+target_compile_options(jxl_extras_codec-obj PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+target_compile_definitions(jxl_extras_codec-obj PRIVATE -DJXL_EXPORT=)
+set_property(TARGET jxl_extras_codec-obj PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_extras_codec-obj PUBLIC
+ ${PROJECT_SOURCE_DIR}
+ ${CMAKE_CURRENT_SOURCE_DIR}/include
+ ${CMAKE_CURRENT_BINARY_DIR}/include
+ ${JXL_HWY_INCLUDE_DIRS}
+)
+set(JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES)
+set(JXL_EXTRAS_CODEC_PUBLIC_COMPILE_DEFINITIONS)
+
+# We only define a static library for jxl_extras since it uses internal parts
+# of jxl library which are not accessible from outside the library in the
+# shared library case.
+add_library(jxl_extras-static STATIC EXCLUDE_FROM_ALL
+ "${JPEGXL_EXTRAS_CORE_SOURCES}"
+ "${JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES}"
+)
+target_compile_options(jxl_extras-static PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+set_property(TARGET jxl_extras-static PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_extras-static PUBLIC "${PROJECT_SOURCE_DIR}")
+target_link_libraries(jxl_extras-static PUBLIC
+ jxl-static
+ jxl_threads-static
+)
+
+# Define an extras library that does not have the image codecs, only the core
+# extras code. This is needed for some of the fuzzers.
+add_library(jxl_extras_nocodec-static STATIC EXCLUDE_FROM_ALL
+ "${JPEGXL_EXTRAS_CORE_SOURCES}"
+ "${JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES}"
+)
+target_compile_options(jxl_extras_nocodec-static PRIVATE "${JPEGXL_INTERNAL_FLAGS}")
+set_property(TARGET jxl_extras_nocodec-static PROPERTY POSITION_INDEPENDENT_CODE ON)
+target_include_directories(jxl_extras_nocodec-static PUBLIC "${PROJECT_SOURCE_DIR}")
+target_link_libraries(jxl_extras_nocodec-static PUBLIC
+ jxl-static
+ jxl_threads-static
+)
+
+find_package(GIF 5.1)
+if(GIF_FOUND)
+ target_sources(jxl_extras_codec-obj PRIVATE
+ extras/dec/gif.cc
+ extras/dec/gif.h
+ )
+ target_include_directories(jxl_extras_codec-obj PRIVATE "${GIF_INCLUDE_DIRS}")
+ list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${GIF_LIBRARIES})
+ list(APPEND JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS -DJPEGXL_ENABLE_GIF=1)
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libgif-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.libgif COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+find_package(JPEG)
+if(JPEG_FOUND)
+ target_sources(jxl_extras_codec-obj PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_JPG_SOURCES}"
+ )
+ target_include_directories(jxl_extras_codec-obj PRIVATE "${JPEG_INCLUDE_DIRS}")
+ list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${JPEG_LIBRARIES})
+ list(APPEND JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS -DJPEGXL_ENABLE_JPEG=1)
+ target_sources(jxl_extras-static PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_JPG_SOURCES}"
+ )
+ target_include_directories(jxl_extras-static PRIVATE "${JPEG_INCLUDE_DIRS}")
+ target_link_libraries(jxl_extras-static PRIVATE ${JPEG_LIBRARIES})
+ target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_JPEG=1)
+ if(JPEGXL_ENABLE_JPEGLI)
+ target_sources(jxl_extras-static PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES}"
+ )
+ target_link_libraries(jxl_extras-static PRIVATE jpegli-static)
+ target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_JPEGLI=1)
+ endif()
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libjpeg-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.libjpeg COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+if(NOT JPEGXL_BUNDLE_LIBPNG)
+ find_package(PNG)
+endif()
+if(PNG_FOUND)
+ target_sources(jxl_extras_codec-obj PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_APNG_SOURCES}"
+ )
+ target_include_directories(jxl_extras_codec-obj PRIVATE "${PNG_INCLUDE_DIRS}")
+ list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${PNG_LIBRARIES})
+ list(APPEND JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS -DJPEGXL_ENABLE_APNG=1)
+ target_sources(jxl_extras-static PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_APNG_SOURCES}"
+ )
+ target_include_directories(jxl_extras-static PUBLIC "${PNG_INCLUDE_DIRS}")
+ target_link_libraries(jxl_extras-static PUBLIC ${PNG_LIBRARIES})
+ target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_APNG=1)
+ configure_file(extras/LICENSE.apngdis
+ ${PROJECT_BINARY_DIR}/LICENSE.apngdis COPYONLY)
+endif()
+
+if (JPEGXL_ENABLE_SJPEG)
+ target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_SJPEG=1)
+ target_link_libraries(jxl_extras-static PRIVATE sjpeg)
+endif ()
+
+if (JPEGXL_ENABLE_OPENEXR)
+pkg_check_modules(OpenEXR IMPORTED_TARGET OpenEXR)
+if (OpenEXR_FOUND)
+ target_sources(jxl_extras_codec-obj PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_EXR_SOURCES}"
+ )
+ list(APPEND JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS -DJPEGXL_ENABLE_EXR=1)
+ target_include_directories(jxl_extras_codec-obj PRIVATE "${OpenEXR_INCLUDE_DIRS}")
+ list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES PkgConfig::OpenEXR)
+ target_sources(jxl_extras-static PRIVATE
+ "${JPEGXL_INTERNAL_CODEC_EXR_SOURCES}"
+ )
+ target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_EXR=1)
+ target_link_libraries(jxl_extras-static PRIVATE PkgConfig::OpenEXR)
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libopenexr-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.libopenexr COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+ # OpenEXR generates exceptions, so we need exception support to catch them.
+ # Actually those flags counteract the ones set in JPEGXL_INTERNAL_FLAGS.
+ if (NOT WIN32)
+ set_source_files_properties(extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS -fexceptions)
+ if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set_source_files_properties(extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS -fcxx-exceptions)
+ endif()
+ endif()
+endif() # OpenEXR_FOUND
+endif() # JPEGXL_ENABLE_OPENEXR
+
+target_compile_definitions(jxl_extras_codec-obj PRIVATE ${JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS})
+
+### Static library.
+add_library(jxl_extras_codec-static STATIC $<TARGET_OBJECTS:jxl_extras_codec-obj>)
+target_compile_definitions(jxl_extras_codec-static PUBLIC ${JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS})
+target_link_libraries(jxl_extras_codec-static PRIVATE ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES} jxl)
+
+### Shared library.
+if (BUILD_SHARED_LIBS)
+add_library(jxl_extras_codec SHARED $<TARGET_OBJECTS:jxl_extras_codec-obj>)
+target_compile_definitions(jxl_extras_codec PUBLIC ${JXL_EXTRAS_CODEC_PUBLIC_DEFINITIONS})
+target_link_libraries(jxl_extras_codec PRIVATE ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES} jxl)
+else()
+add_library(jxl_extras_codec ALIAS jxl_extras_codec-static)
+endif() # BUILD_SHARED_LIBS
diff --git a/third_party/jpeg-xl/lib/jxl_lists.bzl b/third_party/jpeg-xl/lib/jxl_lists.bzl
new file mode 100644
index 0000000000..dbaf90a659
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_lists.bzl
@@ -0,0 +1,637 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file is generated, do not modify by manually.
+# Run `tools/scripts/build_cleaner.py --update` to regenerate it.
+
+libjxl_base_sources = [
+ "jxl/base/arch_macros.h",
+ "jxl/base/bits.h",
+ "jxl/base/byte_order.h",
+ "jxl/base/cache_aligned.cc",
+ "jxl/base/cache_aligned.h",
+ "jxl/base/compiler_specific.h",
+ "jxl/base/data_parallel.cc",
+ "jxl/base/data_parallel.h",
+ "jxl/base/file_io.h",
+ "jxl/base/float.h",
+ "jxl/base/iaca.h",
+ "jxl/base/os_macros.h",
+ "jxl/base/override.h",
+ "jxl/base/padded_bytes.cc",
+ "jxl/base/padded_bytes.h",
+ "jxl/base/printf_macros.h",
+ "jxl/base/profiler.cc",
+ "jxl/base/profiler.h",
+ "jxl/base/random.cc",
+ "jxl/base/random.h",
+ "jxl/base/sanitizer_definitions.h",
+ "jxl/base/scope_guard.h",
+ "jxl/base/span.h",
+ "jxl/base/status.h",
+ "jxl/base/tsc_timer.h",
+]
+
+libjxl_codec_apng_sources = [
+ "extras/dec/apng.cc",
+ "extras/dec/apng.h",
+ "extras/enc/apng.cc",
+ "extras/enc/apng.h",
+]
+
+libjxl_codec_exr_sources = [
+ "extras/dec/exr.cc",
+ "extras/dec/exr.h",
+ "extras/enc/exr.cc",
+ "extras/enc/exr.h",
+]
+
+libjxl_codec_gif_sources = [
+ "extras/dec/gif.cc",
+ "extras/dec/gif.h",
+]
+
+libjxl_codec_jpegli_sources = [
+ "extras/dec/jpegli.cc",
+ "extras/dec/jpegli.h",
+ "extras/enc/jpegli.cc",
+ "extras/enc/jpegli.h",
+]
+
+libjxl_codec_jpg_sources = [
+ "extras/dec/jpg.cc",
+ "extras/dec/jpg.h",
+ "extras/enc/jpg.cc",
+ "extras/enc/jpg.h",
+]
+
+libjxl_codec_jxl_sources = [
+ "extras/dec/jxl.cc",
+ "extras/dec/jxl.h",
+ "extras/enc/jxl.cc",
+ "extras/enc/jxl.h",
+]
+
+libjxl_codec_npy_sources = [
+ "extras/enc/npy.cc",
+ "extras/enc/npy.h",
+]
+
+libjxl_codec_pgx_sources = [
+ "extras/dec/pgx.cc",
+ "extras/dec/pgx.h",
+ "extras/enc/pgx.cc",
+ "extras/enc/pgx.h",
+]
+
+libjxl_codec_pnm_sources = [
+ "extras/dec/pnm.cc",
+ "extras/dec/pnm.h",
+ "extras/enc/pnm.cc",
+ "extras/enc/pnm.h",
+]
+
+libjxl_dec_box_sources = [
+ "jxl/box_content_decoder.cc",
+ "jxl/box_content_decoder.h",
+]
+
+libjxl_dec_jpeg_sources = [
+ "jxl/decode_to_jpeg.cc",
+ "jxl/decode_to_jpeg.h",
+ "jxl/jpeg/dec_jpeg_data.cc",
+ "jxl/jpeg/dec_jpeg_data.h",
+ "jxl/jpeg/dec_jpeg_data_writer.cc",
+ "jxl/jpeg/dec_jpeg_data_writer.h",
+ "jxl/jpeg/dec_jpeg_output_chunk.h",
+ "jxl/jpeg/dec_jpeg_serialization_state.h",
+ "jxl/jpeg/jpeg_data.cc",
+ "jxl/jpeg/jpeg_data.h",
+]
+
+libjxl_dec_sources = [
+ "jxl/ac_context.h",
+ "jxl/ac_strategy.cc",
+ "jxl/ac_strategy.h",
+ "jxl/alpha.cc",
+ "jxl/alpha.h",
+ "jxl/ans_common.cc",
+ "jxl/ans_common.h",
+ "jxl/ans_params.h",
+ "jxl/blending.cc",
+ "jxl/blending.h",
+ "jxl/chroma_from_luma.cc",
+ "jxl/chroma_from_luma.h",
+ "jxl/codec_in_out.h",
+ "jxl/coeff_order.cc",
+ "jxl/coeff_order.h",
+ "jxl/coeff_order_fwd.h",
+ "jxl/color_encoding_internal.cc",
+ "jxl/color_encoding_internal.h",
+ "jxl/color_management.cc",
+ "jxl/color_management.h",
+ "jxl/common.h",
+ "jxl/compressed_dc.cc",
+ "jxl/compressed_dc.h",
+ "jxl/convolve-inl.h",
+ "jxl/convolve.h",
+ "jxl/convolve_separable5.cc",
+ "jxl/convolve_separable7.cc",
+ "jxl/convolve_slow.cc",
+ "jxl/convolve_symmetric3.cc",
+ "jxl/convolve_symmetric5.cc",
+ "jxl/dct-inl.h",
+ "jxl/dct_block-inl.h",
+ "jxl/dct_scales.cc",
+ "jxl/dct_scales.h",
+ "jxl/dct_util.h",
+ "jxl/dec_ans.cc",
+ "jxl/dec_ans.h",
+ "jxl/dec_bit_reader.h",
+ "jxl/dec_cache.cc",
+ "jxl/dec_cache.h",
+ "jxl/dec_context_map.cc",
+ "jxl/dec_context_map.h",
+ "jxl/dec_external_image.cc",
+ "jxl/dec_external_image.h",
+ "jxl/dec_frame.cc",
+ "jxl/dec_frame.h",
+ "jxl/dec_group.cc",
+ "jxl/dec_group.h",
+ "jxl/dec_group_border.cc",
+ "jxl/dec_group_border.h",
+ "jxl/dec_huffman.cc",
+ "jxl/dec_huffman.h",
+ "jxl/dec_modular.cc",
+ "jxl/dec_modular.h",
+ "jxl/dec_noise.cc",
+ "jxl/dec_noise.h",
+ "jxl/dec_patch_dictionary.cc",
+ "jxl/dec_patch_dictionary.h",
+ "jxl/dec_tone_mapping-inl.h",
+ "jxl/dec_transforms-inl.h",
+ "jxl/dec_xyb-inl.h",
+ "jxl/dec_xyb.cc",
+ "jxl/dec_xyb.h",
+ "jxl/decode.cc",
+ "jxl/entropy_coder.cc",
+ "jxl/entropy_coder.h",
+ "jxl/epf.cc",
+ "jxl/epf.h",
+ "jxl/exif.h",
+ "jxl/fast_dct-inl.h",
+ "jxl/fast_dct.cc",
+ "jxl/fast_dct.h",
+ "jxl/fast_dct128-inl.h",
+ "jxl/fast_dct16-inl.h",
+ "jxl/fast_dct256-inl.h",
+ "jxl/fast_dct32-inl.h",
+ "jxl/fast_dct64-inl.h",
+ "jxl/fast_dct8-inl.h",
+ "jxl/fast_math-inl.h",
+ "jxl/field_encodings.h",
+ "jxl/fields.cc",
+ "jxl/fields.h",
+ "jxl/frame_header.cc",
+ "jxl/frame_header.h",
+ "jxl/gauss_blur.cc",
+ "jxl/gauss_blur.h",
+ "jxl/headers.cc",
+ "jxl/headers.h",
+ "jxl/huffman_table.cc",
+ "jxl/huffman_table.h",
+ "jxl/icc_codec.cc",
+ "jxl/icc_codec.h",
+ "jxl/icc_codec_common.cc",
+ "jxl/icc_codec_common.h",
+ "jxl/image.cc",
+ "jxl/image.h",
+ "jxl/image_bundle.cc",
+ "jxl/image_bundle.h",
+ "jxl/image_metadata.cc",
+ "jxl/image_metadata.h",
+ "jxl/image_ops.h",
+ "jxl/inverse_mtf-inl.h",
+ "jxl/jxl_inspection.h",
+ "jxl/lehmer_code.h",
+ "jxl/loop_filter.cc",
+ "jxl/loop_filter.h",
+ "jxl/luminance.cc",
+ "jxl/luminance.h",
+ "jxl/matrix_ops.h",
+ "jxl/memory_manager_internal.cc",
+ "jxl/memory_manager_internal.h",
+ "jxl/modular/encoding/context_predict.h",
+ "jxl/modular/encoding/dec_ma.cc",
+ "jxl/modular/encoding/dec_ma.h",
+ "jxl/modular/encoding/encoding.cc",
+ "jxl/modular/encoding/encoding.h",
+ "jxl/modular/encoding/ma_common.h",
+ "jxl/modular/modular_image.cc",
+ "jxl/modular/modular_image.h",
+ "jxl/modular/options.h",
+ "jxl/modular/transform/palette.cc",
+ "jxl/modular/transform/palette.h",
+ "jxl/modular/transform/rct.cc",
+ "jxl/modular/transform/rct.h",
+ "jxl/modular/transform/squeeze.cc",
+ "jxl/modular/transform/squeeze.h",
+ "jxl/modular/transform/transform.cc",
+ "jxl/modular/transform/transform.h",
+ "jxl/noise.h",
+ "jxl/opsin_params.cc",
+ "jxl/opsin_params.h",
+ "jxl/passes_state.cc",
+ "jxl/passes_state.h",
+ "jxl/patch_dictionary_internal.h",
+ "jxl/quant_weights.cc",
+ "jxl/quant_weights.h",
+ "jxl/quantizer-inl.h",
+ "jxl/quantizer.cc",
+ "jxl/quantizer.h",
+ "jxl/rational_polynomial-inl.h",
+ "jxl/render_pipeline/low_memory_render_pipeline.cc",
+ "jxl/render_pipeline/low_memory_render_pipeline.h",
+ "jxl/render_pipeline/render_pipeline.cc",
+ "jxl/render_pipeline/render_pipeline.h",
+ "jxl/render_pipeline/render_pipeline_stage.h",
+ "jxl/render_pipeline/simple_render_pipeline.cc",
+ "jxl/render_pipeline/simple_render_pipeline.h",
+ "jxl/render_pipeline/stage_blending.cc",
+ "jxl/render_pipeline/stage_blending.h",
+ "jxl/render_pipeline/stage_chroma_upsampling.cc",
+ "jxl/render_pipeline/stage_chroma_upsampling.h",
+ "jxl/render_pipeline/stage_epf.cc",
+ "jxl/render_pipeline/stage_epf.h",
+ "jxl/render_pipeline/stage_from_linear.cc",
+ "jxl/render_pipeline/stage_from_linear.h",
+ "jxl/render_pipeline/stage_gaborish.cc",
+ "jxl/render_pipeline/stage_gaborish.h",
+ "jxl/render_pipeline/stage_noise.cc",
+ "jxl/render_pipeline/stage_noise.h",
+ "jxl/render_pipeline/stage_patches.cc",
+ "jxl/render_pipeline/stage_patches.h",
+ "jxl/render_pipeline/stage_splines.cc",
+ "jxl/render_pipeline/stage_splines.h",
+ "jxl/render_pipeline/stage_spot.cc",
+ "jxl/render_pipeline/stage_spot.h",
+ "jxl/render_pipeline/stage_to_linear.cc",
+ "jxl/render_pipeline/stage_to_linear.h",
+ "jxl/render_pipeline/stage_tone_mapping.cc",
+ "jxl/render_pipeline/stage_tone_mapping.h",
+ "jxl/render_pipeline/stage_upsampling.cc",
+ "jxl/render_pipeline/stage_upsampling.h",
+ "jxl/render_pipeline/stage_write.cc",
+ "jxl/render_pipeline/stage_write.h",
+ "jxl/render_pipeline/stage_xyb.cc",
+ "jxl/render_pipeline/stage_xyb.h",
+ "jxl/render_pipeline/stage_ycbcr.cc",
+ "jxl/render_pipeline/stage_ycbcr.h",
+ "jxl/sanitizers.h",
+ "jxl/simd_util-inl.h",
+ "jxl/splines.cc",
+ "jxl/splines.h",
+ "jxl/toc.cc",
+ "jxl/toc.h",
+ "jxl/transfer_functions-inl.h",
+ "jxl/transpose-inl.h",
+ "jxl/xorshift128plus-inl.h",
+]
+
+libjxl_enc_sources = [
+ "jxl/butteraugli/butteraugli.cc",
+ "jxl/butteraugli/butteraugli.h",
+ "jxl/butteraugli_wrapper.cc",
+ "jxl/enc_ac_strategy.cc",
+ "jxl/enc_ac_strategy.h",
+ "jxl/enc_adaptive_quantization.cc",
+ "jxl/enc_adaptive_quantization.h",
+ "jxl/enc_ans.cc",
+ "jxl/enc_ans.h",
+ "jxl/enc_ans_params.h",
+ "jxl/enc_ar_control_field.cc",
+ "jxl/enc_ar_control_field.h",
+ "jxl/enc_aux_out.cc",
+ "jxl/enc_aux_out.h",
+ "jxl/enc_bit_writer.cc",
+ "jxl/enc_bit_writer.h",
+ "jxl/enc_butteraugli_comparator.cc",
+ "jxl/enc_butteraugli_comparator.h",
+ "jxl/enc_butteraugli_pnorm.cc",
+ "jxl/enc_butteraugli_pnorm.h",
+ "jxl/enc_cache.cc",
+ "jxl/enc_cache.h",
+ "jxl/enc_chroma_from_luma.cc",
+ "jxl/enc_chroma_from_luma.h",
+ "jxl/enc_cluster.cc",
+ "jxl/enc_cluster.h",
+ "jxl/enc_coeff_order.cc",
+ "jxl/enc_coeff_order.h",
+ "jxl/enc_color_management.cc",
+ "jxl/enc_color_management.h",
+ "jxl/enc_comparator.cc",
+ "jxl/enc_comparator.h",
+ "jxl/enc_context_map.cc",
+ "jxl/enc_context_map.h",
+ "jxl/enc_detect_dots.cc",
+ "jxl/enc_detect_dots.h",
+ "jxl/enc_dot_dictionary.cc",
+ "jxl/enc_dot_dictionary.h",
+ "jxl/enc_entropy_coder.cc",
+ "jxl/enc_entropy_coder.h",
+ "jxl/enc_external_image.cc",
+ "jxl/enc_external_image.h",
+ "jxl/enc_fast_lossless.cc",
+ "jxl/enc_fast_lossless.h",
+ "jxl/enc_fields.cc",
+ "jxl/enc_fields.h",
+ "jxl/enc_file.cc",
+ "jxl/enc_file.h",
+ "jxl/enc_frame.cc",
+ "jxl/enc_frame.h",
+ "jxl/enc_gaborish.cc",
+ "jxl/enc_gaborish.h",
+ "jxl/enc_gamma_correct.h",
+ "jxl/enc_group.cc",
+ "jxl/enc_group.h",
+ "jxl/enc_heuristics.cc",
+ "jxl/enc_heuristics.h",
+ "jxl/enc_huffman.cc",
+ "jxl/enc_huffman.h",
+ "jxl/enc_huffman_tree.cc",
+ "jxl/enc_huffman_tree.h",
+ "jxl/enc_icc_codec.cc",
+ "jxl/enc_icc_codec.h",
+ "jxl/enc_image_bundle.cc",
+ "jxl/enc_image_bundle.h",
+ "jxl/enc_jxl_skcms.h",
+ "jxl/enc_linalg.cc",
+ "jxl/enc_linalg.h",
+ "jxl/enc_modular.cc",
+ "jxl/enc_modular.h",
+ "jxl/enc_noise.cc",
+ "jxl/enc_noise.h",
+ "jxl/enc_optimize.cc",
+ "jxl/enc_optimize.h",
+ "jxl/enc_params.h",
+ "jxl/enc_patch_dictionary.cc",
+ "jxl/enc_patch_dictionary.h",
+ "jxl/enc_photon_noise.cc",
+ "jxl/enc_photon_noise.h",
+ "jxl/enc_progressive_split.cc",
+ "jxl/enc_progressive_split.h",
+ "jxl/enc_quant_weights.cc",
+ "jxl/enc_quant_weights.h",
+ "jxl/enc_splines.cc",
+ "jxl/enc_splines.h",
+ "jxl/enc_toc.cc",
+ "jxl/enc_toc.h",
+ "jxl/enc_transforms-inl.h",
+ "jxl/enc_transforms.cc",
+ "jxl/enc_transforms.h",
+ "jxl/enc_xyb.cc",
+ "jxl/enc_xyb.h",
+ "jxl/encode.cc",
+ "jxl/encode_internal.h",
+ "jxl/jpeg/enc_jpeg_data.cc",
+ "jxl/jpeg/enc_jpeg_data.h",
+ "jxl/jpeg/enc_jpeg_data_reader.cc",
+ "jxl/jpeg/enc_jpeg_data_reader.h",
+ "jxl/jpeg/enc_jpeg_huffman_decode.cc",
+ "jxl/jpeg/enc_jpeg_huffman_decode.h",
+ "jxl/modular/encoding/enc_debug_tree.cc",
+ "jxl/modular/encoding/enc_debug_tree.h",
+ "jxl/modular/encoding/enc_encoding.cc",
+ "jxl/modular/encoding/enc_encoding.h",
+ "jxl/modular/encoding/enc_ma.cc",
+ "jxl/modular/encoding/enc_ma.h",
+ "jxl/modular/transform/enc_palette.cc",
+ "jxl/modular/transform/enc_palette.h",
+ "jxl/modular/transform/enc_rct.cc",
+ "jxl/modular/transform/enc_rct.h",
+ "jxl/modular/transform/enc_squeeze.cc",
+ "jxl/modular/transform/enc_squeeze.h",
+ "jxl/modular/transform/enc_transform.cc",
+ "jxl/modular/transform/enc_transform.h",
+]
+
+libjxl_extras_for_tools_sources = [
+ "extras/codec.cc",
+ "extras/codec.h",
+ "extras/hlg.cc",
+ "extras/hlg.h",
+ "extras/packed_image_convert.cc",
+ "extras/packed_image_convert.h",
+ "extras/tone_mapping.cc",
+ "extras/tone_mapping.h",
+]
+
+libjxl_extras_sources = [
+ "extras/dec/color_description.cc",
+ "extras/dec/color_description.h",
+ "extras/dec/color_hints.cc",
+ "extras/dec/color_hints.h",
+ "extras/dec/decode.cc",
+ "extras/dec/decode.h",
+ "extras/enc/encode.cc",
+ "extras/enc/encode.h",
+ "extras/exif.cc",
+ "extras/exif.h",
+ "extras/packed_image.h",
+ "extras/size_constraints.h",
+ "extras/time.cc",
+ "extras/time.h",
+]
+
+libjxl_gbench_sources = [
+ "extras/tone_mapping_gbench.cc",
+ "jxl/dec_external_image_gbench.cc",
+ "jxl/enc_external_image_gbench.cc",
+ "jxl/gauss_blur_gbench.cc",
+ "jxl/splines_gbench.cc",
+ "jxl/tf_gbench.cc",
+]
+
+libjxl_jpegli_sources = [
+ "jpegli/adaptive_quantization.cc",
+ "jpegli/adaptive_quantization.h",
+ "jpegli/bit_writer.cc",
+ "jpegli/bit_writer.h",
+ "jpegli/bitstream.cc",
+ "jpegli/bitstream.h",
+ "jpegli/color_quantize.cc",
+ "jpegli/color_quantize.h",
+ "jpegli/color_transform.cc",
+ "jpegli/color_transform.h",
+ "jpegli/common.cc",
+ "jpegli/common.h",
+ "jpegli/common_internal.h",
+ "jpegli/dct-inl.h",
+ "jpegli/dct.cc",
+ "jpegli/dct.h",
+ "jpegli/decode.cc",
+ "jpegli/decode.h",
+ "jpegli/decode_internal.h",
+ "jpegli/decode_marker.cc",
+ "jpegli/decode_marker.h",
+ "jpegli/decode_scan.cc",
+ "jpegli/decode_scan.h",
+ "jpegli/destination_manager.cc",
+ "jpegli/downsample.cc",
+ "jpegli/downsample.h",
+ "jpegli/encode.cc",
+ "jpegli/encode.h",
+ "jpegli/encode_internal.h",
+ "jpegli/entropy_coding.cc",
+ "jpegli/entropy_coding.h",
+ "jpegli/error.cc",
+ "jpegli/error.h",
+ "jpegli/huffman.cc",
+ "jpegli/huffman.h",
+ "jpegli/idct.cc",
+ "jpegli/idct.h",
+ "jpegli/input.cc",
+ "jpegli/input.h",
+ "jpegli/memory_manager.cc",
+ "jpegli/memory_manager.h",
+ "jpegli/quant.cc",
+ "jpegli/quant.h",
+ "jpegli/render.cc",
+ "jpegli/render.h",
+ "jpegli/simd.cc",
+ "jpegli/simd.h",
+ "jpegli/source_manager.cc",
+ "jpegli/transpose-inl.h",
+ "jpegli/upsample.cc",
+ "jpegli/upsample.h",
+]
+
+libjxl_jpegli_testlib_files = [
+ "jpegli/test_utils.cc",
+ "jpegli/test_utils.h",
+]
+
+libjxl_jpegli_tests = [
+ "jpegli/decode_api_test.cc",
+ "jpegli/encode_api_test.cc",
+ "jpegli/error_handling_test.cc",
+ "jpegli/input_suspension_test.cc",
+ "jpegli/output_suspension_test.cc",
+ "jpegli/source_manager_test.cc",
+ "jpegli/streaming_test.cc",
+ "jpegli/transcode_api_test.cc",
+]
+
+libjxl_jpegli_wrapper_sources = [
+ "jpegli/libjpeg_wrapper.cc",
+]
+
+libjxl_major_version = 0
+
+libjxl_minor_version = 9
+
+libjxl_patch_version = 0
+
+libjxl_public_headers = [
+ "include/jxl/butteraugli.h",
+ "include/jxl/butteraugli_cxx.h",
+ "include/jxl/cms_interface.h",
+ "include/jxl/codestream_header.h",
+ "include/jxl/color_encoding.h",
+ "include/jxl/decode.h",
+ "include/jxl/decode_cxx.h",
+ "include/jxl/encode.h",
+ "include/jxl/encode_cxx.h",
+ "include/jxl/memory_manager.h",
+ "include/jxl/parallel_runner.h",
+ "include/jxl/types.h",
+]
+
+libjxl_testlib_files = [
+ "jxl/dct_for_test.h",
+ "jxl/dec_transforms_testonly.cc",
+ "jxl/dec_transforms_testonly.h",
+ "jxl/fake_parallel_runner_testonly.h",
+ "jxl/image_test_utils.h",
+ "jxl/render_pipeline/test_render_pipeline_stages.h",
+ "jxl/test_image.cc",
+ "jxl/test_image.h",
+ "jxl/test_utils.cc",
+ "jxl/test_utils.h",
+]
+
+libjxl_tests = [
+ "extras/codec_test.cc",
+ "extras/dec/color_description_test.cc",
+ "extras/dec/pgx_test.cc",
+ "extras/jpegli_test.cc",
+ "jxl/ac_strategy_test.cc",
+ "jxl/alpha_test.cc",
+ "jxl/ans_common_test.cc",
+ "jxl/ans_test.cc",
+ "jxl/bit_reader_test.cc",
+ "jxl/bits_test.cc",
+ "jxl/blending_test.cc",
+ "jxl/butteraugli_test.cc",
+ "jxl/byte_order_test.cc",
+ "jxl/coeff_order_test.cc",
+ "jxl/color_encoding_internal_test.cc",
+ "jxl/color_management_test.cc",
+ "jxl/convolve_test.cc",
+ "jxl/data_parallel_test.cc",
+ "jxl/dct_test.cc",
+ "jxl/decode_test.cc",
+ "jxl/enc_external_image_test.cc",
+ "jxl/enc_gaborish_test.cc",
+ "jxl/enc_linalg_test.cc",
+ "jxl/enc_optimize_test.cc",
+ "jxl/enc_photon_noise_test.cc",
+ "jxl/encode_test.cc",
+ "jxl/entropy_coder_test.cc",
+ "jxl/fast_dct_test.cc",
+ "jxl/fast_math_test.cc",
+ "jxl/fields_test.cc",
+ "jxl/gamma_correct_test.cc",
+ "jxl/gauss_blur_test.cc",
+ "jxl/gradient_test.cc",
+ "jxl/iaca_test.cc",
+ "jxl/icc_codec_test.cc",
+ "jxl/image_bundle_test.cc",
+ "jxl/image_ops_test.cc",
+ "jxl/jxl_test.cc",
+ "jxl/lehmer_code_test.cc",
+ "jxl/modular_test.cc",
+ "jxl/opsin_image_test.cc",
+ "jxl/opsin_inverse_test.cc",
+ "jxl/padded_bytes_test.cc",
+ "jxl/passes_test.cc",
+ "jxl/patch_dictionary_test.cc",
+ "jxl/preview_test.cc",
+ "jxl/quant_weights_test.cc",
+ "jxl/quantizer_test.cc",
+ "jxl/rational_polynomial_test.cc",
+ "jxl/render_pipeline/render_pipeline_test.cc",
+ "jxl/roundtrip_test.cc",
+ "jxl/simd_util_test.cc",
+ "jxl/speed_tier_test.cc",
+ "jxl/splines_test.cc",
+ "jxl/toc_test.cc",
+ "jxl/xorshift128plus_test.cc",
+ "threads/thread_parallel_runner_test.cc",
+]
+
+libjxl_threads_public_headers = [
+ "include/jxl/resizable_parallel_runner.h",
+ "include/jxl/resizable_parallel_runner_cxx.h",
+ "include/jxl/thread_parallel_runner.h",
+ "include/jxl/thread_parallel_runner_cxx.h",
+]
+
+libjxl_threads_sources = [
+ "threads/resizable_parallel_runner.cc",
+ "threads/thread_parallel_runner.cc",
+ "threads/thread_parallel_runner_internal.cc",
+ "threads/thread_parallel_runner_internal.h",
+]
diff --git a/third_party/jpeg-xl/lib/jxl_lists.cmake b/third_party/jpeg-xl/lib/jxl_lists.cmake
new file mode 100644
index 0000000000..787684d249
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_lists.cmake
@@ -0,0 +1,631 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# This file is generated, do not modify by manually.
+# Run `tools/scripts/build_cleaner.py --update` to regenerate it.
+
+set(JPEGXL_INTERNAL_BASE_SOURCES
+ jxl/base/arch_macros.h
+ jxl/base/bits.h
+ jxl/base/byte_order.h
+ jxl/base/cache_aligned.cc
+ jxl/base/cache_aligned.h
+ jxl/base/compiler_specific.h
+ jxl/base/data_parallel.cc
+ jxl/base/data_parallel.h
+ jxl/base/file_io.h
+ jxl/base/float.h
+ jxl/base/iaca.h
+ jxl/base/os_macros.h
+ jxl/base/override.h
+ jxl/base/padded_bytes.cc
+ jxl/base/padded_bytes.h
+ jxl/base/printf_macros.h
+ jxl/base/profiler.cc
+ jxl/base/profiler.h
+ jxl/base/random.cc
+ jxl/base/random.h
+ jxl/base/sanitizer_definitions.h
+ jxl/base/scope_guard.h
+ jxl/base/span.h
+ jxl/base/status.h
+ jxl/base/tsc_timer.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_APNG_SOURCES
+ extras/dec/apng.cc
+ extras/dec/apng.h
+ extras/enc/apng.cc
+ extras/enc/apng.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_EXR_SOURCES
+ extras/dec/exr.cc
+ extras/dec/exr.h
+ extras/enc/exr.cc
+ extras/enc/exr.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_GIF_SOURCES
+ extras/dec/gif.cc
+ extras/dec/gif.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES
+ extras/dec/jpegli.cc
+ extras/dec/jpegli.h
+ extras/enc/jpegli.cc
+ extras/enc/jpegli.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JPG_SOURCES
+ extras/dec/jpg.cc
+ extras/dec/jpg.h
+ extras/enc/jpg.cc
+ extras/enc/jpg.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_JXL_SOURCES
+ extras/dec/jxl.cc
+ extras/dec/jxl.h
+ extras/enc/jxl.cc
+ extras/enc/jxl.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_NPY_SOURCES
+ extras/enc/npy.cc
+ extras/enc/npy.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_PGX_SOURCES
+ extras/dec/pgx.cc
+ extras/dec/pgx.h
+ extras/enc/pgx.cc
+ extras/enc/pgx.h
+)
+
+set(JPEGXL_INTERNAL_CODEC_PNM_SOURCES
+ extras/dec/pnm.cc
+ extras/dec/pnm.h
+ extras/enc/pnm.cc
+ extras/enc/pnm.h
+)
+
+set(JPEGXL_INTERNAL_DEC_BOX_SOURCES
+ jxl/box_content_decoder.cc
+ jxl/box_content_decoder.h
+)
+
+set(JPEGXL_INTERNAL_DEC_JPEG_SOURCES
+ jxl/decode_to_jpeg.cc
+ jxl/decode_to_jpeg.h
+ jxl/jpeg/dec_jpeg_data.cc
+ jxl/jpeg/dec_jpeg_data.h
+ jxl/jpeg/dec_jpeg_data_writer.cc
+ jxl/jpeg/dec_jpeg_data_writer.h
+ jxl/jpeg/dec_jpeg_output_chunk.h
+ jxl/jpeg/dec_jpeg_serialization_state.h
+ jxl/jpeg/jpeg_data.cc
+ jxl/jpeg/jpeg_data.h
+)
+
+set(JPEGXL_INTERNAL_DEC_SOURCES
+ jxl/ac_context.h
+ jxl/ac_strategy.cc
+ jxl/ac_strategy.h
+ jxl/alpha.cc
+ jxl/alpha.h
+ jxl/ans_common.cc
+ jxl/ans_common.h
+ jxl/ans_params.h
+ jxl/blending.cc
+ jxl/blending.h
+ jxl/chroma_from_luma.cc
+ jxl/chroma_from_luma.h
+ jxl/codec_in_out.h
+ jxl/coeff_order.cc
+ jxl/coeff_order.h
+ jxl/coeff_order_fwd.h
+ jxl/color_encoding_internal.cc
+ jxl/color_encoding_internal.h
+ jxl/color_management.cc
+ jxl/color_management.h
+ jxl/common.h
+ jxl/compressed_dc.cc
+ jxl/compressed_dc.h
+ jxl/convolve-inl.h
+ jxl/convolve.h
+ jxl/convolve_separable5.cc
+ jxl/convolve_separable7.cc
+ jxl/convolve_slow.cc
+ jxl/convolve_symmetric3.cc
+ jxl/convolve_symmetric5.cc
+ jxl/dct-inl.h
+ jxl/dct_block-inl.h
+ jxl/dct_scales.cc
+ jxl/dct_scales.h
+ jxl/dct_util.h
+ jxl/dec_ans.cc
+ jxl/dec_ans.h
+ jxl/dec_bit_reader.h
+ jxl/dec_cache.cc
+ jxl/dec_cache.h
+ jxl/dec_context_map.cc
+ jxl/dec_context_map.h
+ jxl/dec_external_image.cc
+ jxl/dec_external_image.h
+ jxl/dec_frame.cc
+ jxl/dec_frame.h
+ jxl/dec_group.cc
+ jxl/dec_group.h
+ jxl/dec_group_border.cc
+ jxl/dec_group_border.h
+ jxl/dec_huffman.cc
+ jxl/dec_huffman.h
+ jxl/dec_modular.cc
+ jxl/dec_modular.h
+ jxl/dec_noise.cc
+ jxl/dec_noise.h
+ jxl/dec_patch_dictionary.cc
+ jxl/dec_patch_dictionary.h
+ jxl/dec_tone_mapping-inl.h
+ jxl/dec_transforms-inl.h
+ jxl/dec_xyb-inl.h
+ jxl/dec_xyb.cc
+ jxl/dec_xyb.h
+ jxl/decode.cc
+ jxl/entropy_coder.cc
+ jxl/entropy_coder.h
+ jxl/epf.cc
+ jxl/epf.h
+ jxl/exif.h
+ jxl/fast_dct-inl.h
+ jxl/fast_dct.cc
+ jxl/fast_dct.h
+ jxl/fast_dct128-inl.h
+ jxl/fast_dct16-inl.h
+ jxl/fast_dct256-inl.h
+ jxl/fast_dct32-inl.h
+ jxl/fast_dct64-inl.h
+ jxl/fast_dct8-inl.h
+ jxl/fast_math-inl.h
+ jxl/field_encodings.h
+ jxl/fields.cc
+ jxl/fields.h
+ jxl/frame_header.cc
+ jxl/frame_header.h
+ jxl/gauss_blur.cc
+ jxl/gauss_blur.h
+ jxl/headers.cc
+ jxl/headers.h
+ jxl/huffman_table.cc
+ jxl/huffman_table.h
+ jxl/icc_codec.cc
+ jxl/icc_codec.h
+ jxl/icc_codec_common.cc
+ jxl/icc_codec_common.h
+ jxl/image.cc
+ jxl/image.h
+ jxl/image_bundle.cc
+ jxl/image_bundle.h
+ jxl/image_metadata.cc
+ jxl/image_metadata.h
+ jxl/image_ops.h
+ jxl/inverse_mtf-inl.h
+ jxl/jxl_inspection.h
+ jxl/lehmer_code.h
+ jxl/loop_filter.cc
+ jxl/loop_filter.h
+ jxl/luminance.cc
+ jxl/luminance.h
+ jxl/matrix_ops.h
+ jxl/memory_manager_internal.cc
+ jxl/memory_manager_internal.h
+ jxl/modular/encoding/context_predict.h
+ jxl/modular/encoding/dec_ma.cc
+ jxl/modular/encoding/dec_ma.h
+ jxl/modular/encoding/encoding.cc
+ jxl/modular/encoding/encoding.h
+ jxl/modular/encoding/ma_common.h
+ jxl/modular/modular_image.cc
+ jxl/modular/modular_image.h
+ jxl/modular/options.h
+ jxl/modular/transform/palette.cc
+ jxl/modular/transform/palette.h
+ jxl/modular/transform/rct.cc
+ jxl/modular/transform/rct.h
+ jxl/modular/transform/squeeze.cc
+ jxl/modular/transform/squeeze.h
+ jxl/modular/transform/transform.cc
+ jxl/modular/transform/transform.h
+ jxl/noise.h
+ jxl/opsin_params.cc
+ jxl/opsin_params.h
+ jxl/passes_state.cc
+ jxl/passes_state.h
+ jxl/patch_dictionary_internal.h
+ jxl/quant_weights.cc
+ jxl/quant_weights.h
+ jxl/quantizer-inl.h
+ jxl/quantizer.cc
+ jxl/quantizer.h
+ jxl/rational_polynomial-inl.h
+ jxl/render_pipeline/low_memory_render_pipeline.cc
+ jxl/render_pipeline/low_memory_render_pipeline.h
+ jxl/render_pipeline/render_pipeline.cc
+ jxl/render_pipeline/render_pipeline.h
+ jxl/render_pipeline/render_pipeline_stage.h
+ jxl/render_pipeline/simple_render_pipeline.cc
+ jxl/render_pipeline/simple_render_pipeline.h
+ jxl/render_pipeline/stage_blending.cc
+ jxl/render_pipeline/stage_blending.h
+ jxl/render_pipeline/stage_chroma_upsampling.cc
+ jxl/render_pipeline/stage_chroma_upsampling.h
+ jxl/render_pipeline/stage_epf.cc
+ jxl/render_pipeline/stage_epf.h
+ jxl/render_pipeline/stage_from_linear.cc
+ jxl/render_pipeline/stage_from_linear.h
+ jxl/render_pipeline/stage_gaborish.cc
+ jxl/render_pipeline/stage_gaborish.h
+ jxl/render_pipeline/stage_noise.cc
+ jxl/render_pipeline/stage_noise.h
+ jxl/render_pipeline/stage_patches.cc
+ jxl/render_pipeline/stage_patches.h
+ jxl/render_pipeline/stage_splines.cc
+ jxl/render_pipeline/stage_splines.h
+ jxl/render_pipeline/stage_spot.cc
+ jxl/render_pipeline/stage_spot.h
+ jxl/render_pipeline/stage_to_linear.cc
+ jxl/render_pipeline/stage_to_linear.h
+ jxl/render_pipeline/stage_tone_mapping.cc
+ jxl/render_pipeline/stage_tone_mapping.h
+ jxl/render_pipeline/stage_upsampling.cc
+ jxl/render_pipeline/stage_upsampling.h
+ jxl/render_pipeline/stage_write.cc
+ jxl/render_pipeline/stage_write.h
+ jxl/render_pipeline/stage_xyb.cc
+ jxl/render_pipeline/stage_xyb.h
+ jxl/render_pipeline/stage_ycbcr.cc
+ jxl/render_pipeline/stage_ycbcr.h
+ jxl/sanitizers.h
+ jxl/simd_util-inl.h
+ jxl/splines.cc
+ jxl/splines.h
+ jxl/toc.cc
+ jxl/toc.h
+ jxl/transfer_functions-inl.h
+ jxl/transpose-inl.h
+ jxl/xorshift128plus-inl.h
+)
+
+set(JPEGXL_INTERNAL_ENC_SOURCES
+ jxl/butteraugli/butteraugli.cc
+ jxl/butteraugli/butteraugli.h
+ jxl/butteraugli_wrapper.cc
+ jxl/enc_ac_strategy.cc
+ jxl/enc_ac_strategy.h
+ jxl/enc_adaptive_quantization.cc
+ jxl/enc_adaptive_quantization.h
+ jxl/enc_ans.cc
+ jxl/enc_ans.h
+ jxl/enc_ans_params.h
+ jxl/enc_ar_control_field.cc
+ jxl/enc_ar_control_field.h
+ jxl/enc_aux_out.cc
+ jxl/enc_aux_out.h
+ jxl/enc_bit_writer.cc
+ jxl/enc_bit_writer.h
+ jxl/enc_butteraugli_comparator.cc
+ jxl/enc_butteraugli_comparator.h
+ jxl/enc_butteraugli_pnorm.cc
+ jxl/enc_butteraugli_pnorm.h
+ jxl/enc_cache.cc
+ jxl/enc_cache.h
+ jxl/enc_chroma_from_luma.cc
+ jxl/enc_chroma_from_luma.h
+ jxl/enc_cluster.cc
+ jxl/enc_cluster.h
+ jxl/enc_coeff_order.cc
+ jxl/enc_coeff_order.h
+ jxl/enc_color_management.cc
+ jxl/enc_color_management.h
+ jxl/enc_comparator.cc
+ jxl/enc_comparator.h
+ jxl/enc_context_map.cc
+ jxl/enc_context_map.h
+ jxl/enc_detect_dots.cc
+ jxl/enc_detect_dots.h
+ jxl/enc_dot_dictionary.cc
+ jxl/enc_dot_dictionary.h
+ jxl/enc_entropy_coder.cc
+ jxl/enc_entropy_coder.h
+ jxl/enc_external_image.cc
+ jxl/enc_external_image.h
+ jxl/enc_fast_lossless.cc
+ jxl/enc_fast_lossless.h
+ jxl/enc_fields.cc
+ jxl/enc_fields.h
+ jxl/enc_file.cc
+ jxl/enc_file.h
+ jxl/enc_frame.cc
+ jxl/enc_frame.h
+ jxl/enc_gaborish.cc
+ jxl/enc_gaborish.h
+ jxl/enc_gamma_correct.h
+ jxl/enc_group.cc
+ jxl/enc_group.h
+ jxl/enc_heuristics.cc
+ jxl/enc_heuristics.h
+ jxl/enc_huffman.cc
+ jxl/enc_huffman.h
+ jxl/enc_huffman_tree.cc
+ jxl/enc_huffman_tree.h
+ jxl/enc_icc_codec.cc
+ jxl/enc_icc_codec.h
+ jxl/enc_image_bundle.cc
+ jxl/enc_image_bundle.h
+ jxl/enc_jxl_skcms.h
+ jxl/enc_linalg.cc
+ jxl/enc_linalg.h
+ jxl/enc_modular.cc
+ jxl/enc_modular.h
+ jxl/enc_noise.cc
+ jxl/enc_noise.h
+ jxl/enc_optimize.cc
+ jxl/enc_optimize.h
+ jxl/enc_params.h
+ jxl/enc_patch_dictionary.cc
+ jxl/enc_patch_dictionary.h
+ jxl/enc_photon_noise.cc
+ jxl/enc_photon_noise.h
+ jxl/enc_progressive_split.cc
+ jxl/enc_progressive_split.h
+ jxl/enc_quant_weights.cc
+ jxl/enc_quant_weights.h
+ jxl/enc_splines.cc
+ jxl/enc_splines.h
+ jxl/enc_toc.cc
+ jxl/enc_toc.h
+ jxl/enc_transforms-inl.h
+ jxl/enc_transforms.cc
+ jxl/enc_transforms.h
+ jxl/enc_xyb.cc
+ jxl/enc_xyb.h
+ jxl/encode.cc
+ jxl/encode_internal.h
+ jxl/jpeg/enc_jpeg_data.cc
+ jxl/jpeg/enc_jpeg_data.h
+ jxl/jpeg/enc_jpeg_data_reader.cc
+ jxl/jpeg/enc_jpeg_data_reader.h
+ jxl/jpeg/enc_jpeg_huffman_decode.cc
+ jxl/jpeg/enc_jpeg_huffman_decode.h
+ jxl/modular/encoding/enc_debug_tree.cc
+ jxl/modular/encoding/enc_debug_tree.h
+ jxl/modular/encoding/enc_encoding.cc
+ jxl/modular/encoding/enc_encoding.h
+ jxl/modular/encoding/enc_ma.cc
+ jxl/modular/encoding/enc_ma.h
+ jxl/modular/transform/enc_palette.cc
+ jxl/modular/transform/enc_palette.h
+ jxl/modular/transform/enc_rct.cc
+ jxl/modular/transform/enc_rct.h
+ jxl/modular/transform/enc_squeeze.cc
+ jxl/modular/transform/enc_squeeze.h
+ jxl/modular/transform/enc_transform.cc
+ jxl/modular/transform/enc_transform.h
+)
+
+set(JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES
+ extras/codec.cc
+ extras/codec.h
+ extras/hlg.cc
+ extras/hlg.h
+ extras/packed_image_convert.cc
+ extras/packed_image_convert.h
+ extras/tone_mapping.cc
+ extras/tone_mapping.h
+)
+
+set(JPEGXL_INTERNAL_EXTRAS_SOURCES
+ extras/dec/color_description.cc
+ extras/dec/color_description.h
+ extras/dec/color_hints.cc
+ extras/dec/color_hints.h
+ extras/dec/decode.cc
+ extras/dec/decode.h
+ extras/enc/encode.cc
+ extras/enc/encode.h
+ extras/exif.cc
+ extras/exif.h
+ extras/packed_image.h
+ extras/size_constraints.h
+ extras/time.cc
+ extras/time.h
+)
+
+set(JPEGXL_INTERNAL_GBENCH_SOURCES
+ extras/tone_mapping_gbench.cc
+ jxl/dec_external_image_gbench.cc
+ jxl/enc_external_image_gbench.cc
+ jxl/gauss_blur_gbench.cc
+ jxl/splines_gbench.cc
+ jxl/tf_gbench.cc
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_SOURCES
+ jpegli/adaptive_quantization.cc
+ jpegli/adaptive_quantization.h
+ jpegli/bit_writer.cc
+ jpegli/bit_writer.h
+ jpegli/bitstream.cc
+ jpegli/bitstream.h
+ jpegli/color_quantize.cc
+ jpegli/color_quantize.h
+ jpegli/color_transform.cc
+ jpegli/color_transform.h
+ jpegli/common.cc
+ jpegli/common.h
+ jpegli/common_internal.h
+ jpegli/dct-inl.h
+ jpegli/dct.cc
+ jpegli/dct.h
+ jpegli/decode.cc
+ jpegli/decode.h
+ jpegli/decode_internal.h
+ jpegli/decode_marker.cc
+ jpegli/decode_marker.h
+ jpegli/decode_scan.cc
+ jpegli/decode_scan.h
+ jpegli/destination_manager.cc
+ jpegli/downsample.cc
+ jpegli/downsample.h
+ jpegli/encode.cc
+ jpegli/encode.h
+ jpegli/encode_internal.h
+ jpegli/entropy_coding.cc
+ jpegli/entropy_coding.h
+ jpegli/error.cc
+ jpegli/error.h
+ jpegli/huffman.cc
+ jpegli/huffman.h
+ jpegli/idct.cc
+ jpegli/idct.h
+ jpegli/input.cc
+ jpegli/input.h
+ jpegli/memory_manager.cc
+ jpegli/memory_manager.h
+ jpegli/quant.cc
+ jpegli/quant.h
+ jpegli/render.cc
+ jpegli/render.h
+ jpegli/simd.cc
+ jpegli/simd.h
+ jpegli/source_manager.cc
+ jpegli/transpose-inl.h
+ jpegli/upsample.cc
+ jpegli/upsample.h
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES
+ jpegli/test_utils.cc
+ jpegli/test_utils.h
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_TESTS
+ jpegli/decode_api_test.cc
+ jpegli/encode_api_test.cc
+ jpegli/error_handling_test.cc
+ jpegli/input_suspension_test.cc
+ jpegli/output_suspension_test.cc
+ jpegli/source_manager_test.cc
+ jpegli/streaming_test.cc
+ jpegli/transcode_api_test.cc
+)
+
+set(JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES
+ jpegli/libjpeg_wrapper.cc
+)
+
+set(JPEGXL_INTERNAL_PUBLIC_HEADERS
+ include/jxl/butteraugli.h
+ include/jxl/butteraugli_cxx.h
+ include/jxl/cms_interface.h
+ include/jxl/codestream_header.h
+ include/jxl/color_encoding.h
+ include/jxl/decode.h
+ include/jxl/decode_cxx.h
+ include/jxl/encode.h
+ include/jxl/encode_cxx.h
+ include/jxl/memory_manager.h
+ include/jxl/parallel_runner.h
+ include/jxl/types.h
+)
+
+set(JPEGXL_INTERNAL_TESTLIB_FILES
+ jxl/dct_for_test.h
+ jxl/dec_transforms_testonly.cc
+ jxl/dec_transforms_testonly.h
+ jxl/fake_parallel_runner_testonly.h
+ jxl/image_test_utils.h
+ jxl/render_pipeline/test_render_pipeline_stages.h
+ jxl/test_image.cc
+ jxl/test_image.h
+ jxl/test_utils.cc
+ jxl/test_utils.h
+)
+
+set(JPEGXL_INTERNAL_TESTS
+ extras/codec_test.cc
+ extras/dec/color_description_test.cc
+ extras/dec/pgx_test.cc
+ extras/jpegli_test.cc
+ jxl/ac_strategy_test.cc
+ jxl/alpha_test.cc
+ jxl/ans_common_test.cc
+ jxl/ans_test.cc
+ jxl/bit_reader_test.cc
+ jxl/bits_test.cc
+ jxl/blending_test.cc
+ jxl/butteraugli_test.cc
+ jxl/byte_order_test.cc
+ jxl/coeff_order_test.cc
+ jxl/color_encoding_internal_test.cc
+ jxl/color_management_test.cc
+ jxl/convolve_test.cc
+ jxl/data_parallel_test.cc
+ jxl/dct_test.cc
+ jxl/decode_test.cc
+ jxl/enc_external_image_test.cc
+ jxl/enc_gaborish_test.cc
+ jxl/enc_linalg_test.cc
+ jxl/enc_optimize_test.cc
+ jxl/enc_photon_noise_test.cc
+ jxl/encode_test.cc
+ jxl/entropy_coder_test.cc
+ jxl/fast_dct_test.cc
+ jxl/fast_math_test.cc
+ jxl/fields_test.cc
+ jxl/gamma_correct_test.cc
+ jxl/gauss_blur_test.cc
+ jxl/gradient_test.cc
+ jxl/iaca_test.cc
+ jxl/icc_codec_test.cc
+ jxl/image_bundle_test.cc
+ jxl/image_ops_test.cc
+ jxl/jxl_test.cc
+ jxl/lehmer_code_test.cc
+ jxl/modular_test.cc
+ jxl/opsin_image_test.cc
+ jxl/opsin_inverse_test.cc
+ jxl/padded_bytes_test.cc
+ jxl/passes_test.cc
+ jxl/patch_dictionary_test.cc
+ jxl/preview_test.cc
+ jxl/quant_weights_test.cc
+ jxl/quantizer_test.cc
+ jxl/rational_polynomial_test.cc
+ jxl/render_pipeline/render_pipeline_test.cc
+ jxl/roundtrip_test.cc
+ jxl/simd_util_test.cc
+ jxl/speed_tier_test.cc
+ jxl/splines_test.cc
+ jxl/toc_test.cc
+ jxl/xorshift128plus_test.cc
+ threads/thread_parallel_runner_test.cc
+)
+
+set(JPEGXL_INTERNAL_THREADS_PUBLIC_HEADERS
+ include/jxl/resizable_parallel_runner.h
+ include/jxl/resizable_parallel_runner_cxx.h
+ include/jxl/thread_parallel_runner.h
+ include/jxl/thread_parallel_runner_cxx.h
+)
+
+set(JPEGXL_INTERNAL_THREADS_SOURCES
+ threads/resizable_parallel_runner.cc
+ threads/thread_parallel_runner.cc
+ threads/thread_parallel_runner_internal.cc
+ threads/thread_parallel_runner_internal.h
+)
diff --git a/third_party/jpeg-xl/lib/jxl_tests.cmake b/third_party/jpeg-xl/lib/jxl_tests.cmake
new file mode 100644
index 0000000000..88c5a89f5c
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_tests.cmake
@@ -0,0 +1,84 @@
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include(compatibility.cmake)
+include(jxl_lists.cmake)
+
+if(BUILD_TESTING OR JPEGXL_ENABLE_TOOLS)
+# Library with test-only code shared between all tests / fuzzers.
+add_library(jxl_testlib-static STATIC ${JPEGXL_INTERNAL_TESTLIB_FILES})
+target_compile_options(jxl_testlib-static PRIVATE
+ ${JPEGXL_INTERNAL_FLAGS}
+ ${JPEGXL_COVERAGE_FLAGS}
+)
+target_compile_definitions(jxl_testlib-static PUBLIC
+ -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}")
+target_include_directories(jxl_testlib-static PUBLIC
+ "${PROJECT_SOURCE_DIR}"
+)
+target_link_libraries(jxl_testlib-static
+ hwy
+ jxl_extras_nocodec-static
+ jxl-static
+)
+endif()
+
+if(NOT BUILD_TESTING)
+ return()
+endif()
+
+list(APPEND JPEGXL_INTERNAL_TESTS
+ # TODO(deymo): Move this to tools/
+ ../tools/box/box_test.cc
+ ../tools/djxl_fuzzer_test.cc
+)
+
+find_package(GTest)
+
+# Individual test binaries:
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests)
+foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_TESTS)
+ # The TESTNAME is the name without the extension or directory.
+ get_filename_component(TESTNAME ${TESTFILE} NAME_WE)
+ if(TESTFILE STREQUAL ../tools/djxl_fuzzer_test.cc)
+ add_executable(${TESTNAME} ${TESTFILE} ../tools/djxl_fuzzer.cc)
+ else()
+ add_executable(${TESTNAME} ${TESTFILE})
+ endif()
+ if(JPEGXL_EMSCRIPTEN)
+ # The emscripten linking step takes too much memory and crashes during the
+ # wasm-opt step when using -O2 optimization level
+ set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "\
+ -O1 \
+ -s USE_LIBPNG=1 \
+ -s ALLOW_MEMORY_GROWTH=1 \
+ -s SINGLE_FILE=1 \
+ -s PROXY_TO_PTHREAD \
+ -s EXIT_RUNTIME=1 \
+ -s USE_PTHREADS=1 \
+ -s NODERAWFS=1 \
+ ")
+ else()
+ set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}")
+ endif()
+ target_compile_options(${TESTNAME} PRIVATE
+ ${JPEGXL_INTERNAL_FLAGS}
+ # Add coverage flags to the test binary so code in the private headers of
+ # the library is also instrumented when running tests that execute it.
+ ${JPEGXL_COVERAGE_FLAGS}
+ )
+ target_link_libraries(${TESTNAME}
+ box
+ gmock
+ GTest::GTest
+ GTest::Main
+ jxl_extras-static
+ jxl_testlib-static
+ )
+ # Output test targets in the test directory.
+ set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/")
+ if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error")
+ endif ()
+ jxl_discover_tests(${TESTNAME})
+endforeach ()
diff --git a/third_party/jpeg-xl/lib/jxl_threads.cmake b/third_party/jpeg-xl/lib/jxl_threads.cmake
new file mode 100644
index 0000000000..2f5ac17c83
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_threads.cmake
@@ -0,0 +1,120 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(Threads REQUIRED)
+
+include(jxl_lists.cmake)
+
+### Define the jxl_threads shared or static target library. The ${target}
+# parameter should already be created with add_library(), but this function
+# sets all the remaining common properties.
+function(_set_jxl_threads _target)
+ target_compile_options(${_target} PRIVATE ${JPEGXL_INTERNAL_FLAGS})
+ target_compile_options(${_target} PUBLIC ${JPEGXL_COVERAGE_FLAGS})
+ set_property(TARGET ${_target} PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+ target_include_directories(${_target}
+ PRIVATE
+ "${PROJECT_SOURCE_DIR}"
+ PUBLIC
+ "${CMAKE_CURRENT_SOURCE_DIR}/include"
+ "${CMAKE_CURRENT_BINARY_DIR}/include")
+
+ target_link_libraries(${_target}
+ PUBLIC ${JPEGXL_COVERAGE_FLAGS} Threads::Threads
+ )
+
+ set_target_properties(${_target} PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+ DEFINE_SYMBOL JXL_THREADS_INTERNAL_LIBRARY_BUILD
+ )
+
+ # Always install the library as jxl_threads.{a,so} file without the "-static"
+ # suffix, except in Windows.
+ if (NOT WIN32 OR MINGW)
+ set_target_properties(${_target} PROPERTIES OUTPUT_NAME "jxl_threads")
+ endif()
+ install(TARGETS ${_target}
+ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
+ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+endfunction()
+
+### Static library.
+add_library(jxl_threads-static STATIC ${JPEGXL_INTERNAL_THREADS_SOURCES})
+_set_jxl_threads(jxl_threads-static)
+
+# Make jxl_threads symbols neither imported nor exported when using the static
+# library. These will have hidden visibility anyway in the static library case
+# in unix.
+target_compile_definitions(jxl_threads-static
+ PUBLIC -DJXL_THREADS_STATIC_DEFINE)
+
+
+### Public shared library.
+if (BUILD_SHARED_LIBS)
+add_library(jxl_threads SHARED ${JPEGXL_INTERNAL_THREADS_SOURCES})
+_set_jxl_threads(jxl_threads)
+
+set_target_properties(jxl_threads PROPERTIES
+ VERSION ${JPEGXL_LIBRARY_VERSION}
+ SOVERSION ${JPEGXL_LIBRARY_SOVERSION}
+ LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}"
+ RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
+
+ set_target_properties(jxl_threads PROPERTIES
+ LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version)
+ if(APPLE)
+ set_property(TARGET ${target} APPEND_STRING PROPERTY
+ LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms")
+ elseif(WIN32)
+ # Nothing needed here, we use __declspec(dllexport) (jxl_threads_export.h)
+ else()
+ set_property(TARGET jxl_threads APPEND_STRING PROPERTY
+ LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version")
+ endif() # APPLE
+
+# Compile the shared library such that the JXL_THREADS_EXPORT symbols are
+# exported. Users of the library will not set this flag and therefore import
+# those symbols.
+target_compile_definitions(jxl_threads
+ PRIVATE -DJXL_THREADS_INTERNAL_LIBRARY_BUILD)
+
+# Generate the jxl/jxl_threads_export.h header, we only need to generate it once
+# but we can use it from both libraries.
+generate_export_header(jxl_threads
+ BASE_NAME JXL_THREADS
+ EXPORT_FILE_NAME include/jxl/jxl_threads_export.h)
+else()
+add_library(jxl_threads ALIAS jxl_threads-static)
+# When not building the shared library generate the jxl_threads_export.h header
+# only based on the static target.
+generate_export_header(jxl_threads-static
+ BASE_NAME JXL_THREADS
+ EXPORT_FILE_NAME include/jxl/jxl_threads_export.h)
+endif() # BUILD_SHARED_LIBS
+
+
+### Add a pkg-config file for libjxl_threads.
+
+# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}")
+ set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}")
+else()
+ set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}")
+endif()
+# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute.
+if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}")
+ set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}")
+else()
+ set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}")
+endif()
+
+set(JPEGXL_THREADS_LIBRARY_REQUIRES "")
+configure_file("${CMAKE_CURRENT_SOURCE_DIR}/threads/libjxl_threads.pc.in"
+ "libjxl_threads.pc" @ONLY)
+install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl_threads.pc"
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
diff --git a/third_party/jpeg-xl/lib/jxl_vars.bzl b/third_party/jpeg-xl/lib/jxl_vars.bzl
new file mode 100644
index 0000000000..7efa84cc44
--- /dev/null
+++ b/third_party/jpeg-xl/lib/jxl_vars.bzl
@@ -0,0 +1,46 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+# Extra build variables.
+
+libjxl_root_package = "__main__"
+
+libjxl_deps_brotli = ["@brotli//:brotlidec", "@brotli//:brotlienc"]
+libjxl_deps_gif = ["@gif//:gif"]
+libjxl_deps_gtest = ["@googletest//:gtest_main"]
+libjxl_deps_hwy = ["@highway//:hwy"]
+libjxl_deps_hwy_nanobenchmark = ["@highway//:nanobenchmark"]
+libjxl_deps_hwy_test_util = ["@highway//:hwy_test_util"]
+libjxl_deps_jpeg = ["@libjpeg_turbo//:jpeg"]
+libjxl_deps_jxl_box = ["//tools:box"]
+libjxl_deps_exr = ["@openexr//:OpenEXR"]
+libjxl_deps_png = ["@png//:png"]
+libjxl_deps_runfiles = ["@bazel_tools//tools/cpp/runfiles"]
+libjxl_deps_skcms = ["@skcms//:skcms"]
+libjxl_deps_testdata = ["//:testdata"]
+
+libjxl_test_shards = {
+ "jpegli/decode_api_test": 10,
+ "jpegli/encode_api_test": 4,
+ "jpegli/input_suspension_test": 6,
+ "jpegli/output_suspension_test": 2,
+ "jxl/ans_test": 2,
+ "jxl/linalg_test": 2,
+ "jxl/modular_test": 4,
+ "jxl/roundtrip_test": 4,
+ "jxl/xorshift128plus_test": 2,
+ "jxl/ac_strategy_test": 10, # TODO(eustas): separate heavy shard
+ "jxl/dct_test": 32,
+ "jxl/decode_test": 10, # TODO(eustas): separate heavy shard
+ "jxl/fast_dct_test": 8, # TODO(eustas): separate ultra-heavy shard
+ "jxl/fast_math_test": 10, # TODO(eustas): separate heavy shard
+ "jxl/jxl_test": 10, # TODO(eustas): separate heavy shard
+ "jxl/render_pipeline/render_pipeline_test": 10,
+}
+
+libjxl_test_timeouts = {
+ "jxl/fast_dct_test": "long",
+ "jxl/dct_test": "long",
+}
diff --git a/third_party/jpeg-xl/lib/threads/libjxl_threads.pc.in b/third_party/jpeg-xl/lib/threads/libjxl_threads.pc.in
new file mode 100644
index 0000000000..50b937a840
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/libjxl_threads.pc.in
@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=${prefix}
+libdir=@PKGCONFIG_TARGET_LIBS@
+includedir=@PKGCONFIG_TARGET_INCLUDES@
+
+Name: libjxl_threads
+Description: JPEG XL multi-thread runner using std::threads.
+Version: @JPEGXL_LIBRARY_VERSION@
+Requires.private: @JPEGXL_THREADS_LIBRARY_REQUIRES@
+Libs: -L${libdir} -ljxl_threads
+Libs.private: -lm
+Cflags: -I${includedir}
+Cflags.private: -DJXL_THREADS_STATIC_DEFINE
diff --git a/third_party/jpeg-xl/lib/threads/resizable_parallel_runner.cc b/third_party/jpeg-xl/lib/threads/resizable_parallel_runner.cc
new file mode 100644
index 0000000000..db27286dea
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/resizable_parallel_runner.cc
@@ -0,0 +1,195 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/resizable_parallel_runner.h>
+
+#include <algorithm>
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+namespace jpegxl {
+namespace {
+
+// A thread pool that allows changing the number of threads it runs. It also
+// runs tasks on the calling thread, which can work better on schedulers for
+// heterogeneous architectures.
+struct ResizeableParallelRunner {
+ void SetNumThreads(size_t num) {
+ if (num > 0) {
+ num -= 1;
+ }
+ {
+ std::unique_lock<std::mutex> l(state_mutex_);
+ num_desired_workers_ = num;
+ workers_can_proceed_.notify_all();
+ }
+ if (workers_.size() < num) {
+ for (size_t i = workers_.size(); i < num; i++) {
+ workers_.emplace_back([this, i]() { WorkerBody(i); });
+ }
+ }
+ if (workers_.size() > num) {
+ for (size_t i = num; i < workers_.size(); i++) {
+ workers_[i].join();
+ }
+ workers_.resize(num);
+ }
+ }
+
+ ~ResizeableParallelRunner() { SetNumThreads(0); }
+
+ JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start,
+ uint32_t end) {
+ if (start + 1 == end) {
+ JxlParallelRetCode ret = init(jxl_opaque, 1);
+ if (ret != 0) return ret;
+
+ func(jxl_opaque, start, 0);
+ return ret;
+ }
+
+ size_t num_workers = std::min<size_t>(workers_.size() + 1, end - start);
+ JxlParallelRetCode ret = init(jxl_opaque, num_workers);
+ if (ret != 0) {
+ return ret;
+ }
+
+ {
+ std::unique_lock<std::mutex> l(state_mutex_);
+ // Avoid waking up more workers than needed.
+ max_running_workers_ = end - start - 1;
+ next_task_ = start;
+ end_task_ = end;
+ func_ = func;
+ jxl_opaque_ = jxl_opaque;
+ work_available_ = true;
+ num_running_workers_++;
+ workers_can_proceed_.notify_all();
+ }
+
+ DequeueTasks(0);
+
+ while (true) {
+ std::unique_lock<std::mutex> l(state_mutex_);
+ if (num_running_workers_ == 0) break;
+ work_done_.wait(l);
+ }
+
+ return ret;
+ }
+
+ private:
+ void WorkerBody(size_t worker_id) {
+ while (true) {
+ {
+ std::unique_lock<std::mutex> l(state_mutex_);
+ // Worker pool was reduced, resize down.
+ if (worker_id >= num_desired_workers_) {
+ return;
+ }
+ // Nothing to do this time.
+ if (!work_available_ || worker_id >= max_running_workers_) {
+ workers_can_proceed_.wait(l);
+ continue;
+ }
+ num_running_workers_++;
+ }
+ DequeueTasks(worker_id + 1);
+ }
+ }
+
+ void DequeueTasks(size_t thread_id) {
+ while (true) {
+ uint32_t task = next_task_++;
+ if (task >= end_task_) {
+ std::unique_lock<std::mutex> l(state_mutex_);
+ num_running_workers_--;
+ work_available_ = false;
+ if (num_running_workers_ == 0) {
+ work_done_.notify_all();
+ }
+ break;
+ }
+ func_(jxl_opaque_, task, thread_id);
+ }
+ }
+
+ // Checks when the worker has something to do, which can be one of:
+ // - quitting (when worker_id >= num_desired_workers_)
+ // - having work available for them (work_available_ is true and worker_id >=
+ // max_running_workers_)
+ std::condition_variable workers_can_proceed_;
+
+ // Workers are done, and the main thread can proceed (num_running_workers_ ==
+ // 0)
+ std::condition_variable work_done_;
+
+ std::vector<std::thread> workers_;
+
+ // Protects all the remaining variables, except for func_, jxl_opaque_ and
+ // end_task_ (for which only the write by the main thread is protected, and
+ // subsequent uses by workers happen-after it) and next_task_ (which is
+ // atomic).
+ std::mutex state_mutex_;
+
+ // Range of tasks still need to be done.
+ std::atomic<uint32_t> next_task_;
+ uint32_t end_task_;
+
+ // Function to run and its argument.
+ JxlParallelRunFunction func_;
+ void* jxl_opaque_; // not owned
+
+ // Variables that control the workers:
+ // - work_available_ is set to true after a call to Run() and to false at the
+ // end of it.
+ // - num_desired_workers_ represents the number of workers that should be
+ // present.
+ // - max_running_workers_ represents the number of workers that should be
+ // executing tasks.
+ // - num_running_workers_ represents the number of workers that are executing
+ // tasks.
+ size_t num_desired_workers_ = 0;
+ size_t max_running_workers_ = 0;
+ size_t num_running_workers_ = 0;
+ bool work_available_ = false;
+};
+} // namespace
+} // namespace jpegxl
+
+extern "C" {
+JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+ return static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+ ->Run(jpegxl_opaque, init, func, start_range, end_range);
+}
+
+JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate(
+ const JxlMemoryManager* memory_manager) {
+ return new jpegxl::ResizeableParallelRunner();
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads(
+ void* runner_opaque, size_t num_threads) {
+ static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque)
+ ->SetNumThreads(num_threads);
+}
+
+JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque) {
+ delete static_cast<jpegxl::ResizeableParallelRunner*>(runner_opaque);
+}
+
+JXL_THREADS_EXPORT uint32_t
+JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize) {
+ // ~one thread per group.
+ return std::min<uint64_t>(std::thread::hardware_concurrency(),
+ xsize * ysize / (256 * 256));
+}
+}
diff --git a/third_party/jpeg-xl/lib/threads/thread_parallel_runner.cc b/third_party/jpeg-xl/lib/threads/thread_parallel_runner.cc
new file mode 100644
index 0000000000..47b81bdb16
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/thread_parallel_runner.cc
@@ -0,0 +1,101 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/thread_parallel_runner.h>
+#include <string.h>
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+namespace {
+
+// Default JxlMemoryManager using malloc and free for the jpegxl_threads
+// library. Same as the default JxlMemoryManager for the jpegxl library
+// itself.
+
+// Default alloc and free functions.
+void* ThreadMemoryManagerDefaultAlloc(void* opaque, size_t size) {
+ return malloc(size);
+}
+
+void ThreadMemoryManagerDefaultFree(void* opaque, void* address) {
+ free(address);
+}
+
+// Initializes the memory manager instance with the passed one. The
+// MemoryManager passed in |memory_manager| may be NULL or contain NULL
+// functions which will be initialized with the default ones. If either alloc
+// or free are NULL, then both must be NULL, otherwise this function returns an
+// error.
+bool ThreadMemoryManagerInit(JxlMemoryManager* self,
+ const JxlMemoryManager* memory_manager) {
+ if (memory_manager) {
+ *self = *memory_manager;
+ } else {
+ memset(self, 0, sizeof(*self));
+ }
+ if (!self->alloc != !self->free) {
+ return false;
+ }
+ if (!self->alloc) self->alloc = ThreadMemoryManagerDefaultAlloc;
+ if (!self->free) self->free = ThreadMemoryManagerDefaultFree;
+
+ return true;
+}
+
+void* ThreadMemoryManagerAlloc(const JxlMemoryManager* memory_manager,
+ size_t size) {
+ return memory_manager->alloc(memory_manager->opaque, size);
+}
+
+void ThreadMemoryManagerFree(const JxlMemoryManager* memory_manager,
+ void* address) {
+ return memory_manager->free(memory_manager->opaque, address);
+}
+
+} // namespace
+
+JxlParallelRetCode JxlThreadParallelRunner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+ return jpegxl::ThreadParallelRunner::Runner(
+ runner_opaque, jpegxl_opaque, init, func, start_range, end_range);
+}
+
+/// Starts the given number of worker threads and blocks until they are ready.
+/// "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+/// run on the main thread.
+void* JxlThreadParallelRunnerCreate(const JxlMemoryManager* memory_manager,
+ size_t num_worker_threads) {
+ JxlMemoryManager local_memory_manager;
+ if (!ThreadMemoryManagerInit(&local_memory_manager, memory_manager))
+ return nullptr;
+
+ void* alloc = ThreadMemoryManagerAlloc(&local_memory_manager,
+ sizeof(jpegxl::ThreadParallelRunner));
+ if (!alloc) return nullptr;
+ // Placement new constructor on allocated memory
+ jpegxl::ThreadParallelRunner* runner =
+ new (alloc) jpegxl::ThreadParallelRunner(num_worker_threads);
+ runner->memory_manager = local_memory_manager;
+
+ return runner;
+}
+
+void JxlThreadParallelRunnerDestroy(void* runner_opaque) {
+ jpegxl::ThreadParallelRunner* runner =
+ reinterpret_cast<jpegxl::ThreadParallelRunner*>(runner_opaque);
+ if (runner) {
+ JxlMemoryManager local_memory_manager = runner->memory_manager;
+ // Call destructor directly since custom free function is used.
+ runner->~ThreadParallelRunner();
+ ThreadMemoryManagerFree(&local_memory_manager, runner);
+ }
+}
+
+// Get default value for num_worker_threads parameter of
+// InitJxlThreadParallelRunner.
+size_t JxlThreadParallelRunnerDefaultNumWorkerThreads() {
+ return std::thread::hardware_concurrency();
+}
diff --git a/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.cc b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.cc
new file mode 100644
index 0000000000..f26a9ba263
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.cc
@@ -0,0 +1,215 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "lib/threads/thread_parallel_runner_internal.h"
+
+#include <algorithm>
+
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace
+#endif // defined(*_SANITIZER)
+
+#include <jxl/thread_parallel_runner.h>
+
+#include "lib/jxl/base/profiler.h"
+
+namespace {
+
+// Important: JXL_ASSERT does not guarantee running the `condition` code,
+// use only for debug mode checks.
+
+#if JXL_ENABLE_ASSERT
+// Exits the program after printing a stack trace when possible.
+bool Abort() {
+#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
+ defined(THREAD_SANITIZER)
+ // If compiled with any sanitizer print a stack trace. This call doesn't crash
+ // the program, instead the trap below will crash it also allowing gdb to
+ // break there.
+ __sanitizer_print_stack_trace();
+#endif // defined(*_SANITIZER)
+
+#ifdef _MSC_VER
+ __debugbreak();
+ abort();
+#else
+ __builtin_trap();
+#endif
+}
+#define JXL_ASSERT(condition) \
+ do { \
+ if (!(condition)) { \
+ Abort(); \
+ } \
+ } while (0)
+#else
+#define JXL_ASSERT(condition) \
+ do { \
+ } while (0)
+#endif
+} // namespace
+
+namespace jpegxl {
+
+// static
+JxlParallelRetCode ThreadParallelRunner::Runner(
+ void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init,
+ JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) {
+ ThreadParallelRunner* self =
+ static_cast<ThreadParallelRunner*>(runner_opaque);
+ if (start_range > end_range) return -1;
+ if (start_range == end_range) return 0;
+
+ int ret = init(jpegxl_opaque, std::max<size_t>(self->num_worker_threads_, 1));
+ if (ret != 0) return ret;
+
+ // Use a sequential run when num_worker_threads_ is zero since we have no
+ // worker threads.
+ if (self->num_worker_threads_ == 0) {
+ const size_t thread = 0;
+ for (uint32_t task = start_range; task < end_range; ++task) {
+ func(jpegxl_opaque, task, thread);
+ }
+ return 0;
+ }
+
+ if (self->depth_.fetch_add(1, std::memory_order_acq_rel) != 0) {
+ return -1; // Must not re-enter.
+ }
+
+ const WorkerCommand worker_command =
+ (static_cast<WorkerCommand>(start_range) << 32) + end_range;
+ // Ensure the inputs do not result in a reserved command.
+ JXL_ASSERT(worker_command != kWorkerWait);
+ JXL_ASSERT(worker_command != kWorkerOnce);
+ JXL_ASSERT(worker_command != kWorkerExit);
+
+ self->data_func_ = func;
+ self->jpegxl_opaque_ = jpegxl_opaque;
+ self->num_reserved_.store(0, std::memory_order_relaxed);
+
+ self->StartWorkers(worker_command);
+ self->WorkersReadyBarrier();
+
+ if (self->depth_.fetch_add(-1, std::memory_order_acq_rel) != 1) {
+ return -1;
+ }
+ return 0;
+}
+
+// static
+void ThreadParallelRunner::RunRange(ThreadParallelRunner* self,
+ const WorkerCommand command,
+ const int thread) {
+ const uint32_t begin = command >> 32;
+ const uint32_t end = command & 0xFFFFFFFF;
+ const uint32_t num_tasks = end - begin;
+ const uint32_t num_worker_threads = self->num_worker_threads_;
+
+ // OpenMP introduced several "schedule" strategies:
+ // "single" (static assignment of exactly one chunk per thread): slower.
+ // "dynamic" (allocates k tasks at a time): competitive for well-chosen k.
+ // "guided" (allocates k tasks, decreases k): computing k = remaining/n
+ // is faster than halving k each iteration. We prefer this strategy
+ // because it avoids user-specified parameters.
+
+ for (;;) {
+#if 0
+ // dynamic
+ const uint32_t my_size = std::max(num_tasks / (num_worker_threads * 4), 1);
+#else
+ // guided
+ const uint32_t num_reserved =
+ self->num_reserved_.load(std::memory_order_relaxed);
+ // It is possible that more tasks are reserved than ready to run.
+ const uint32_t num_remaining =
+ num_tasks - std::min(num_reserved, num_tasks);
+ const uint32_t my_size =
+ std::max(num_remaining / (num_worker_threads * 4), 1u);
+#endif
+ const uint32_t my_begin = begin + self->num_reserved_.fetch_add(
+ my_size, std::memory_order_relaxed);
+ const uint32_t my_end = std::min(my_begin + my_size, begin + num_tasks);
+ // Another thread already reserved the last task.
+ if (my_begin >= my_end) {
+ break;
+ }
+ for (uint32_t task = my_begin; task < my_end; ++task) {
+ self->data_func_(self->jpegxl_opaque_, task, thread);
+ }
+ }
+}
+
+// static
+void ThreadParallelRunner::ThreadFunc(ThreadParallelRunner* self,
+ const int thread) {
+ // Until kWorkerExit command received:
+ for (;;) {
+ std::unique_lock<std::mutex> lock(self->mutex_);
+ // Notify main thread that this thread is ready.
+ if (++self->workers_ready_ == self->num_threads_) {
+ self->workers_ready_cv_.notify_one();
+ }
+ RESUME_WAIT:
+ // Wait for a command.
+ self->worker_start_cv_.wait(lock);
+ const WorkerCommand command = self->worker_start_command_;
+ switch (command) {
+ case kWorkerWait: // spurious wakeup:
+ goto RESUME_WAIT; // lock still held, avoid incrementing ready.
+ case kWorkerOnce:
+ lock.unlock();
+ self->data_func_(self->jpegxl_opaque_, thread, thread);
+ break;
+ case kWorkerExit:
+ return; // exits thread
+ default:
+ lock.unlock();
+ RunRange(self, command, thread);
+ break;
+ }
+ }
+}
+
+ThreadParallelRunner::ThreadParallelRunner(const int num_worker_threads)
+ : num_worker_threads_(num_worker_threads),
+ num_threads_(std::max(num_worker_threads, 1)) {
+ PROFILER_ZONE("ThreadParallelRunner ctor");
+
+ threads_.reserve(num_worker_threads_);
+
+ // Suppress "unused-private-field" warning.
+ (void)padding1;
+ (void)padding2;
+
+ // Safely handle spurious worker wakeups.
+ worker_start_command_ = kWorkerWait;
+
+ for (uint32_t i = 0; i < num_worker_threads_; ++i) {
+ threads_.emplace_back(ThreadFunc, this, i);
+ }
+
+ if (num_worker_threads_ != 0) {
+ WorkersReadyBarrier();
+ }
+
+ // Warm up profiler on worker threads so its expensive initialization
+ // doesn't count towards other timer measurements.
+ RunOnEachThread(
+ [](const int task, const int thread) { PROFILER_ZONE("@InitWorkers"); });
+}
+
+ThreadParallelRunner::~ThreadParallelRunner() {
+ if (num_worker_threads_ != 0) {
+ StartWorkers(kWorkerExit);
+ }
+
+ for (std::thread& thread : threads_) {
+ JXL_ASSERT(thread.joinable());
+ thread.join();
+ }
+}
+} // namespace jpegxl
diff --git a/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.h b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.h
new file mode 100644
index 0000000000..199a5f2a8b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_internal.h
@@ -0,0 +1,166 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+
+// C++ implementation using std::thread of a ::JxlParallelRunner.
+
+// The main class in this module, ThreadParallelRunner, implements a static
+// method ThreadParallelRunner::Runner than can be passed as a
+// JxlParallelRunner when using the JPEG XL library. This uses std::thread
+// internally and related synchronization functions. The number of threads
+// created is fixed at construction time and the threads are re-used for every
+// ThreadParallelRunner::Runner call. Only one concurrent Runner() call per
+// instance is allowed at a time.
+//
+// This is a scalable, lower-overhead thread pool runner, especially suitable
+// for data-parallel computations in the fork-join model, where clients need to
+// know when all tasks have completed.
+//
+// This thread pool can efficiently load-balance millions of tasks using an
+// atomic counter, thus avoiding per-task virtual or system calls. With 48
+// hyperthreads and 1M tasks that add to an atomic counter, overall runtime is
+// 10-20x higher when using std::async, and ~200x for a queue-based thread
+// pool.
+//
+// Usage:
+// ThreadParallelRunner runner;
+// JxlDecode(
+// ... , &ThreadParallelRunner::Runner, static_cast<void*>(&runner));
+
+#ifndef LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+#define LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
+
+#include <jxl/memory_manager.h>
+#include <jxl/parallel_runner.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <atomic>
+#include <condition_variable> //NOLINT
+#include <mutex> //NOLINT
+#include <thread> //NOLINT
+#include <vector>
+
+namespace jpegxl {
+
+// Main helper class implementing the ::JxlParallelRunner interface.
+class ThreadParallelRunner {
+ public:
+ // ::JxlParallelRunner interface.
+ static JxlParallelRetCode Runner(void* runner_opaque, void* jpegxl_opaque,
+ JxlParallelRunInit init,
+ JxlParallelRunFunction func,
+ uint32_t start_range, uint32_t end_range);
+
+ // Starts the given number of worker threads and blocks until they are ready.
+ // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks
+ // run on the main thread.
+ explicit ThreadParallelRunner(
+ int num_worker_threads = std::thread::hardware_concurrency());
+
+ // Waits for all threads to exit.
+ ~ThreadParallelRunner();
+
+ // Returns maximum number of main/worker threads that may call Func. Useful
+ // for allocating per-thread storage.
+ size_t NumThreads() const { return num_threads_; }
+
+ // Runs func(thread, thread) on all thread(s) that may participate in Run.
+ // If NumThreads() == 0, runs on the main thread with thread == 0, otherwise
+ // concurrently called by each worker thread in [0, NumThreads()).
+ template <class Func>
+ void RunOnEachThread(const Func& func) {
+ if (num_worker_threads_ == 0) {
+ const int thread = 0;
+ func(thread, thread);
+ return;
+ }
+
+ data_func_ = reinterpret_cast<JxlParallelRunFunction>(&CallClosure<Func>);
+ jpegxl_opaque_ = const_cast<void*>(static_cast<const void*>(&func));
+ StartWorkers(kWorkerOnce);
+ WorkersReadyBarrier();
+ }
+
+ JxlMemoryManager memory_manager;
+
+ private:
+ // After construction and between calls to Run, workers are "ready", i.e.
+ // waiting on worker_start_cv_. They are "started" by sending a "command"
+ // and notifying all worker_start_cv_ waiters. (That is why all workers
+ // must be ready/waiting - otherwise, the notification will not reach all of
+ // them and the main thread waits in vain for them to report readiness.)
+ using WorkerCommand = uint64_t;
+
+ // Special values; all others encode the begin/end parameters. Note that all
+ // these are no-op ranges (begin >= end) and therefore never used to encode
+ // ranges.
+ static constexpr WorkerCommand kWorkerWait = ~1ULL;
+ static constexpr WorkerCommand kWorkerOnce = ~2ULL;
+ static constexpr WorkerCommand kWorkerExit = ~3ULL;
+
+ // Calls f(task, thread). Used for type erasure of Func arguments. The
+ // signature must match JxlParallelRunFunction, hence a void* argument.
+ template <class Closure>
+ static void CallClosure(void* f, const uint32_t task, const size_t thread) {
+ (*reinterpret_cast<const Closure*>(f))(task, thread);
+ }
+
+ void WorkersReadyBarrier() {
+ std::unique_lock<std::mutex> lock(mutex_);
+ // Typically only a single iteration.
+ while (workers_ready_ != threads_.size()) {
+ workers_ready_cv_.wait(lock);
+ }
+ workers_ready_ = 0;
+
+ // Safely handle spurious worker wakeups.
+ worker_start_command_ = kWorkerWait;
+ }
+
+ // Precondition: all workers are ready.
+ void StartWorkers(const WorkerCommand worker_command) {
+ mutex_.lock();
+ worker_start_command_ = worker_command;
+ // Workers will need this lock, so release it before they wake up.
+ mutex_.unlock();
+ worker_start_cv_.notify_all();
+ }
+
+ // Attempts to reserve and perform some work from the global range of tasks,
+ // which is encoded within "command". Returns after all tasks are reserved.
+ static void RunRange(ThreadParallelRunner* self, const WorkerCommand command,
+ const int thread);
+
+ static void ThreadFunc(ThreadParallelRunner* self, int thread);
+
+ // Unmodified after ctor, but cannot be const because we call thread::join().
+ std::vector<std::thread> threads_;
+
+ const uint32_t num_worker_threads_; // == threads_.size()
+ const uint32_t num_threads_;
+
+ std::atomic<int> depth_{0}; // detects if Run is re-entered (not supported).
+
+ std::mutex mutex_; // guards both cv and their variables.
+ std::condition_variable workers_ready_cv_;
+ uint32_t workers_ready_ = 0;
+ std::condition_variable worker_start_cv_;
+ WorkerCommand worker_start_command_;
+
+ // Written by main thread, read by workers (after mutex lock/unlock).
+ JxlParallelRunFunction data_func_;
+ void* jpegxl_opaque_;
+
+ // Updated by workers; padding avoids false sharing.
+ uint8_t padding1[64];
+ std::atomic<uint32_t> num_reserved_{0};
+ uint8_t padding2[64];
+};
+
+} // namespace jpegxl
+
+#endif // LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_
diff --git a/third_party/jpeg-xl/lib/threads/thread_parallel_runner_test.cc b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_test.cc
new file mode 100644
index 0000000000..a757c3018b
--- /dev/null
+++ b/third_party/jpeg-xl/lib/threads/thread_parallel_runner_test.cc
@@ -0,0 +1,123 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <atomic>
+
+#include "lib/jxl/base/data_parallel.h"
+#include "lib/jxl/test_utils.h"
+#include "lib/jxl/testing.h"
+
+using jxl::test::ThreadPoolForTests;
+
+namespace jpegxl {
+namespace {
+
+int PopulationCount(uint64_t bits) {
+ int num_set = 0;
+ while (bits != 0) {
+ num_set += bits & 1;
+ bits >>= 1;
+ }
+ return num_set;
+}
+
+// Ensures task parameter is in bounds, every parameter is reached,
+// pool can be reused (multiple consecutive Run calls), pool can be destroyed
+// (joining with its threads), num_threads=0 works (runs on current thread).
+TEST(ThreadParallelRunnerTest, TestPool) {
+ for (int num_threads = 0; num_threads <= 18; ++num_threads) {
+ ThreadPoolForTests pool(num_threads);
+ for (int num_tasks = 0; num_tasks < 32; ++num_tasks) {
+ std::vector<int> mementos(num_tasks);
+ for (int begin = 0; begin < 32; ++begin) {
+ std::fill(mementos.begin(), mementos.end(), 0);
+ EXPECT_TRUE(RunOnPool(
+ &pool, begin, begin + num_tasks, jxl::ThreadPool::NoInit,
+ [begin, num_tasks, &mementos](const int task, const int thread) {
+ // Parameter is in the given range
+ EXPECT_GE(task, begin);
+ EXPECT_LT(task, begin + num_tasks);
+
+ // Store mementos to be sure we visited each task.
+ mementos.at(task - begin) = 1000 + task;
+ },
+ "TestPool"));
+ for (int task = begin; task < begin + num_tasks; ++task) {
+ EXPECT_EQ(1000 + task, mementos.at(task - begin));
+ }
+ }
+ }
+ }
+}
+
+// Verify "thread" parameter when processing few tasks.
+TEST(ThreadParallelRunnerTest, TestSmallAssignments) {
+ // WARNING: cumulative total threads must not exceed profiler.h kMaxThreads.
+ const int kMaxThreads = 8;
+ for (int num_threads = 1; num_threads <= kMaxThreads; ++num_threads) {
+ ThreadPoolForTests pool(num_threads);
+
+ // (Avoid mutex because it may perturb the worker thread scheduling)
+ std::atomic<uint64_t> id_bits{0};
+ std::atomic<int> num_calls{0};
+
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, num_threads, jxl::ThreadPool::NoInit,
+ [&num_calls, num_threads, &id_bits](const int task, const int thread) {
+ num_calls.fetch_add(1, std::memory_order_relaxed);
+
+ EXPECT_LT(thread, num_threads);
+ uint64_t bits = id_bits.load(std::memory_order_relaxed);
+ while (
+ !id_bits.compare_exchange_weak(bits, bits | (1ULL << thread))) {
+ }
+ },
+ "TestSmallAssignments"));
+
+ // Correct number of tasks.
+ EXPECT_EQ(num_threads, num_calls.load());
+
+ const int num_participants = PopulationCount(id_bits.load());
+ // Can't expect equality because other workers may have woken up too late.
+ EXPECT_LE(num_participants, num_threads);
+ }
+}
+
+struct Counter {
+ Counter() {
+ // Suppress "unused-field" warning.
+ (void)padding;
+ }
+ void Assimilate(const Counter& victim) { counter += victim.counter; }
+ int counter = 0;
+ int padding[31];
+};
+
+TEST(ThreadParallelRunnerTest, TestCounter) {
+ const int kNumThreads = 12;
+ ThreadPoolForTests pool(kNumThreads);
+ alignas(128) Counter counters[kNumThreads];
+
+ const int kNumTasks = kNumThreads * 19;
+ EXPECT_TRUE(RunOnPool(
+ &pool, 0, kNumTasks, jxl::ThreadPool::NoInit,
+ [&counters](const int task, const int thread) {
+ counters[thread].counter += task;
+ },
+ "TestCounter"));
+
+ int expected = 0;
+ for (int i = 0; i < kNumTasks; ++i) {
+ expected += i;
+ }
+
+ for (int i = 1; i < kNumThreads; ++i) {
+ counters[0].Assimilate(counters[i]);
+ }
+ EXPECT_EQ(expected, counters[0].counter);
+}
+
+} // namespace
+} // namespace jpegxl
diff --git a/third_party/jpeg-xl/plugins/CMakeLists.txt b/third_party/jpeg-xl/plugins/CMakeLists.txt
new file mode 100644
index 0000000000..bff1bff29d
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+if(NOT MSVC)
+ option(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF "Enable plugin for GdkPixbuf image loading library" ON)
+ if(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF)
+ add_subdirectory(gdk-pixbuf)
+ endif()
+endif()
+
+option(JPEGXL_ENABLE_PLUGIN_GIMP210 "Enable plugin for GIMP 2.10.x series" ON)
+if(JPEGXL_ENABLE_PLUGIN_GIMP210)
+ add_subdirectory(gimp)
+endif()
+
+option(JPEGXL_ENABLE_PLUGIN_MIME "Enable image/jxl declaration for shared-mime-info" ON)
+if(JPEGXL_ENABLE_PLUGIN_MIME)
+ add_subdirectory(mime)
+endif()
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/CMakeLists.txt b/third_party/jpeg-xl/plugins/gdk-pixbuf/CMakeLists.txt
new file mode 100644
index 0000000000..7b53b98c66
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/CMakeLists.txt
@@ -0,0 +1,83 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig)
+pkg_check_modules(Gdk-Pixbuf IMPORTED_TARGET gdk-pixbuf-2.0>=2.36)
+
+include(GNUInstallDirs)
+
+if (NOT Gdk-Pixbuf_FOUND)
+ message(WARNING "GDK Pixbuf development libraries not found, \
+ the Gdk-Pixbuf plugin will not be built")
+ return ()
+endif ()
+
+add_library(pixbufloader-jxl MODULE pixbufloader-jxl.c)
+
+# Mark all symbols as hidden by default. The PkgConfig::Gdk-Pixbuf dependency
+# will cause fill_info and fill_vtable entry points to be made public.
+set_target_properties(pixbufloader-jxl PROPERTIES
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+)
+
+# Note: This only needs the decoder library, but we don't install the decoder
+# shared library.
+target_link_libraries(pixbufloader-jxl jxl jxl_threads lcms2 PkgConfig::Gdk-Pixbuf)
+
+execute_process(COMMAND ${PKG_CONFIG_EXECUTABLE} gdk-pixbuf-2.0 --variable gdk_pixbuf_moduledir --define-variable=prefix=${CMAKE_INSTALL_PREFIX} OUTPUT_VARIABLE GDK_PIXBUF_MODULEDIR OUTPUT_STRIP_TRAILING_WHITESPACE)
+install(TARGETS pixbufloader-jxl DESTINATION "${GDK_PIXBUF_MODULEDIR}")
+
+# Instead of the following, we might instead add the
+# mime type image/jxl to
+# /usr/share/thumbnailers/gdk-pixbuf-thumbnailer.thumbnailer
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/jxl.thumbnailer DESTINATION "${CMAKE_INSTALL_DATADIR}/thumbnailers/")
+
+if(BUILD_TESTING AND NOT CMAKE_CROSSCOMPILING)
+ pkg_check_modules(Gdk IMPORTED_TARGET gdk-2.0)
+ if (Gdk_FOUND)
+ # Test for loading a .jxl file using the pixbufloader library via GDK. This
+ # requires to have the image/jxl mime type and loader library configured,
+ # which we do in a fake environment in the CMAKE_CURRENT_BINARY_DIR.
+ add_executable(pixbufloader_test pixbufloader_test.cc)
+ target_link_libraries(pixbufloader_test PkgConfig::Gdk)
+
+ # Create a mime cache for test.
+ add_custom_command(
+ OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache"
+ COMMAND env XDG_DATA_HOME=${CMAKE_CURRENT_BINARY_DIR}
+ xdg-mime install --novendor
+ "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml"
+ DEPENDS "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml"
+ )
+ add_custom_target(pixbufloader_test_mime
+ DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache"
+ )
+ add_dependencies(pixbufloader_test pixbufloader_test_mime)
+
+ # Use a fake X server to run the test if xvfb is installed.
+ find_program (XVFB_PROGRAM xvfb-run)
+ if(XVFB_PROGRAM)
+ set(XVFB_PROGRAM_PREFIX "${XVFB_PROGRAM};-a")
+ else()
+ set(XVFB_PROGRAM_PREFIX "")
+ endif()
+
+ # libX11.so and libgdk-x11-2.0.so are not compiled with MSAN -> report
+ # use-of-uninitialized-value for string some internal string value.
+ # TODO(eustas): investigate direct memory leak (32 bytes).
+ if (NOT (SANITIZER STREQUAL "msan") AND NOT (SANITIZER STREQUAL "asan"))
+ add_test(
+ NAME pixbufloader_test_jxl
+ COMMAND
+ ${XVFB_PROGRAM_PREFIX} $<TARGET_FILE:pixbufloader_test>
+ "${CMAKE_CURRENT_SOURCE_DIR}/loaders_test.cache"
+ "${CMAKE_SOURCE_DIR}/testdata/jxl/blending/cropped_traffic_light.jxl"
+ WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+ )
+ set_tests_properties(pixbufloader_test_jxl PROPERTIES SKIP_RETURN_CODE 254)
+ endif()
+ endif() # Gdk_FOUND
+endif() # BUILD_TESTING
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/README.md b/third_party/jpeg-xl/plugins/gdk-pixbuf/README.md
new file mode 100644
index 0000000000..185919436f
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/README.md
@@ -0,0 +1,50 @@
+## JPEG XL GDK Pixbuf
+
+
+The plugin may already have been installed when following the instructions from the
+[Installing section of BUILDING.md](../../BUILDING.md#installing), in which case it should
+already be in the correct place, e.g.
+
+```/usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so```
+
+Otherwise we can copy it manually:
+
+```bash
+sudo cp $your_build_directory/plugins/gdk-pixbuf/libpixbufloader-jxl.so /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so
+```
+
+
+Then we need to update the cache, for example with:
+
+```bash
+sudo /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/gdk-pixbuf-query-loaders --update-cache
+```
+
+In order to get thumbnails with this, first one has to add the jxl MIME type, see
+[../mime/README.md](../mime/README.md).
+
+Ensure that the thumbnailer file is installed in the correct place,
+`/usr/share/thumbnailers/jxl.thumbnailer` or `/usr/local/share/thumbnailers/jxl.thumbnailer`.
+
+The file should have been copied automatically when following the instructions
+in the [Installing section of README.md](../../README.md#installing), but
+otherwise it can be copied manually:
+
+```bash
+sudo cp plugins/gdk-pixbuf/jxl.thumbnailer /usr/local/share/thumbnailers/jxl.thumbnailer
+```
+
+Update the Mime database with
+```bash
+update-mime --local
+```
+or
+```bash
+sudo update-desktop-database
+```
+
+Then possibly delete the thumbnail cache with
+```bash
+rm -r ~/.cache/thumbnails
+```
+and restart the application displaying thumbnails, e.g. `nautilus -q` to display thumbnails.
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/jxl.thumbnailer b/third_party/jpeg-xl/plugins/gdk-pixbuf/jxl.thumbnailer
new file mode 100644
index 0000000000..1bcaab61fc
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/jxl.thumbnailer
@@ -0,0 +1,4 @@
+[Thumbnailer Entry]
+TryExec=/usr/bin/gdk-pixbuf-thumbnailer
+Exec=/usr/bin/gdk-pixbuf-thumbnailer -s %s %u %o
+MimeType=image/jxl;
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/loaders_test.cache b/third_party/jpeg-xl/plugins/gdk-pixbuf/loaders_test.cache
new file mode 100644
index 0000000000..95c62c8fc3
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/loaders_test.cache
@@ -0,0 +1,16 @@
+# GdkPixbuf Image Loader Modules file for testing
+# Automatically generated file, do not edit
+# Created by gdk-pixbuf-query-loaders from gdk-pixbuf-2.42.2
+#
+# Generated with:
+# GDK_PIXBUF_MODULEDIR=`pwd`/build/plugins/gdk-pixbuf/ gdk-pixbuf-query-loaders
+#
+# Modified to use the library from the current working directory at runtime.
+"./libpixbufloader-jxl.so"
+"jxl" 4 "gdk-pixbuf" "JPEG XL image" "BSD-3"
+"image/jxl" ""
+"jxl" ""
+"\377\n" " " 100
+"...\fJXL \r\n\207\n" "zzz " 100
+
+
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader-jxl.c b/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader-jxl.c
new file mode 100644
index 0000000000..9df9611b39
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader-jxl.c
@@ -0,0 +1,816 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <jxl/codestream_header.h>
+#include <jxl/decode.h>
+#include <jxl/encode.h>
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/types.h>
+
+#include "lcms2.h"
+
+#define GDK_PIXBUF_ENABLE_BACKEND
+#include <gdk-pixbuf/gdk-pixbuf.h>
+#undef GDK_PIXBUF_ENABLE_BACKEND
+
+G_BEGIN_DECLS
+
+// Information about a single frame.
+typedef struct {
+ uint64_t duration_ms;
+ GdkPixbuf *data;
+ gboolean decoded;
+} GdkPixbufJxlAnimationFrame;
+
+// Represent a whole JPEG XL animation; all its fields are owned; as a GObject,
+// the Animation struct itself is reference counted (as are the GdkPixbufs for
+// individual frames).
+struct _GdkPixbufJxlAnimation {
+ GdkPixbufAnimation parent_instance;
+
+ // GDK interface implementation callbacks.
+ GdkPixbufModuleSizeFunc image_size_callback;
+ GdkPixbufModulePreparedFunc pixbuf_prepared_callback;
+ GdkPixbufModuleUpdatedFunc area_updated_callback;
+ gpointer user_data;
+
+ // All frames known so far; a frame is added when the JXL_DEC_FRAME event is
+ // received from the decoder; initially frame.decoded is FALSE, until
+ // the JXL_DEC_IMAGE event is received.
+ GArray *frames;
+
+ // JPEG XL decoder and related structures.
+ JxlParallelRunner *parallel_runner;
+ JxlDecoder *decoder;
+ JxlPixelFormat pixel_format;
+
+ // Decoding is `done` when JXL_DEC_SUCCESS is received; calling
+ // load_increment afterwards gives an error.
+ gboolean done;
+
+ // Image information.
+ size_t xsize;
+ size_t ysize;
+ gboolean alpha_premultiplied;
+ gboolean has_animation;
+ gboolean has_alpha;
+ uint64_t total_duration_ms;
+ uint64_t tick_duration_us;
+ uint64_t repetition_count; // 0 = loop forever
+
+ gpointer icc_buff;
+ cmsContext context;
+ cmsHPROFILE profile, srgb;
+ cmsHTRANSFORM transform;
+};
+
+#define GDK_TYPE_PIXBUF_JXL_ANIMATION (gdk_pixbuf_jxl_animation_get_type())
+G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation, GDK,
+ JXL_ANIMATION, GdkPixbufAnimation);
+
+G_DEFINE_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation,
+ GDK_TYPE_PIXBUF_ANIMATION);
+
+// Iterator to a given point in time in the animation; contains a pointer to the
+// full animation.
+struct _GdkPixbufJxlAnimationIter {
+ GdkPixbufAnimationIter parent_instance;
+ GdkPixbufJxlAnimation *animation;
+ size_t current_frame;
+ uint64_t time_offset;
+};
+
+#define GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER \
+ (gdk_pixbuf_jxl_animation_iter_get_type())
+G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter,
+ GDK, JXL_ANIMATION_ITER, GdkPixbufAnimationIter);
+G_DEFINE_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter,
+ GDK_TYPE_PIXBUF_ANIMATION_ITER);
+
+static void gdk_pixbuf_jxl_animation_init(GdkPixbufJxlAnimation *obj) {
+ // Suppress "unused function" warnings.
+ (void)glib_autoptr_cleanup_GdkPixbufJxlAnimation;
+ (void)GDK_JXL_ANIMATION;
+ (void)GDK_IS_JXL_ANIMATION;
+}
+
+static gboolean gdk_pixbuf_jxl_animation_is_static_image(
+ GdkPixbufAnimation *anim) {
+ GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+ return !jxl_anim->has_animation;
+}
+
+static GdkPixbuf *gdk_pixbuf_jxl_animation_get_static_image(
+ GdkPixbufAnimation *anim) {
+ GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+ if (jxl_anim->frames == NULL || jxl_anim->frames->len == 0) return NULL;
+ GdkPixbufJxlAnimationFrame *frame =
+ &g_array_index(jxl_anim->frames, GdkPixbufJxlAnimationFrame, 0);
+ return frame->decoded ? frame->data : NULL;
+}
+
+static void gdk_pixbuf_jxl_animation_get_size(GdkPixbufAnimation *anim,
+ int *width, int *height) {
+ GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim;
+ if (width) *width = jxl_anim->xsize;
+ if (height) *height = jxl_anim->ysize;
+}
+
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+static gboolean gdk_pixbuf_jxl_animation_iter_advance(
+ GdkPixbufAnimationIter *iter, const GTimeVal *current_time);
+
+static GdkPixbufAnimationIter *gdk_pixbuf_jxl_animation_get_iter(
+ GdkPixbufAnimation *anim, const GTimeVal *start_time) {
+ GdkPixbufJxlAnimationIter *iter =
+ g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER, NULL);
+ iter->animation = (GdkPixbufJxlAnimation *)anim;
+ iter->time_offset = start_time->tv_sec * 1000ULL + start_time->tv_usec / 1000;
+ g_object_ref(iter->animation);
+ gdk_pixbuf_jxl_animation_iter_advance((GdkPixbufAnimationIter *)iter,
+ start_time);
+ return (GdkPixbufAnimationIter *)iter;
+}
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+static void gdk_pixbuf_jxl_animation_finalize(GObject *obj) {
+ GdkPixbufJxlAnimation *decoder_state = (GdkPixbufJxlAnimation *)obj;
+ if (decoder_state->frames != NULL) {
+ for (size_t i = 0; i < decoder_state->frames->len; i++) {
+ g_object_unref(
+ g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, i)
+ .data);
+ }
+ g_array_free(decoder_state->frames, /*free_segment=*/TRUE);
+ }
+ JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner);
+ JxlDecoderDestroy(decoder_state->decoder);
+ cmsDeleteTransform(decoder_state->transform);
+ cmsCloseProfile(decoder_state->srgb);
+ cmsCloseProfile(decoder_state->profile);
+ cmsDeleteContext(decoder_state->context);
+ g_free(decoder_state->icc_buff);
+}
+
+static void gdk_pixbuf_jxl_animation_class_init(
+ GdkPixbufJxlAnimationClass *klass) {
+ G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_finalize;
+ klass->parent_class.is_static_image =
+ gdk_pixbuf_jxl_animation_is_static_image;
+ klass->parent_class.get_static_image =
+ gdk_pixbuf_jxl_animation_get_static_image;
+ klass->parent_class.get_size = gdk_pixbuf_jxl_animation_get_size;
+ klass->parent_class.get_iter = gdk_pixbuf_jxl_animation_get_iter;
+}
+
+static void gdk_pixbuf_jxl_animation_iter_init(GdkPixbufJxlAnimationIter *obj) {
+ (void)glib_autoptr_cleanup_GdkPixbufJxlAnimationIter;
+ (void)GDK_JXL_ANIMATION_ITER;
+ (void)GDK_IS_JXL_ANIMATION_ITER;
+}
+
+static int gdk_pixbuf_jxl_animation_iter_get_delay_time(
+ GdkPixbufAnimationIter *iter) {
+ GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+ if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+ return 0;
+ }
+ return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+ jxl_iter->current_frame)
+ .duration_ms;
+}
+
+static GdkPixbuf *gdk_pixbuf_jxl_animation_iter_get_pixbuf(
+ GdkPixbufAnimationIter *iter) {
+ GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+ if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+ return NULL;
+ }
+ return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+ jxl_iter->current_frame)
+ .data;
+}
+
+static gboolean gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame(
+ GdkPixbufAnimationIter *iter) {
+ GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+ if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) {
+ return TRUE;
+ }
+ return !g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+ jxl_iter->current_frame)
+ .decoded;
+}
+
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+static gboolean gdk_pixbuf_jxl_animation_iter_advance(
+ GdkPixbufAnimationIter *iter, const GTimeVal *current_time) {
+ GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter;
+ size_t old_frame = jxl_iter->current_frame;
+
+ uint64_t current_time_ms = current_time->tv_sec * 1000ULL +
+ current_time->tv_usec / 1000 -
+ jxl_iter->time_offset;
+
+ if (jxl_iter->animation->frames->len == 0) {
+ jxl_iter->current_frame = 0;
+ } else if (!jxl_iter->animation->done &&
+ current_time_ms >= jxl_iter->animation->total_duration_ms) {
+ jxl_iter->current_frame = jxl_iter->animation->frames->len - 1;
+ } else if (jxl_iter->animation->repetition_count != 0 &&
+ current_time_ms > jxl_iter->animation->repetition_count *
+ jxl_iter->animation->total_duration_ms) {
+ jxl_iter->current_frame = jxl_iter->animation->frames->len - 1;
+ } else {
+ uint64_t total_duration_ms = jxl_iter->animation->total_duration_ms;
+ // Guard against divide-by-0 in malicious files.
+ if (total_duration_ms == 0) total_duration_ms = 1;
+ uint64_t loop_offset = current_time_ms % total_duration_ms;
+ jxl_iter->current_frame = 0;
+ while (TRUE) {
+ uint64_t duration =
+ g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame,
+ jxl_iter->current_frame)
+ .duration_ms;
+ if (duration >= loop_offset) {
+ break;
+ }
+ loop_offset -= duration;
+ jxl_iter->current_frame++;
+ }
+ }
+
+ return old_frame != jxl_iter->current_frame;
+}
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+static void gdk_pixbuf_jxl_animation_iter_finalize(GObject *obj) {
+ GdkPixbufJxlAnimationIter *iter = (GdkPixbufJxlAnimationIter *)obj;
+ g_object_unref(iter->animation);
+}
+
+static void gdk_pixbuf_jxl_animation_iter_class_init(
+ GdkPixbufJxlAnimationIterClass *klass) {
+ G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_iter_finalize;
+ klass->parent_class.get_delay_time =
+ gdk_pixbuf_jxl_animation_iter_get_delay_time;
+ klass->parent_class.get_pixbuf = gdk_pixbuf_jxl_animation_iter_get_pixbuf;
+ klass->parent_class.on_currently_loading_frame =
+ gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame;
+ klass->parent_class.advance = gdk_pixbuf_jxl_animation_iter_advance;
+}
+
+G_END_DECLS
+
+static gpointer begin_load(GdkPixbufModuleSizeFunc size_func,
+ GdkPixbufModulePreparedFunc prepare_func,
+ GdkPixbufModuleUpdatedFunc update_func,
+ gpointer user_data, GError **error) {
+ GdkPixbufJxlAnimation *decoder_state =
+ g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION, NULL);
+ if (decoder_state == NULL) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the animation state failed");
+ return NULL;
+ }
+ decoder_state->image_size_callback = size_func;
+ decoder_state->pixbuf_prepared_callback = prepare_func;
+ decoder_state->area_updated_callback = update_func;
+ decoder_state->user_data = user_data;
+ decoder_state->frames =
+ g_array_new(/*zero_terminated=*/FALSE, /*clear_=*/TRUE,
+ sizeof(GdkPixbufJxlAnimationFrame));
+
+ if (decoder_state->frames == NULL) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the frame array failed");
+ goto cleanup;
+ }
+
+ if (!(decoder_state->parallel_runner =
+ JxlResizableParallelRunnerCreate(NULL))) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the JXL parallel runner failed");
+ goto cleanup;
+ }
+
+ if (!(decoder_state->decoder = JxlDecoderCreate(NULL))) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the JXL decoder failed");
+ goto cleanup;
+ }
+
+ JxlDecoderStatus status;
+
+ if ((status = JxlDecoderSetParallelRunner(
+ decoder_state->decoder, JxlResizableParallelRunner,
+ decoder_state->parallel_runner)) != JXL_DEC_SUCCESS) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderSetParallelRunner failed: %x", status);
+ goto cleanup;
+ }
+ if ((status = JxlDecoderSubscribeEvents(
+ decoder_state->decoder, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME)) !=
+ JXL_DEC_SUCCESS) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderSubscribeEvents failed: %x", status);
+ goto cleanup;
+ }
+
+ decoder_state->pixel_format.data_type = JXL_TYPE_FLOAT;
+ decoder_state->pixel_format.endianness = JXL_NATIVE_ENDIAN;
+
+ return decoder_state;
+cleanup:
+ JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner);
+ JxlDecoderDestroy(decoder_state->decoder);
+ g_object_unref(decoder_state);
+ return NULL;
+}
+
+static gboolean stop_load(gpointer context, GError **error) {
+ g_object_unref(context);
+ return TRUE;
+}
+
+static void draw_pixels(void *context, size_t x, size_t y, size_t num_pixels,
+ const void *pixels) {
+ GdkPixbufJxlAnimation *decoder_state = context;
+
+ GdkPixbuf *output =
+ g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+ decoder_state->frames->len - 1)
+ .data;
+
+ guchar *dst = gdk_pixbuf_get_pixels(output) +
+ decoder_state->pixel_format.num_channels * x +
+ gdk_pixbuf_get_rowstride(output) * y;
+
+ cmsDoTransform(decoder_state->transform, pixels, dst, num_pixels);
+}
+
+static gboolean load_increment(gpointer context, const guchar *buf, guint size,
+ GError **error) {
+ GdkPixbufJxlAnimation *decoder_state = context;
+ if (decoder_state->done == TRUE) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JXL decoder load_increment called after end of file");
+ return FALSE;
+ }
+
+ JxlDecoderStatus status;
+
+ if ((status = JxlDecoderSetInput(decoder_state->decoder, buf, size)) !=
+ JXL_DEC_SUCCESS) {
+ // Should never happen if things are done properly.
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JXL decoder logic error: %x", status);
+ return FALSE;
+ }
+
+ for (;;) {
+ status = JxlDecoderProcessInput(decoder_state->decoder);
+ switch (status) {
+ case JXL_DEC_NEED_MORE_INPUT: {
+ JxlDecoderReleaseInput(decoder_state->decoder);
+ return TRUE;
+ }
+
+ case JXL_DEC_BASIC_INFO: {
+ JxlBasicInfo info;
+ if (JxlDecoderGetBasicInfo(decoder_state->decoder, &info) !=
+ JXL_DEC_SUCCESS) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JXLDecoderGetBasicInfo failed");
+ return FALSE;
+ }
+ decoder_state->pixel_format.num_channels = info.alpha_bits > 0 ? 4 : 3;
+ decoder_state->alpha_premultiplied = info.alpha_premultiplied;
+ decoder_state->xsize = info.xsize;
+ decoder_state->ysize = info.ysize;
+ decoder_state->has_animation = info.have_animation;
+ decoder_state->has_alpha = info.alpha_bits > 0;
+ if (info.have_animation) {
+ decoder_state->repetition_count = info.animation.num_loops;
+ decoder_state->tick_duration_us = 1000000ULL *
+ info.animation.tps_denominator /
+ info.animation.tps_numerator;
+ }
+ gint width = info.xsize;
+ gint height = info.ysize;
+ if (decoder_state->image_size_callback) {
+ decoder_state->image_size_callback(&width, &height,
+ decoder_state->user_data);
+ }
+
+ // GDK convention for signaling being interested only in the basic info.
+ if (width == 0 || height == 0) {
+ decoder_state->done = TRUE;
+ return TRUE;
+ }
+
+ // Set an appropriate number of threads for the image size.
+ JxlResizableParallelRunnerSetThreads(
+ decoder_state->parallel_runner,
+ JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize));
+ break;
+ }
+
+ case JXL_DEC_COLOR_ENCODING: {
+ // Get the ICC color profile of the pixel data
+ size_t icc_size;
+ if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(
+ decoder_state->decoder,
+ &decoder_state->pixel_format,
+ JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderGetICCProfileSize failed");
+ return FALSE;
+ }
+ if (!(decoder_state->icc_buff = g_malloc(icc_size))) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Allocating ICC profile failed");
+ return FALSE;
+ }
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetColorAsICCProfile(decoder_state->decoder,
+ &decoder_state->pixel_format,
+ JXL_COLOR_PROFILE_TARGET_DATA,
+ decoder_state->icc_buff, icc_size)) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderGetColorAsICCProfile failed");
+ return FALSE;
+ }
+ decoder_state->context = cmsCreateContext(NULL, NULL);
+ if (!decoder_state->context) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Failed to create LCMS2 context");
+ return FALSE;
+ }
+ decoder_state->profile = cmsOpenProfileFromMemTHR(
+ decoder_state->context, decoder_state->icc_buff, icc_size);
+ if (!decoder_state->profile) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Invalid ICC profile from JXL image decoder");
+ return FALSE;
+ }
+ decoder_state->srgb = cmsCreate_sRGBProfileTHR(decoder_state->context);
+ if (!decoder_state->srgb) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Failed to create sRGB profile");
+ return FALSE;
+ }
+ decoder_state->transform = cmsCreateTransformTHR(
+ decoder_state->context, decoder_state->profile,
+ decoder_state->has_alpha ? TYPE_RGBA_FLT : TYPE_RGB_FLT,
+ decoder_state->srgb,
+ decoder_state->has_alpha ? TYPE_RGBA_8 : TYPE_RGB_8,
+ INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA);
+ if (!decoder_state->transform) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Failed to create LCMS2 color transform");
+ return FALSE;
+ }
+
+ break;
+ }
+
+ case JXL_DEC_FRAME: {
+ // TODO(veluca): support rescaling.
+ JxlFrameHeader frame_header;
+ if (JxlDecoderGetFrameHeader(decoder_state->decoder, &frame_header) !=
+ JXL_DEC_SUCCESS) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Failed to retrieve frame info");
+ return FALSE;
+ }
+
+ {
+ GdkPixbufJxlAnimationFrame frame;
+ frame.decoded = FALSE;
+ frame.duration_ms =
+ frame_header.duration * decoder_state->tick_duration_us / 1000;
+ decoder_state->total_duration_ms += frame.duration_ms;
+ frame.data =
+ gdk_pixbuf_new(GDK_COLORSPACE_RGB, decoder_state->has_alpha,
+ /*bits_per_sample=*/8, decoder_state->xsize,
+ decoder_state->ysize);
+ if (frame.data == NULL) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Failed to allocate output pixel buffer");
+ return FALSE;
+ }
+ decoder_state->pixel_format.align =
+ gdk_pixbuf_get_rowstride(frame.data);
+ g_array_append_val(decoder_state->frames, frame);
+ }
+ if (decoder_state->pixbuf_prepared_callback &&
+ decoder_state->frames->len == 1) {
+ decoder_state->pixbuf_prepared_callback(
+ g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+ 0)
+ .data,
+ decoder_state->has_animation ? (GdkPixbufAnimation *)decoder_state
+ : NULL,
+ decoder_state->user_data);
+ }
+ break;
+ }
+
+ case JXL_DEC_NEED_IMAGE_OUT_BUFFER: {
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSetImageOutCallback(decoder_state->decoder,
+ &decoder_state->pixel_format,
+ draw_pixels, decoder_state)) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderSetImageOutCallback failed");
+ return FALSE;
+ }
+ break;
+ }
+
+ case JXL_DEC_FULL_IMAGE: {
+ // TODO(veluca): consider doing partial updates.
+ if (decoder_state->area_updated_callback) {
+ GdkPixbuf *output = g_array_index(decoder_state->frames,
+ GdkPixbufJxlAnimationFrame, 0)
+ .data;
+ decoder_state->area_updated_callback(
+ output, 0, 0, gdk_pixbuf_get_width(output),
+ gdk_pixbuf_get_height(output), decoder_state->user_data);
+ }
+ g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame,
+ decoder_state->frames->len - 1)
+ .decoded = TRUE;
+ break;
+ }
+
+ case JXL_DEC_SUCCESS: {
+ decoder_state->done = TRUE;
+ return TRUE;
+ }
+
+ default: {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Unexpected JxlDecoderProcessInput return code: %x",
+ status);
+ return FALSE;
+ }
+ }
+ }
+ return TRUE;
+}
+
+static gboolean jxl_is_save_option_supported(const gchar *option_key) {
+ if (g_strcmp0(option_key, "quality") == 0) {
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean jxl_image_saver(FILE *f, GdkPixbuf *pixbuf, gchar **keys,
+ gchar **values, GError **error) {
+ long quality = 90; /* default; must be between 0 and 100 */
+ double distance;
+ gboolean save_alpha;
+ JxlEncoder *encoder;
+ void *parallel_runner;
+ JxlEncoderFrameSettings *frame_settings;
+ JxlBasicInfo output_info;
+ JxlPixelFormat pixel_format;
+ JxlColorEncoding color_profile;
+ JxlEncoderStatus status;
+
+ GByteArray *compressed;
+ size_t offset = 0;
+ uint8_t *next_out;
+ size_t avail_out;
+
+ if (f == NULL || pixbuf == NULL) {
+ return FALSE;
+ }
+
+ if (keys && *keys) {
+ gchar **kiter = keys;
+ gchar **viter = values;
+
+ while (*kiter) {
+ if (strcmp(*kiter, "quality") == 0) {
+ char *endptr = NULL;
+ quality = strtol(*viter, &endptr, 10);
+
+ if (endptr == *viter) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION,
+ "JXL quality must be a value between 0 and 100; value "
+ "\"%s\" could not be parsed.",
+ *viter);
+
+ return FALSE;
+ }
+
+ if (quality < 0 || quality > 100) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION,
+ "JXL quality must be a value between 0 and 100; value "
+ "\"%ld\" is not allowed.",
+ quality);
+
+ return FALSE;
+ }
+ } else {
+ g_warning("Unrecognized parameter (%s) passed to JXL saver.", *kiter);
+ }
+
+ ++kiter;
+ ++viter;
+ }
+ }
+
+ if (gdk_pixbuf_get_bits_per_sample(pixbuf) != 8) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+ "Sorry, only 8bit images are supported by this JXL saver");
+ return FALSE;
+ }
+
+ JxlEncoderInitBasicInfo(&output_info);
+ output_info.have_container = JXL_FALSE;
+ output_info.xsize = gdk_pixbuf_get_width(pixbuf);
+ output_info.ysize = gdk_pixbuf_get_height(pixbuf);
+ output_info.bits_per_sample = 8;
+ output_info.orientation = JXL_ORIENT_IDENTITY;
+ output_info.num_color_channels = 3;
+
+ if (output_info.xsize == 0 || output_info.ysize == 0) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_CORRUPT_IMAGE,
+ "Empty image, nothing to save");
+ return FALSE;
+ }
+
+ save_alpha = gdk_pixbuf_get_has_alpha(pixbuf);
+
+ pixel_format.data_type = JXL_TYPE_UINT8;
+ pixel_format.endianness = JXL_NATIVE_ENDIAN;
+ pixel_format.align = gdk_pixbuf_get_rowstride(pixbuf);
+
+ if (save_alpha) {
+ if (gdk_pixbuf_get_n_channels(pixbuf) != 4) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+ "Unsupported number of channels");
+ return FALSE;
+ }
+
+ output_info.num_extra_channels = 1;
+ output_info.alpha_bits = 8;
+ pixel_format.num_channels = 4;
+ } else {
+ if (gdk_pixbuf_get_n_channels(pixbuf) != 3) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE,
+ "Unsupported number of channels");
+ return FALSE;
+ }
+
+ output_info.num_extra_channels = 0;
+ output_info.alpha_bits = 0;
+ pixel_format.num_channels = 3;
+ }
+
+ encoder = JxlEncoderCreate(NULL);
+ if (!encoder) {
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the JXL encoder failed");
+ return FALSE;
+ }
+
+ parallel_runner = JxlResizableParallelRunnerCreate(NULL);
+ if (!parallel_runner) {
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "Creation of the JXL decoder failed");
+ return FALSE;
+ }
+
+ JxlResizableParallelRunnerSetThreads(
+ parallel_runner, JxlResizableParallelRunnerSuggestThreads(
+ output_info.xsize, output_info.ysize));
+
+ status = JxlEncoderSetParallelRunner(encoder, JxlResizableParallelRunner,
+ parallel_runner);
+ if (status != JXL_ENC_SUCCESS) {
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlDecoderSetParallelRunner failed: %x", status);
+ return FALSE;
+ }
+
+ if (quality > 99) {
+ output_info.uses_original_profile = JXL_TRUE;
+ distance = 0;
+ } else {
+ output_info.uses_original_profile = JXL_FALSE;
+ if (quality >= 30) {
+ distance = 0.1 + (100 - quality) * 0.09;
+ } else {
+ distance =
+ 53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0;
+ }
+ }
+
+ status = JxlEncoderSetBasicInfo(encoder, &output_info);
+ if (status != JXL_ENC_SUCCESS) {
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlEncoderSetBasicInfo failed: %x", status);
+ return FALSE;
+ }
+
+ JxlColorEncodingSetToSRGB(&color_profile, JXL_FALSE);
+ status = JxlEncoderSetColorEncoding(encoder, &color_profile);
+ if (status != JXL_ENC_SUCCESS) {
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlEncoderSetColorEncoding failed: %x", status);
+ return FALSE;
+ }
+
+ frame_settings = JxlEncoderFrameSettingsCreate(encoder, NULL);
+ JxlEncoderSetFrameDistance(frame_settings, distance);
+ JxlEncoderSetFrameLossless(frame_settings, output_info.uses_original_profile);
+
+ status = JxlEncoderAddImageFrame(frame_settings, &pixel_format,
+ gdk_pixbuf_read_pixels(pixbuf),
+ gdk_pixbuf_get_byte_length(pixbuf));
+ if (status != JXL_ENC_SUCCESS) {
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED,
+ "JxlEncoderAddImageFrame failed: %x", status);
+ return FALSE;
+ }
+
+ JxlEncoderCloseInput(encoder);
+
+ compressed = g_byte_array_sized_new(4096);
+ g_byte_array_set_size(compressed, 4096);
+ do {
+ next_out = compressed->data + offset;
+ avail_out = compressed->len - offset;
+ status = JxlEncoderProcessOutput(encoder, &next_out, &avail_out);
+
+ if (status == JXL_ENC_NEED_MORE_OUTPUT) {
+ offset = next_out - compressed->data;
+ g_byte_array_set_size(compressed, compressed->len * 2);
+ } else if (status == JXL_ENC_ERROR) {
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+ g_set_error(error, G_FILE_ERROR, 0, "JxlEncoderProcessOutput failed: %x",
+ status);
+ return FALSE;
+ }
+ } while (status != JXL_ENC_SUCCESS);
+
+ JxlResizableParallelRunnerDestroy(parallel_runner);
+ JxlEncoderDestroy(encoder);
+
+ g_byte_array_set_size(compressed, next_out - compressed->data);
+ if (compressed->len > 0) {
+ fwrite(compressed->data, 1, compressed->len, f);
+ g_byte_array_free(compressed, TRUE);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+void fill_vtable(GdkPixbufModule *module) {
+ module->begin_load = begin_load;
+ module->stop_load = stop_load;
+ module->load_increment = load_increment;
+ module->is_save_option_supported = jxl_is_save_option_supported;
+ module->save = jxl_image_saver;
+}
+
+void fill_info(GdkPixbufFormat *info) {
+ static GdkPixbufModulePattern signature[] = {
+ {"\xFF\x0A", " ", 100},
+ {"...\x0CJXL \x0D\x0A\x87\x0A", "zzz ", 100},
+ {NULL, NULL, 0},
+ };
+
+ static gchar *mime_types[] = {"image/jxl", NULL};
+
+ static gchar *extensions[] = {"jxl", NULL};
+
+ info->name = "jxl";
+ info->signature = signature;
+ info->description = "JPEG XL image";
+ info->mime_types = mime_types;
+ info->extensions = extensions;
+ info->flags = GDK_PIXBUF_FORMAT_WRITABLE | GDK_PIXBUF_FORMAT_THREADSAFE;
+ info->license = "BSD-3";
+}
diff --git a/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader_test.cc b/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader_test.cc
new file mode 100644
index 0000000000..5e5642d491
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gdk-pixbuf/pixbufloader_test.cc
@@ -0,0 +1,41 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <gdk-pixbuf/gdk-pixbuf.h>
+#include <gdk/gdk.h>
+#include <glib.h>
+#include <stdlib.h>
+
+int main(int argc, char* argv[]) {
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s <loaders.cache> <image.jxl>\n", argv[0]);
+ return 1;
+ }
+
+ const char* loaders_cache = argv[1];
+ const char* filename = argv[2];
+ setenv("GDK_PIXBUF_MODULE_FILE", loaders_cache, true);
+
+ // XDG_DATA_HOME is the path where we look for the mime cache.
+ // XDG_DATA_DIRS directories are used in addition to XDG_DATA_HOME.
+ setenv("XDG_DATA_HOME", ".", true);
+ setenv("XDG_DATA_DIRS", "", true);
+
+ if (!gdk_init_check(nullptr, nullptr)) {
+ fprintf(stderr, "This test requires a DISPLAY\n");
+ // Signals ctest that we should mark this test as skipped.
+ return 254;
+ }
+ GError* error = nullptr;
+ GdkPixbuf* pb = gdk_pixbuf_new_from_file(filename, &error);
+ if (pb != nullptr) {
+ g_object_unref(pb);
+ return 0;
+ } else {
+ fprintf(stderr, "Error loading file: %s\n", filename);
+ g_assert_no_error(error);
+ return 1;
+ }
+}
diff --git a/third_party/jpeg-xl/plugins/gimp/CMakeLists.txt b/third_party/jpeg-xl/plugins/gimp/CMakeLists.txt
new file mode 100644
index 0000000000..f0a49005ed
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+find_package(PkgConfig)
+pkg_check_modules(Gimp IMPORTED_TARGET gimp-2.0>=2.10 gimpui-2.0>=2.10)
+
+if (NOT Gimp_FOUND)
+ message(WARNING "Gimp development libraries not found, the Gimp plugin will not be built")
+ return ()
+endif ()
+
+add_executable(file-jxl WIN32
+ common.h
+ common.cc
+ file-jxl-load.cc
+ file-jxl-load.h
+ file-jxl-save.cc
+ file-jxl-save.h
+ file-jxl.cc)
+target_link_libraries(file-jxl jxl jxl_threads PkgConfig::Gimp)
+
+target_include_directories(file-jxl PUBLIC
+ ${PROJECT_SOURCE_DIR}) # for plugins/gimp absolute paths.
+
+pkg_get_variable(GIMP_LIB_DIR gimp-2.0 gimplibdir)
+install(TARGETS file-jxl RUNTIME DESTINATION "${GIMP_LIB_DIR}/plug-ins/file-jxl/")
diff --git a/third_party/jpeg-xl/plugins/gimp/common.cc b/third_party/jpeg-xl/plugins/gimp/common.cc
new file mode 100644
index 0000000000..1a884570cb
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/common.cc
@@ -0,0 +1,27 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+JpegXlGimpProgress::JpegXlGimpProgress(const char *message) {
+ cur_progress = 0;
+ max_progress = 100;
+
+ gimp_progress_init_printf("%s\n", message);
+}
+
+void JpegXlGimpProgress::update() {
+ gimp_progress_update((float)++cur_progress / (float)max_progress);
+ return;
+}
+
+void JpegXlGimpProgress::finished() {
+ gimp_progress_update(1.0);
+ return;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/plugins/gimp/common.h b/third_party/jpeg-xl/plugins/gimp/common.h
new file mode 100644
index 0000000000..3fe63c1a47
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/common.h
@@ -0,0 +1,45 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_COMMON_H_
+#define PLUGINS_GIMP_COMMON_H_
+
+#include <libgimp/gimp.h>
+#include <libgimp/gimpui.h>
+#include <math.h>
+
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <vector>
+
+#define PLUG_IN_BINARY "file-jxl"
+#define SAVE_PROC "file-jxl-save"
+
+// Defined by both FUIF and glib.
+#undef MAX
+#undef MIN
+#undef CLAMP
+
+#include <jxl/resizable_parallel_runner.h>
+#include <jxl/resizable_parallel_runner_cxx.h>
+
+namespace jxl {
+
+class JpegXlGimpProgress {
+ public:
+ explicit JpegXlGimpProgress(const char *message);
+ void update();
+ void finished();
+
+ private:
+ int cur_progress;
+ int max_progress;
+
+}; // class JpegXlGimpProgress
+
+} // namespace jxl
+
+#endif // PLUGINS_GIMP_COMMON_H_
diff --git a/third_party/jpeg-xl/plugins/gimp/file-jxl-load.cc b/third_party/jpeg-xl/plugins/gimp/file-jxl-load.cc
new file mode 100644
index 0000000000..361a74920c
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/file-jxl-load.cc
@@ -0,0 +1,487 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/file-jxl-load.h"
+
+#include <jxl/decode.h>
+#include <jxl/decode_cxx.h>
+
+#define _PROFILE_ORIGIN_ JXL_COLOR_PROFILE_TARGET_ORIGINAL
+#define _PROFILE_TARGET_ JXL_COLOR_PROFILE_TARGET_DATA
+#define LOAD_PROC "file-jxl-load"
+
+namespace jxl {
+
+bool SetJpegXlOutBuffer(
+ std::unique_ptr<JxlDecoderStruct, JxlDecoderDestroyStruct> *dec,
+ JxlPixelFormat *format, size_t *buffer_size, gpointer *pixels_buffer_1) {
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderImageOutBufferSize(dec->get(), format, buffer_size)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderImageOutBufferSize failed\n");
+ return false;
+ }
+ *pixels_buffer_1 = g_malloc(*buffer_size);
+ if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec->get(), format,
+ *pixels_buffer_1,
+ *buffer_size)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n");
+ return false;
+ }
+ return true;
+}
+
+bool LoadJpegXlImage(const gchar *const filename, gint32 *const image_id) {
+ bool stop_processing = false;
+ JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT;
+ std::vector<uint8_t> icc_profile;
+ GimpColorProfile *profile_icc = nullptr;
+ GimpColorProfile *profile_int = nullptr;
+ bool is_linear = false;
+ unsigned long xsize = 0, ysize = 0;
+ long crop_x0 = 0, crop_y0 = 0;
+ size_t layer_idx = 0;
+ uint32_t frame_duration = 0;
+ double tps_denom = 1.f, tps_numer = 1.f;
+
+ gint32 layer;
+
+ gpointer pixels_buffer_1 = nullptr;
+ gpointer pixels_buffer_2 = nullptr;
+ size_t buffer_size = 0;
+
+ GimpImageBaseType image_type = GIMP_RGB;
+ GimpImageType layer_type = GIMP_RGB_IMAGE;
+ GimpPrecision precision = GIMP_PRECISION_U16_GAMMA;
+ JxlBasicInfo info = {};
+ JxlPixelFormat format = {};
+ JxlAnimationHeader animation = {};
+ JxlBlendMode blend_mode = JXL_BLEND_BLEND;
+ char *frame_name = nullptr; // will be realloced
+ size_t frame_name_len = 0;
+
+ format.num_channels = 4;
+ format.data_type = JXL_TYPE_FLOAT;
+ format.endianness = JXL_NATIVE_ENDIAN;
+ format.align = 0;
+
+ bool is_gray = false;
+
+ JpegXlGimpProgress gimp_load_progress(
+ ("Opening JPEG XL file:" + std::string(filename)).c_str());
+ gimp_load_progress.update();
+
+ // read file
+ std::ifstream instream(filename, std::ios::in | std::ios::binary);
+ std::vector<uint8_t> compressed((std::istreambuf_iterator<char>(instream)),
+ std::istreambuf_iterator<char>());
+ instream.close();
+
+ gimp_load_progress.update();
+
+ // multi-threaded parallel runner.
+ auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+ auto dec = JxlDecoderMake(nullptr);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderSubscribeEvents(
+ dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING |
+ JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION |
+ JXL_DEC_FRAME)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSubscribeEvents failed\n");
+ return false;
+ }
+
+ if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(),
+ JxlResizableParallelRunner,
+ runner.get())) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSetParallelRunner failed\n");
+ return false;
+ }
+ // TODO: make this work with coalescing set to false, while handling frames
+ // with duration 0 and references to earlier frames correctly.
+ if (JXL_DEC_SUCCESS != JxlDecoderSetCoalescing(dec.get(), JXL_TRUE)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSetCoalescing failed\n");
+ return false;
+ }
+
+ // grand decode loop...
+ JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size());
+
+ if (JXL_DEC_SUCCESS != JxlDecoderSetProgressiveDetail(
+ dec.get(), JxlProgressiveDetail::kPasses)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSetProgressiveDetail failed\n");
+ return false;
+ }
+
+ while (true) {
+ gimp_load_progress.update();
+
+ if (!stop_processing) status = JxlDecoderProcessInput(dec.get());
+
+ if (status == JXL_DEC_BASIC_INFO) {
+ if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderGetBasicInfo failed\n");
+ return false;
+ }
+
+ xsize = info.xsize;
+ ysize = info.ysize;
+ if (info.have_animation) {
+ animation = info.animation;
+ tps_denom = animation.tps_denominator;
+ tps_numer = animation.tps_numerator;
+ }
+
+ JxlResizableParallelRunnerSetThreads(
+ runner.get(), JxlResizableParallelRunnerSuggestThreads(xsize, ysize));
+ } else if (status == JXL_DEC_COLOR_ENCODING) {
+ // check for ICC profile
+ size_t icc_size = 0;
+ JxlColorEncoding color_encoding;
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetColorAsEncodedProfile(
+ dec.get(), &format, _PROFILE_ORIGIN_, &color_encoding)) {
+ // Attempt to load ICC profile when no internal color encoding
+ if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(dec.get(), &format,
+ _PROFILE_ORIGIN_,
+ &icc_size)) {
+ g_printerr(LOAD_PROC
+ " Warning: JxlDecoderGetICCProfileSize failed\n");
+ }
+
+ if (icc_size > 0) {
+ icc_profile.resize(icc_size);
+ if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(
+ dec.get(), &format, _PROFILE_ORIGIN_,
+ icc_profile.data(), icc_profile.size())) {
+ g_printerr(LOAD_PROC
+ " Warning: JxlDecoderGetColorAsICCProfile failed\n");
+ }
+
+ profile_icc = gimp_color_profile_new_from_icc_profile(
+ icc_profile.data(), icc_profile.size(), nullptr);
+
+ if (profile_icc) {
+ is_linear = gimp_color_profile_is_linear(profile_icc);
+ g_printerr(LOAD_PROC " Info: Color profile is_linear = %d\n",
+ is_linear);
+ } else {
+ g_printerr(LOAD_PROC " Warning: Failed to read ICC profile.\n");
+ }
+ } else {
+ g_printerr(LOAD_PROC " Warning: Empty ICC data.\n");
+ }
+ }
+
+ // Internal color profile detection...
+ if (JXL_DEC_SUCCESS ==
+ JxlDecoderGetColorAsEncodedProfile(
+ dec.get(), &format, _PROFILE_TARGET_, &color_encoding)) {
+ g_printerr(LOAD_PROC " Info: Internal color encoding detected.\n");
+
+ // figure out linearity of internal profile
+ switch (color_encoding.transfer_function) {
+ case JXL_TRANSFER_FUNCTION_LINEAR:
+ is_linear = true;
+ break;
+
+ case JXL_TRANSFER_FUNCTION_709:
+ case JXL_TRANSFER_FUNCTION_PQ:
+ case JXL_TRANSFER_FUNCTION_HLG:
+ case JXL_TRANSFER_FUNCTION_GAMMA:
+ case JXL_TRANSFER_FUNCTION_DCI:
+ case JXL_TRANSFER_FUNCTION_SRGB:
+ is_linear = false;
+ break;
+
+ case JXL_TRANSFER_FUNCTION_UNKNOWN:
+ default:
+ if (profile_icc) {
+ g_printerr(LOAD_PROC
+ " Info: Unknown transfer function. "
+ "ICC profile is present.");
+ } else {
+ g_printerr(LOAD_PROC
+ " Info: Unknown transfer function. "
+ "No ICC profile present.");
+ }
+ break;
+ }
+
+ switch (color_encoding.color_space) {
+ case JXL_COLOR_SPACE_RGB:
+ if (color_encoding.white_point == JXL_WHITE_POINT_D65 &&
+ color_encoding.primaries == JXL_PRIMARIES_SRGB) {
+ if (is_linear) {
+ profile_int = gimp_color_profile_new_rgb_srgb_linear();
+ } else {
+ profile_int = gimp_color_profile_new_rgb_srgb();
+ }
+ } else if (!is_linear &&
+ color_encoding.white_point == JXL_WHITE_POINT_D65 &&
+ (color_encoding.primaries_green_xy[0] == 0.2100 ||
+ color_encoding.primaries_green_xy[1] == 0.7100)) {
+ // Probably Adobe RGB
+ profile_int = gimp_color_profile_new_rgb_adobe();
+ } else if (profile_icc) {
+ g_printerr(LOAD_PROC
+ " Info: Unknown RGB colorspace. "
+ "Using ICC profile.\n");
+ } else {
+ g_printerr(LOAD_PROC
+ " Info: Unknown RGB colorspace. "
+ "Treating as sRGB.\n");
+ if (is_linear) {
+ profile_int = gimp_color_profile_new_rgb_srgb_linear();
+ } else {
+ profile_int = gimp_color_profile_new_rgb_srgb();
+ }
+ }
+ break;
+
+ case JXL_COLOR_SPACE_GRAY:
+ is_gray = true;
+ if (!profile_icc ||
+ color_encoding.white_point == JXL_WHITE_POINT_D65) {
+ if (is_linear) {
+ profile_int = gimp_color_profile_new_d65_gray_linear();
+ } else {
+ profile_int = gimp_color_profile_new_d65_gray_srgb_trc();
+ }
+ }
+ break;
+ case JXL_COLOR_SPACE_XYB:
+ case JXL_COLOR_SPACE_UNKNOWN:
+ default:
+ if (profile_icc) {
+ g_printerr(LOAD_PROC
+ " Info: Unknown colorspace. Using ICC profile.\n");
+ } else {
+ g_error(
+ LOAD_PROC
+ " Warning: Unknown colorspace. Treating as sRGB profile.\n");
+
+ if (is_linear) {
+ profile_int = gimp_color_profile_new_rgb_srgb_linear();
+ } else {
+ profile_int = gimp_color_profile_new_rgb_srgb();
+ }
+ }
+ break;
+ }
+ }
+
+ // set pixel format
+ if (info.num_color_channels > 1) {
+ if (info.alpha_bits == 0) {
+ image_type = GIMP_RGB;
+ layer_type = GIMP_RGB_IMAGE;
+ format.num_channels = info.num_color_channels;
+ } else {
+ image_type = GIMP_RGB;
+ layer_type = GIMP_RGBA_IMAGE;
+ format.num_channels = info.num_color_channels + 1;
+ }
+ } else if (info.num_color_channels == 1) {
+ if (info.alpha_bits == 0) {
+ image_type = GIMP_GRAY;
+ layer_type = GIMP_GRAY_IMAGE;
+ format.num_channels = info.num_color_channels;
+ } else {
+ image_type = GIMP_GRAY;
+ layer_type = GIMP_GRAYA_IMAGE;
+ format.num_channels = info.num_color_channels + 1;
+ }
+ }
+
+ // Set image bit depth and linearity
+ if (info.bits_per_sample <= 8) {
+ if (is_linear) {
+ precision = GIMP_PRECISION_U8_LINEAR;
+ } else {
+ precision = GIMP_PRECISION_U8_GAMMA;
+ }
+ } else if (info.bits_per_sample <= 16) {
+ if (info.exponent_bits_per_sample > 0) {
+ if (is_linear) {
+ precision = GIMP_PRECISION_HALF_LINEAR;
+ } else {
+ precision = GIMP_PRECISION_HALF_GAMMA;
+ }
+ } else if (is_linear) {
+ precision = GIMP_PRECISION_U16_LINEAR;
+ } else {
+ precision = GIMP_PRECISION_U16_GAMMA;
+ }
+ } else {
+ if (info.exponent_bits_per_sample > 0) {
+ if (is_linear) {
+ precision = GIMP_PRECISION_FLOAT_LINEAR;
+ } else {
+ precision = GIMP_PRECISION_FLOAT_GAMMA;
+ }
+ } else if (is_linear) {
+ precision = GIMP_PRECISION_U32_LINEAR;
+ } else {
+ precision = GIMP_PRECISION_U32_GAMMA;
+ }
+ }
+
+ // create new image
+ if (is_linear) {
+ *image_id = gimp_image_new_with_precision(xsize, ysize, image_type,
+ GIMP_PRECISION_FLOAT_LINEAR);
+ } else {
+ *image_id = gimp_image_new_with_precision(xsize, ysize, image_type,
+ GIMP_PRECISION_FLOAT_GAMMA);
+ }
+
+ if (profile_int) {
+ gimp_image_set_color_profile(*image_id, profile_int);
+ } else if (!profile_icc) {
+ g_printerr(LOAD_PROC " Warning: No color profile.\n");
+ }
+ } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) {
+ // get image from decoder in FLOAT
+ format.data_type = JXL_TYPE_FLOAT;
+ if (!SetJpegXlOutBuffer(&dec, &format, &buffer_size, &pixels_buffer_1))
+ return false;
+ } else if (status == JXL_DEC_FULL_IMAGE) {
+ // create and insert layer
+ gchar *layer_name;
+ if (layer_idx == 0 && !info.have_animation) {
+ layer_name = g_strdup_printf("Background");
+ } else {
+ const GString *blend_null_flag = g_string_new("");
+ const GString *blend_replace_flag = g_string_new(" (replace)");
+ const GString *blend_combine_flag = g_string_new(" (combine)");
+ GString *blend;
+ if (blend_mode == JXL_BLEND_REPLACE) {
+ blend = (GString *)blend_replace_flag;
+ } else if (blend_mode == JXL_BLEND_BLEND) {
+ blend = (GString *)blend_combine_flag;
+ } else {
+ blend = (GString *)blend_null_flag;
+ }
+ char *temp_frame_name = nullptr;
+ bool must_free_frame_name = false;
+ if (frame_name_len == 0) {
+ temp_frame_name = g_strdup_printf("Frame %lu", layer_idx + 1);
+ must_free_frame_name = true;
+ } else {
+ temp_frame_name = frame_name;
+ }
+ double fduration = frame_duration * 1000.f * tps_denom / tps_numer;
+ layer_name = g_strdup_printf("%s (%.15gms)%s", temp_frame_name,
+ fduration, blend->str);
+ if (must_free_frame_name) free(temp_frame_name);
+ }
+ layer = gimp_layer_new(*image_id, layer_name, xsize, ysize, layer_type,
+ /*opacity=*/100,
+ gimp_image_get_default_new_layer_mode(*image_id));
+
+ gimp_image_insert_layer(*image_id, layer, /*parent_id=*/-1,
+ /*position=*/0);
+
+ pixels_buffer_2 = g_malloc(buffer_size);
+ GeglBuffer *buffer = gimp_drawable_get_buffer(layer);
+ const Babl *destination_format = gegl_buffer_set_format(buffer, nullptr);
+
+ std::string babl_format_str = "";
+ if (is_gray) {
+ babl_format_str += "Y'";
+ } else {
+ babl_format_str += "R'G'B'";
+ }
+ if (info.alpha_bits > 0) {
+ babl_format_str += "A";
+ }
+ babl_format_str += " float";
+
+ const Babl *source_format = babl_format(babl_format_str.c_str());
+
+ babl_process(babl_fish(source_format, destination_format),
+ pixels_buffer_1, pixels_buffer_2, xsize * ysize);
+
+ gegl_buffer_set(buffer, GEGL_RECTANGLE(0, 0, xsize, ysize), 0, nullptr,
+ pixels_buffer_2, GEGL_AUTO_ROWSTRIDE);
+ gimp_item_transform_translate(layer, crop_x0, crop_y0);
+
+ g_clear_object(&buffer);
+ g_free(pixels_buffer_1);
+ g_free(pixels_buffer_2);
+ if (stop_processing) status = JXL_DEC_SUCCESS;
+ g_free(layer_name);
+ layer_idx++;
+ } else if (status == JXL_DEC_FRAME) {
+ JxlFrameHeader frame_header;
+ if (JxlDecoderGetFrameHeader(dec.get(), &frame_header) !=
+ JXL_DEC_SUCCESS) {
+ g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n");
+ return false;
+ }
+ xsize = frame_header.layer_info.xsize;
+ ysize = frame_header.layer_info.ysize;
+ crop_x0 = frame_header.layer_info.crop_x0;
+ crop_y0 = frame_header.layer_info.crop_y0;
+ frame_duration = frame_header.duration;
+ blend_mode = frame_header.layer_info.blend_info.blendmode;
+ if (blend_mode != JXL_BLEND_BLEND && blend_mode != JXL_BLEND_REPLACE) {
+ g_printerr(
+ LOAD_PROC
+ " Warning: JxlDecoderGetFrameHeader: Unhandled blend mode: %d\n",
+ blend_mode);
+ }
+ if ((frame_name_len = frame_header.name_length) > 0) {
+ frame_name = (char *)realloc(frame_name, frame_name_len);
+ if (JXL_DEC_SUCCESS !=
+ JxlDecoderGetFrameName(dec.get(), frame_name, frame_name_len)) {
+ g_printerr(LOAD_PROC "Error: JxlDecoderGetFrameName failed");
+ return false;
+ };
+ }
+ } else if (status == JXL_DEC_SUCCESS) {
+ // All decoding successfully finished.
+ // It's not required to call JxlDecoderReleaseInput(dec.get())
+ // since the decoder will be destroyed.
+ break;
+ } else if (status == JXL_DEC_NEED_MORE_INPUT ||
+ status == JXL_DEC_FRAME_PROGRESSION) {
+ stop_processing = status != JXL_DEC_FRAME_PROGRESSION;
+ if (JxlDecoderFlushImage(dec.get()) == JXL_DEC_SUCCESS) {
+ status = JXL_DEC_FULL_IMAGE;
+ continue;
+ }
+ g_printerr(LOAD_PROC " Error: Already provided all input\n");
+ return false;
+ } else if (status == JXL_DEC_ERROR) {
+ g_printerr(LOAD_PROC " Error: Decoder error\n");
+ return false;
+ } else {
+ g_printerr(LOAD_PROC " Error: Unknown decoder status\n");
+ return false;
+ }
+ } // end grand decode loop
+
+ gimp_load_progress.update();
+
+ if (profile_icc) {
+ gimp_image_set_color_profile(*image_id, profile_icc);
+ }
+
+ gimp_load_progress.update();
+
+ // TODO(xiota): Add option to keep image as float
+ if (info.bits_per_sample < 32) {
+ gimp_image_convert_precision(*image_id, precision);
+ }
+
+ gimp_image_set_filename(*image_id, filename);
+
+ gimp_load_progress.finished();
+ return true;
+}
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/plugins/gimp/file-jxl-load.h b/third_party/jpeg-xl/plugins/gimp/file-jxl-load.h
new file mode 100644
index 0000000000..ef5b92fef6
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/file-jxl-load.h
@@ -0,0 +1,17 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_FILE_JXL_LOAD_H_
+#define PLUGINS_GIMP_FILE_JXL_LOAD_H_
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+bool LoadJpegXlImage(const gchar* filename, gint32* image_id);
+
+} // namespace jxl
+
+#endif // PLUGINS_GIMP_FILE_JXL_LOAD_H_
diff --git a/third_party/jpeg-xl/plugins/gimp/file-jxl-save.cc b/third_party/jpeg-xl/plugins/gimp/file-jxl-save.cc
new file mode 100644
index 0000000000..c1e1ebd9af
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/file-jxl-save.cc
@@ -0,0 +1,895 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "plugins/gimp/file-jxl-save.h"
+
+#include <jxl/encode.h>
+#include <jxl/encode_cxx.h>
+
+#include <cmath>
+#include <utility>
+
+#include "gobject/gsignal.h"
+
+#define PLUG_IN_BINARY "file-jxl"
+#define SAVE_PROC "file-jxl-save"
+
+#define SCALE_WIDTH 200
+
+namespace jxl {
+
+namespace {
+
+#ifndef g_clear_signal_handler
+// g_clear_signal_handler was added in glib 2.62
+void g_clear_signal_handler(gulong* handler, gpointer instance) {
+ if (handler != nullptr && *handler != 0) {
+ g_signal_handler_disconnect(instance, *handler);
+ *handler = 0;
+ }
+}
+#endif // g_clear_signal_handler
+
+class JpegXlSaveOpts {
+ public:
+ float distance;
+ float quality;
+
+ bool lossless = false;
+ bool is_linear = false;
+ bool has_alpha = false;
+ bool is_gray = false;
+ bool icc_attached = false;
+
+ bool advanced_mode = false;
+ bool use_container = true;
+ bool save_exif = false;
+ int encoding_effort = 7;
+ int faster_decoding = 0;
+
+ std::string babl_format_str = "RGB u16";
+ std::string babl_type_str = "u16";
+ std::string babl_model_str = "RGB";
+
+ JxlPixelFormat pixel_format;
+ JxlBasicInfo basic_info;
+
+ // functions
+ JpegXlSaveOpts();
+
+ bool SetDistance(float dist);
+ bool SetQuality(float qual);
+ bool SetDimensions(int x, int y);
+ bool SetNumChannels(int channels);
+
+ bool UpdateDistance();
+ bool UpdateQuality();
+
+ bool SetModel(bool is_linear_);
+
+ bool UpdateBablFormat();
+ bool SetBablModel(std::string model);
+ bool SetBablType(std::string type);
+
+ bool SetPrecision(int gimp_precision);
+
+ private:
+}; // class JpegXlSaveOpts
+
+JpegXlSaveOpts jxl_save_opts;
+
+class JpegXlSaveGui {
+ public:
+ bool SaveDialog();
+
+ private:
+ GtkWidget* toggle_lossless = nullptr;
+ GtkAdjustment* entry_distance = nullptr;
+ GtkAdjustment* entry_quality = nullptr;
+ GtkAdjustment* entry_effort = nullptr;
+ GtkAdjustment* entry_faster = nullptr;
+ GtkWidget* frame_advanced = nullptr;
+ GtkWidget* toggle_no_xyb = nullptr;
+ GtkWidget* toggle_raw = nullptr;
+ gulong handle_toggle_lossless = 0;
+ gulong handle_entry_quality = 0;
+ gulong handle_entry_distance = 0;
+
+ static bool GuiOnChangeQuality(GtkAdjustment* adj_qual, void* this_pointer);
+
+ static bool GuiOnChangeDistance(GtkAdjustment* adj_dist, void* this_pointer);
+
+ static bool GuiOnChangeEffort(GtkAdjustment* adj_effort);
+ static bool GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer);
+ static bool GuiOnChangeCodestream(GtkWidget* toggle);
+ static bool GuiOnChangeNoXYB(GtkWidget* toggle);
+
+ static bool GuiOnChangeAdvancedMode(GtkWidget* toggle, void* this_pointer);
+}; // class JpegXlSaveGui
+
+JpegXlSaveGui jxl_save_gui;
+
+bool JpegXlSaveGui::GuiOnChangeQuality(GtkAdjustment* adj_qual,
+ void* this_pointer) {
+ JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+
+ g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+ g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+ g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+ GtkAdjustment* adj_dist = self->entry_distance;
+ jxl_save_opts.SetQuality(gtk_adjustment_get_value(adj_qual));
+ gtk_adjustment_set_value(adj_dist, jxl_save_opts.distance);
+
+ self->handle_toggle_lossless = g_signal_connect(
+ self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+ self->handle_entry_distance =
+ g_signal_connect(self->entry_distance, "value-changed",
+ G_CALLBACK(GuiOnChangeDistance), self);
+ self->handle_entry_quality =
+ g_signal_connect(self->entry_quality, "value-changed",
+ G_CALLBACK(GuiOnChangeQuality), self);
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeDistance(GtkAdjustment* adj_dist,
+ void* this_pointer) {
+ JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+ GtkAdjustment* adj_qual = self->entry_quality;
+
+ g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+ g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+ g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+ jxl_save_opts.SetDistance(gtk_adjustment_get_value(adj_dist));
+ gtk_adjustment_set_value(adj_qual, jxl_save_opts.quality);
+
+ if (!(jxl_save_opts.distance < 0.001)) {
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_lossless),
+ false);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+ }
+
+ self->handle_toggle_lossless = g_signal_connect(
+ self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+ self->handle_entry_distance =
+ g_signal_connect(self->entry_distance, "value-changed",
+ G_CALLBACK(GuiOnChangeDistance), self);
+ self->handle_entry_quality =
+ g_signal_connect(self->entry_quality, "value-changed",
+ G_CALLBACK(GuiOnChangeQuality), self);
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeEffort(GtkAdjustment* adj_effort) {
+ float new_effort = 10 - gtk_adjustment_get_value(adj_effort);
+ jxl_save_opts.encoding_effort = new_effort;
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer) {
+ JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+ GtkAdjustment* adj_distance = self->entry_distance;
+ GtkAdjustment* adj_quality = self->entry_quality;
+ GtkAdjustment* adj_effort = self->entry_effort;
+
+ jxl_save_opts.lossless =
+ gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+
+ g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance);
+ g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality);
+ g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless);
+
+ if (jxl_save_opts.lossless) {
+ gtk_adjustment_set_value(adj_quality, 100.0);
+ gtk_adjustment_set_value(adj_distance, 0.0);
+ jxl_save_opts.distance = 0;
+ jxl_save_opts.UpdateQuality();
+ gtk_adjustment_set_value(adj_effort, 7);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), true);
+ } else {
+ gtk_adjustment_set_value(adj_quality, 90.0);
+ gtk_adjustment_set_value(adj_distance, 1.0);
+ jxl_save_opts.distance = 1.0;
+ jxl_save_opts.UpdateQuality();
+ gtk_adjustment_set_value(adj_effort, 3);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+ }
+ self->handle_toggle_lossless = g_signal_connect(
+ self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self);
+ self->handle_entry_distance =
+ g_signal_connect(self->entry_distance, "value-changed",
+ G_CALLBACK(GuiOnChangeDistance), self);
+ self->handle_entry_quality =
+ g_signal_connect(self->entry_quality, "value-changed",
+ G_CALLBACK(GuiOnChangeQuality), self);
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeCodestream(GtkWidget* toggle) {
+ jxl_save_opts.use_container =
+ !gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeNoXYB(GtkWidget* toggle) {
+ jxl_save_opts.basic_info.uses_original_profile =
+ gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+ return true;
+}
+
+bool JpegXlSaveGui::GuiOnChangeAdvancedMode(GtkWidget* toggle,
+ void* this_pointer) {
+ JpegXlSaveGui* self = static_cast<JpegXlSaveGui*>(this_pointer);
+ jxl_save_opts.advanced_mode =
+ gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle));
+
+ gtk_widget_set_sensitive(self->frame_advanced, jxl_save_opts.advanced_mode);
+
+ if (!jxl_save_opts.advanced_mode) {
+ jxl_save_opts.basic_info.uses_original_profile = false;
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false);
+
+ jxl_save_opts.use_container = true;
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_raw), false);
+
+ jxl_save_opts.faster_decoding = 0;
+ gtk_adjustment_set_value(GTK_ADJUSTMENT(self->entry_faster), 0);
+ }
+ return true;
+}
+
+bool JpegXlSaveGui::SaveDialog() {
+ gboolean run;
+ GtkWidget* dialog;
+ GtkWidget* content_area;
+ GtkWidget* main_vbox;
+ GtkWidget* frame;
+ GtkWidget* toggle;
+ GtkWidget* table;
+ GtkWidget* vbox;
+ GtkWidget* separator;
+
+ // initialize export dialog
+ gimp_ui_init(PLUG_IN_BINARY, true);
+ dialog = gimp_export_dialog_new("JPEG XL", PLUG_IN_BINARY, SAVE_PROC);
+
+ gtk_window_set_resizable(GTK_WINDOW(dialog), false);
+ content_area = gimp_export_dialog_get_content_area(dialog);
+
+ main_vbox = gtk_vbox_new(false, 6);
+ gtk_container_set_border_width(GTK_CONTAINER(main_vbox), 6);
+ gtk_box_pack_start(GTK_BOX(content_area), main_vbox, true, true, 0);
+ gtk_widget_show(main_vbox);
+
+ // Standard Settings Frame
+ frame = gtk_frame_new(nullptr);
+ gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_ETCHED_IN);
+ gtk_box_pack_start(GTK_BOX(main_vbox), frame, false, false, 0);
+ gtk_widget_show(frame);
+
+ vbox = gtk_vbox_new(false, 6);
+ gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+ gtk_container_add(GTK_CONTAINER(frame), vbox);
+ gtk_widget_show(vbox);
+
+ // Layout Table
+ table = gtk_table_new(20, 3, false);
+ gtk_table_set_col_spacings(GTK_TABLE(table), 6);
+ gtk_box_pack_start(GTK_BOX(vbox), table, false, false, 0);
+ gtk_widget_show(table);
+
+ // Distance Slider
+ static gchar distance_help[] =
+ "Butteraugli distance target. Suggested values:"
+ "\n\td\u00A0=\u00A00.3\tExcellent"
+ "\n\td\u00A0=\u00A01\tVery Good"
+ "\n\td\u00A0=\u00A02\tGood"
+ "\n\td\u00A0=\u00A03\tFair"
+ "\n\td\u00A0=\u00A06\tPoor";
+
+ entry_distance = (GtkAdjustment*)gimp_scale_entry_new(
+ GTK_TABLE(table), 0, 0, "Distance", SCALE_WIDTH, 0,
+ jxl_save_opts.distance, 0.0, 15.0, 0.001, 1.0, 3, true, 0.0, 0.0,
+ distance_help, SAVE_PROC);
+ gimp_scale_entry_set_logarithmic((GtkObject*)entry_distance, true);
+
+ // Quality Slider
+ static gchar quality_help[] =
+ "JPEG-style Quality is remapped to distance. "
+ "Values roughly match libjpeg quality settings.";
+ entry_quality = (GtkAdjustment*)gimp_scale_entry_new(
+ GTK_TABLE(table), 0, 1, "Quality", SCALE_WIDTH, 0, jxl_save_opts.quality,
+ 8.26, 100.0, 1.0, 10.0, 2, true, 0.0, 0.0, quality_help, SAVE_PROC);
+
+ // Distance and Quality Signals
+ handle_entry_distance = g_signal_connect(
+ entry_distance, "value-changed", G_CALLBACK(GuiOnChangeDistance), this);
+ handle_entry_quality = g_signal_connect(entry_quality, "value-changed",
+ G_CALLBACK(GuiOnChangeQuality), this);
+
+ // ----------
+ separator = gtk_vseparator_new();
+ gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 2, 3, GTK_EXPAND,
+ GTK_EXPAND, 9, 9);
+ gtk_widget_show(separator);
+
+ // Encoding Effort / Speed
+ static gchar effort_help[] =
+ "Adjust encoding speed. Higher values are faster because "
+ "the encoder uses less effort to hit distance targets. "
+ "As\u00A0a\u00A0result, image quality may be decreased. "
+ "Default\u00A0=\u00A03.";
+ entry_effort = (GtkAdjustment*)gimp_scale_entry_new(
+ GTK_TABLE(table), 0, 3, "Speed", SCALE_WIDTH, 0,
+ 10 - jxl_save_opts.encoding_effort, 1, 9, 1, 2, 0, true, 0.0, 0.0,
+ effort_help, SAVE_PROC);
+
+ // effort signal
+ g_signal_connect(entry_effort, "value-changed", G_CALLBACK(GuiOnChangeEffort),
+ nullptr);
+
+ // ----------
+ separator = gtk_vseparator_new();
+ gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 4, 5, GTK_EXPAND,
+ GTK_EXPAND, 9, 9);
+ gtk_widget_show(separator);
+
+ // Lossless Mode Convenience Checkbox
+ static gchar lossless_help[] =
+ "Compress using modular lossless mode. "
+ "Speed\u00A0is adjusted to improve performance.";
+ toggle_lossless = gtk_check_button_new_with_label("Lossless Mode");
+ gimp_help_set_help_data(toggle_lossless, lossless_help, nullptr);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_lossless),
+ jxl_save_opts.lossless);
+ gtk_table_attach_defaults(GTK_TABLE(table), toggle_lossless, 0, 2, 5, 6);
+ gtk_widget_show(toggle_lossless);
+
+ // lossless signal
+ handle_toggle_lossless = g_signal_connect(
+ toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), this);
+
+ // ----------
+ separator = gtk_vseparator_new();
+ gtk_box_pack_start(GTK_BOX(main_vbox), separator, false, false, 1);
+ gtk_widget_show(separator);
+
+ // Advanced Settings Frame
+ std::vector<GtkWidget*> advanced_opts;
+
+ frame_advanced = gtk_frame_new("Advanced Settings");
+ gimp_help_set_help_data(frame_advanced,
+ "Some advanced settings may produce malformed files.",
+ nullptr);
+ gtk_frame_set_shadow_type(GTK_FRAME(frame_advanced), GTK_SHADOW_ETCHED_IN);
+ gtk_box_pack_start(GTK_BOX(main_vbox), frame_advanced, true, true, 0);
+ gtk_widget_show(frame_advanced);
+
+ gtk_widget_set_sensitive(frame_advanced, false);
+
+ vbox = gtk_vbox_new(false, 6);
+ gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+ gtk_container_add(GTK_CONTAINER(frame_advanced), vbox);
+ gtk_widget_show(vbox);
+
+ // uses_original_profile
+ static gchar uses_original_profile_help[] =
+ "Prevents conversion to the XYB colorspace. "
+ "File sizes are approximately doubled.";
+ toggle_no_xyb = gtk_check_button_new_with_label("Do not use XYB colorspace");
+ gimp_help_set_help_data(toggle_no_xyb, uses_original_profile_help, nullptr);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_no_xyb),
+ jxl_save_opts.basic_info.uses_original_profile);
+ gtk_box_pack_start(GTK_BOX(vbox), toggle_no_xyb, false, false, 0);
+ gtk_widget_show(toggle_no_xyb);
+
+ g_signal_connect(toggle_no_xyb, "toggled", G_CALLBACK(GuiOnChangeNoXYB),
+ nullptr);
+
+ // save raw codestream
+ static gchar codestream_help[] =
+ "Save the raw codestream, without a container. "
+ "The container is required for metadata and some other features.";
+ toggle_raw = gtk_check_button_new_with_label("Save Raw Codestream");
+ gimp_help_set_help_data(toggle_raw, codestream_help, nullptr);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_raw),
+ !jxl_save_opts.use_container);
+ gtk_box_pack_start(GTK_BOX(vbox), toggle_raw, false, false, 0);
+ gtk_widget_show(toggle_raw);
+
+ g_signal_connect(toggle_raw, "toggled", G_CALLBACK(GuiOnChangeCodestream),
+ nullptr);
+
+ // ----------
+ separator = gtk_vseparator_new();
+ gtk_box_pack_start(GTK_BOX(vbox), separator, false, false, 1);
+ gtk_widget_show(separator);
+
+ // Faster Decoding / Decoding Speed
+ static gchar faster_help[] =
+ "Improve decoding speed at the expense of quality. "
+ "Default\u00A0=\u00A00.";
+ table = gtk_table_new(1, 3, false);
+ gtk_table_set_col_spacings(GTK_TABLE(table), 6);
+ gtk_container_add(GTK_CONTAINER(vbox), table);
+ gtk_widget_show(table);
+
+ entry_faster = (GtkAdjustment*)gimp_scale_entry_new(
+ GTK_TABLE(table), 0, 0, "Faster Decoding", SCALE_WIDTH, 0,
+ jxl_save_opts.faster_decoding, 0, 4, 1, 1, 0, true, 0.0, 0.0, faster_help,
+ SAVE_PROC);
+
+ // Faster Decoding Signals
+ g_signal_connect(entry_faster, "value-changed",
+ G_CALLBACK(gimp_int_adjustment_update),
+ &jxl_save_opts.faster_decoding);
+
+ // Enable Advanced Settings
+ frame = gtk_frame_new(nullptr);
+ gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_NONE);
+ gtk_box_pack_start(GTK_BOX(main_vbox), frame, true, true, 0);
+ gtk_widget_show(frame);
+
+ vbox = gtk_vbox_new(false, 6);
+ gtk_container_set_border_width(GTK_CONTAINER(vbox), 6);
+ gtk_container_add(GTK_CONTAINER(frame), vbox);
+ gtk_widget_show(vbox);
+
+ static gchar advanced_help[] =
+ "Some advanced settings may produce malformed files.";
+ toggle = gtk_check_button_new_with_label("Enable Advanced Settings");
+ gimp_help_set_help_data(toggle, advanced_help, nullptr);
+ gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle),
+ jxl_save_opts.advanced_mode);
+ gtk_box_pack_start(GTK_BOX(vbox), toggle, false, false, 0);
+ gtk_widget_show(toggle);
+
+ g_signal_connect(toggle, "toggled", G_CALLBACK(GuiOnChangeAdvancedMode),
+ this);
+
+ // show dialog
+ gtk_widget_show(dialog);
+
+ GtkAllocation allocation;
+ gtk_widget_get_allocation(dialog, &allocation);
+
+ int height = allocation.height;
+ gtk_widget_set_size_request(dialog, height * 1.5, height);
+
+ run = (gimp_dialog_run(GIMP_DIALOG(dialog)) == GTK_RESPONSE_OK);
+ gtk_widget_destroy(dialog);
+
+ return run;
+} // JpegXlSaveGui::SaveDialog
+
+JpegXlSaveOpts::JpegXlSaveOpts() {
+ SetDistance(1.0);
+
+ pixel_format.num_channels = 4;
+ pixel_format.data_type = JXL_TYPE_FLOAT;
+ pixel_format.endianness = JXL_NATIVE_ENDIAN;
+ pixel_format.align = 0;
+
+ JxlEncoderInitBasicInfo(&basic_info);
+ return;
+} // JpegXlSaveOpts constructor
+
+bool JpegXlSaveOpts::SetModel(bool is_linear_) {
+ int channels;
+ std::string model;
+
+ if (is_gray) {
+ channels = 1;
+ if (is_linear_) {
+ model = "Y";
+ } else {
+ model = "Y'";
+ }
+ } else {
+ channels = 3;
+ if (is_linear_) {
+ model = "RGB";
+ } else {
+ model = "R'G'B'";
+ }
+ }
+ if (has_alpha) {
+ SetBablModel(model + "A");
+ SetNumChannels(channels + 1);
+ } else {
+ SetBablModel(model);
+ SetNumChannels(channels);
+ }
+ return true;
+} // JpegXlSaveOpts::SetModel
+
+bool JpegXlSaveOpts::SetDistance(float dist) {
+ distance = dist;
+ return UpdateQuality();
+}
+
+bool JpegXlSaveOpts::SetQuality(float qual) {
+ quality = qual;
+ return UpdateDistance();
+}
+
+bool JpegXlSaveOpts::UpdateQuality() {
+ float qual;
+
+ if (distance < 0.1) {
+ qual = 100;
+ } else if (distance > 6.4) {
+ qual = -5.0 / 53.0 * sqrt(6360.0 * distance - 39975.0) + 1725.0 / 53.0;
+ lossless = false;
+ } else {
+ qual = 100 - (distance - 0.1) / 0.09;
+ lossless = false;
+ }
+
+ if (qual < 0) {
+ quality = 0.0;
+ } else if (qual >= 100) {
+ quality = 100.0;
+ } else {
+ quality = qual;
+ }
+
+ return true;
+}
+
+bool JpegXlSaveOpts::UpdateDistance() {
+ float dist;
+ if (quality >= 30) {
+ dist = 0.1 + (100 - quality) * 0.09;
+ } else {
+ dist = 53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0;
+ }
+
+ if (dist > 25) {
+ distance = 25;
+ } else {
+ distance = dist;
+ }
+ return true;
+}
+
+bool JpegXlSaveOpts::SetDimensions(int x, int y) {
+ basic_info.xsize = x;
+ basic_info.ysize = y;
+ return true;
+}
+
+bool JpegXlSaveOpts::SetNumChannels(int channels) {
+ switch (channels) {
+ case 1:
+ pixel_format.num_channels = 1;
+ basic_info.num_color_channels = 1;
+ basic_info.num_extra_channels = 0;
+ basic_info.alpha_bits = 0;
+ basic_info.alpha_exponent_bits = 0;
+ break;
+ case 2:
+ pixel_format.num_channels = 2;
+ basic_info.num_color_channels = 1;
+ basic_info.num_extra_channels = 1;
+ basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample));
+ basic_info.alpha_exponent_bits = 0;
+ break;
+ case 3:
+ pixel_format.num_channels = 3;
+ basic_info.num_color_channels = 3;
+ basic_info.num_extra_channels = 0;
+ basic_info.alpha_bits = 0;
+ basic_info.alpha_exponent_bits = 0;
+ break;
+ case 4:
+ pixel_format.num_channels = 4;
+ basic_info.num_color_channels = 3;
+ basic_info.num_extra_channels = 1;
+ basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample));
+ basic_info.alpha_exponent_bits = 0;
+ break;
+ default:
+ SetNumChannels(3);
+ } // switch
+ return true;
+} // JpegXlSaveOpts::SetNumChannels
+
+bool JpegXlSaveOpts::UpdateBablFormat() {
+ babl_format_str = babl_model_str + " " + babl_type_str;
+ return true;
+}
+
+bool JpegXlSaveOpts::SetBablModel(std::string model) {
+ babl_model_str = std::move(model);
+ return UpdateBablFormat();
+}
+
+bool JpegXlSaveOpts::SetBablType(std::string type) {
+ babl_type_str = std::move(type);
+ return UpdateBablFormat();
+}
+
+bool JpegXlSaveOpts::SetPrecision(int gimp_precision) {
+ switch (gimp_precision) {
+ case GIMP_PRECISION_HALF_GAMMA:
+ case GIMP_PRECISION_HALF_LINEAR:
+ basic_info.bits_per_sample = 16;
+ basic_info.exponent_bits_per_sample = 5;
+ break;
+
+ // UINT32 not supported by encoder; using FLOAT instead
+ case GIMP_PRECISION_U32_GAMMA:
+ case GIMP_PRECISION_U32_LINEAR:
+ case GIMP_PRECISION_FLOAT_GAMMA:
+ case GIMP_PRECISION_FLOAT_LINEAR:
+ basic_info.bits_per_sample = 32;
+ basic_info.exponent_bits_per_sample = 8;
+ break;
+
+ case GIMP_PRECISION_U16_GAMMA:
+ case GIMP_PRECISION_U16_LINEAR:
+ basic_info.bits_per_sample = 16;
+ basic_info.exponent_bits_per_sample = 0;
+ break;
+
+ default:
+ case GIMP_PRECISION_U8_LINEAR:
+ case GIMP_PRECISION_U8_GAMMA:
+ basic_info.bits_per_sample = 8;
+ basic_info.exponent_bits_per_sample = 0;
+ break;
+ }
+ return true;
+} // JpegXlSaveOpts::SetPrecision
+
+} // namespace
+
+bool SaveJpegXlImage(const gint32 image_id, const gint32 drawable_id,
+ const gint32 orig_image_id, const gchar* const filename) {
+ if (!jxl_save_gui.SaveDialog()) {
+ return true;
+ }
+
+ gint32 nlayers;
+ gint32* layers;
+ gint32 duplicate = gimp_image_duplicate(image_id);
+
+ JpegXlGimpProgress gimp_save_progress(
+ ("Saving JPEG XL file:" + std::string(filename)).c_str());
+ gimp_save_progress.update();
+
+ // try to get ICC color profile...
+ std::vector<uint8_t> icc;
+
+ GimpColorProfile* profile = gimp_image_get_effective_color_profile(image_id);
+ jxl_save_opts.is_gray = gimp_color_profile_is_gray(profile);
+ jxl_save_opts.is_linear = gimp_color_profile_is_linear(profile);
+
+ profile = gimp_image_get_color_profile(image_id);
+ if (profile) {
+ g_printerr(SAVE_PROC " Info: Extracting ICC Profile...\n");
+ gsize icc_size;
+ const guint8* const icc_bytes =
+ gimp_color_profile_get_icc_profile(profile, &icc_size);
+
+ icc.assign(icc_bytes, icc_bytes + icc_size);
+ } else {
+ g_printerr(SAVE_PROC " Info: No ICC profile. Exporting image anyway.\n");
+ }
+
+ gimp_save_progress.update();
+
+ jxl_save_opts.SetDimensions(gimp_image_width(image_id),
+ gimp_image_height(image_id));
+
+ jxl_save_opts.SetPrecision(gimp_image_get_precision(image_id));
+ layers = gimp_image_get_layers(duplicate, &nlayers);
+
+ for (int i = 0; i < nlayers; i++) {
+ if (gimp_drawable_has_alpha(layers[i])) {
+ jxl_save_opts.has_alpha = true;
+ break;
+ }
+ }
+
+ gimp_save_progress.update();
+
+ // layers need to match image size, for now
+ for (int i = 0; i < nlayers; i++) {
+ gimp_layer_resize_to_image_size(layers[i]);
+ }
+
+ // treat layers as animation frames, for now
+ if (nlayers > 1) {
+ jxl_save_opts.basic_info.have_animation = true;
+ jxl_save_opts.basic_info.animation.tps_numerator = 100;
+ }
+
+ gimp_save_progress.update();
+
+ // multi-threaded parallel runner.
+ auto runner = JxlResizableParallelRunnerMake(nullptr);
+
+ JxlResizableParallelRunnerSetThreads(
+ runner.get(),
+ JxlResizableParallelRunnerSuggestThreads(jxl_save_opts.basic_info.xsize,
+ jxl_save_opts.basic_info.ysize));
+
+ auto enc = JxlEncoderMake(/*memory_manager=*/nullptr);
+ JxlEncoderUseContainer(enc.get(), jxl_save_opts.use_container);
+
+ if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(),
+ JxlResizableParallelRunner,
+ runner.get())) {
+ g_printerr(SAVE_PROC " Error: JxlEncoderSetParallelRunner failed\n");
+ return false;
+ }
+
+ // this sets some basic_info properties
+ jxl_save_opts.SetModel(jxl_save_opts.is_linear);
+
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetBasicInfo(enc.get(), &jxl_save_opts.basic_info)) {
+ g_printerr(SAVE_PROC " Error: JxlEncoderSetBasicInfo failed\n");
+ return false;
+ }
+
+ // try to use ICC profile
+ if (!icc.empty() && !jxl_save_opts.is_gray) {
+ if (JXL_ENC_SUCCESS ==
+ JxlEncoderSetICCProfile(enc.get(), icc.data(), icc.size())) {
+ jxl_save_opts.icc_attached = true;
+ } else {
+ g_printerr(SAVE_PROC " Warning: JxlEncoderSetICCProfile failed.\n");
+ jxl_save_opts.basic_info.uses_original_profile = false;
+ jxl_save_opts.lossless = false;
+ }
+ } else {
+ g_printerr(SAVE_PROC " Warning: Using internal profile.\n");
+ jxl_save_opts.basic_info.uses_original_profile = false;
+ jxl_save_opts.lossless = false;
+ }
+
+ // set up internal color profile
+ JxlColorEncoding color_encoding = {};
+
+ if (jxl_save_opts.is_linear) {
+ JxlColorEncodingSetToLinearSRGB(&color_encoding, jxl_save_opts.is_gray);
+ } else {
+ JxlColorEncodingSetToSRGB(&color_encoding, jxl_save_opts.is_gray);
+ }
+
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) {
+ g_printerr(SAVE_PROC " Warning: JxlEncoderSetColorEncoding failed\n");
+ }
+
+ // set encoder options
+ JxlEncoderFrameSettings* frame_settings;
+ frame_settings = JxlEncoderFrameSettingsCreate(enc.get(), nullptr);
+
+ JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT,
+ jxl_save_opts.encoding_effort);
+ JxlEncoderFrameSettingsSetOption(frame_settings,
+ JXL_ENC_FRAME_SETTING_DECODING_SPEED,
+ jxl_save_opts.faster_decoding);
+
+ // lossless mode
+ if (jxl_save_opts.lossless || jxl_save_opts.distance < 0.01) {
+ if (jxl_save_opts.basic_info.exponent_bits_per_sample > 0) {
+ // lossless mode doesn't work well with floating point
+ jxl_save_opts.distance = 0.01;
+ jxl_save_opts.lossless = false;
+ JxlEncoderSetFrameLossless(frame_settings, false);
+ JxlEncoderSetFrameDistance(frame_settings, 0.01);
+ } else {
+ JxlEncoderSetFrameDistance(frame_settings, 0);
+ JxlEncoderSetFrameLossless(frame_settings, true);
+ }
+ } else {
+ jxl_save_opts.lossless = false;
+ JxlEncoderSetFrameLossless(frame_settings, false);
+ JxlEncoderSetFrameDistance(frame_settings, jxl_save_opts.distance);
+ }
+
+ // convert precision and colorspace
+ if (jxl_save_opts.is_linear &&
+ jxl_save_opts.basic_info.bits_per_sample < 32) {
+ gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_LINEAR);
+ } else {
+ gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_GAMMA);
+ }
+
+ // process layers and compress into JXL
+ size_t buffer_size =
+ jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize *
+ jxl_save_opts.pixel_format.num_channels * 4; // bytes per sample
+
+ for (int i = nlayers - 1; i >= 0; i--) {
+ gimp_save_progress.update();
+
+ // copy image into buffer...
+ gpointer pixels_buffer_1;
+ gpointer pixels_buffer_2;
+ pixels_buffer_1 = g_malloc(buffer_size);
+ pixels_buffer_2 = g_malloc(buffer_size);
+
+ gimp_layer_resize_to_image_size(layers[i]);
+
+ GeglBuffer* buffer = gimp_drawable_get_buffer(layers[i]);
+
+ // using gegl_buffer_set_format to get the format because
+ // gegl_buffer_get_format doesn't always get the original format
+ const Babl* native_format = gegl_buffer_set_format(buffer, nullptr);
+
+ gegl_buffer_get(buffer,
+ GEGL_RECTANGLE(0, 0, jxl_save_opts.basic_info.xsize,
+ jxl_save_opts.basic_info.ysize),
+ 1.0, native_format, pixels_buffer_1, GEGL_AUTO_ROWSTRIDE,
+ GEGL_ABYSS_NONE);
+ g_clear_object(&buffer);
+
+ // use babl to fix gamma mismatch issues
+ jxl_save_opts.SetModel(jxl_save_opts.is_linear);
+ jxl_save_opts.pixel_format.data_type = JXL_TYPE_FLOAT;
+ jxl_save_opts.SetBablType("float");
+ const Babl* destination_format =
+ babl_format(jxl_save_opts.babl_format_str.c_str());
+
+ babl_process(
+ babl_fish(native_format, destination_format), pixels_buffer_1,
+ pixels_buffer_2,
+ jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize);
+
+ gimp_save_progress.update();
+
+ // send layer to encoder
+ if (JXL_ENC_SUCCESS !=
+ JxlEncoderAddImageFrame(frame_settings, &jxl_save_opts.pixel_format,
+ pixels_buffer_2, buffer_size)) {
+ g_printerr(SAVE_PROC " Error: JxlEncoderAddImageFrame failed\n");
+ return false;
+ }
+ }
+
+ JxlEncoderCloseInput(enc.get());
+
+ // get data from encoder
+ std::vector<uint8_t> compressed;
+ compressed.resize(262144);
+ uint8_t* next_out = compressed.data();
+ size_t avail_out = compressed.size();
+
+ JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT;
+ while (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ gimp_save_progress.update();
+
+ process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out);
+ if (process_result == JXL_ENC_NEED_MORE_OUTPUT) {
+ size_t offset = next_out - compressed.data();
+ compressed.resize(compressed.size() + 262144);
+ next_out = compressed.data() + offset;
+ avail_out = compressed.size() - offset;
+ }
+ }
+ compressed.resize(next_out - compressed.data());
+
+ if (JXL_ENC_SUCCESS != process_result) {
+ g_printerr(SAVE_PROC " Error: JxlEncoderProcessOutput failed\n");
+ return false;
+ }
+
+ // write file
+ std::ofstream outstream(filename, std::ios::out | std::ios::binary);
+ copy(compressed.begin(), compressed.end(),
+ std::ostream_iterator<uint8_t>(outstream));
+
+ gimp_save_progress.finished();
+ return true;
+} // SaveJpegXlImage()
+
+} // namespace jxl
diff --git a/third_party/jpeg-xl/plugins/gimp/file-jxl-save.h b/third_party/jpeg-xl/plugins/gimp/file-jxl-save.h
new file mode 100644
index 0000000000..c9d0e8091f
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/file-jxl-save.h
@@ -0,0 +1,18 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifndef PLUGINS_GIMP_FILE_JXL_SAVE_H_
+#define PLUGINS_GIMP_FILE_JXL_SAVE_H_
+
+#include "plugins/gimp/common.h"
+
+namespace jxl {
+
+bool SaveJpegXlImage(gint32 image_id, gint32 drawable_id, gint32 orig_image_id,
+ const gchar* filename);
+
+} // namespace jxl
+
+#endif // PLUGINS_GIMP_FILE_JXL_SAVE_H_
diff --git a/third_party/jpeg-xl/plugins/gimp/file-jxl.cc b/third_party/jpeg-xl/plugins/gimp/file-jxl.cc
new file mode 100644
index 0000000000..743495a2e0
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/gimp/file-jxl.cc
@@ -0,0 +1,157 @@
+// Copyright (c) the JPEG XL Project Authors. All rights reserved.
+//
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <string.h>
+
+#include <string>
+
+#include "plugins/gimp/common.h"
+#include "plugins/gimp/file-jxl-load.h"
+#include "plugins/gimp/file-jxl-save.h"
+
+namespace jxl {
+namespace {
+
+constexpr char kLoadProc[] = "file-jxl-load";
+constexpr char kSaveProc[] = "file-jxl-save";
+
+void Query() {
+ {
+ static char run_mode_name[] = "run-mode";
+ static char run_mode_description[] = "Run mode";
+ static char filename_name[] = "filename";
+ static char filename_description[] = "The name of the file to load";
+ static char raw_filename_name[] = "raw-filename";
+ static char raw_filename_description[] =
+ "The name of the file, as entered by the user";
+ static const GimpParamDef load_args[] = {
+ {GIMP_PDB_INT32, run_mode_name, run_mode_description},
+ {GIMP_PDB_STRING, filename_name, filename_description},
+ {GIMP_PDB_STRING, raw_filename_name, raw_filename_description},
+ };
+ static char image_name[] = "image";
+ static char image_description[] = "Loaded image";
+ static const GimpParamDef load_return_vals[] = {
+ {GIMP_PDB_IMAGE, image_name, image_description},
+ };
+
+ gimp_install_procedure(
+ /*name=*/kLoadProc, /*blurb=*/"Loads JPEG XL image files",
+ /*help=*/"Loads JPEG XL image files", /*author=*/"JPEG XL Project",
+ /*copyright=*/"JPEG XL Project", /*date=*/"2019",
+ /*menu_label=*/"JPEG XL image", /*image_types=*/nullptr,
+ /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(load_args),
+ /*n_return_vals=*/G_N_ELEMENTS(load_return_vals), /*params=*/load_args,
+ /*return_vals=*/load_return_vals);
+ gimp_register_file_handler_mime(kLoadProc, "image/jxl");
+ gimp_register_magic_load_handler(
+ kLoadProc, "jxl", "",
+ "0,string,\xFF\x0A,"
+ "0,string,\\000\\000\\000\x0CJXL\\040\\015\\012\x87\\012");
+ }
+
+ {
+ static char run_mode_name[] = "run-mode";
+ static char run_mode_description[] = "Run mode";
+ static char image_name[] = "image";
+ static char image_description[] = "Input image";
+ static char drawable_name[] = "drawable";
+ static char drawable_description[] = "Drawable to save";
+ static char filename_name[] = "filename";
+ static char filename_description[] = "The name of the file to save";
+ static char raw_filename_name[] = "raw-filename";
+ static char raw_filename_description[] = "The name of the file to save";
+ static const GimpParamDef save_args[] = {
+ {GIMP_PDB_INT32, run_mode_name, run_mode_description},
+ {GIMP_PDB_IMAGE, image_name, image_description},
+ {GIMP_PDB_DRAWABLE, drawable_name, drawable_description},
+ {GIMP_PDB_STRING, filename_name, filename_description},
+ {GIMP_PDB_STRING, raw_filename_name, raw_filename_description},
+ };
+
+ gimp_install_procedure(
+ /*name=*/kSaveProc, /*blurb=*/"Saves JPEG XL image files",
+ /*help=*/"Saves JPEG XL image files", /*author=*/"JPEG XL Project",
+ /*copyright=*/"JPEG XL Project", /*date=*/"2019",
+ /*menu_label=*/"JPEG XL image", /*image_types=*/"RGB*, GRAY*",
+ /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(save_args),
+ /*n_return_vals=*/0, /*params=*/save_args,
+ /*return_vals=*/nullptr);
+ gimp_register_file_handler_mime(kSaveProc, "image/jxl");
+ gimp_register_save_handler(kSaveProc, "jxl", "");
+ }
+}
+
+void Run(const gchar* const name, const gint nparams,
+ const GimpParam* const params, gint* const nreturn_vals,
+ GimpParam** const return_vals) {
+ gegl_init(nullptr, nullptr);
+
+ static GimpParam values[2];
+
+ *nreturn_vals = 1;
+ *return_vals = values;
+
+ values[0].type = GIMP_PDB_STATUS;
+ values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR;
+
+ if (strcmp(name, kLoadProc) == 0) {
+ if (nparams != 3) {
+ values[0].data.d_status = GIMP_PDB_CALLING_ERROR;
+ return;
+ }
+
+ const gchar* const filename = params[1].data.d_string;
+ gint32 image_id;
+ if (!LoadJpegXlImage(filename, &image_id)) {
+ values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR;
+ return;
+ }
+
+ *nreturn_vals = 2;
+ values[0].data.d_status = GIMP_PDB_SUCCESS;
+ values[1].type = GIMP_PDB_IMAGE;
+ values[1].data.d_image = image_id;
+ } else if (strcmp(name, kSaveProc) == 0) {
+ if (nparams != 5) {
+ values[0].data.d_status = GIMP_PDB_CALLING_ERROR;
+ return;
+ }
+
+ gint32 image_id = params[1].data.d_image;
+ gint32 drawable_id = params[2].data.d_drawable;
+ const gchar* const filename = params[3].data.d_string;
+ const gint32 orig_image_id = image_id;
+ const GimpExportReturn export_result = gimp_export_image(
+ &image_id, &drawable_id, "JPEG XL",
+ static_cast<GimpExportCapabilities>(GIMP_EXPORT_CAN_HANDLE_RGB |
+ GIMP_EXPORT_CAN_HANDLE_GRAY |
+ GIMP_EXPORT_CAN_HANDLE_ALPHA));
+ switch (export_result) {
+ case GIMP_EXPORT_CANCEL:
+ values[0].data.d_status = GIMP_PDB_CANCEL;
+ return;
+ case GIMP_EXPORT_IGNORE:
+ break;
+ case GIMP_EXPORT_EXPORT:
+ break;
+ }
+ if (!SaveJpegXlImage(image_id, drawable_id, orig_image_id, filename)) {
+ return;
+ }
+ if (image_id != orig_image_id) {
+ gimp_image_delete(image_id);
+ }
+ values[0].data.d_status = GIMP_PDB_SUCCESS;
+ }
+}
+
+} // namespace
+} // namespace jxl
+
+static const GimpPlugInInfo PLUG_IN_INFO = {nullptr, nullptr, &jxl::Query,
+ &jxl::Run};
+
+MAIN()
diff --git a/third_party/jpeg-xl/plugins/mime/CMakeLists.txt b/third_party/jpeg-xl/plugins/mime/CMakeLists.txt
new file mode 100644
index 0000000000..6f2a0f919c
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/mime/CMakeLists.txt
@@ -0,0 +1,6 @@
+# Copyright (c) the JPEG XL Project Authors. All rights reserved.
+#
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+install(FILES image-jxl.xml DESTINATION share/mime/packages/)
diff --git a/third_party/jpeg-xl/plugins/mime/README.md b/third_party/jpeg-xl/plugins/mime/README.md
new file mode 100644
index 0000000000..4d398c7b90
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/mime/README.md
@@ -0,0 +1,37 @@
+## :warning: Not needed anymore
+
+As `image/jxl` is now supported by [shared-mine-info 2.2](https://gitlab.freedesktop.org/xdg/shared-mime-info/-/releases/2.2), it should not be necessary anymore to install this plugin.
+
+You can test if your system correctly understand the MIME type of JPEG XL image by obtaining a JPEG XL image, e.g. with
+```bash
+wget https://raw.githubusercontent.com/libjxl/conformance/master/testcases/bicycles/input.jxl
+```
+and with that sample JPEG XL file `input.jxl` (or any other valid JPEG XL file), run any of the following commands:
+```bash
+xdg-mime query filetype input.jxl
+file --mime-type input.jxl
+mimetype input.jxl
+```
+If the output contains `image/jxl` you are all set!
+
+
+## JPEG XL MIME type
+
+If not already installed by the [Installing section of BUILDING.md](../../BUILDING.md#installing), then it can be done manually:
+
+### Install
+```bash
+sudo xdg-mime install --novendor image-jxl.xml
+```
+
+Then run:
+```
+update-mime --local
+```
+
+
+### Uninstall
+```bash
+sudo xdg-mime uninstall image-jxl.xml
+```
+
diff --git a/third_party/jpeg-xl/plugins/mime/image-jxl.xml b/third_party/jpeg-xl/plugins/mime/image-jxl.xml
new file mode 100644
index 0000000000..cab9018c7d
--- /dev/null
+++ b/third_party/jpeg-xl/plugins/mime/image-jxl.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info">
+ <mime-type type="image/jxl">
+ <comment>JPEG XL image</comment>
+ <comment xml:lang="fr">image JPEG XL</comment>
+ <comment xml:lang="nl">JPEG XL afbeelding</comment>
+ <magic priority="50">
+ <match type="string" offset="0" value="\xFF\x0A"/>
+ <match type="string" offset="0" value="\0\0\0\x0CJXL \x0D\x0A\x87\x0A"/>
+ </magic>
+ <glob pattern="*.jxl"/>
+ </mime-type>
+</mime-info>
diff --git a/third_party/jpeg-xl/third_party/CMakeLists.txt b/third_party/jpeg-xl/third_party/CMakeLists.txt
new file mode 100644
index 0000000000..ef24e12995
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/CMakeLists.txt
@@ -0,0 +1,175 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan"))
+ set(BUILD_TESTING OFF)
+endif()
+
+# Highway
+set(HWY_SYSTEM_GTEST ON CACHE INTERNAL "")
+set(HWY_FORCE_STATIC_LIBS ON CACHE INTERNAL "")
+set(HWY_ENABLE_CONTRIB OFF CACHE INTERNAL "")
+set(HWY_ENABLE_EXAMPLES OFF CACHE INTERNAL "")
+set(HWY_ENABLE_TESTS OFF CACHE INTERNAL "")
+if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan"))
+ set(HWY_ENABLE_INSTALL OFF CACHE INTERNAL "")
+endif()
+if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/highway/CMakeLists.txt" AND
+ NOT JPEGXL_FORCE_SYSTEM_HWY)
+ add_subdirectory(highway)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/highway/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY)
+else()
+ find_package(HWY 1.0.0)
+ if (NOT HWY_FOUND)
+ message(FATAL_ERROR
+ "Highway library (hwy) not found. Install libhwy-dev or download it "
+ "to third_party/highway from https://github.com/google/highway . "
+ "Highway is required to build JPEG XL. You can run "
+ "${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.")
+ endif()
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libhwy-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+# brotli
+if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/brotli/c/include/brotli/decode.h" OR
+ JPEGXL_FORCE_SYSTEM_BROTLI)
+ find_package(Brotli)
+ if (NOT Brotli_FOUND)
+ message(FATAL_ERROR
+ "Brotli not found, install brotli-dev or download brotli source code to"
+ " third_party/brotli from https://github.com/google/brotli. You can use"
+ " ${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.")
+ endif ()
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libbrotli-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+else()
+ # Compile brotli from sources.
+ set(BROTLI_DISABLE_TESTS ON CACHE STRING "Disable Brotli tests")
+ # Override default "no-install" policy.
+ if((NOT SANITIZER STREQUAL "asan") AND (NOT SANITIZER STREQUAL "msan"))
+ set(BROTLI_BUNDLED_MODE OFF CACHE INTERNAL "")
+ endif()
+ add_subdirectory(brotli)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/brotli/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY)
+ if(APPLE)
+ if(NOT DEFINED CMAKE_MACOSX_RPATH)
+ # Use @rpath in install_name when CMAKE_MACOSX_RPATH is not set.
+ set_property(TARGET brotlienc PROPERTY MACOSX_RPATH TRUE)
+ set_property(TARGET brotlidec PROPERTY MACOSX_RPATH TRUE)
+ set_property(TARGET brotlicommon PROPERTY MACOSX_RPATH TRUE)
+ endif()
+ if((NOT DEFINED CMAKE_MACOSX_RPATH) OR CMAKE_MACOSX_RPATH)
+ # Set library search path when @rpath is used.
+ if(NOT DEFINED CMAKE_INSTALL_RPATH)
+ set_property(TARGET brotlienc PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+ set_property(TARGET brotlidec PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+ set_property(TARGET brotlicommon PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+ endif()
+ else()
+ # Set conventional install_name when @rpath is not used.
+ if(NOT DEFINED CMAKE_INSTALL_NAME_DIR)
+ set_property(TARGET brotlienc PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+ set_property(TARGET brotlidec PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+ set_property(TARGET brotlicommon PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib")
+ endif()
+ endif()
+ endif() # APPLE
+endif()
+
+# *cms
+if (JPEGXL_ENABLE_SKCMS OR JPEGXL_ENABLE_PLUGINS)
+ if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/skcms/skcms.h" )
+ message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+ "build dependencies.")
+ endif()
+ include(skcms.cmake)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/skcms/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.skcms COPYONLY)
+endif ()
+if (JPEGXL_ENABLE_VIEWERS OR NOT JPEGXL_ENABLE_SKCMS OR JPEGXL_ENABLE_PLUGINS)
+ if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/lcms/.git" OR JPEGXL_FORCE_SYSTEM_LCMS2 )
+ find_package(LCMS2 2.13)
+ if ( NOT LCMS2_FOUND )
+ message(FATAL_ERROR "Please install lcms2 or run git submodule update --init")
+ endif ()
+ else()
+ include(lcms2.cmake)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lcms/COPYING"
+ ${PROJECT_BINARY_DIR}/LICENSE.lcms COPYONLY)
+ endif()
+endif()
+
+# libpng
+if (JPEGXL_BUNDLE_LIBPNG AND JPEGXL_EMSCRIPTEN)
+ if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt")
+ message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+ "build dependencies.")
+ endif()
+ file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/libpng/scripts/pnglibconf.h.prebuilt" DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/libpng")
+ file(RENAME "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h.prebuilt" "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h")
+ set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/")
+ set(ZLIB_LIBRARY "")
+ set(PNG_FOUND YES PARENT_SCOPE)
+ set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE)
+ set(PNG_LIBRARIES "" PARENT_SCOPE)
+elseif (JPEGXL_BUNDLE_LIBPNG)
+ if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt")
+ message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+ "build dependencies.")
+ endif()
+ add_subdirectory(zlib)
+ set(PNG_STATIC ON CACHE BOOL "")
+ set(PNG_EXECUTABLES OFF CACHE BOOL "")
+ set(PNG_BUILD_ZLIB ON CACHE BOOL "")
+ set(PNG_TESTS OFF CACHE BOOL "")
+ set(SKIP_INSTALL_ALL ON CACHE BOOL "")
+ set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/")
+ set(ZLIB_LIBRARY zlibstatic)
+ add_subdirectory(libpng EXCLUDE_FROM_ALL)
+ set(PNG_FOUND YES PARENT_SCOPE)
+ set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE)
+ set(PNG_LIBRARIES png_static PARENT_SCOPE)
+ set_property(TARGET png_static PROPERTY POSITION_INDEPENDENT_CODE ON)
+ set_property(TARGET zlibstatic PROPERTY POSITION_INDEPENDENT_CODE ON)
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libpng/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY)
+ endif()
+else()
+ find_package(PNG)
+ if(PNG_FOUND AND JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/zlib1g-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.zlib COPYONLY)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/libpng-dev/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+endif()
+
+# sjpeg
+if (JPEGXL_ENABLE_SJPEG)
+ if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/CMakeLists.txt")
+ message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the "
+ "build dependencies.")
+ endif()
+ include(sjpeg.cmake)
+ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/COPYING"
+ ${PROJECT_BINARY_DIR}/LICENSE.sjpeg COPYONLY)
+endif ()
diff --git a/third_party/jpeg-xl/third_party/HEVCSoftware/README.md b/third_party/jpeg-xl/third_party/HEVCSoftware/README.md
new file mode 100644
index 0000000000..70ebaeba33
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/HEVCSoftware/README.md
@@ -0,0 +1,2 @@
+This directory contains modified configuration files from the reference HEVC
+encoder, the source code of which can be found at: https://hevc.hhi.fraunhofer.de/svn/svn_HEVCSoftware/
diff --git a/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/LICENSE b/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/LICENSE
new file mode 100644
index 0000000000..a9d8844e42
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/LICENSE
@@ -0,0 +1,31 @@
+The copyright in this software is being made available under the BSD
+License, included below. This software may be subject to other third party
+and contributor rights, including patent rights, and no such rights are
+granted under this license.  
+
+Copyright (c) 2010-2017, ITU/ISO/IEC
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/encoder_intra_main_scc_10.cfg b/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/encoder_intra_main_scc_10.cfg
new file mode 100644
index 0000000000..5f6b95836d
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/HEVCSoftware/cfg/encoder_intra_main_scc_10.cfg
@@ -0,0 +1,136 @@
+#======== File I/O =====================
+BitstreamFile : str.bin
+ReconFile : rec.yuv
+
+#======== Profile definition ==============
+Profile : main-SCC # Profile name to use for encoding. Use main (for FDIS main), main10 (for FDIS main10), main-still-picture, main-RExt, high-throughput-RExt, main-SCC
+Tier : main # Tier to use for interpretation of --Level (main or high only)"
+
+#======== Unit definition ================
+MaxCUWidth : 64 # Maximum coding unit width in pixel
+MaxCUHeight : 64 # Maximum coding unit height in pixel
+MaxPartitionDepth : 4 # Maximum coding unit depth
+QuadtreeTULog2MaxSize : 5 # Log2 of maximum transform size for
+ # quadtree-based TU coding (2...6)
+QuadtreeTULog2MinSize : 2 # Log2 of minimum transform size for
+ # quadtree-based TU coding (2...6)
+QuadtreeTUMaxDepthInter : 3
+QuadtreeTUMaxDepthIntra : 3
+
+#======== Coding Structure =============
+IntraPeriod : 1 # Period of I-Frame ( -1 = only first)
+DecodingRefreshType : 1 # Random Access 0:none, 1:CRA, 2:IDR, 3:Recovery Point SEI
+GOPSize : 1 # GOP Size (number of B slice = GOPSize-1)
+ReWriteParamSetsFlag : 1 # Write parameter sets with every IRAP
+# Type POC QPoffset QPfactor tcOffsetDiv2 betaOffsetDiv2 temporal_id #ref_pics_active #ref_pics reference pictures
+
+#=========== Motion Search =============
+FastSearch : 1 # 0:Full search 1:TZ search
+SearchRange : 64 # (0: Search range is a Full frame)
+HadamardME : 1 # Use of hadamard measure for fractional ME
+FEN : 1 # Fast encoder decision
+FDM : 1 # Fast Decision for Merge RD cost
+
+#======== Quantization =============
+QP : 32 # Quantization parameter(0-51)
+MaxDeltaQP : 0 # CU-based multi-QP optimization
+MaxCuDQPDepth : 0 # Max depth of a minimum CuDQP for sub-LCU-level delta QP
+DeltaQpRD : 0 # Slice-based multi-QP optimization
+RDOQ : 1 # RDOQ
+RDOQTS : 1 # RDOQ for transform skip
+CbQpOffset : 6
+CrQpOffset : 6
+
+#=========== Deblock Filter ============
+LoopFilterOffsetInPPS : 1 # Dbl params: 0=varying params in SliceHeader, param = base_param + GOP_offset_param; 1 (default) =constant params in PPS, param = base_param)
+LoopFilterDisable : 0 # Disable deblocking filter (0=Filter, 1=No Filter)
+LoopFilterBetaOffset_div2 : 0 # base_param: -6 ~ 6
+LoopFilterTcOffset_div2 : 0 # base_param: -6 ~ 6
+DeblockingFilterMetric : 0 # blockiness metric (automatically configures deblocking parameters in bitstream). Applies slice-level loop filter offsets (LoopFilterOffsetInPPS and LoopFilterDisable must be 0)
+
+#=========== Misc. ============
+InternalBitDepth : 10 # codec operating bit-depth
+
+#=========== Coding Tools =================
+SAO : 1 # Sample adaptive offset (0: OFF, 1: ON)
+AMP : 1 # Asymmetric motion partitions (0: OFF, 1: ON)
+TransformSkip : 1 # Transform skipping (0: OFF, 1: ON)
+TransformSkipFast : 1 # Fast Transform skipping (0: OFF, 1: ON)
+SAOLcuBoundary : 0 # SAOLcuBoundary using non-deblocked pixels (0: OFF, 1: ON)
+
+#============ Slices ================
+SliceMode : 0 # 0: Disable all slice options.
+ # 1: Enforce maximum number of LCU in an slice,
+ # 2: Enforce maximum number of bytes in an 'slice'
+ # 3: Enforce maximum number of tiles in a slice
+SliceArgument : 1500 # Argument for 'SliceMode'.
+ # If SliceMode==1 it represents max. SliceGranularity-sized blocks per slice.
+ # If SliceMode==2 it represents max. bytes per slice.
+ # If SliceMode==3 it represents max. tiles per slice.
+
+LFCrossSliceBoundaryFlag : 1 # In-loop filtering, including ALF and DB, is across or not across slice boundary.
+ # 0:not across, 1: across
+
+#============ PCM ================
+PCMEnabledFlag : 0 # 0: No PCM mode
+PCMLog2MaxSize : 5 # Log2 of maximum PCM block size.
+PCMLog2MinSize : 3 # Log2 of minimum PCM block size.
+PCMInputBitDepthFlag : 1 # 0: PCM bit-depth is internal bit-depth. 1: PCM bit-depth is input bit-depth.
+PCMFilterDisableFlag : 0 # 0: Enable loop filtering on I_PCM samples. 1: Disable loop filtering on I_PCM samples.
+
+#============ Tiles ================
+TileUniformSpacing : 0 # 0: the column boundaries are indicated by TileColumnWidth array, the row boundaries are indicated by TileRowHeight array
+ # 1: the column and row boundaries are distributed uniformly
+NumTileColumnsMinus1 : 0 # Number of tile columns in a picture minus 1
+TileColumnWidthArray : 2 3 # Array containing tile column width values in units of CTU (from left to right in picture)
+NumTileRowsMinus1 : 0 # Number of tile rows in a picture minus 1
+TileRowHeightArray : 2 # Array containing tile row height values in units of CTU (from top to bottom in picture)
+
+LFCrossTileBoundaryFlag : 1 # In-loop filtering is across or not across tile boundary.
+ # 0:not across, 1: across
+
+#============ WaveFront ================
+WaveFrontSynchro : 0 # 0: No WaveFront synchronisation (WaveFrontSubstreams must be 1 in this case).
+ # >0: WaveFront synchronises with the LCU above and to the right by this many LCUs.
+
+#=========== Quantization Matrix =================
+ScalingList : 0 # ScalingList 0 : off, 1 : default, 2 : file read
+ScalingListFile : scaling_list.txt # Scaling List file name. If file is not exist, use Default Matrix.
+
+#============ Lossless ================
+TransquantBypassEnable : 0 # Value of PPS flag.
+CUTransquantBypassFlagForce: 0 # Force transquant bypass mode, when transquant_bypass_enable_flag is enabled
+
+#=========== RExt ============
+ExtendedPrecision : 0 # Increased internal accuracies to support high bit depths (not valid in V1 profiles)
+TransformSkipLog2MaxSize : 2 # Specify transform-skip maximum size. Minimum 2. (not valid in V1 profiles)
+ImplicitResidualDPCM : 1 # Enable implicitly signalled residual DPCM for intra (also known as sample-adaptive intra predict) (not valid in V1 profiles)
+ExplicitResidualDPCM : 1 # Enable explicitly signalled residual DPCM for inter and intra-block-copy (not valid in V1 profiles)
+ResidualRotation : 1 # Enable rotation of transform-skipped and transquant-bypassed TUs through 180 degrees prior to entropy coding (not valid in V1 profiles)
+SingleSignificanceMapContext : 1 # Enable, for transform-skipped and transquant-bypassed TUs, the selection of a single significance map context variable for all coefficients (not valid in V1 profiles)
+IntraReferenceSmoothing : 1 # 0: Disable use of intra reference smoothing (not valid in V1 profiles). 1: Enable use of intra reference smoothing (same as V1)
+GolombRiceParameterAdaptation : 1 # Enable the partial retention of the Golomb-Rice parameter value from one coefficient group to the next
+HighPrecisionPredictionWeighting : 1 # Use high precision option for weighted prediction (not valid in V1 profiles)
+CrossComponentPrediction : 1 # Enable the use of cross-component prediction (not valid in V1 profiles)
+
+#=========== SCC ============
+IntraBlockCopyEnabled : 1 # Enable the use of intra block copying
+HashBasedIntraBlockCopySearchEnabled : 1 # Use hash based search for intra block copying on 8x8 blocks
+IntraBlockCopySearchWidthInCTUs : -1 # Search range for IBC (-1: full frame search)
+IntraBlockCopyNonHashSearchWidthInCTUs : 3 # Search range for IBC non-hash search method (i.e., fast/full search)
+MSEBasedSequencePSNR : 1 # 0:Emit sequence PSNR only as a linear average of the frame PSNRs, 1: also emit a sequence PSNR based on an average of the frame MSEs
+PrintClippedPSNR : 1 # 0:Print lossless PSNR values as 999.99 dB, 1: clip lossless PSNR according to resolution
+PrintFrameMSE : 1 # 0:emit only bit count and PSNRs for each frame, 1: also emit MSE values
+PrintSequenceMSE : 1 # 0:emit only bit rate and PSNRs for the whole sequence, 1 = also emit MSE values
+ColourTransform : 1 # Enable the use of color transform(not valid in V1 profiles)
+PaletteMode : 1 # Enable the use of palette mode(not valid in V1 profiles)
+PaletteMaxSize : 63 # Supported maximum palette size (not valid in V1 profiles)
+PaletteMaxPredSize : 128 # Supported maximum palette predictor size (not valid in V1 profiles)
+IntraBoundaryFilterDisabled : 1 # Disable the use of intra boundary filtering (not valid in V1 profiles)
+TransquantBypassInferTUSplit : 1 # Infer TU splitting for transquant bypass CUs
+PalettePredInSPSEnabled : 0 # Transmit palette predictor initializer in SPS (not valid in V1 profiles)
+PalettePredInPPSEnabled : 0 # Transmit palette predictor initializer in PPS (not valid in V1 profiles)
+SelectiveRDOQ : 1 # Selective RDOQ
+
+### DO NOT ADD ANYTHING BELOW THIS LINE ###
+### DO NOT DELETE THE EMPTY LINE BELOW ###
diff --git a/third_party/jpeg-xl/third_party/dirent.cc b/third_party/jpeg-xl/third_party/dirent.cc
new file mode 100644
index 0000000000..81015ed0fb
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/dirent.cc
@@ -0,0 +1,142 @@
+// Copyright (c) the JPEG XL Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(_WIN32) || defined(_WIN64)
+#include "third_party/dirent.h"
+
+#include "lib/jxl/base/status.h"
+
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif // NOMINMAX
+#include <windows.h>
+
+#include <memory>
+#include <string>
+
+int mkdir(const char* path, mode_t /*mode*/) {
+ const LPSECURITY_ATTRIBUTES sec = nullptr;
+ if (!CreateDirectory(path, sec)) {
+ JXL_NOTIFY_ERROR("Failed to create directory %s", path);
+ return -1;
+ }
+ return 0;
+}
+
+// Modified from code bearing the following notice:
+// https://trac.wildfiregames.com/browser/ps/trunk/source/lib/sysdep/os/
+/* Copyright (C) 2010 Wildfire Games.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+struct DIR {
+ HANDLE hFind;
+
+ WIN32_FIND_DATA findData; // indeterminate if hFind == INVALID_HANDLE_VALUE
+
+ // readdir will return the address of this member.
+ // (must be stored in DIR to allow multiple independent
+ // opendir/readdir sequences).
+ dirent ent;
+
+ // used by readdir to skip the first FindNextFile.
+ size_t numCalls = 0;
+};
+
+static bool IsValidDirectory(const char* path) {
+ const DWORD fileAttributes = GetFileAttributes(path);
+
+ // path not found
+ if (fileAttributes == INVALID_FILE_ATTRIBUTES) return false;
+
+ // not a directory
+ if ((fileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) return false;
+
+ return true;
+}
+
+DIR* opendir(const char* path) {
+ if (!IsValidDirectory(path)) {
+ errno = ENOENT;
+ return nullptr;
+ }
+
+ std::unique_ptr<DIR> d(new DIR);
+
+ // NB: "c:\\path" only returns information about that directory;
+ // trailing slashes aren't allowed. append "\\*" to retrieve its entries.
+ std::string searchPath(path);
+ if (searchPath.back() != '/' && searchPath.back() != '\\') {
+ searchPath += '\\';
+ }
+ searchPath += '*';
+
+ // (we don't defer FindFirstFile until readdir because callers
+ // expect us to return 0 if directory reading will/did fail.)
+ d->hFind = FindFirstFile(searchPath.c_str(), &d->findData);
+ if (d->hFind != INVALID_HANDLE_VALUE) return d.release();
+ if (GetLastError() == ERROR_NO_MORE_FILES) return d.release(); // empty
+
+ JXL_NOTIFY_ERROR("Failed to open directory %s", searchPath.c_str());
+ return nullptr;
+}
+
+int closedir(DIR* dir) {
+ delete dir;
+ return 0;
+}
+
+dirent* readdir(DIR* d) {
+ // "empty" case from opendir
+ if (d->hFind == INVALID_HANDLE_VALUE) return nullptr;
+
+ // until end of directory or a valid entry was found:
+ for (;;) {
+ if (d->numCalls++ != 0) // (skip first call to FindNextFile - see opendir)
+ {
+ if (!FindNextFile(d->hFind, &d->findData)) {
+ JXL_ASSERT(GetLastError() == ERROR_NO_MORE_FILES);
+ SetLastError(0);
+ return nullptr; // end of directory or error
+ }
+ }
+
+ // only return non-hidden and non-system entries
+ if ((d->findData.dwFileAttributes &
+ (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM)) == 0) {
+ d->ent.d_name = d->findData.cFileName;
+ return &d->ent;
+ }
+ }
+}
+
+#endif // #if defined(_WIN32) || defined(_WIN64)
diff --git a/third_party/jpeg-xl/third_party/dirent.h b/third_party/jpeg-xl/third_party/dirent.h
new file mode 100644
index 0000000000..37a08f425b
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/dirent.h
@@ -0,0 +1,49 @@
+// Copyright (c) the JPEG XL Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIB_JXL_THIRD_PARTY_DIRENT_H_
+#define LIB_JXL_THIRD_PARTY_DIRENT_H_
+
+// Emulates POSIX readdir for Windows
+
+#if defined(_WIN32) || defined(_WIN64)
+
+#include <sys/stat.h> // S_IFREG
+
+#ifndef _MODE_T_
+typedef unsigned int mode_t;
+#endif // _MODE_T_
+int mkdir(const char* path, mode_t mode);
+
+struct dirent {
+ char* d_name; // no path
+};
+
+#define stat _stat64
+
+#ifndef S_ISDIR
+#define S_ISDIR(m) (m & S_IFDIR)
+#endif // S_ISDIR
+
+#ifndef S_ISREG
+#define S_ISREG(m) (m & S_IFREG)
+#endif // S_ISREG
+
+struct DIR;
+DIR* opendir(const char* path);
+int closedir(DIR* dir);
+dirent* readdir(DIR* d);
+
+#endif // #if defined(_WIN32) || defined(_WIN64)
+#endif // LIB_JXL_THIRD_PARTY_DIRENT_H_
diff --git a/third_party/jpeg-xl/third_party/lcms2.cmake b/third_party/jpeg-xl/third_party/lcms2.cmake
new file mode 100644
index 0000000000..c4551de862
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/lcms2.cmake
@@ -0,0 +1,77 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(lcms2 STATIC EXCLUDE_FROM_ALL
+ lcms/src/cmsalpha.c
+ lcms/src/cmscam02.c
+ lcms/src/cmscgats.c
+ lcms/src/cmscnvrt.c
+ lcms/src/cmserr.c
+ lcms/src/cmsgamma.c
+ lcms/src/cmsgmt.c
+ lcms/src/cmshalf.c
+ lcms/src/cmsintrp.c
+ lcms/src/cmsio0.c
+ lcms/src/cmsio1.c
+ lcms/src/cmslut.c
+ lcms/src/cmsmd5.c
+ lcms/src/cmsmtrx.c
+ lcms/src/cmsnamed.c
+ lcms/src/cmsopt.c
+ lcms/src/cmspack.c
+ lcms/src/cmspcs.c
+ lcms/src/cmsplugin.c
+ lcms/src/cmsps2.c
+ lcms/src/cmssamp.c
+ lcms/src/cmssm.c
+ lcms/src/cmstypes.c
+ lcms/src/cmsvirt.c
+ lcms/src/cmswtpnt.c
+ lcms/src/cmsxform.c
+ lcms/src/lcms2_internal.h
+)
+target_include_directories(lcms2
+ PUBLIC "${CMAKE_CURRENT_LIST_DIR}/lcms/include")
+# This warning triggers with gcc-8.
+if (CMAKE_C_COMPILER_ID MATCHES "GNU")
+target_compile_options(lcms2
+ PRIVATE
+ # gcc-only flags.
+ -Wno-stringop-truncation
+ -Wno-strict-aliasing
+)
+endif()
+# By default LCMS uses sizeof(void*) for memory alignment, but in arm 32-bits we
+# can't access doubles not aligned to 8 bytes. This forces the alignment to 8
+# bytes.
+target_compile_definitions(lcms2
+ PRIVATE "-DCMS_PTR_ALIGNMENT=8")
+target_compile_definitions(lcms2
+ PUBLIC "-DCMS_NO_REGISTER_KEYWORD=1")
+
+# Ensure that a thread safe alternative of gmtime is used in LCMS
+include(CheckSymbolExists)
+check_symbol_exists(gmtime_r "time.h" HAVE_GMTIME_R)
+if (HAVE_GMTIME_R)
+ target_compile_definitions(lcms2
+ PUBLIC "-DHAVE_GMTIME_R=1")
+else()
+ check_symbol_exists(gmtime_s "time.h" HAVE_GMTIME_S)
+ if (HAVE_GMTIME_S)
+ target_compile_definitions(lcms2
+ PUBLIC "-DHAVE_GMTIME_S=1")
+ endif()
+endif()
+
+set_property(TARGET lcms2 PROPERTY POSITION_INDEPENDENT_CODE ON)
diff --git a/third_party/jpeg-xl/third_party/sjpeg.cmake b/third_party/jpeg-xl/third_party/sjpeg.cmake
new file mode 100644
index 0000000000..f1a69252ba
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/sjpeg.cmake
@@ -0,0 +1,27 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# We need to CACHE the SJPEG_BUILD_EXAMPLES to not be removed by the option()
+# inside SJPEG.
+set(SJPEG_BUILD_EXAMPLES NO CACHE BOOL "Examples")
+# SJPEG uses OpenGL which throws a warning if multiple options are installed.
+# This setting makes it prefer the new version.
+set(OpenGL_GL_PREFERENCE GLVND)
+
+# Build SJPEG as a static library.
+set(BUILD_SHARED_LIBS_BACKUP ${BUILD_SHARED_LIBS})
+set(BUILD_SHARED_LIBS OFF)
+add_subdirectory(sjpeg EXCLUDE_FROM_ALL)
+target_include_directories(sjpeg PUBLIC "${CMAKE_CURRENT_LIST_DIR}/sjpeg/src/")
+set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_BACKUP})
diff --git a/third_party/jpeg-xl/third_party/skcms.cmake b/third_party/jpeg-xl/third_party/skcms.cmake
new file mode 100644
index 0000000000..4d2a79cdbc
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/skcms.cmake
@@ -0,0 +1,51 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_library(skcms-obj OBJECT EXCLUDE_FROM_ALL skcms/skcms.cc)
+target_include_directories(skcms-obj PUBLIC "${CMAKE_CURRENT_LIST_DIR}/skcms/")
+
+# This library is meant to be compiled/used by external libs (such as plugins)
+# that need to use skcms. We use a wrapper for libjxl.
+add_library(skcms-interface INTERFACE)
+target_sources(skcms-interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/skcms/skcms.cc)
+target_include_directories(skcms-interface INTERFACE ${CMAKE_CURRENT_LIST_DIR}/skcms)
+
+include(CheckCXXCompilerFlag)
+check_cxx_compiler_flag("-Wno-psabi" CXX_WPSABI_SUPPORTED)
+if(CXX_WPSABI_SUPPORTED)
+ target_compile_options(skcms-obj PRIVATE -Wno-psabi)
+ target_compile_options(skcms-interface INTERFACE -Wno-psabi)
+endif()
+
+if(JPEGXL_BUNDLE_SKCMS)
+ target_compile_options(skcms-obj PRIVATE -DJPEGXL_BUNDLE_SKCMS=1)
+ if(MSVC)
+ target_compile_options(skcms-obj
+ PRIVATE /FI${CMAKE_CURRENT_SOURCE_DIR}/../lib/jxl/enc_jxl_skcms.h)
+ else()
+ target_compile_options(skcms-obj
+ PRIVATE -include ${CMAKE_CURRENT_SOURCE_DIR}/../lib/jxl/enc_jxl_skcms.h)
+ endif()
+endif()
+
+set_target_properties(skcms-obj PROPERTIES
+ POSITION_INDEPENDENT_CODE ON
+ CXX_VISIBILITY_PRESET hidden
+ VISIBILITY_INLINES_HIDDEN 1
+)
+
+add_library(skcms STATIC EXCLUDE_FROM_ALL $<TARGET_OBJECTS:skcms-obj>)
+target_include_directories(skcms
+ PUBLIC $<TARGET_PROPERTY:skcms-obj,INCLUDE_DIRECTORIES>)
+
diff --git a/third_party/jpeg-xl/third_party/testing.cmake b/third_party/jpeg-xl/third_party/testing.cmake
new file mode 100644
index 0000000000..68368675da
--- /dev/null
+++ b/third_party/jpeg-xl/third_party/testing.cmake
@@ -0,0 +1,85 @@
+# Copyright (c) the JPEG XL Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Enable tests in third_party/ as well.
+enable_testing()
+include(CTest)
+
+set(SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/third_party")
+
+if(BUILD_TESTING)
+# Add GTest from source and alias it to what the find_package(GTest) workflow
+# defines. Omitting googletest/ directory would require it to be available in
+# the base system instead, but it would work just fine. This makes packages
+# using GTest and calling find_package(GTest) actually work.
+if (EXISTS "${SOURCE_DIR}/googletest/CMakeLists.txt" AND
+ NOT JPEGXL_FORCE_SYSTEM_GTEST)
+ add_subdirectory(third_party/googletest EXCLUDE_FROM_ALL)
+
+ set(GTEST_ROOT "${SOURCE_DIR}/googletest/googletest")
+ set(GTEST_INCLUDE_DIR "$<TARGET_PROPERTY:INCLUDE_DIRECTORIES,gtest>"
+ CACHE STRING "")
+ set(GMOCK_INCLUDE_DIR "$<TARGET_PROPERTY:INCLUDE_DIRECTORIES,gmock>")
+ set(GTEST_LIBRARY "$<TARGET_FILE:gtest>")
+ set(GTEST_MAIN_LIBRARY "$<TARGET_FILE:gtest_main>")
+ add_library(GTest::gtest ALIAS gtest)
+ add_library(GTest::GTest ALIAS gtest)
+ add_library(GTest::gtest_main ALIAS gtest_main)
+ add_library(GTest::Main ALIAS gtest_main)
+
+ set_target_properties(gtest PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+ set_target_properties(gmock PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+ set_target_properties(gtest_main PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+ set_target_properties(gmock_main PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
+
+ # googletest doesn't compile clean with clang-cl (-Wundef)
+ if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ set_target_properties(gtest PROPERTIES COMPILE_FLAGS "-Wno-error")
+ set_target_properties(gmock PROPERTIES COMPILE_FLAGS "-Wno-error")
+ set_target_properties(gtest_main PROPERTIES COMPILE_FLAGS "-Wno-error")
+ set_target_properties(gmock_main PROPERTIES COMPILE_FLAGS "-Wno-error")
+ endif ()
+ configure_file("${SOURCE_DIR}/googletest/LICENSE"
+ ${PROJECT_BINARY_DIR}/LICENSE.googletest COPYONLY)
+else()
+ if(JPEGXL_DEP_LICENSE_DIR)
+ configure_file("${JPEGXL_DEP_LICENSE_DIR}/googletest/copyright"
+ ${PROJECT_BINARY_DIR}/LICENSE.googletest COPYONLY)
+ endif() # JPEGXL_DEP_LICENSE_DIR
+endif()
+find_package(GTest)
+if (NOT GTEST_FOUND)
+ set(BUILD_TESTING OFF CACHE BOOL "Build tests" FORCE)
+ message(SEND_ERROR "GTest not found. Install googletest package "
+ "(libgtest-dev) in the system or download googletest to "
+ "third_party/googletest from https://github.com/google/googletest ."
+ "To disable tests instead re-run cmake with -DBUILD_TESTING=OFF.")
+endif() # NOT GTEST_FOUND
+
+# Look for gmock in the system too.
+if (NOT DEFINED GMOCK_INCLUDE_DIR)
+ find_path(
+ GMOCK_INCLUDE_DIR "gmock/gmock.h"
+ HINTS ${GTEST_INCLUDE_DIRS})
+ if (NOT GMOCK_INCLUDE_DIR)
+ set(BUILD_TESTING OFF CACHE BOOL "Build tests" FORCE)
+ message(SEND_ERROR "GMock not found. Install googletest package "
+ "(libgmock-dev) in the system or download googletest to "
+ "third_party/googletest from https://github.com/google/googletest ."
+ "To disable tests instead re-run cmake with -DBUILD_TESTING=OFF.")
+ else()
+ message(STATUS "Found GMock: ${GMOCK_INCLUDE_DIR}")
+ endif() # NOT GMOCK_INCLUDE_DIR
+endif() # NOT DEFINED GMOCK_INCLUDE_DIR
+endif() # BUILD_TESTING