diff options
Diffstat (limited to 'gfx/qcms')
33 files changed, 6793 insertions, 0 deletions
diff --git a/gfx/qcms/Cargo.toml b/gfx/qcms/Cargo.toml new file mode 100644 index 0000000000..e330244d01 --- /dev/null +++ b/gfx/qcms/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "qcms" +authors = ["Jeff Muizelaar", "Benoit Girard", "Andrew Osmond"] +version = "0.2.0" +edition = "2018" +include = ["src/**/*", "build.rs"] +description = "lightweight color management" +documentation = "https://docs.rs/qcms" +license = "MIT" +repository = "https://github.com/FirefoxGraphics/qcms" +keywords = ["color"] +categories = ["graphics"] + +[features] +default = [] +c_bindings = ["libc"] +iccv4-enabled = [] + +[dependencies] +libc = {version = "0.2", optional = true } diff --git a/gfx/qcms/README.md b/gfx/qcms/README.md new file mode 100644 index 0000000000..1997abcabe --- /dev/null +++ b/gfx/qcms/README.md @@ -0,0 +1,36 @@ +# qcms +[![Crates.io](https://img.shields.io/crates/v/qcms.svg)](https://crates.io/crates/qcms) +[![Documentation](https://docs.rs/qcms/badge.svg)](https://docs.rs/qcms) + + +Firefox's library for transforming image data between ICC profiles. + +## Example +```rust + // Decode the jpeg + let mut d = jpeg_decoder::Decoder::new(std::fs::File::open("/Users/jrmuizel/Desktop/DSCF2460.jpg").unwrap()); + let mut data = d.decode().unwrap(); + let info = d.info().unwrap(); + + // Extract the profile after decode + let profile = d.icc_profile().unwrap(); + + // Create a new qcms Profile + let input = qcms::Profile::new_from_slice(&profile).unwrap(); + let mut output = qcms::Profile::new_sRGB(); + output.precache_output_transform(); + + // Create a transform between input and output profiles and apply it. + let xfm = qcms::Transform::new(&input, &output, qcms::DataType::RGB8, qcms::Intent::default()).unwrap(); + xfm.apply(&mut data); + + // write the result to a PNG + let mut encoder = png::Encoder::new(std::fs::File::create("out.png").unwrap(), info.width as u32, info.height as u32); + encoder.set_color(png::ColorType::Rgb); + encoder.set_srgb(png::SrgbRenderingIntent::Perceptual); + let mut writer = encoder.write_header().unwrap(); + writer.write_image_data(&data).unwrap(); // Save +``` + +This library was originally written in C, was converted to Rust using [c2rust](https://c2rust.com/), and then refactored to be mostly +safe and more idiomatic Rust. diff --git a/gfx/qcms/build.rs b/gfx/qcms/build.rs new file mode 100644 index 0000000000..26ae7dcc22 --- /dev/null +++ b/gfx/qcms/build.rs @@ -0,0 +1,7 @@ +fn main() { + println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1"); + let target = std::env::var("TARGET").expect("TARGET environment variable not defined"); + if target.contains("neon") { + println!("cargo:rustc-cfg=libcore_neon"); + } +} diff --git a/gfx/qcms/fuzz/.gitignore b/gfx/qcms/fuzz/.gitignore new file mode 100644 index 0000000000..572e03bdf3 --- /dev/null +++ b/gfx/qcms/fuzz/.gitignore @@ -0,0 +1,4 @@ + +target +corpus +artifacts diff --git a/gfx/qcms/fuzz/Cargo.lock b/gfx/qcms/fuzz/Cargo.lock new file mode 100644 index 0000000000..6d350aaa53 --- /dev/null +++ b/gfx/qcms/fuzz/Cargo.lock @@ -0,0 +1,45 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "arbitrary" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0922a3e746b5a44e111e5603feb6704e5cc959116f66737f50bb5cbd264e9d87" + +[[package]] +name = "cc" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c" + +[[package]] +name = "libc" +version = "0.2.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235" + +[[package]] +name = "libfuzzer-sys" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee8c42ab62f43795ed77a965ed07994c5584cdc94fd0ebf14b22ac1524077acc" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "qcms" +version = "0.2.0" +dependencies = [ + "libc", +] + +[[package]] +name = "qcms-fuzz" +version = "0.0.0" +dependencies = [ + "libc", + "libfuzzer-sys", + "qcms", +] diff --git a/gfx/qcms/fuzz/Cargo.toml b/gfx/qcms/fuzz/Cargo.toml new file mode 100644 index 0000000000..076c4355ff --- /dev/null +++ b/gfx/qcms/fuzz/Cargo.toml @@ -0,0 +1,28 @@ + +[package] +name = "qcms-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.3" +libc = "0.2" + +[dependencies.qcms] +path = ".." +features = ["c_bindings"] + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_target_qcms" +path = "fuzz_targets/fuzz_target_qcms.rs" +test = false +doc = false diff --git a/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs b/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs new file mode 100644 index 0000000000..22d9737d3f --- /dev/null +++ b/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs @@ -0,0 +1,94 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +extern crate qcms; +extern crate libc; +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +use qcms::c_bindings::{qcms_profile, icSigRgbData, qcms_profile_is_bogus, icSigGrayData}; +use qcms::c_bindings::{qcms_profile_get_color_space, qcms_profile_get_rendering_intent, qcms_profile_from_memory, qcms_profile_release, qcms_profile_sRGB, qcms_transform_create}; +use qcms::c_bindings::{qcms_profile_precache_output_transform, qcms_transform_data, qcms_transform_release, qcms_enable_iccv4}; + +use qcms::DataType::*; + + unsafe fn transform(src_profile: *mut qcms_profile, dst_profile: *mut qcms_profile, size: usize) + { + // qcms supports GRAY and RGB profiles as input, and RGB as output. + + let src_color_space = qcms_profile_get_color_space(&*src_profile); + let mut src_type = if (size & 1) != 0 { RGBA8 } else { RGB8 }; + if src_color_space == icSigGrayData { + src_type = if (size & 1) != 0 { GrayA8 } else { Gray8 }; + } else if src_color_space != icSigRgbData { + return; + } + + let dst_color_space = qcms_profile_get_color_space(&*dst_profile); + if dst_color_space != icSigRgbData { + return; + } + let dst_type = if (size & 2) != 0 { RGBA8 } else { RGB8 }; + + let intent = qcms_profile_get_rendering_intent(&*src_profile); + // Firefox calls this on the display profile to increase performance. + // Skip with low probability to increase coverage. + if (size % 15) != 0 { + qcms_profile_precache_output_transform(&mut *dst_profile); + } + + let transform = + qcms_transform_create(&*src_profile, src_type, &*dst_profile, dst_type, intent); + if transform == std::ptr::null_mut() { + return; + } + + const SRC_SIZE: usize = 36; + let src:[u8; SRC_SIZE] = [ + 0x7F, 0x7F, 0x7F, 0x00, 0x00, 0x7F, 0x7F, 0xFF, 0x7F, 0x10, 0x20, 0x30, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xB0, 0xBF, 0xEF, 0x6F, + 0x3F, 0xC0, 0x9F, 0xE0, 0x90, 0xCF, 0x40, 0xAF, 0x0F, 0x01, 0x60, 0xF0, + ]; + let mut dst: [u8; 36 * 4] = [0; 144]; // 4x in case of GRAY to RGBA + + qcms_transform_data(&*transform, src.as_ptr() as *const libc::c_void, dst.as_mut_ptr() as *mut libc::c_void, (SRC_SIZE / src_type.bytes_per_pixel()) as usize); + qcms_transform_release(transform); + } + + unsafe fn do_fuzz(data: &[u8]) + { + let size = data.len(); + qcms_enable_iccv4(); + + let profile = qcms_profile_from_memory(data.as_ptr() as *const libc::c_void, size); + if profile == std::ptr::null_mut() { + return; + } + + let srgb_profile = qcms_profile_sRGB(); + if srgb_profile == std::ptr::null_mut() { + qcms_profile_release(profile); + return; + } + + transform(profile, srgb_profile, size); + + // Firefox only checks the display (destination) profile. + if !qcms_profile_is_bogus(&mut *profile) { + + transform(srgb_profile, profile, size); + + } + qcms_profile_release(profile); + qcms_profile_release(srgb_profile); + + return; + } + + + +fuzz_target!(|data: &[u8]| { + unsafe { do_fuzz(data) } +}); diff --git a/gfx/qcms/fuzz/qcms_fuzzer.dict b/gfx/qcms/fuzz/qcms_fuzzer.dict new file mode 100644 index 0000000000..213193c7d1 --- /dev/null +++ b/gfx/qcms/fuzz/qcms_fuzzer.dict @@ -0,0 +1,26 @@ +# v2 +0x41324230="A2B0" +0x42324130="B2A0" +0x47524159="GRAY" +0x4C616220="Lab " +0x52474220="RGB " +0x58595a20="XYZ " +0x62545243="bTRC" +0x6258595a="bXYZ" +0x63686164="chad" +0x63757276="curv" +0x67545243="gTRC" +0x6758595a="gXYZ" +0x6D667431="mft1" +0x6D667432="mft2" +0x6b545243="kTRC" +0x6d6e7472="mntr" +0x72545243="rTRC" +0x7258595a="rXYZ" +0x73636e72="scnr" +0x73663332="sf32" + +# v4 +0x6D414220="mAB " +0x6D424120="mBA " +0x70617261="para" diff --git a/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc b/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc Binary files differnew file mode 100644 index 0000000000..6dcf942ac1 --- /dev/null +++ b/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc diff --git a/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc b/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc Binary files differnew file mode 100644 index 0000000000..12b096cac0 --- /dev/null +++ b/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc diff --git a/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc b/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc Binary files differnew file mode 100644 index 0000000000..2d8efe536b --- /dev/null +++ b/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc diff --git a/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc b/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc Binary files differnew file mode 100644 index 0000000000..1626458464 --- /dev/null +++ b/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc diff --git a/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc b/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc Binary files differnew file mode 100644 index 0000000000..2db6991c23 --- /dev/null +++ b/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc diff --git a/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc b/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc Binary files differnew file mode 100644 index 0000000000..c13db9b200 --- /dev/null +++ b/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc diff --git a/gfx/qcms/moz.build b/gfx/qcms/moz.build new file mode 100644 index 0000000000..1e899c7a46 --- /dev/null +++ b/gfx/qcms/moz.build @@ -0,0 +1,10 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +EXPORTS += [ + 'qcms.h', + 'qcmstypes.h', +] diff --git a/gfx/qcms/profiles/B2A0-ident.icc b/gfx/qcms/profiles/B2A0-ident.icc Binary files differnew file mode 100644 index 0000000000..672553030f --- /dev/null +++ b/gfx/qcms/profiles/B2A0-ident.icc diff --git a/gfx/qcms/profiles/displaycal-lut-stripped.icc b/gfx/qcms/profiles/displaycal-lut-stripped.icc Binary files differnew file mode 100644 index 0000000000..79ddef53e9 --- /dev/null +++ b/gfx/qcms/profiles/displaycal-lut-stripped.icc diff --git a/gfx/qcms/profiles/lcms_samsung_syncmaster.icc b/gfx/qcms/profiles/lcms_samsung_syncmaster.icc Binary files differnew file mode 100644 index 0000000000..3dcde88d06 --- /dev/null +++ b/gfx/qcms/profiles/lcms_samsung_syncmaster.icc diff --git a/gfx/qcms/profiles/lcms_thinkpad_w540.icc b/gfx/qcms/profiles/lcms_thinkpad_w540.icc Binary files differnew file mode 100644 index 0000000000..c154e7e589 --- /dev/null +++ b/gfx/qcms/profiles/lcms_thinkpad_w540.icc diff --git a/gfx/qcms/qcms.h b/gfx/qcms/qcms.h new file mode 100644 index 0000000000..ae889680bc --- /dev/null +++ b/gfx/qcms/qcms.h @@ -0,0 +1,191 @@ +#ifndef QCMS_H +#define QCMS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* if we've already got an ICC_H header we can ignore the following */ +#ifndef ICC_H +/* icc34 defines */ + +/***************************************************************** + Copyright (c) 1994-1996 SunSoft, Inc. + + Rights Reserved + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without restrict- +ion, including without limitation the rights to use, copy, modify, +merge, publish distribute, sublicense, and/or sell copies of the +Software, and to permit persons to whom the Software is furnished +to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON- +INFRINGEMENT. IN NO EVENT SHALL SUNSOFT, INC. OR ITS PARENT +COMPANY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of SunSoft, Inc. +shall not be used in advertising or otherwise to promote the +sale, use or other dealings in this Software without written +authorization from SunSoft Inc. +******************************************************************/ + +/* + * QCMS, in general, is not threadsafe. However, it should be safe to create + * profile and transformation objects on different threads, so long as you + * don't use the same objects on different threads at the same time. + */ + +/* + * Color Space Signatures + * Note that only icSigXYZData and icSigLabData are valid + * Profile Connection Spaces (PCSs) + */ +typedef enum { + icSigXYZData = 0x58595A20L, /* 'XYZ ' */ + icSigLabData = 0x4C616220L, /* 'Lab ' */ + icSigLuvData = 0x4C757620L, /* 'Luv ' */ + icSigYCbCrData = 0x59436272L, /* 'YCbr' */ + icSigYxyData = 0x59787920L, /* 'Yxy ' */ + icSigRgbData = 0x52474220L, /* 'RGB ' */ + icSigGrayData = 0x47524159L, /* 'GRAY' */ + icSigHsvData = 0x48535620L, /* 'HSV ' */ + icSigHlsData = 0x484C5320L, /* 'HLS ' */ + icSigCmykData = 0x434D594BL, /* 'CMYK' */ + icSigCmyData = 0x434D5920L, /* 'CMY ' */ + icSig2colorData = 0x32434C52L, /* '2CLR' */ + icSig3colorData = 0x33434C52L, /* '3CLR' */ + icSig4colorData = 0x34434C52L, /* '4CLR' */ + icSig5colorData = 0x35434C52L, /* '5CLR' */ + icSig6colorData = 0x36434C52L, /* '6CLR' */ + icSig7colorData = 0x37434C52L, /* '7CLR' */ + icSig8colorData = 0x38434C52L, /* '8CLR' */ + icSig9colorData = 0x39434C52L, /* '9CLR' */ + icSig10colorData = 0x41434C52L, /* 'ACLR' */ + icSig11colorData = 0x42434C52L, /* 'BCLR' */ + icSig12colorData = 0x43434C52L, /* 'CCLR' */ + icSig13colorData = 0x44434C52L, /* 'DCLR' */ + icSig14colorData = 0x45434C52L, /* 'ECLR' */ + icSig15colorData = 0x46434C52L, /* 'FCLR' */ + icMaxEnumData = 0xFFFFFFFFL +} icColorSpaceSignature; +#endif + +#include <stdio.h> +#include <stdbool.h> + +struct _qcms_transform; +typedef struct _qcms_transform qcms_transform; + +struct _qcms_profile; +typedef struct _qcms_profile qcms_profile; + +/* these values match the Rendering Intent values from the ICC spec */ +typedef enum { + QCMS_INTENT_MIN = 0, + QCMS_INTENT_PERCEPTUAL = 0, + QCMS_INTENT_RELATIVE_COLORIMETRIC = 1, + QCMS_INTENT_SATURATION = 2, + QCMS_INTENT_ABSOLUTE_COLORIMETRIC = 3, + QCMS_INTENT_MAX = 3, + + /* Chris Murphy (CM consultant) suggests this as a default in the event that we + * cannot reproduce relative + Black Point Compensation. BPC brings an + * unacceptable performance overhead, so we go with perceptual. */ + QCMS_INTENT_DEFAULT = QCMS_INTENT_PERCEPTUAL, +} qcms_intent; + +//XXX: I don't really like the _DATA_ prefix +typedef enum { + QCMS_DATA_RGB_8, + QCMS_DATA_RGBA_8, + QCMS_DATA_BGRA_8, + QCMS_DATA_GRAY_8, + QCMS_DATA_GRAYA_8 +} qcms_data_type; + +/* the names for the following two types are sort of ugly */ +typedef struct +{ + double x; + double y; + double Y; +} qcms_CIE_xyY; + +typedef struct +{ + qcms_CIE_xyY red; + qcms_CIE_xyY green; + qcms_CIE_xyY blue; +} qcms_CIE_xyYTRIPLE; + +qcms_profile* qcms_profile_create_rgb_with_gamma_set( + qcms_CIE_xyY white_point, + qcms_CIE_xyYTRIPLE primaries, + float redGamma, + float blueGamma, + float greenGamma); + +qcms_profile* qcms_profile_create_rgb_with_gamma( + qcms_CIE_xyY white_point, + qcms_CIE_xyYTRIPLE primaries, + float gamma); + +void qcms_data_create_rgb_with_gamma( + qcms_CIE_xyY white_point, + qcms_CIE_xyYTRIPLE primaries, + float gamma, + void **mem, + size_t *size); + +qcms_profile* qcms_profile_from_memory(const void *mem, size_t size); + +qcms_profile* qcms_profile_from_file(FILE *file); +qcms_profile* qcms_profile_from_path(const char *path); + +void qcms_data_from_path(const char *path, void **mem, size_t *size); + +#ifdef _WIN32 +qcms_profile* qcms_profile_from_unicode_path(const wchar_t *path); +void qcms_data_from_unicode_path(const wchar_t *path, void **mem, size_t *size); +#endif + +qcms_CIE_xyY qcms_white_point_sRGB(void); +qcms_profile* qcms_profile_sRGB(void); + +void qcms_profile_release(qcms_profile *profile); + +bool qcms_profile_is_bogus(qcms_profile *profile); +qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile); +icColorSpaceSignature qcms_profile_get_color_space(qcms_profile *profile); + +void qcms_profile_precache_output_transform(qcms_profile *profile); + +qcms_transform* qcms_transform_create( + qcms_profile *in, qcms_data_type in_type, + qcms_profile* out, qcms_data_type out_type, + qcms_intent intent); + +void qcms_transform_release(qcms_transform *); + +void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length); + +void qcms_enable_iccv4(); +void qcms_enable_neon(); +void qcms_enable_avx(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gfx/qcms/qcmsint.h b/gfx/qcms/qcmsint.h new file mode 100644 index 0000000000..b08fc9490b --- /dev/null +++ b/gfx/qcms/qcmsint.h @@ -0,0 +1,120 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +#ifndef QCMS_INT_H +#define QCMS_INT_H + +#include "qcms.h" +#include "qcmstypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +#define ALIGN __declspec(align(16)) +#else +#define ALIGN __attribute__(( aligned (16) )) +#endif + +struct _qcms_transform; + +typedef void (*transform_fn_t)(const struct _qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length); + + +void qcms_transform_data_rgb_out_lut(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); + +void qcms_transform_data_rgb_out_lut_precache(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_precache(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_precache(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); + +void qcms_transform_data_rgb_out_lut_avx(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_avx(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_avx(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgb_out_lut_sse2(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_sse2(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_sse2(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgb_out_lut_sse1(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_sse1(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_sse1(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); + +void qcms_transform_data_rgb_out_lut_altivec(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_altivec(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); + +void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); +void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform, + const unsigned char *src, + unsigned char *dest, + size_t length); + +extern bool qcms_supports_iccv4; +extern bool qcms_supports_neon; +extern bool qcms_supports_avx; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gfx/qcms/qcmstypes.h b/gfx/qcms/qcmstypes.h new file mode 100644 index 0000000000..7f6c70f12c --- /dev/null +++ b/gfx/qcms/qcmstypes.h @@ -0,0 +1,59 @@ +#ifndef QCMS_TYPES_H +#define QCMS_TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(IS_LITTLE_ENDIAN) && !defined(IS_BIG_ENDIAN) +#if BYTE_ORDER == LITTLE_ENDIAN +#define IS_LITTLE_ENDIAN +#elif BYTE_ORDER == BIG_ENDIAN +#define IS_BIG_ENDIAN +#endif + +/* all of the platforms that we use _MSC_VER on are little endian + * so this is sufficient for now */ +#ifdef _MSC_VER +#define IS_LITTLE_ENDIAN +#endif + +#ifdef __OS2__ +#define IS_LITTLE_ENDIAN +#endif +#endif + +#if !defined(IS_LITTLE_ENDIAN) && !defined(IS_BIG_ENDIAN) +#error Unknown endianess +#endif + +#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__) +# include <inttypes.h> +#elif defined (_MSC_VER) && _MSC_VER < 1600 +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#ifdef _WIN64 +typedef unsigned __int64 uintptr_t; +#else +typedef unsigned long uintptr_t; +#endif + +#elif defined (_AIX) +# include <sys/inttypes.h> +#else +# include <stdint.h> +#endif + +#include <stdbool.h> + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/gfx/qcms/src/c_bindings.rs b/gfx/qcms/src/c_bindings.rs new file mode 100644 index 0000000000..912601ac11 --- /dev/null +++ b/gfx/qcms/src/c_bindings.rs @@ -0,0 +1,368 @@ +use std::{ptr::null_mut, slice}; + +use libc::{fclose, fopen, fread, free, malloc, memset, FILE}; + +use crate::{ + double_to_s15Fixed16Number, + iccread::*, + matrix::Matrix, + transform::get_rgb_colorants, + transform::DataType, + transform::{qcms_transform, transform_create}, + Intent, +}; + +#[no_mangle] +pub extern "C" fn qcms_profile_sRGB() -> *mut Profile { + let profile = Profile::new_sRGB(); + Box::into_raw(profile) +} + +//XXX: it would be nice if we had a way of ensuring +// everything in a profile was initialized regardless of how it was created +//XXX: should this also be taking a black_point? +/* similar to CGColorSpaceCreateCalibratedRGB */ +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma_set( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + redGamma: f32, + greenGamma: f32, + blueGamma: f32, +) -> *mut Profile { + let profile = + Profile::new_rgb_with_gamma_set(white_point, primaries, redGamma, greenGamma, blueGamma); + match profile { + Some(profile) => Box::into_raw(profile), + None => null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_gray_with_gamma(gamma: f32) -> *mut Profile { + let profile = Profile::new_gray_with_gamma(gamma); + Box::into_raw(profile) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + gamma: f32, +) -> *mut Profile { + qcms_profile_create_rgb_with_gamma_set(white_point, primaries, gamma, gamma, gamma) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_table( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + table: *const u16, + num_entries: i32, +) -> *mut Profile { + let table = slice::from_raw_parts(table, num_entries as usize); + let profile = Profile::new_rgb_with_table(white_point, primaries, table); + match profile { + Some(profile) => Box::into_raw(profile), + None => null_mut(), + } +} + +/* qcms_profile_from_memory does not hold a reference to the memory passed in */ +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_memory( + mem: *const libc::c_void, + size: usize, +) -> *mut Profile { + let mem = slice::from_raw_parts(mem as *const libc::c_uchar, size); + let profile = Profile::new_from_slice(mem); + match profile { + Some(profile) => Box::into_raw(profile), + None => null_mut(), + } +} + +#[no_mangle] +pub extern "C" fn qcms_profile_get_rendering_intent(profile: &Profile) -> Intent { + profile.rendering_intent +} +#[no_mangle] +pub extern "C" fn qcms_profile_get_color_space(profile: &Profile) -> icColorSpaceSignature { + profile.color_space +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_release(profile: *mut Profile) { + drop(Box::from_raw(profile)); +} +unsafe extern "C" fn qcms_data_from_file( + file: *mut FILE, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + let length: u32; + let remaining_length: u32; + let read_length: usize; + let mut length_be: be32 = 0; + let data: *mut libc::c_void; + *mem = std::ptr::null_mut::<libc::c_void>(); + *size = 0; + if fread( + &mut length_be as *mut be32 as *mut libc::c_void, + 1, + ::std::mem::size_of::<be32>(), + file, + ) != ::std::mem::size_of::<be32>() + { + return; + } + length = u32::from_be(length_be); + if length > MAX_PROFILE_SIZE as libc::c_uint + || (length as libc::c_ulong) < ::std::mem::size_of::<be32>() as libc::c_ulong + { + return; + } + /* allocate room for the entire profile */ + data = malloc(length as usize); + if data.is_null() { + return; + } + /* copy in length to the front so that the buffer will contain the entire profile */ + *(data as *mut be32) = length_be; + remaining_length = + (length as libc::c_ulong - ::std::mem::size_of::<be32>() as libc::c_ulong) as u32; + /* read the rest profile */ + read_length = fread( + (data as *mut libc::c_uchar).add(::std::mem::size_of::<be32>()) as *mut libc::c_void, + 1, + remaining_length as usize, + file, + ) as usize; + if read_length != remaining_length as usize { + free(data); + return; + } + /* successfully get the profile.*/ + *mem = data; + *size = length as usize; +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_file(file: *mut FILE) -> *mut Profile { + let mut length: usize = 0; + let profile: *mut Profile; + let mut data: *mut libc::c_void = std::ptr::null_mut::<libc::c_void>(); + qcms_data_from_file(file, &mut data, &mut length); + if data.is_null() || length == 0 { + return std::ptr::null_mut::<Profile>(); + } + profile = qcms_profile_from_memory(data, length); + free(data); + profile +} +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_path(path: *const libc::c_char) -> *mut Profile { + let mut profile: *mut Profile = std::ptr::null_mut::<Profile>(); + let file = fopen(path, b"rb\x00" as *const u8 as *const libc::c_char); + if !file.is_null() { + profile = qcms_profile_from_file(file); + fclose(file); + } + profile +} +#[no_mangle] +pub unsafe extern "C" fn qcms_data_from_path( + path: *const libc::c_char, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + *mem = std::ptr::null_mut::<libc::c_void>(); + *size = 0; + let file = fopen(path, b"rb\x00" as *const u8 as *const libc::c_char); + if !file.is_null() { + qcms_data_from_file(file, mem, size); + fclose(file); + }; +} + +#[cfg(windows)] +extern "C" { + pub fn _wfopen(filename: *const libc::wchar_t, mode: *const libc::wchar_t) -> *mut FILE; +} + +#[cfg(windows)] +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_unicode_path(path: *const libc::wchar_t) { + let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr()); + if !file.is_null() { + qcms_profile_from_file(file); + fclose(file); + }; +} + +#[cfg(windows)] +#[no_mangle] +pub unsafe extern "C" fn qcms_data_from_unicode_path( + path: *const libc::wchar_t, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + *mem = 0 as *mut libc::c_void; + *size = 0; + let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr()); + if !file.is_null() { + qcms_data_from_file(file, mem, size); + fclose(file); + }; +} + +#[no_mangle] +pub extern "C" fn qcms_transform_create( + in_0: &Profile, + in_type: DataType, + out: &Profile, + out_type: DataType, + intent: Intent, +) -> *mut qcms_transform { + let transform = transform_create(in_0, in_type, out, out_type, intent); + match transform { + Some(transform) => Box::into_raw(transform), + None => null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_data_create_rgb_with_gamma( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + gamma: f32, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + let length: u32; + let mut index: u32; + let xyz_count: u32; + let trc_count: u32; + let mut tag_table_offset: usize; + let mut tag_data_offset: usize; + let data: *mut libc::c_void; + let mut colorants: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + let TAG_XYZ: [u32; 3] = [TAG_rXYZ, TAG_gXYZ, TAG_bXYZ]; + let TAG_TRC: [u32; 3] = [TAG_rTRC, TAG_gTRC, TAG_bTRC]; + if mem.is_null() || size.is_null() { + return; + } + *mem = std::ptr::null_mut::<libc::c_void>(); + *size = 0; + /* + * total length = icc profile header(128) + tag count(4) + + * (tag table item (12) * total tag (6 = 3 rTRC + 3 rXYZ)) + rTRC elements data (3 * 20) + * + rXYZ elements data (3*16), and all tag data elements must start at the 4-byte boundary. + */ + xyz_count = 3; // rXYZ, gXYZ, bXYZ + trc_count = 3; // rTRC, gTRC, bTRC + length = + (128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count) + xyz_count * 20 + trc_count * 16; + // reserve the total memory. + data = malloc(length as usize); + if data.is_null() { + return; + } + memset(data, 0, length as usize); + // Part1 : write rXYZ, gXYZ and bXYZ + if !get_rgb_colorants(&mut colorants, white_point, primaries) { + free(data); + return; + } + let data = std::slice::from_raw_parts_mut(data as *mut u8, length as usize); + // the position of first tag's signature in tag table + tag_table_offset = (128 + 4) as usize; // the start of tag data elements. + tag_data_offset = ((128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count)) as usize; + index = 0; + while index < xyz_count { + // tag table + write_u32(data, tag_table_offset, TAG_XYZ[index as usize]); // 20 bytes per TAG_(r/g/b)XYZ tag element + write_u32(data, tag_table_offset + 4, tag_data_offset as u32); + write_u32(data, tag_table_offset + 8, 20); + // tag data element + write_u32(data, tag_data_offset, XYZ_TYPE); + // reserved 4 bytes. + write_u32( + data, + tag_data_offset + 8, + double_to_s15Fixed16Number(colorants.m[0][index as usize] as f64) as u32, + ); + write_u32( + data, + tag_data_offset + 12, + double_to_s15Fixed16Number(colorants.m[1][index as usize] as f64) as u32, + ); + write_u32( + data, + tag_data_offset + 16, + double_to_s15Fixed16Number(colorants.m[2][index as usize] as f64) as u32, + ); + tag_table_offset += 12; + tag_data_offset += 20; + index += 1 + } + // Part2 : write rTRC, gTRC and bTRC + index = 0; + while index < trc_count { + // tag table + write_u32(data, tag_table_offset, TAG_TRC[index as usize]); // 14 bytes per TAG_(r/g/b)TRC element + write_u32(data, tag_table_offset + 4, tag_data_offset as u32); + write_u32(data, tag_table_offset + 8, 14); + // tag data element + write_u32(data, tag_data_offset, CURVE_TYPE); + // reserved 4 bytes. + write_u32(data, tag_data_offset + 8, 1); // count + write_u16(data, tag_data_offset + 12, float_to_u8Fixed8Number(gamma)); + tag_table_offset += 12; + tag_data_offset += 16; + index += 1 + } + /* Part3 : write profile header + * + * Important header fields are left empty. This generates a profile for internal use only. + * We should be generating: Profile version (04300000h), Profile signature (acsp), + * PCS illumiant field. Likewise mandatory profile tags are omitted. + */ + write_u32(data, 0, length); // the total length of this memory + write_u32(data, 12, DISPLAY_DEVICE_PROFILE); // profile->class_type + write_u32(data, 16, RGB_SIGNATURE); // profile->color_space + write_u32(data, 20, XYZ_TYPE); // profile->pcs + write_u32(data, 64, Intent::Perceptual as u32); // profile->rendering_intent + write_u32(data, 128, 6); // total tag count + // prepare the result + *mem = data.as_mut_ptr() as *mut libc::c_void; + *size = length as usize; +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data( + transform: &qcms_transform, + src: *const libc::c_void, + dest: *mut libc::c_void, + length: usize, +) { + transform.transform_fn.expect("non-null function pointer")( + transform, + src as *const u8, + dest as *mut u8, + length, + ); +} + +pub type icColorSpaceSignature = u32; +pub const icSigGrayData: icColorSpaceSignature = 1196573017; +pub const icSigRgbData: icColorSpaceSignature = 1380401696; + +pub use crate::iccread::qcms_profile_is_bogus; +pub use crate::iccread::Profile as qcms_profile; +pub use crate::transform::{ + qcms_enable_iccv4, qcms_profile_precache_output_transform, qcms_transform_release, +}; diff --git a/gfx/qcms/src/chain.rs b/gfx/qcms/src/chain.rs new file mode 100644 index 0000000000..c1faf9dcd7 --- /dev/null +++ b/gfx/qcms/src/chain.rs @@ -0,0 +1,998 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +// qcms +// Copyright (C) 2009 Mozilla Corporation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +use crate::{ + iccread::LAB_SIGNATURE, + iccread::RGB_SIGNATURE, + iccread::XYZ_SIGNATURE, + iccread::{lutType, lutmABType, Profile}, + matrix::Matrix, + s15Fixed16Number_to_float, + transform_util::clamp_float, + transform_util::{ + build_colorant_matrix, build_input_gamma_table, build_output_lut, lut_interp_linear, + lut_interp_linear_float, + }, +}; + +#[derive(Clone, Default)] +pub struct ModularTransform { + matrix: Matrix, + tx: f32, + ty: f32, + tz: f32, + input_clut_table_r: Option<Vec<f32>>, + input_clut_table_g: Option<Vec<f32>>, + input_clut_table_b: Option<Vec<f32>>, + input_clut_table_length: u16, + clut: Option<Vec<f32>>, + grid_size: u16, + output_clut_table_r: Option<Vec<f32>>, + output_clut_table_g: Option<Vec<f32>>, + output_clut_table_b: Option<Vec<f32>>, + output_clut_table_length: u16, + output_gamma_lut_r: Option<Vec<u16>>, + output_gamma_lut_g: Option<Vec<u16>>, + output_gamma_lut_b: Option<Vec<u16>>, + output_gamma_lut_r_length: usize, + output_gamma_lut_g_length: usize, + output_gamma_lut_b_length: usize, + transform_module_fn: TransformModuleFn, + next_transform: Option<Box<ModularTransform>>, +} +pub type TransformModuleFn = + Option<fn(_: &ModularTransform, _: &[f32], _: &mut [f32]) -> ()>; + +#[inline] +fn lerp(a: f32, b: f32, t: f32) -> f32 { + a * (1.0 - t) + b * t +} + +fn build_lut_matrix(lut: Option<&lutType>) -> Matrix { + let mut result: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + if let Some(lut) = lut { + result.m[0][0] = s15Fixed16Number_to_float(lut.e00); + result.m[0][1] = s15Fixed16Number_to_float(lut.e01); + result.m[0][2] = s15Fixed16Number_to_float(lut.e02); + result.m[1][0] = s15Fixed16Number_to_float(lut.e10); + result.m[1][1] = s15Fixed16Number_to_float(lut.e11); + result.m[1][2] = s15Fixed16Number_to_float(lut.e12); + result.m[2][0] = s15Fixed16Number_to_float(lut.e20); + result.m[2][1] = s15Fixed16Number_to_float(lut.e21); + result.m[2][2] = s15Fixed16Number_to_float(lut.e22); + result.invalid = false + } else { + result.m = Default::default(); + result.invalid = true + } + result +} +fn build_mAB_matrix(lut: &lutmABType) -> Matrix { + let mut result: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + + result.m[0][0] = s15Fixed16Number_to_float(lut.e00); + result.m[0][1] = s15Fixed16Number_to_float(lut.e01); + result.m[0][2] = s15Fixed16Number_to_float(lut.e02); + result.m[1][0] = s15Fixed16Number_to_float(lut.e10); + result.m[1][1] = s15Fixed16Number_to_float(lut.e11); + result.m[1][2] = s15Fixed16Number_to_float(lut.e12); + result.m[2][0] = s15Fixed16Number_to_float(lut.e20); + result.m[2][1] = s15Fixed16Number_to_float(lut.e21); + result.m[2][2] = s15Fixed16Number_to_float(lut.e22); + result.invalid = false; + + result +} +//Based on lcms cmsLab2XYZ +fn f(t: f32) -> f32 { + if t <= 24. / 116. * (24. / 116.) * (24. / 116.) { + (841. / 108. * t) + 16. / 116. + } else { + t.powf(1. / 3.) + } +} +fn f_1(t: f32) -> f32 { + if t <= 24.0 / 116.0 { + (108.0 / 841.0) * (t - 16.0 / 116.0) + } else { + t * t * t + } +} + +fn transform_module_LAB_to_XYZ(_transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + // lcms: D50 XYZ values + let WhitePointX: f32 = 0.9642; + let WhitePointY: f32 = 1.0; + let WhitePointZ: f32 = 0.8249; + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let device_L: f32 = src[0] * 100.0; + let device_a: f32 = src[1] * 255.0 - 128.0; + let device_b: f32 = src[2] * 255.0 - 128.0; + + let y: f32 = (device_L + 16.0) / 116.0; + + let X = f_1(y + 0.002 * device_a) * WhitePointX; + let Y = f_1(y) * WhitePointY; + let Z = f_1(y - 0.005 * device_b) * WhitePointZ; + + dest[0] = (X as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + dest[1] = (Y as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + dest[2] = (Z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + } +} +//Based on lcms cmsXYZ2Lab +fn transform_module_XYZ_to_LAB(_transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + // lcms: D50 XYZ values + let WhitePointX: f32 = 0.9642; + let WhitePointY: f32 = 1.0; + let WhitePointZ: f32 = 0.8249; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let device_x: f32 = + (src[0] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointX as f64) as f32; + let device_y: f32 = + (src[1] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointY as f64) as f32; + let device_z: f32 = + (src[2] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointZ as f64) as f32; + + let fx = f(device_x); + let fy = f(device_y); + let fz = f(device_z); + + let L: f32 = 116.0 * fy - 16.0; + let a: f32 = 500.0 * (fx - fy); + let b: f32 = 200.0 * (fy - fz); + + dest[0] = L / 100.0; + dest[1] = (a + 128.0) / 255.0; + dest[2] = (b + 128.0) / 255.0; + } +} +fn transform_module_clut_only(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + let xy_len: i32 = 1; + let x_len: i32 = transform.grid_size as i32; + let len: i32 = x_len * x_len; + + let r_table = &transform.clut.as_ref().unwrap()[0..]; + let g_table = &transform.clut.as_ref().unwrap()[1..]; + let b_table = &transform.clut.as_ref().unwrap()[2..]; + + let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize]; + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + debug_assert!(transform.grid_size as i32 >= 1); + let linear_r: f32 = src[0]; + let linear_g: f32 = src[1]; + let linear_b: f32 = src[2]; + let x: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let y: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let z: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let x_n: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let y_n: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let z_n: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let x_d: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32; + let y_d: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32; + let z_d: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32; + + let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d); + let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d); + let r_y1: f32 = lerp(r_x1, r_x2, y_d); + let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d); + let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d); + let r_y2: f32 = lerp(r_x3, r_x4, y_d); + let clut_r: f32 = lerp(r_y1, r_y2, z_d); + + let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d); + let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d); + let g_y1: f32 = lerp(g_x1, g_x2, y_d); + let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d); + let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d); + let g_y2: f32 = lerp(g_x3, g_x4, y_d); + let clut_g: f32 = lerp(g_y1, g_y2, z_d); + + let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d); + let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d); + let b_y1: f32 = lerp(b_x1, b_x2, y_d); + let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d); + let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d); + let b_y2: f32 = lerp(b_x3, b_x4, y_d); + let clut_b: f32 = lerp(b_y1, b_y2, z_d); + + dest[0] = clamp_float(clut_r); + dest[1] = clamp_float(clut_g); + dest[2] = clamp_float(clut_b); + } +} +fn transform_module_clut(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + let xy_len: i32 = 1; + let x_len: i32 = transform.grid_size as i32; + let len: i32 = x_len * x_len; + + let r_table = &transform.clut.as_ref().unwrap()[0..]; + let g_table = &transform.clut.as_ref().unwrap()[1..]; + let b_table = &transform.clut.as_ref().unwrap()[2..]; + let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize]; + + let input_clut_table_r = transform.input_clut_table_r.as_ref().unwrap(); + let input_clut_table_g = transform.input_clut_table_g.as_ref().unwrap(); + let input_clut_table_b = transform.input_clut_table_b.as_ref().unwrap(); + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + debug_assert!(transform.grid_size as i32 >= 1); + let device_r: f32 = src[0]; + let device_g: f32 = src[1]; + let device_b: f32 = src[2]; + let linear_r: f32 = lut_interp_linear_float(device_r, &input_clut_table_r); + let linear_g: f32 = lut_interp_linear_float(device_g, &input_clut_table_g); + let linear_b: f32 = lut_interp_linear_float(device_b, &input_clut_table_b); + let x: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let y: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let z: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).floor() as i32; + let x_n: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let y_n: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let z_n: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).ceil() as i32; + let x_d: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32; + let y_d: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32; + let z_d: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32; + + let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d); + let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d); + let r_y1: f32 = lerp(r_x1, r_x2, y_d); + let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d); + let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d); + let r_y2: f32 = lerp(r_x3, r_x4, y_d); + let clut_r: f32 = lerp(r_y1, r_y2, z_d); + + let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d); + let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d); + let g_y1: f32 = lerp(g_x1, g_x2, y_d); + let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d); + let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d); + let g_y2: f32 = lerp(g_x3, g_x4, y_d); + let clut_g: f32 = lerp(g_y1, g_y2, z_d); + + let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d); + let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d); + let b_y1: f32 = lerp(b_x1, b_x2, y_d); + let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d); + let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d); + let b_y2: f32 = lerp(b_x3, b_x4, y_d); + let clut_b: f32 = lerp(b_y1, b_y2, z_d); + let pcs_r: f32 = + lut_interp_linear_float(clut_r, &transform.output_clut_table_r.as_ref().unwrap()); + let pcs_g: f32 = + lut_interp_linear_float(clut_g, &transform.output_clut_table_g.as_ref().unwrap()); + let pcs_b: f32 = + lut_interp_linear_float(clut_b, &transform.output_clut_table_b.as_ref().unwrap()); + dest[0] = clamp_float(pcs_r); + dest[1] = clamp_float(pcs_g); + dest[2] = clamp_float(pcs_b); + } +} +/* NOT USED +static void qcms_transform_module_tetra_clut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length) +{ + size_t i; + int xy_len = 1; + int x_len = transform->grid_size; + int len = x_len * x_len; + float* r_table = transform->r_clut; + float* g_table = transform->g_clut; + float* b_table = transform->b_clut; + float c0_r, c1_r, c2_r, c3_r; + float c0_g, c1_g, c2_g, c3_g; + float c0_b, c1_b, c2_b, c3_b; + float clut_r, clut_g, clut_b; + float pcs_r, pcs_g, pcs_b; + for (i = 0; i < length; i++) { + float device_r = *src++; + float device_g = *src++; + float device_b = *src++; + float linear_r = lut_interp_linear_float(device_r, + transform->input_clut_table_r, transform->input_clut_table_length); + float linear_g = lut_interp_linear_float(device_g, + transform->input_clut_table_g, transform->input_clut_table_length); + float linear_b = lut_interp_linear_float(device_b, + transform->input_clut_table_b, transform->input_clut_table_length); + + int x = floorf(linear_r * (transform->grid_size-1)); + int y = floorf(linear_g * (transform->grid_size-1)); + int z = floorf(linear_b * (transform->grid_size-1)); + int x_n = ceilf(linear_r * (transform->grid_size-1)); + int y_n = ceilf(linear_g * (transform->grid_size-1)); + int z_n = ceilf(linear_b * (transform->grid_size-1)); + float rx = linear_r * (transform->grid_size-1) - x; + float ry = linear_g * (transform->grid_size-1) - y; + float rz = linear_b * (transform->grid_size-1) - z; + + c0_r = CLU(r_table, x, y, z); + c0_g = CLU(g_table, x, y, z); + c0_b = CLU(b_table, x, y, z); + if( rx >= ry ) { + if (ry >= rz) { //rx >= ry && ry >= rz + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else { + if (rx >= rz) { //rx >= rz && rz >= ry + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); + } else { //rz > rx && rx >= ry + c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + } + } else { + if (rx >= rz) { //ry > rx && rx >= rz + c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); + c2_r = CLU(r_table, x_n, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); + c2_g = CLU(g_table, x_n, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); + c2_b = CLU(b_table, x_n, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else { + if (ry >= rz) { //ry >= rz && rz > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); + } else { //rz > ry && ry > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } + } + } + + clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; + clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; + clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; + + pcs_r = lut_interp_linear_float(clut_r, + transform->output_clut_table_r, transform->output_clut_table_length); + pcs_g = lut_interp_linear_float(clut_g, + transform->output_clut_table_g, transform->output_clut_table_length); + pcs_b = lut_interp_linear_float(clut_b, + transform->output_clut_table_b, transform->output_clut_table_length); + *dest++ = clamp_float(pcs_r); + *dest++ = clamp_float(pcs_g); + *dest++ = clamp_float(pcs_b); + } +} +*/ +fn transform_module_gamma_table(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + let mut out_r: f32; + let mut out_g: f32; + let mut out_b: f32; + let input_clut_table_r = transform.input_clut_table_r.as_ref().unwrap(); + let input_clut_table_g = transform.input_clut_table_g.as_ref().unwrap(); + let input_clut_table_b = transform.input_clut_table_b.as_ref().unwrap(); + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + out_r = lut_interp_linear_float(in_r, input_clut_table_r); + out_g = lut_interp_linear_float(in_g, input_clut_table_g); + out_b = lut_interp_linear_float(in_b, input_clut_table_b); + + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } +} +fn transform_module_gamma_lut(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + let mut out_r: f32; + let mut out_g: f32; + let mut out_b: f32; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + out_r = lut_interp_linear(in_r as f64, &transform.output_gamma_lut_r.as_ref().unwrap()); + out_g = lut_interp_linear(in_g as f64, &transform.output_gamma_lut_g.as_ref().unwrap()); + out_b = lut_interp_linear(in_b as f64, &transform.output_gamma_lut_b.as_ref().unwrap()); + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } +} +fn transform_module_matrix_translate( + transform: &ModularTransform, + src: &[f32], + dest: &mut [f32], +) { + let mut mat: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + mat.m[0][0] = transform.matrix.m[0][0]; + mat.m[1][0] = transform.matrix.m[0][1]; + mat.m[2][0] = transform.matrix.m[0][2]; + mat.m[0][1] = transform.matrix.m[1][0]; + mat.m[1][1] = transform.matrix.m[1][1]; + mat.m[2][1] = transform.matrix.m[1][2]; + mat.m[0][2] = transform.matrix.m[2][0]; + mat.m[1][2] = transform.matrix.m[2][1]; + mat.m[2][2] = transform.matrix.m[2][2]; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + let out_r: f32 = + mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b + transform.tx; + let out_g: f32 = + mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b + transform.ty; + let out_b: f32 = + mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b + transform.tz; + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } +} + +fn transform_module_matrix(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) { + let mut mat: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + mat.m[0][0] = transform.matrix.m[0][0]; + mat.m[1][0] = transform.matrix.m[0][1]; + mat.m[2][0] = transform.matrix.m[0][2]; + mat.m[0][1] = transform.matrix.m[1][0]; + mat.m[1][1] = transform.matrix.m[1][1]; + mat.m[2][1] = transform.matrix.m[1][2]; + mat.m[0][2] = transform.matrix.m[2][0]; + mat.m[1][2] = transform.matrix.m[2][1]; + mat.m[2][2] = transform.matrix.m[2][2]; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + let out_r: f32 = mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b; + let out_g: f32 = mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b; + let out_b: f32 = mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b; + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } +} +fn modular_transform_alloc() -> Option<Box<ModularTransform>> { + Some(Box::new(Default::default())) +} +fn modular_transform_release(mut t: Option<Box<ModularTransform>>) { + // destroy a list of transforms non-recursively + let mut next_transform; + while let Some(mut transform) = t { + next_transform = std::mem::replace(&mut transform.next_transform, None); + t = next_transform + } +} +/* Set transform to be the next element in the linked list. */ +fn append_transform( + transform: Option<Box<ModularTransform>>, + mut next_transform: &mut Option<Box<ModularTransform>>, +) -> &mut Option<Box<ModularTransform>> { + *next_transform = transform; + while next_transform.is_some() { + next_transform = &mut next_transform.as_mut().unwrap().next_transform; + } + next_transform +} +/* reverse the transformation list (used by mBA) */ +fn reverse_transform( + mut transform: Option<Box<ModularTransform>>, +) -> Option<Box<ModularTransform>> { + let mut prev_transform = None; + while transform.is_some() { + let next_transform = std::mem::replace( + &mut transform.as_mut().unwrap().next_transform, + prev_transform, + ); + prev_transform = transform; + transform = next_transform + } + prev_transform +} +fn modular_transform_create_mAB(lut: &lutmABType) -> Option<Box<ModularTransform>> { + let mut first_transform = None; + let mut next_transform = &mut first_transform; + let mut transform; + if lut.a_curves[0].is_some() { + let clut_length: usize; + // If the A curve is present this also implies the + // presence of a CLUT. + lut.clut_table.as_ref()?; + + // Prepare A curve. + transform = modular_transform_alloc(); + transform.as_ref()?; + transform.as_mut().unwrap().input_clut_table_r = + build_input_gamma_table(lut.a_curves[0].as_deref()); + transform.as_mut().unwrap().input_clut_table_g = + build_input_gamma_table(lut.a_curves[1].as_deref()); + transform.as_mut().unwrap().input_clut_table_b = + build_input_gamma_table(lut.a_curves[2].as_deref()); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table); + next_transform = append_transform(transform, next_transform); + + if lut.num_grid_points[0] as i32 != lut.num_grid_points[1] as i32 + || lut.num_grid_points[1] as i32 != lut.num_grid_points[2] as i32 + { + //XXX: We don't currently support clut that are not squared! + return None; + } + + // Prepare CLUT + transform = modular_transform_alloc(); + transform.as_ref()?; + + clut_length = (lut.num_grid_points[0] as usize).pow(3) * 3; + assert_eq!(clut_length, lut.clut_table.as_ref().unwrap().len()); + transform.as_mut().unwrap().clut = lut.clut_table.clone(); + transform.as_mut().unwrap().grid_size = lut.num_grid_points[0] as u16; + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_clut_only); + next_transform = append_transform(transform, next_transform); + } + + if lut.m_curves[0].is_some() { + // M curve imples the presence of a Matrix + + // Prepare M curve + transform = modular_transform_alloc(); + transform.as_ref()?; + transform.as_mut().unwrap().input_clut_table_r = + build_input_gamma_table(lut.m_curves[0].as_deref()); + transform.as_mut().unwrap().input_clut_table_g = + build_input_gamma_table(lut.m_curves[1].as_deref()); + transform.as_mut().unwrap().input_clut_table_b = + build_input_gamma_table(lut.m_curves[2].as_deref()); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table); + next_transform = append_transform(transform, next_transform); + + // Prepare Matrix + transform = modular_transform_alloc(); + transform.as_ref()?; + transform.as_mut().unwrap().matrix = build_mAB_matrix(lut); + if transform.as_mut().unwrap().matrix.invalid { + return None; + } + transform.as_mut().unwrap().tx = s15Fixed16Number_to_float(lut.e03); + transform.as_mut().unwrap().ty = s15Fixed16Number_to_float(lut.e13); + transform.as_mut().unwrap().tz = s15Fixed16Number_to_float(lut.e23); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix_translate); + next_transform = append_transform(transform, next_transform); + } + + if lut.b_curves[0].is_some() { + // Prepare B curve + transform = modular_transform_alloc(); + transform.as_ref()?; + transform.as_mut().unwrap().input_clut_table_r = + build_input_gamma_table(lut.b_curves[0].as_deref()); + transform.as_mut().unwrap().input_clut_table_g = + build_input_gamma_table(lut.b_curves[1].as_deref()); + transform.as_mut().unwrap().input_clut_table_b = + build_input_gamma_table(lut.b_curves[2].as_deref()); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table); + append_transform(transform, next_transform); + } else { + // B curve is mandatory + return None; + } + + if lut.reversed { + // mBA are identical to mAB except that the transformation order + // is reversed + first_transform = reverse_transform(first_transform) + } + first_transform +} + +fn modular_transform_create_lut(lut: &lutType) -> Option<Box<ModularTransform>> { + let mut first_transform = None; + let mut next_transform = &mut first_transform; + + let _in_curve_len: usize; + let clut_length: usize; + let _out_curve_len: usize; + let _in_curves: *mut f32; + let _out_curves: *mut f32; + let mut transform = modular_transform_alloc(); + if transform.is_some() { + transform.as_mut().unwrap().matrix = build_lut_matrix(Some(lut)); + if !transform.as_mut().unwrap().matrix.invalid { + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix); + next_transform = append_transform(transform, next_transform); + // Prepare input curves + transform = modular_transform_alloc(); + if transform.is_some() { + transform.as_mut().unwrap().input_clut_table_r = + Some(lut.input_table[0..lut.num_input_table_entries as usize].to_vec()); + transform.as_mut().unwrap().input_clut_table_g = Some( + lut.input_table[lut.num_input_table_entries as usize + ..lut.num_input_table_entries as usize * 2] + .to_vec(), + ); + transform.as_mut().unwrap().input_clut_table_b = Some( + lut.input_table[lut.num_input_table_entries as usize * 2 + ..lut.num_input_table_entries as usize * 3] + .to_vec(), + ); + transform.as_mut().unwrap().input_clut_table_length = lut.num_input_table_entries; + // Prepare table + clut_length = (lut.num_clut_grid_points as usize).pow(3) * 3; + assert_eq!(clut_length, lut.clut_table.len()); + transform.as_mut().unwrap().clut = Some(lut.clut_table.clone()); + + transform.as_mut().unwrap().grid_size = lut.num_clut_grid_points as u16; + // Prepare output curves + transform.as_mut().unwrap().output_clut_table_r = + Some(lut.output_table[0..lut.num_output_table_entries as usize].to_vec()); + transform.as_mut().unwrap().output_clut_table_g = Some( + lut.output_table[lut.num_output_table_entries as usize + ..lut.num_output_table_entries as usize * 2] + .to_vec(), + ); + transform.as_mut().unwrap().output_clut_table_b = Some( + lut.output_table[lut.num_output_table_entries as usize * 2 + ..lut.num_output_table_entries as usize * 3] + .to_vec(), + ); + transform.as_mut().unwrap().output_clut_table_length = lut.num_output_table_entries; + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_clut); + append_transform(transform, next_transform); + return first_transform; + } + } + } + modular_transform_release(first_transform); + None +} + +fn modular_transform_create_input(input: &Profile) -> Option<Box<ModularTransform>> { + let mut first_transform = None; + let mut next_transform = &mut first_transform; + if input.A2B0.is_some() { + let lut_transform = modular_transform_create_lut(input.A2B0.as_deref().unwrap()); + if lut_transform.is_none() { + return None; + } else { + append_transform(lut_transform, next_transform); + } + } else if input.mAB.is_some() + && (*input.mAB.as_deref().unwrap()).num_in_channels == 3 + && (*input.mAB.as_deref().unwrap()).num_out_channels == 3 + { + let mAB_transform = modular_transform_create_mAB(input.mAB.as_deref().unwrap()); + if mAB_transform.is_none() { + return None; + } else { + append_transform(mAB_transform, next_transform); + } + } else { + let mut transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().input_clut_table_r = + build_input_gamma_table(input.redTRC.as_deref()); + transform.as_mut().unwrap().input_clut_table_g = + build_input_gamma_table(input.greenTRC.as_deref()); + transform.as_mut().unwrap().input_clut_table_b = + build_input_gamma_table(input.blueTRC.as_deref()); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table); + if transform.as_mut().unwrap().input_clut_table_r.is_none() + || transform.as_mut().unwrap().input_clut_table_g.is_none() + || transform.as_mut().unwrap().input_clut_table_b.is_none() + { + append_transform(transform, next_transform); + return None; + } else { + next_transform = append_transform(transform, next_transform); + transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().matrix.m[0][0] = 1. / 1.999_969_5; + transform.as_mut().unwrap().matrix.m[0][1] = 0.0; + transform.as_mut().unwrap().matrix.m[0][2] = 0.0; + transform.as_mut().unwrap().matrix.m[1][0] = 0.0; + transform.as_mut().unwrap().matrix.m[1][1] = 1. / 1.999_969_5; + transform.as_mut().unwrap().matrix.m[1][2] = 0.0; + transform.as_mut().unwrap().matrix.m[2][0] = 0.0; + transform.as_mut().unwrap().matrix.m[2][1] = 0.0; + transform.as_mut().unwrap().matrix.m[2][2] = 1. / 1.999_969_5; + transform.as_mut().unwrap().matrix.invalid = false; + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix); + next_transform = append_transform(transform, next_transform); + transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().matrix = build_colorant_matrix(input); + transform.as_mut().unwrap().transform_module_fn = + Some(transform_module_matrix); + append_transform(transform, next_transform); + } + } + } + } + } + first_transform +} +fn modular_transform_create_output(out: &Profile) -> Option<Box<ModularTransform>> { + let mut first_transform = None; + let mut next_transform = &mut first_transform; + if out.B2A0.is_some() { + let lut_transform = modular_transform_create_lut(out.B2A0.as_deref().unwrap()); + if lut_transform.is_none() { + return None; + } else { + append_transform(lut_transform, next_transform); + } + } else if out.mBA.is_some() + && (*out.mBA.as_deref().unwrap()).num_in_channels == 3 + && (*out.mBA.as_deref().unwrap()).num_out_channels == 3 + { + let lut_transform_0 = modular_transform_create_mAB(out.mBA.as_deref().unwrap()); + if lut_transform_0.is_none() { + return None; + } else { + append_transform(lut_transform_0, next_transform); + } + } else if out.redTRC.is_some() && out.greenTRC.is_some() && out.blueTRC.is_some() { + let mut transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().matrix = build_colorant_matrix(out).invert(); + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix); + next_transform = append_transform(transform, next_transform); + transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().matrix.m[0][0] = 1.999_969_5; + transform.as_mut().unwrap().matrix.m[0][1] = 0.0; + transform.as_mut().unwrap().matrix.m[0][2] = 0.0; + transform.as_mut().unwrap().matrix.m[1][0] = 0.0; + transform.as_mut().unwrap().matrix.m[1][1] = 1.999_969_5; + transform.as_mut().unwrap().matrix.m[1][2] = 0.0; + transform.as_mut().unwrap().matrix.m[2][0] = 0.0; + transform.as_mut().unwrap().matrix.m[2][1] = 0.0; + transform.as_mut().unwrap().matrix.m[2][2] = 1.999_969_5; + transform.as_mut().unwrap().matrix.invalid = false; + transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix); + next_transform = append_transform(transform, next_transform); + transform = modular_transform_alloc(); + if transform.is_none() { + return None; + } else { + transform.as_mut().unwrap().output_gamma_lut_r = + Some(build_output_lut(out.redTRC.as_deref().unwrap())); + transform.as_mut().unwrap().output_gamma_lut_g = + Some(build_output_lut(out.greenTRC.as_deref().unwrap())); + transform.as_mut().unwrap().output_gamma_lut_b = + Some(build_output_lut(out.blueTRC.as_deref().unwrap())); + transform.as_mut().unwrap().transform_module_fn = + Some(transform_module_gamma_lut); + if transform.as_mut().unwrap().output_gamma_lut_r.is_none() + || transform.as_mut().unwrap().output_gamma_lut_g.is_none() + || transform.as_mut().unwrap().output_gamma_lut_b.is_none() + { + return None; + } else { + append_transform(transform, next_transform); + } + } + } + } + } else { + debug_assert!(false, "Unsupported output profile workflow."); + return None; + } + first_transform +} +/* Not Completed +// Simplify the transformation chain to an equivalent transformation chain +static struct qcms_modular_transform* qcms_modular_transform_reduce(struct qcms_modular_transform *transform) +{ + struct qcms_modular_transform *first_transform = NULL; + struct qcms_modular_transform *curr_trans = transform; + struct qcms_modular_transform *prev_trans = NULL; + while (curr_trans) { + struct qcms_modular_transform *next_trans = curr_trans->next_transform; + if (curr_trans->transform_module_fn == qcms_transform_module_matrix) { + if (next_trans && next_trans->transform_module_fn == qcms_transform_module_matrix) { + curr_trans->matrix = matrix_multiply(curr_trans->matrix, next_trans->matrix); + goto remove_next; + } + } + if (curr_trans->transform_module_fn == qcms_transform_module_gamma_table) { + bool isLinear = true; + uint16_t i; + for (i = 0; isLinear && i < 256; i++) { + isLinear &= (int)(curr_trans->input_clut_table_r[i] * 255) == i; + isLinear &= (int)(curr_trans->input_clut_table_g[i] * 255) == i; + isLinear &= (int)(curr_trans->input_clut_table_b[i] * 255) == i; + } + goto remove_current; + } + +next_transform: + if (!next_trans) break; + prev_trans = curr_trans; + curr_trans = next_trans; + continue; +remove_current: + if (curr_trans == transform) { + //Update head + transform = next_trans; + } else { + prev_trans->next_transform = next_trans; + } + curr_trans->next_transform = NULL; + qcms_modular_transform_release(curr_trans); + //return transform; + return qcms_modular_transform_reduce(transform); +remove_next: + curr_trans->next_transform = next_trans->next_transform; + next_trans->next_transform = NULL; + qcms_modular_transform_release(next_trans); + continue; + } + return transform; +} +*/ +fn modular_transform_create( + input: &Profile, + output: &Profile, +) -> Option<Box<ModularTransform>> { + let mut first_transform = None; + let mut next_transform = &mut first_transform; + if input.color_space == RGB_SIGNATURE { + let rgb_to_pcs = modular_transform_create_input(input); + rgb_to_pcs.as_ref()?; + next_transform = append_transform(rgb_to_pcs, next_transform); + } else { + debug_assert!(false, "input color space not supported"); + return None; + } + + if input.pcs == LAB_SIGNATURE && output.pcs == XYZ_SIGNATURE { + let mut lab_to_pcs = modular_transform_alloc(); + lab_to_pcs.as_ref()?; + lab_to_pcs.as_mut().unwrap().transform_module_fn = Some(transform_module_LAB_to_XYZ); + next_transform = append_transform(lab_to_pcs, next_transform); + } + + // This does not improve accuracy in practice, something is wrong here. + //if (in->chromaticAdaption.invalid == false) { + // struct qcms_modular_transform* chromaticAdaption; + // chromaticAdaption = qcms_modular_transform_alloc(); + // if (!chromaticAdaption) + // goto fail; + // append_transform(chromaticAdaption, &next_transform); + // chromaticAdaption->matrix = matrix_invert(in->chromaticAdaption); + // chromaticAdaption->transform_module_fn = qcms_transform_module_matrix; + //} + + if input.pcs == XYZ_SIGNATURE && output.pcs == LAB_SIGNATURE { + let mut pcs_to_lab = modular_transform_alloc(); + pcs_to_lab.as_ref()?; + pcs_to_lab.as_mut().unwrap().transform_module_fn = Some(transform_module_XYZ_to_LAB); + next_transform = append_transform(pcs_to_lab, next_transform); + } + + if output.color_space == RGB_SIGNATURE { + let pcs_to_rgb = modular_transform_create_output(output); + pcs_to_rgb.as_ref()?; + append_transform(pcs_to_rgb, next_transform); + } else { + debug_assert!(false, "output color space not supported"); + } + + // Not Completed + //return qcms_modular_transform_reduce(first_transform); + first_transform +} +fn modular_transform_data( + mut transform: Option<&ModularTransform>, + mut src: Vec<f32>, + mut dest: Vec<f32>, + _len: usize, +) -> Option<Vec<f32>> { + while transform.is_some() { + // Keep swaping src/dest when performing a transform to use less memory. + let _transform_fn: TransformModuleFn = transform.unwrap().transform_module_fn; + transform + .unwrap() + .transform_module_fn + .expect("non-null function pointer")( + transform.as_ref().unwrap(), &src, &mut dest + ); + std::mem::swap(&mut src, &mut dest); + transform = transform.unwrap().next_transform.as_deref(); + } + // The results end up in the src buffer because of the switching + Some(src) +} + +pub fn chain_transform( + input: &Profile, + output: &Profile, + src: Vec<f32>, + dest: Vec<f32>, + lutSize: usize, +) -> Option<Vec<f32>> { + let transform_list = modular_transform_create(input, output); + if transform_list.is_some() { + let lut = modular_transform_data(transform_list.as_deref(), src, dest, lutSize / 3); + modular_transform_release(transform_list); + return lut; + } + None +} diff --git a/gfx/qcms/src/gtest.rs b/gfx/qcms/src/gtest.rs new file mode 100644 index 0000000000..d47a4829e6 --- /dev/null +++ b/gfx/qcms/src/gtest.rs @@ -0,0 +1,887 @@ +#[cfg(all(test, feature = "c_bindings"))] +mod gtest { + use crate::{ + c_bindings::*, iccread::*, transform::DataType::*, transform::*, + transform_util::lut_inverse_interp16, Intent::Perceptual, + }; + use libc::c_void; + use std::ptr::null_mut; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + use crate::transform_neon::{ + qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon, + qcms_transform_data_rgba_out_lut_neon, + }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use crate::{ + transform_avx::{ + qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx, + qcms_transform_data_rgba_out_lut_avx, + }, + transform_sse2::{ + qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2, + qcms_transform_data_rgba_out_lut_sse2, + }, + }; + + #[test] + fn test_lut_inverse_crash() { + let lutTable1: [u16; 128] = [ + 0x0000, 0x0000, 0x0000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + ]; + let lutTable2: [u16; 128] = [ + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + ]; + + // Crash/Assert test + + lut_inverse_interp16(5, &lutTable1); + lut_inverse_interp16(5, &lutTable2); + } + + #[test] + fn test_lut_inverse() { + // mimic sRGB_v4_ICC mBA Output + // + // XXXX + // X + // X + // XXXX + let mut value: u16; + let mut lutTable: [u16; 256] = [0; 256]; + + for i in 0..20 { + lutTable[i] = 0; + } + + for i in 20..200 { + lutTable[i] = ((i - 20) * 0xFFFF / (200 - 20)) as u16; + } + + for i in 200..lutTable.len() { + lutTable[i] = 0xFFFF; + } + + for i in 0..65535 { + lut_inverse_interp16(i, &lutTable); + } + + // Lookup the interesting points + + value = lut_inverse_interp16(0, &lutTable); + assert!(value <= 20 * 256); + + value = lut_inverse_interp16(1, &lutTable); + assert!(value > 20 * 256); + + value = lut_inverse_interp16(65535, &lutTable); + assert!(value < 201 * 256); + } + + // this test takes to long to run on miri + #[cfg(not(miri))] + #[test] + fn test_lut_inverse_non_monotonic() { + // Make sure we behave sanely for non monotic functions + // X X X + // X X X + // X X X + let mut lutTable: [u16; 256] = [0; 256]; + + for i in 0..100 { + lutTable[i] = ((i - 0) * 0xFFFF / (100 - 0)) as u16; + } + + for i in 100..200 { + lutTable[i] = ((i - 100) * 0xFFFF / (200 - 100)) as u16; + } + + for i in 200..256 { + lutTable[i] = ((i - 200) * 0xFFFF / (256 - 200)) as u16; + } + + for i in 0..65535 { + lut_inverse_interp16(i, &lutTable); + } + + // Make sure we don't crash, hang or let sanitizers do their magic + } + /* qcms_data_create_rgb_with_gamma is broken + #[test] + fn profile_from_gamma() { + + let white_point = qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.}; + let primaries = qcms_CIE_xyYTRIPLE { + red: qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.}, + green: qcms_CIE_xyY { x: 0.21, y: 0.71, Y: 1.}, + blue: qcms_CIE_xyY { x: 0.15, y: 0.06, Y: 1.} + }; + let mut mem: *mut libc::c_void = std::ptr::null_mut(); + let mut size: size_t = 0; + unsafe { qcms_data_create_rgb_with_gamma(white_point, primaries, 2.2, &mut mem, &mut size); } + assert!(size != 0) + } + */ + + #[test] + fn alignment() { + assert_eq!(std::mem::align_of::<qcms_transform>(), 16); + } + + #[test] + fn basic() { + let sRGB_profile = crate::c_bindings::qcms_profile_sRGB(); + + let Rec709Primaries = qcms_CIE_xyYTRIPLE { + red: qcms_CIE_xyY { + x: 0.6400f64, + y: 0.3300f64, + Y: 1.0f64, + }, + green: qcms_CIE_xyY { + x: 0.3000f64, + y: 0.6000f64, + Y: 1.0f64, + }, + blue: qcms_CIE_xyY { + x: 0.1500f64, + y: 0.0600f64, + Y: 1.0f64, + }, + }; + let D65 = qcms_white_point_sRGB(); + let other = unsafe { qcms_profile_create_rgb_with_gamma(D65, Rec709Primaries, 2.2) }; + unsafe { qcms_profile_precache_output_transform(&mut *other) }; + + let transform = unsafe { + qcms_transform_create( + &mut *sRGB_profile, + RGB8, + &mut *other, + RGB8, + Perceptual, + ) + }; + let mut data: [u8; 120] = [0; 120]; + + unsafe { + qcms_transform_data( + &*transform, + data.as_ptr() as *const libc::c_void, + data.as_mut_ptr() as *mut libc::c_void, + data.len() / 3, + ) + }; + + unsafe { + qcms_transform_release(transform); + qcms_profile_release(sRGB_profile); + qcms_profile_release(other); + } + } + + #[test] + fn gray_alpha() { + let sRGB_profile = qcms_profile_sRGB(); + let other = unsafe { qcms_profile_create_gray_with_gamma(2.2) }; + unsafe { qcms_profile_precache_output_transform(&mut *other) }; + + let transform = unsafe { + qcms_transform_create( + &mut *other, + GrayA8, + &mut *sRGB_profile, + RGBA8, + Perceptual, + ) + }; + assert!(!transform.is_null()); + + let in_data: [u8; 4] = [0, 255, 255, 0]; + let mut out_data: [u8; 2 * 4] = [0; 8]; + unsafe { + qcms_transform_data( + &*transform, + in_data.as_ptr() as *const libc::c_void, + out_data.as_mut_ptr() as *mut libc::c_void, + in_data.len() / 2, + ) + }; + + assert_eq!(out_data, [0, 0, 0, 255, 255, 255, 255, 0]); + unsafe { + qcms_transform_release(transform); + qcms_profile_release(sRGB_profile); + qcms_profile_release(other); + } + } + #[test] + fn samples() { + use libc::c_void; + use std::io::Read; + + let mut d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + qcms_enable_iccv4(); + d.push("fuzz"); + d.push("samples"); + let samples = [ + "0220-ca351238d719fd07ef8607d326b398fe.icc", + "0372-973178997787ee780b4b58ee47cad683.icc", + "0744-0a5faafe175e682b10c590b03d3f093b.icc", + "0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc", + "0732-80707d91aea0f8e64ef0286cc7720e99.icc", + "1809-2bd4b77651214ca6110fdbee2502671e.icc", + ]; + for s in samples.iter() { + let mut p = d.clone(); + p.push(s); + let mut file = std::fs::File::open(p.clone()).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + unsafe { qcms_profile_release(profile) }; + } + } + + #[test] + fn v4() { + use libc::c_void; + use std::io::Read; + + let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + qcms_enable_iccv4(); + p.push("profiles"); + // this profile was made by taking the lookup table profile from + // http://displaycal.net/icc-color-management-test/ and removing + // the unneeed tables using lcms + p.push("displaycal-lut-stripped.icc"); + + let mut file = std::fs::File::open(p).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + + let srgb_profile = qcms_profile_sRGB(); + assert_ne!(srgb_profile, std::ptr::null_mut()); + + unsafe { qcms_profile_precache_output_transform(&mut *srgb_profile) }; + + let intent = unsafe { qcms_profile_get_rendering_intent(&*profile) }; + let transform = unsafe { + qcms_transform_create(&*profile, RGB8, &*srgb_profile, RGB8, intent) + }; + + assert_ne!(transform, std::ptr::null_mut()); + + const SRC_SIZE: usize = 4; + let src: [u8; SRC_SIZE * 3] = [ + 246, 246, 246, // gray + 255, 0, 0, // red + 0, 255, 255, // cyan + 255, 255, 0, // yellow + ]; + let mut dst: [u8; SRC_SIZE * 3] = [0; SRC_SIZE * 3]; + + // the reference values here should be adjusted if the accuracy + // of the transformation changes + let reference = [ + 246, 246, 246, // gray + 255, 0, 0, // red + 248, 14, 22, // red + 0, 0, 255, // blue + ]; + + unsafe { + qcms_transform_data( + &*transform, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + SRC_SIZE, + ); + } + + assert_eq!(reference, dst); + unsafe { qcms_transform_release(transform) } + unsafe { qcms_profile_release(profile) } + unsafe { qcms_profile_release(srgb_profile) } + } + + fn CmpRgbChannel(reference: &[u8], test: &[u8], index: usize) -> bool { + (reference[index] as i32 - test[index] as i32).abs() <= 1 + } + + fn CmpRgbBufferImpl( + refBuffer: &[u8], + testBuffer: &[u8], + pixels: usize, + kSwapRB: bool, + hasAlpha: bool, + ) -> bool { + let pixelSize = if hasAlpha { 4 } else { 3 }; + if refBuffer[..pixels * pixelSize] == testBuffer[..pixels * pixelSize] { + return true; + } + + let kRIndex = if kSwapRB { 2 } else { 0 }; + let kGIndex = 1; + let kBIndex = if kSwapRB { 0 } else { 2 }; + let kAIndex = 3; + + let mut remaining = pixels; + let mut reference = &refBuffer[..]; + let mut test = &testBuffer[..]; + while remaining > 0 { + if !CmpRgbChannel(reference, test, kRIndex) + || !CmpRgbChannel(reference, test, kGIndex) + || !CmpRgbChannel(reference, test, kBIndex) + || (hasAlpha && reference[kAIndex] != test[kAIndex]) + { + assert_eq!(test[kRIndex], reference[kRIndex]); + assert_eq!(test[kGIndex], reference[kGIndex]); + assert_eq!(test[kBIndex], reference[kBIndex]); + if hasAlpha { + assert_eq!(test[kAIndex], reference[kAIndex]); + } + return false; + } + remaining -= 1; + reference = &reference[pixelSize..]; + test = &test[pixelSize..]; + } + + true + } + + fn GetRgbInputBufferImpl(kSwapRB: bool, kHasAlpha: bool) -> (usize, Vec<u8>) { + let colorSamples = [0, 5, 16, 43, 101, 127, 182, 255]; + let colorSampleMax = colorSamples.len(); + let pixelSize = if kHasAlpha { 4 } else { 3 }; + let pixelCount = colorSampleMax * colorSampleMax * 256 * 3; + + let mut outBuffer = vec![0; pixelCount * pixelSize]; + + let kRIndex = if kSwapRB { 2 } else { 0 }; + let kGIndex = 1; + let kBIndex = if kSwapRB { 0 } else { 2 }; + let kAIndex = 3; + + // Sample every red pixel value with a subset of green and blue. + // we use a u16 for r to avoid https://github.com/rust-lang/rust/issues/78283 + let mut color: &mut [u8] = &mut outBuffer[..]; + for r in 0..=255u16 { + for &g in colorSamples.iter() { + for &b in colorSamples.iter() { + color[kRIndex] = r as u8; + color[kGIndex] = g; + color[kBIndex] = b; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + // Sample every green pixel value with a subset of red and blue. + let mut color = &mut outBuffer[..]; + for &r in colorSamples.iter() { + for g in 0..=255u16 { + for &b in colorSamples.iter() { + color[kRIndex] = r; + color[kGIndex] = g as u8; + color[kBIndex] = b; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + // Sample every blue pixel value with a subset of red and green. + let mut color = &mut outBuffer[..]; + for &r in colorSamples.iter() { + for &g in colorSamples.iter() { + for b in 0..=255u16 { + color[kRIndex] = r; + color[kGIndex] = g; + color[kBIndex] = b as u8; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + (pixelCount, outBuffer) + } + + fn GetRgbInputBuffer() -> (usize, Vec<u8>) { + GetRgbInputBufferImpl(false, false) + } + + fn GetRgbaInputBuffer() -> (usize, Vec<u8>) { + GetRgbInputBufferImpl(false, true) + } + + fn GetBgraInputBuffer() -> (usize, Vec<u8>) { + GetRgbInputBufferImpl(true, true) + } + + fn CmpRgbBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, false) + } + + fn CmpRgbaBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, true) + } + + fn CmpBgraBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, true, true) + } + + fn ClearRgbBuffer(buffer: &mut [u8], pixels: usize) { + for i in 0..pixels * 3 { + buffer[i] = 0; + } + } + + fn ClearRgbaBuffer(buffer: &mut [u8], pixels: usize) { + for i in 0..pixels * 4 { + buffer[i] = 0; + } + } + + fn GetRgbOutputBuffer(pixels: usize) -> Vec<u8> { + vec![0; pixels * 3] + } + + fn GetRgbaOutputBuffer(pixels: usize) -> Vec<u8> { + vec![0; pixels * 4] + } + + struct QcmsProfileTest { + in_profile: *mut Profile, + out_profile: *mut Profile, + transform: *mut qcms_transform, + + input: Vec<u8>, + output: Vec<u8>, + reference: Vec<u8>, + + pixels: usize, + storage_type: DataType, + precache: bool, + } + + impl QcmsProfileTest { + fn new() -> QcmsProfileTest { + QcmsProfileTest { + in_profile: null_mut(), + out_profile: null_mut(), + transform: null_mut(), + input: Vec::new(), + output: Vec::new(), + reference: Vec::new(), + + pixels: 0, + storage_type: RGB8, + precache: false, + } + } + + fn SetUp(&mut self) { + qcms_enable_iccv4(); + } + + unsafe fn TearDown(&mut self) { + if self.in_profile != null_mut() { + qcms_profile_release(self.in_profile) + } + + if self.out_profile != null_mut() { + qcms_profile_release(self.out_profile) + } + + if self.transform != null_mut() { + qcms_transform_release(self.transform) + } + } + + unsafe fn SetTransform(&mut self, transform: *mut qcms_transform) -> bool { + if self.transform != null_mut() { + qcms_transform_release(self.transform) + } + self.transform = transform; + self.transform != null_mut() + } + + unsafe fn SetTransformForType(&mut self, ty: DataType) -> bool { + self.SetTransform(qcms_transform_create( + &*self.in_profile, + ty, + &*self.out_profile, + ty, + Perceptual, + )) + } + + unsafe fn SetBuffers(&mut self, ty: DataType) -> bool { + match ty { + RGB8 => { + let (pixels, input) = GetRgbInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbOutputBuffer(self.pixels); + self.output = GetRgbOutputBuffer(self.pixels) + } + RGBA8 => { + let (pixels, input) = GetBgraInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbaOutputBuffer(self.pixels); + self.output = GetRgbaOutputBuffer(self.pixels); + } + BGRA8 => { + let (pixels, input) = GetRgbaInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbaOutputBuffer(self.pixels); + self.output = GetRgbaOutputBuffer(self.pixels); + } + _ => unreachable!("Unknown type!"), + } + self.storage_type = ty; + self.pixels > 0 + } + + unsafe fn ClearOutputBuffer(&mut self) { + match self.storage_type { + RGB8 => ClearRgbBuffer(&mut self.output, self.pixels), + RGBA8 | BGRA8 => ClearRgbaBuffer(&mut self.output, self.pixels), + _ => unreachable!("Unknown type!"), + } + } + + unsafe fn ProduceRef(&mut self, trans_fn: transform_fn_t) { + trans_fn.unwrap()( + &*self.transform, + self.input.as_mut_ptr(), + self.reference.as_mut_ptr(), + self.pixels, + ) + } + + fn CopyInputToRef(&mut self) { + let pixelSize = match self.storage_type { + RGB8 => 3, + RGBA8 | BGRA8 => 4, + _ => unreachable!("Unknown type!"), + }; + self.reference + .copy_from_slice(&self.input[..self.pixels * pixelSize]) + } + + unsafe fn ProduceOutput(&mut self, trans_fn: transform_fn_t) { + self.ClearOutputBuffer(); + trans_fn.unwrap()( + &*self.transform, + self.input.as_mut_ptr(), + self.output.as_mut_ptr(), + self.pixels, + ) + } + + unsafe fn VerifyOutput(&self, buf: &[u8]) -> bool { + match self.storage_type { + RGB8 => CmpRgbBuffer(buf, &self.output, self.pixels), + RGBA8 => CmpRgbaBuffer(buf, &self.output, self.pixels), + BGRA8 => CmpBgraBuffer(buf, &self.output, self.pixels), + _ => unreachable!("Unknown type!"), + } + } + + unsafe fn ProduceVerifyOutput(&mut self, trans_fn: transform_fn_t) -> bool { + self.ProduceOutput(trans_fn); + self.VerifyOutput(&self.reference) + } + + unsafe fn PrecacheOutput(&mut self) { + qcms_profile_precache_output_transform(&mut *self.out_profile); + self.precache = true; + } + unsafe fn TransformPrecache(&mut self) { + assert_eq!(self.precache, false); + assert!(self.SetBuffers(RGB8)); + assert!(self.SetTransformForType(RGB8)); + self.ProduceRef(Some(qcms_transform_data_rgb_out_lut)); + + self.PrecacheOutput(); + assert!(self.SetTransformForType(RGB8)); + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_precache))) + } + + unsafe fn TransformPrecachePlatformExt(&mut self) { + self.PrecacheOutput(); + + // Verify RGB transforms. + assert!(self.SetBuffers(RGB8)); + assert!(self.SetTransformForType(RGB8)); + self.ProduceRef(Some(qcms_transform_data_rgb_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(qcms_transform_data_rgb_out_lut_neon)) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(qcms_transform_data_rgb_out_lut_neon)) + } + } + + // Verify RGBA transform. + assert!(self.SetBuffers(RGBA8)); + assert!(self.SetTransformForType(RGBA8)); + self.ProduceRef(Some(qcms_transform_data_rgba_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon))) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon))) + } + } + + // Verify BGRA transform. + assert!(self.SetBuffers(BGRA8)); + assert!(self.SetTransformForType(BGRA8)); + self.ProduceRef(Some(qcms_transform_data_bgra_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon))) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon))) + } + } + } + } + + #[test] + fn sRGB_to_sRGB_precache() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = qcms_profile_sRGB(); + pt.TransformPrecache(); + pt.TearDown(); + } + } + + #[test] + fn sRGB_to_sRGB_transform_identity() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = qcms_profile_sRGB(); + pt.PrecacheOutput(); + pt.SetBuffers(RGB8); + pt.SetTransformForType(RGB8); + qcms_transform_data( + &*pt.transform, + pt.input.as_mut_ptr() as *mut c_void, + pt.output.as_mut_ptr() as *mut c_void, + pt.pixels, + ); + assert!(pt.VerifyOutput(&pt.input)); + pt.TearDown(); + } + } + + fn profile_from_path(file: &str) -> *mut Profile { + use std::io::Read; + let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("profiles"); + path.push(file); + let mut file = std::fs::File::open(path).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + profile + } + + #[test] + fn sRGB_to_ThinkpadW540() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = profile_from_path("lcms_thinkpad_w540.icc"); + pt.TransformPrecachePlatformExt(); + pt.TearDown(); + } + } + + #[test] + fn sRGB_to_SamsungSyncmaster() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = profile_from_path("lcms_samsung_syncmaster.icc"); + pt.TransformPrecachePlatformExt(); + pt.TearDown(); + } + } + + #[test] + fn v4_output() { + qcms_enable_iccv4(); + let input = qcms_profile_sRGB(); + // B2A0-ident.icc was created from the profile in bug 1679621 + // manually edited using iccToXML/iccFromXML + let output = profile_from_path("B2A0-ident.icc"); + + let transform = unsafe { + qcms_transform_create( + &*input, + RGB8, + &*output, + RGB8, + Perceptual, + ) + }; + let src = [0u8, 60, 195]; + let mut dst = [0u8, 0, 0]; + unsafe { + qcms_transform_data( + &*transform, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + 1, + ); + } + assert_eq!(dst, [15, 16, 122]); + unsafe { + qcms_transform_release(transform); + qcms_profile_release(input); + qcms_profile_release(output); + } + } + + #[test] + fn gray_smoke_test() { + let input = crate::Profile::new_gray_with_gamma(2.2); + let output = crate::Profile::new_sRGB(); + let xfm = + transform_create(&input, GrayA8, &output, RGBA8, crate::Intent::default()).unwrap(); + let src = [20u8, 20u8]; + let mut dst = [0u8, 0, 0, 0]; + unsafe { + qcms_transform_data( + &xfm, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + src.len() / GrayA8.bytes_per_pixel(), + ); + } + + } +} + +#[cfg(test)] +mod test { + #[test] + fn identity() { + let p1 = crate::Profile::new_sRGB(); + let p2 = crate::Profile::new_sRGB(); + let xfm = crate::Transform::new( + &p1, + &p2, + crate::DataType::RGB8, + crate::Intent::default(), + ) + .unwrap(); + let mut data = [4, 30, 80]; + xfm.apply(&mut data); + assert_eq!(data, [4, 30, 80]); + } +} diff --git a/gfx/qcms/src/iccread.rs b/gfx/qcms/src/iccread.rs new file mode 100644 index 0000000000..f1107d50a8 --- /dev/null +++ b/gfx/qcms/src/iccread.rs @@ -0,0 +1,1179 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use std::{ + sync::atomic::{AtomicBool, Ordering}, + sync::Arc, +}; + +use crate::transform::{set_rgb_colorants, PrecacheOuput}; +use crate::{matrix::Matrix, s15Fixed16Number, s15Fixed16Number_to_float, Intent, Intent::*}; + +pub static SUPPORTS_ICCV4: AtomicBool = AtomicBool::new(cfg!(feature = "iccv4-enabled")); + +pub const RGB_SIGNATURE: u32 = 0x52474220; +pub const GRAY_SIGNATURE: u32 = 0x47524159; +pub const XYZ_SIGNATURE: u32 = 0x58595A20; +pub const LAB_SIGNATURE: u32 = 0x4C616220; + +/// A color profile +#[repr(C)] +#[derive(Default)] +pub struct Profile { + pub(crate) class_type: u32, + pub(crate) color_space: u32, + pub(crate) pcs: u32, + pub(crate) rendering_intent: Intent, + pub(crate) redColorant: XYZNumber, + pub(crate) blueColorant: XYZNumber, + pub(crate) greenColorant: XYZNumber, + pub(crate) redTRC: Option<Box<curveType>>, + pub(crate) blueTRC: Option<Box<curveType>>, + pub(crate) greenTRC: Option<Box<curveType>>, + pub(crate) grayTRC: Option<Box<curveType>>, + pub(crate) A2B0: Option<Box<lutType>>, + pub(crate) B2A0: Option<Box<lutType>>, + pub(crate) mAB: Option<Box<lutmABType>>, + pub(crate) mBA: Option<Box<lutmABType>>, + pub(crate) chromaticAdaption: Matrix, + pub(crate) output_table_r: Option<Arc<PrecacheOuput>>, + pub(crate) output_table_g: Option<Arc<PrecacheOuput>>, + pub(crate) output_table_b: Option<Arc<PrecacheOuput>>, +} + +#[derive(Default)] +pub(crate) struct lutmABType { + pub num_in_channels: u8, + pub num_out_channels: u8, + // 16 is the upperbound, actual is 0..num_in_channels. + pub num_grid_points: [u8; 16], + pub e00: s15Fixed16Number, + pub e01: s15Fixed16Number, + pub e02: s15Fixed16Number, + pub e03: s15Fixed16Number, + pub e10: s15Fixed16Number, + pub e11: s15Fixed16Number, + pub e12: s15Fixed16Number, + pub e13: s15Fixed16Number, + pub e20: s15Fixed16Number, + pub e21: s15Fixed16Number, + pub e22: s15Fixed16Number, + pub e23: s15Fixed16Number, + // reversed elements (for mBA) + pub reversed: bool, + pub clut_table: Option<Vec<f32>>, + pub a_curves: [Option<Box<curveType>>; MAX_CHANNELS], + pub b_curves: [Option<Box<curveType>>; MAX_CHANNELS], + pub m_curves: [Option<Box<curveType>>; MAX_CHANNELS], +} + +pub(crate) enum curveType { + Curve(Vec<uInt16Number>), + Parametric(Vec<f32>), +} +type uInt16Number = u16; + +/* should lut8Type and lut16Type be different types? */ +pub(crate) struct lutType { + // used by lut8Type/lut16Type (mft2) only + pub num_input_channels: u8, + pub num_output_channels: u8, + pub num_clut_grid_points: u8, + pub e00: s15Fixed16Number, + pub e01: s15Fixed16Number, + pub e02: s15Fixed16Number, + pub e10: s15Fixed16Number, + pub e11: s15Fixed16Number, + pub e12: s15Fixed16Number, + pub e20: s15Fixed16Number, + pub e21: s15Fixed16Number, + pub e22: s15Fixed16Number, + pub num_input_table_entries: u16, + pub num_output_table_entries: u16, + pub input_table: Vec<f32>, + pub clut_table: Vec<f32>, + pub output_table: Vec<f32>, +} + +#[repr(C)] +#[derive(Copy, Clone, Default)] +pub struct XYZNumber { + pub X: s15Fixed16Number, + pub Y: s15Fixed16Number, + pub Z: s15Fixed16Number, +} + +/// A color in the CIE xyY color space +/* the names for the following two types are sort of ugly */ +#[repr(C)] +#[derive(Copy, Clone)] +pub struct qcms_CIE_xyY { + pub x: f64, + pub y: f64, + pub Y: f64, +} + +/// a set of CIE_xyY values that can use to describe the primaries of a color space +#[repr(C)] +#[derive(Copy, Clone)] +pub struct qcms_CIE_xyYTRIPLE { + pub red: qcms_CIE_xyY, + pub green: qcms_CIE_xyY, + pub blue: qcms_CIE_xyY, +} + +struct Tag { + signature: u32, + offset: u32, + size: u32, +} + +/* It might be worth having a unified limit on content controlled + * allocation per profile. This would remove the need for many + * of the arbitrary limits that we used */ +pub type be32 = u32; +pub type be16 = u16; + +type TagIndex = [Tag]; + +/* a wrapper around the memory that we are going to parse + * into a qcms_profile */ +struct MemSource<'a> { + buf: &'a [u8], + valid: bool, + invalid_reason: Option<&'static str>, +} +pub type uInt8Number = u8; +#[inline] +fn uInt8Number_to_float(a: uInt8Number) -> f32 { + a as f32 / 255.0 +} + +#[inline] +fn uInt16Number_to_float(a: uInt16Number) -> f32 { + a as f32 / 65535.0 +} + +fn cpu_to_be32(v: u32) -> be32 { + v.to_be() +} +fn cpu_to_be16(v: u16) -> be16 { + v.to_be() +} +fn be32_to_cpu(v: be32) -> u32 { + u32::from_be(v) +} +fn be16_to_cpu(v: be16) -> u16 { + u16::from_be(v) +} +fn invalid_source(mut mem: &mut MemSource, reason: &'static str) { + mem.valid = false; + mem.invalid_reason = Some(reason); +} +fn read_u32(mem: &mut MemSource, offset: usize) -> u32 { + /* Subtract from mem->size instead of the more intuitive adding to offset. + * This avoids overflowing offset. The subtraction is safe because + * mem->size is guaranteed to be > 4 */ + if offset > mem.buf.len() - 4 { + invalid_source(mem, "Invalid offset"); + 0 + } else { + let k = unsafe { std::ptr::read_unaligned(mem.buf.as_ptr().add(offset) as *const be32) }; + be32_to_cpu(k) + } +} +fn read_u16(mem: &mut MemSource, offset: usize) -> u16 { + if offset > mem.buf.len() - 2 { + invalid_source(mem, "Invalid offset"); + 0u16 + } else { + let k = unsafe { std::ptr::read_unaligned(mem.buf.as_ptr().add(offset) as *const be16) }; + be16_to_cpu(k) + } +} +fn read_u8(mem: &mut MemSource, offset: usize) -> u8 { + if offset > mem.buf.len() - 1 { + invalid_source(mem, "Invalid offset"); + 0u8 + } else { + unsafe { *(mem.buf.as_ptr().add(offset) as *mut u8) } + } +} +fn read_s15Fixed16Number(mem: &mut MemSource, offset: usize) -> s15Fixed16Number { + read_u32(mem, offset) as s15Fixed16Number +} +fn read_uInt8Number(mem: &mut MemSource, offset: usize) -> uInt8Number { + read_u8(mem, offset) +} +fn read_uInt16Number(mem: &mut MemSource, offset: usize) -> uInt16Number { + read_u16(mem, offset) +} +pub fn write_u32(mem: &mut [u8], offset: usize, value: u32) { + if offset > mem.len() - std::mem::size_of_val(&value) { + panic!("OOB"); + } + let mem = mem.as_mut_ptr(); + unsafe { + std::ptr::write_unaligned(mem.add(offset) as *mut u32, cpu_to_be32(value)); + } +} +pub fn write_u16(mem: &mut [u8], offset: usize, value: u16) { + if offset > mem.len() - std::mem::size_of_val(&value) { + panic!("OOB"); + } + let mem = mem.as_mut_ptr(); + unsafe { + std::ptr::write_unaligned(mem.add(offset) as *mut u16, cpu_to_be16(value)); + } +} + +/* An arbitrary 4MB limit on profile size */ +pub(crate) const MAX_PROFILE_SIZE: usize = 1024 * 1024 * 4; +const MAX_TAG_COUNT: u32 = 1024; + +fn check_CMM_type_signature(_src: &mut MemSource) { + //uint32_t CMM_type_signature = read_u32(src, 4); + //TODO: do the check? +} +fn check_profile_version(src: &mut MemSource) { + /* + uint8_t major_revision = read_u8(src, 8 + 0); + uint8_t minor_revision = read_u8(src, 8 + 1); + */ + let reserved1: u8 = read_u8(src, (8 + 2) as usize); + let reserved2: u8 = read_u8(src, (8 + 3) as usize); + /* Checking the version doesn't buy us anything + if (major_revision != 0x4) { + if (major_revision > 0x2) + invalid_source(src, "Unsupported major revision"); + if (minor_revision > 0x40) + invalid_source(src, "Unsupported minor revision"); + } + */ + if reserved1 != 0 || reserved2 != 0 { + invalid_source(src, "Invalid reserved bytes"); + }; +} + +const INPUT_DEVICE_PROFILE: u32 = 0x73636e72; // 'scnr' +pub const DISPLAY_DEVICE_PROFILE: u32 = 0x6d6e7472; // 'mntr' +const OUTPUT_DEVICE_PROFILE: u32 = 0x70727472; // 'prtr' +const DEVICE_LINK_PROFILE: u32 = 0x6c696e6b; // 'link' +const COLOR_SPACE_PROFILE: u32 = 0x73706163; // 'spac' +const ABSTRACT_PROFILE: u32 = 0x61627374; // 'abst' +const NAMED_COLOR_PROFILE: u32 = 0x6e6d636c; // 'nmcl' + +fn read_class_signature(mut profile: &mut Profile, mem: &mut MemSource) { + profile.class_type = read_u32(mem, 12); + match profile.class_type { + DISPLAY_DEVICE_PROFILE + | INPUT_DEVICE_PROFILE + | OUTPUT_DEVICE_PROFILE + | COLOR_SPACE_PROFILE => {} + _ => { + invalid_source(mem, "Invalid Profile/Device Class signature"); + } + }; +} +fn read_color_space(mut profile: &mut Profile, mem: &mut MemSource) { + profile.color_space = read_u32(mem, 16); + match profile.color_space { + RGB_SIGNATURE | GRAY_SIGNATURE => {} + _ => { + invalid_source(mem, "Unsupported colorspace"); + } + }; +} +fn read_pcs(mut profile: &mut Profile, mem: &mut MemSource) { + profile.pcs = read_u32(mem, 20); + match profile.pcs { + XYZ_SIGNATURE | LAB_SIGNATURE => {} + _ => { + invalid_source(mem, "Unsupported pcs"); + } + }; +} +fn read_tag_table(_profile: &mut Profile, mem: &mut MemSource) -> Vec<Tag> { + let count = read_u32(mem, 128); + if count > MAX_TAG_COUNT { + invalid_source(mem, "max number of tags exceeded"); + return Vec::new(); + } + let mut index = Vec::with_capacity(count as usize); + for i in 0..count { + index.push(Tag { + signature: read_u32(mem, (128 + 4 + 4 * i * 3) as usize), + offset: read_u32(mem, (128 + 4 + 4 * i * 3 + 4) as usize), + size: read_u32(mem, (128 + 4 + 4 * i * 3 + 8) as usize), + }); + } + + index +} + +/// Checks a profile for obvious inconsistencies and returns +/// true if the profile looks bogus and should probably be +/// ignored. +#[no_mangle] +pub extern "C" fn qcms_profile_is_bogus(profile: &mut Profile) -> bool { + let mut sum: [f32; 3] = [0.; 3]; + let mut target: [f32; 3] = [0.; 3]; + let mut tolerance: [f32; 3] = [0.; 3]; + let rX: f32; + let rY: f32; + let rZ: f32; + let gX: f32; + let gY: f32; + let gZ: f32; + let bX: f32; + let bY: f32; + let bZ: f32; + let negative: bool; + let mut i: u32; + // We currently only check the bogosity of RGB profiles + if profile.color_space != RGB_SIGNATURE { + return false; + } + if profile.A2B0.is_some() + || profile.B2A0.is_some() + || profile.mAB.is_some() + || profile.mBA.is_some() + { + return false; + } + rX = s15Fixed16Number_to_float(profile.redColorant.X); + rY = s15Fixed16Number_to_float(profile.redColorant.Y); + rZ = s15Fixed16Number_to_float(profile.redColorant.Z); + gX = s15Fixed16Number_to_float(profile.greenColorant.X); + gY = s15Fixed16Number_to_float(profile.greenColorant.Y); + gZ = s15Fixed16Number_to_float(profile.greenColorant.Z); + bX = s15Fixed16Number_to_float(profile.blueColorant.X); + bY = s15Fixed16Number_to_float(profile.blueColorant.Y); + bZ = s15Fixed16Number_to_float(profile.blueColorant.Z); + // Sum the values; they should add up to something close to white + sum[0] = rX + gX + bX; + sum[1] = rY + gY + bY; + sum[2] = rZ + gZ + bZ; + // Build our target vector (see mozilla bug 460629) + target[0] = 0.96420; + target[1] = 1.00000; + target[2] = 0.82491; + // Our tolerance vector - Recommended by Chris Murphy based on + // conversion from the LAB space criterion of no more than 3 in any one + // channel. This is similar to, but slightly more tolerant than Adobe's + // criterion. + tolerance[0] = 0.02; + tolerance[1] = 0.02; + tolerance[2] = 0.04; + // Compare with our tolerance + i = 0; + while i < 3 { + if !(sum[i as usize] - tolerance[i as usize] <= target[i as usize] + && sum[i as usize] + tolerance[i as usize] >= target[i as usize]) + { + return true; + } + i += 1 + } + if !cfg!(target_os = "macos") { + negative = (rX < 0.) + || (rY < 0.) + || (rZ < 0.) + || (gX < 0.) + || (gY < 0.) + || (gZ < 0.) + || (bX < 0.) + || (bY < 0.) + || (bZ < 0.); + } else { + // Chromatic adaption to D50 can result in negative XYZ, but the white + // point D50 tolerance test has passed. Accept negative values herein. + // See https://bugzilla.mozilla.org/show_bug.cgi?id=498245#c18 onwards + // for discussion about whether profile XYZ can or cannot be negative, + // per the spec. Also the https://bugzil.la/450923 user report. + + // FIXME: allow this relaxation on all ports? + negative = false; // bogus + } + if negative { + return true; + } + // All Good + false +} + +pub const TAG_bXYZ: u32 = 0x6258595a; +pub const TAG_gXYZ: u32 = 0x6758595a; +pub const TAG_rXYZ: u32 = 0x7258595a; +pub const TAG_rTRC: u32 = 0x72545243; +pub const TAG_bTRC: u32 = 0x62545243; +pub const TAG_gTRC: u32 = 0x67545243; +pub const TAG_kTRC: u32 = 0x6b545243; +pub const TAG_A2B0: u32 = 0x41324230; +pub const TAG_B2A0: u32 = 0x42324130; +pub const TAG_CHAD: u32 = 0x63686164; + +fn find_tag(index: &TagIndex, tag_id: u32) -> Option<&Tag> { + for t in index { + if t.signature == tag_id { + return Some(t); + } + } + None +} + +pub const XYZ_TYPE: u32 = 0x58595a20; // 'XYZ ' +pub const CURVE_TYPE: u32 = 0x63757276; // 'curv' +pub const PARAMETRIC_CURVE_TYPE: u32 = 0x70617261; // 'para' +pub const LUT16_TYPE: u32 = 0x6d667432; // 'mft2' +pub const LUT8_TYPE: u32 = 0x6d667431; // 'mft1' +pub const LUT_MAB_TYPE: u32 = 0x6d414220; // 'mAB ' +pub const LUT_MBA_TYPE: u32 = 0x6d424120; // 'mBA ' +pub const CHROMATIC_TYPE: u32 = 0x73663332; // 'sf32' + +fn read_tag_s15Fixed16ArrayType(src: &mut MemSource, index: &TagIndex, tag_id: u32) -> Matrix { + let tag = find_tag(index, tag_id); + let mut matrix: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + if let Some(tag) = tag { + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + // Check mandatory type signature for s16Fixed16ArrayType + if type_0 != CHROMATIC_TYPE { + invalid_source(src, "unexpected type, expected \'sf32\'"); + } + for i in 0..=8 { + matrix.m[(i / 3) as usize][(i % 3) as usize] = s15Fixed16Number_to_float( + read_s15Fixed16Number(src, (offset + 8 + (i * 4) as u32) as usize), + ); + } + matrix.invalid = false + } else { + matrix.invalid = true; + invalid_source(src, "missing sf32tag"); + } + matrix +} +fn read_tag_XYZType(src: &mut MemSource, index: &TagIndex, tag_id: u32) -> XYZNumber { + let mut num: XYZNumber = { + let init = XYZNumber { X: 0, Y: 0, Z: 0 }; + init + }; + let tag = find_tag(&index, tag_id); + if let Some(tag) = tag { + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + if type_0 != XYZ_TYPE { + invalid_source(src, "unexpected type, expected XYZ"); + } + num.X = read_s15Fixed16Number(src, (offset + 8) as usize); + num.Y = read_s15Fixed16Number(src, (offset + 12) as usize); + num.Z = read_s15Fixed16Number(src, (offset + 16) as usize) + } else { + invalid_source(src, "missing xyztag"); + } + num +} +// Read the tag at a given offset rather then the tag_index. +// This method is used when reading mAB tags where nested curveType are +// present that are not part of the tag_index. +fn read_curveType(src: &mut MemSource, offset: u32, len: &mut u32) -> Option<Box<curveType>> { + const COUNT_TO_LENGTH: [u32; 5] = [1, 3, 4, 5, 7]; //PARAMETRIC_CURVE_TYPE + let type_0: u32 = read_u32(src, offset as usize); + let count: u32; + if type_0 != CURVE_TYPE && type_0 != PARAMETRIC_CURVE_TYPE { + invalid_source(src, "unexpected type, expected CURV or PARA"); + return None; + } + if type_0 == CURVE_TYPE { + count = read_u32(src, (offset + 8) as usize); + //arbitrary + if count > 40000 { + invalid_source(src, "curve size too large"); + return None; + } + let mut table = Vec::with_capacity(count as usize); + for i in 0..count { + table.push(read_u16(src, (offset + 12 + i * 2) as usize)); + } + *len = 12 + count * 2; + Some(Box::new(curveType::Curve(table))) + } else { + count = read_u16(src, (offset + 8) as usize) as u32; + if count > 4 { + invalid_source(src, "parametric function type not supported."); + return None; + } + let mut params = Vec::with_capacity(count as usize); + for i in 0..COUNT_TO_LENGTH[count as usize] { + params.push(s15Fixed16Number_to_float(read_s15Fixed16Number( + src, + (offset + 12 + i * 4) as usize, + ))); + } + *len = 12 + COUNT_TO_LENGTH[count as usize] * 4; + if count == 1 || count == 2 { + /* we have a type 1 or type 2 function that has a division by 'a' */ + let a: f32 = params[1]; + if a == 0.0 { + invalid_source(src, "parametricCurve definition causes division by zero"); + } + } + Some(Box::new(curveType::Parametric(params))) + } +} +fn read_tag_curveType( + src: &mut MemSource, + index: &TagIndex, + tag_id: u32, +) -> Option<Box<curveType>> { + let tag = find_tag(index, tag_id); + if let Some(tag) = tag { + let mut len: u32 = 0; + return read_curveType(src, tag.offset, &mut len); + } else { + invalid_source(src, "missing curvetag"); + } + None +} + +const MAX_LUT_SIZE: u32 = 500000; // arbitrary +const MAX_CHANNELS: usize = 10; // arbitrary +fn read_nested_curveType( + src: &mut MemSource, + curveArray: &mut [Option<Box<curveType>>; MAX_CHANNELS], + num_channels: u8, + curve_offset: u32, +) { + let mut channel_offset: u32 = 0; + for i in 0..usize::from(num_channels) { + let mut tag_len: u32 = 0; + curveArray[i] = read_curveType(src, curve_offset + channel_offset, &mut tag_len); + if curveArray[i].is_none() { + invalid_source(src, "invalid nested curveType curve"); + break; + } else { + channel_offset += tag_len; + // 4 byte aligned + if tag_len % 4 != 0 { + channel_offset += 4 - tag_len % 4 + } + } + } +} + +/* See section 10.10 for specs */ +fn read_tag_lutmABType(src: &mut MemSource, tag: &Tag) -> Option<Box<lutmABType>> { + let offset: u32 = tag.offset; + let mut clut_size: u32 = 1; + let type_0: u32 = read_u32(src, offset as usize); + if type_0 != LUT_MAB_TYPE && type_0 != LUT_MBA_TYPE { + return None; + } + let num_in_channels = read_u8(src, (offset + 8) as usize); + let num_out_channels = read_u8(src, (offset + 9) as usize); + if num_in_channels > 10 || num_out_channels > 10 { + return None; + } + // We require 3in/out channels since we only support RGB->XYZ (or RGB->LAB) + // XXX: If we remove this restriction make sure that the number of channels + // is less or equal to the maximum number of mAB curves in qcmsint.h + // also check for clut_size overflow. Also make sure it's != 0 + if num_in_channels != 3 || num_out_channels != 3 { + return None; + } + // some of this data is optional and is denoted by a zero offset + // we also use this to track their existance + let mut a_curve_offset = read_u32(src, (offset + 28) as usize); + let mut clut_offset = read_u32(src, (offset + 24) as usize); + let mut m_curve_offset = read_u32(src, (offset + 20) as usize); + let mut matrix_offset = read_u32(src, (offset + 16) as usize); + let mut b_curve_offset = read_u32(src, (offset + 12) as usize); + // Convert offsets relative to the tag to relative to the profile + // preserve zero for optional fields + if a_curve_offset != 0 { + a_curve_offset += offset + } + if clut_offset != 0 { + clut_offset += offset + } + if m_curve_offset != 0 { + m_curve_offset += offset + } + if matrix_offset != 0 { + matrix_offset += offset + } + if b_curve_offset != 0 { + b_curve_offset += offset + } + if clut_offset != 0 { + debug_assert!(num_in_channels == 3); + // clut_size can not overflow since lg(256^num_in_channels) = 24 bits. + for i in 0..u32::from(num_in_channels) { + clut_size *= read_u8(src, (clut_offset + i) as usize) as u32; + if clut_size == 0 { + invalid_source(src, "bad clut_size"); + } + } + } else { + clut_size = 0 + } + // 24bits * 3 won't overflow either + clut_size *= num_out_channels as u32; + if clut_size > MAX_LUT_SIZE { + return None; + } + + let mut lut = Box::new(lutmABType::default()); + + if clut_offset != 0 { + for i in 0..usize::from(num_in_channels) { + lut.num_grid_points[i] = read_u8(src, clut_offset as usize + i); + if lut.num_grid_points[i] == 0 { + invalid_source(src, "bad grid_points"); + } + } + } + // Reverse the processing of transformation elements for mBA type. + lut.reversed = type_0 == LUT_MBA_TYPE; + lut.num_in_channels = num_in_channels; + lut.num_out_channels = num_out_channels; + if matrix_offset != 0 { + // read the matrix if we have it + lut.e00 = read_s15Fixed16Number(src, (matrix_offset + (4 * 0) as u32) as usize); // the caller checks that this doesn't happen + lut.e01 = read_s15Fixed16Number(src, (matrix_offset + (4 * 1) as u32) as usize); + lut.e02 = read_s15Fixed16Number(src, (matrix_offset + (4 * 2) as u32) as usize); + lut.e10 = read_s15Fixed16Number(src, (matrix_offset + (4 * 3) as u32) as usize); + lut.e11 = read_s15Fixed16Number(src, (matrix_offset + (4 * 4) as u32) as usize); + lut.e12 = read_s15Fixed16Number(src, (matrix_offset + (4 * 5) as u32) as usize); + lut.e20 = read_s15Fixed16Number(src, (matrix_offset + (4 * 6) as u32) as usize); + lut.e21 = read_s15Fixed16Number(src, (matrix_offset + (4 * 7) as u32) as usize); + lut.e22 = read_s15Fixed16Number(src, (matrix_offset + (4 * 8) as u32) as usize); + lut.e03 = read_s15Fixed16Number(src, (matrix_offset + (4 * 9) as u32) as usize); + lut.e13 = read_s15Fixed16Number(src, (matrix_offset + (4 * 10) as u32) as usize); + lut.e23 = read_s15Fixed16Number(src, (matrix_offset + (4 * 11) as u32) as usize) + } + if a_curve_offset != 0 { + read_nested_curveType(src, &mut lut.a_curves, num_in_channels, a_curve_offset); + } + if m_curve_offset != 0 { + read_nested_curveType(src, &mut lut.m_curves, num_out_channels, m_curve_offset); + } + if b_curve_offset != 0 { + read_nested_curveType(src, &mut lut.b_curves, num_out_channels, b_curve_offset); + } else { + invalid_source(src, "B curves required"); + } + if clut_offset != 0 { + let clut_precision = read_u8(src, (clut_offset + 16) as usize); + let mut clut_table = Vec::with_capacity(clut_size as usize); + if clut_precision == 1 { + for i in 0..clut_size { + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + (clut_offset + 20 + i * 1) as usize, + ))); + } + lut.clut_table = Some(clut_table); + } else if clut_precision == 2 { + for i in 0..clut_size { + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + (clut_offset + 20 + i * 2) as usize, + ))); + } + lut.clut_table = Some(clut_table); + } else { + invalid_source(src, "Invalid clut precision"); + } + } + if !src.valid { + return None; + } + Some(lut) +} +fn read_tag_lutType(src: &mut MemSource, tag: &Tag) -> Option<Box<lutType>> { + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + let num_input_table_entries: u16; + let num_output_table_entries: u16; + let input_offset: u32; + let entry_size: usize; + if type_0 == LUT8_TYPE { + num_input_table_entries = 256u16; + num_output_table_entries = 256u16; + entry_size = 1; + input_offset = 48 + } else if type_0 == LUT16_TYPE { + num_input_table_entries = read_u16(src, (offset + 48) as usize); + num_output_table_entries = read_u16(src, (offset + 50) as usize); + + // these limits come from the spec + if num_input_table_entries < 2 + || num_input_table_entries > 4096 + || num_output_table_entries < 2 + || num_output_table_entries > 4096 + { + invalid_source(src, "Bad channel count"); + return None; + } + entry_size = 2; + input_offset = 52 + } else { + debug_assert!(false); + invalid_source(src, "Unexpected lut type"); + return None; + } + let in_chan = read_u8(src, (offset + 8) as usize); + let out_chan = read_u8(src, (offset + 9) as usize); + let grid_points = read_u8(src, (offset + 10) as usize); + let clut_size = (grid_points as f64).powf(in_chan as f64) as u32; + if clut_size > MAX_LUT_SIZE { + invalid_source(src, "CLUT too large"); + return None; + } + if clut_size <= 0 { + invalid_source(src, "CLUT must not be empty."); + return None; + } + if in_chan != 3 || out_chan != 3 { + invalid_source(src, "CLUT only supports RGB"); + return None; + } + + let e00 = read_s15Fixed16Number(src, (offset + 12) as usize); + let e01 = read_s15Fixed16Number(src, (offset + 16) as usize); + let e02 = read_s15Fixed16Number(src, (offset + 20) as usize); + let e10 = read_s15Fixed16Number(src, (offset + 24) as usize); + let e11 = read_s15Fixed16Number(src, (offset + 28) as usize); + let e12 = read_s15Fixed16Number(src, (offset + 32) as usize); + let e20 = read_s15Fixed16Number(src, (offset + 36) as usize); + let e21 = read_s15Fixed16Number(src, (offset + 40) as usize); + let e22 = read_s15Fixed16Number(src, (offset + 44) as usize); + + let mut input_table = Vec::with_capacity((num_input_table_entries * in_chan as u16) as usize); + for i in 0..(num_input_table_entries * in_chan as u16) { + if type_0 == LUT8_TYPE { + input_table.push(uInt8Number_to_float(read_uInt8Number( + src, + (offset + input_offset) as usize + i as usize * entry_size, + ))) + } else { + input_table.push(uInt16Number_to_float(read_uInt16Number( + src, + (offset + input_offset) as usize + i as usize * entry_size, + ))) + } + } + let clut_offset = ((offset + input_offset) as usize + + (num_input_table_entries as i32 * in_chan as i32) as usize * entry_size) + as u32; + + let mut clut_table = Vec::with_capacity((clut_size * out_chan as u32) as usize); + for i in (0..clut_size * out_chan as u32).step_by(3) { + if type_0 == LUT8_TYPE { + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + clut_offset as usize + i as usize * entry_size + 0, + ))); + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + clut_offset as usize + i as usize * entry_size + 1, + ))); + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + clut_offset as usize + i as usize * entry_size + 2, + ))) + } else { + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + clut_offset as usize + i as usize * entry_size + 0, + ))); + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + clut_offset as usize + i as usize * entry_size + 2, + ))); + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + clut_offset as usize + i as usize * entry_size + 4, + ))) + } + } + let output_offset = + (clut_offset as usize + (clut_size * out_chan as u32) as usize * entry_size) as u32; + + let mut output_table = + Vec::with_capacity((num_output_table_entries * out_chan as u16) as usize); + for i in 0..num_output_table_entries as i32 * out_chan as i32 { + if type_0 == LUT8_TYPE { + output_table.push(uInt8Number_to_float(read_uInt8Number( + src, + output_offset as usize + i as usize * entry_size, + ))) + } else { + output_table.push(uInt16Number_to_float(read_uInt16Number( + src, + output_offset as usize + i as usize * entry_size, + ))) + } + } + Some(Box::new(lutType { + num_input_table_entries, + num_output_table_entries, + num_input_channels: in_chan, + num_output_channels: out_chan, + num_clut_grid_points: grid_points, + e00, + e01, + e02, + e10, + e11, + e12, + e20, + e21, + e22, + input_table, + clut_table, + output_table, + })) +} +fn read_rendering_intent(mut profile: &mut Profile, src: &mut MemSource) { + let intent = read_u32(src, 64); + profile.rendering_intent = match intent { + x if x == Perceptual as u32 => Perceptual, + x if x == RelativeColorimetric as u32 => RelativeColorimetric, + x if x == Saturation as u32 => Saturation, + x if x == AbsoluteColorimetric as u32 => AbsoluteColorimetric, + _ => { + invalid_source(src, "unknown rendering intent"); + Intent::default() + } + }; +} +fn profile_create() -> Box<Profile> { + Box::new(Profile::default()) +} +/* build sRGB gamma table */ +/* based on cmsBuildParametricGamma() */ +fn build_sRGB_gamma_table(num_entries: i32) -> Vec<u16> { + /* taken from lcms: Build_sRGBGamma() */ + let gamma: f64 = 2.4; + let a: f64 = 1.0 / 1.055; + let b: f64 = 0.055 / 1.055; + let c: f64 = 1.0 / 12.92; + let d: f64 = 0.04045; + let mut table = Vec::with_capacity(num_entries as usize); + + for i in 0..num_entries { + let x: f64 = i as f64 / (num_entries - 1) as f64; + let y: f64; + let mut output: f64; + // IEC 61966-2.1 (sRGB) + // Y = (aX + b)^Gamma | X >= d + // Y = cX | X < d + if x >= d { + let e: f64 = a * x + b; + if e > 0. { + y = e.powf(gamma) + } else { + y = 0. + } + } else { + y = c * x + } + // Saturate -- this could likely move to a separate function + output = y * 65535.0 + 0.5; + if output > 65535.0 { + output = 65535.0 + } + if output < 0.0 { + output = 0.0 + } + table.push(output.floor() as u16); + } + table +} +fn curve_from_table(table: &[u16]) -> Box<curveType> { + Box::new(curveType::Curve(table.to_vec())) +} +pub fn float_to_u8Fixed8Number(a: f32) -> u16 { + if a > 255.0 + 255.0 / 256f32 { + 0xffffu16 + } else if a < 0.0 { + 0u16 + } else { + (a * 256.0 + 0.5).floor() as u16 + } +} + +fn curve_from_gamma(gamma: f32) -> Box<curveType> { + Box::new(curveType::Curve(vec![float_to_u8Fixed8Number(gamma)])) +} + +/* from lcms: cmsWhitePointFromTemp */ +/* tempK must be >= 4000. and <= 25000. + * Invalid values of tempK will return + * (x,y,Y) = (-1.0, -1.0, -1.0) + * similar to argyll: icx_DTEMP2XYZ() */ +fn white_point_from_temp(temp_K: i32) -> qcms_CIE_xyY { + let mut white_point: qcms_CIE_xyY = qcms_CIE_xyY { + x: 0., + y: 0., + Y: 0., + }; + // No optimization provided. + let T = temp_K as f64; // Square + let T2 = T * T; // Cube + let T3 = T2 * T; + // For correlated color temperature (T) between 4000K and 7000K: + let x = if T >= 4000.0 && T <= 7000.0 { + -4.6070 * (1E9 / T3) + 2.9678 * (1E6 / T2) + 0.09911 * (1E3 / T) + 0.244063 + } else if T > 7000.0 && T <= 25000.0 { + -2.0064 * (1E9 / T3) + 1.9018 * (1E6 / T2) + 0.24748 * (1E3 / T) + 0.237040 + } else { + // or for correlated color temperature (T) between 7000K and 25000K: + // Invalid tempK + white_point.x = -1.0; + white_point.y = -1.0; + white_point.Y = -1.0; + debug_assert!(false, "invalid temp"); + return white_point; + }; + // Obtain y(x) + let y = -3.000 * (x * x) + 2.870 * x - 0.275; + // wave factors (not used, but here for futures extensions) + // let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y); + // let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y); + // Fill white_point struct + white_point.x = x; + white_point.y = y; + white_point.Y = 1.0; + white_point +} +#[no_mangle] +pub extern "C" fn qcms_white_point_sRGB() -> qcms_CIE_xyY { + white_point_from_temp(6504) +} + +impl Profile { + //XXX: it would be nice if we had a way of ensuring + // everything in a profile was initialized regardless of how it was created + //XXX: should this also be taking a black_point? + /* similar to CGColorSpaceCreateCalibratedRGB */ + pub fn new_rgb_with_table( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + table: &[u16], + ) -> Option<Box<Profile>> { + let mut profile = profile_create(); + //XXX: should store the whitepoint + if !set_rgb_colorants(&mut profile, white_point, primaries) { + return None; + } + profile.redTRC = Some(curve_from_table(table)); + profile.blueTRC = Some(curve_from_table(table)); + profile.greenTRC = Some(curve_from_table(table)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + Some(profile) + } + pub fn new_sRGB() -> Box<Profile> { + let Rec709Primaries = qcms_CIE_xyYTRIPLE { + red: { + qcms_CIE_xyY { + x: 0.6400, + y: 0.3300, + Y: 1.0, + } + }, + green: { + qcms_CIE_xyY { + x: 0.3000, + y: 0.6000, + Y: 1.0, + } + }, + blue: { + qcms_CIE_xyY { + x: 0.1500, + y: 0.0600, + Y: 1.0, + } + }, + }; + let D65 = qcms_white_point_sRGB(); + let table = build_sRGB_gamma_table(1024); + + Profile::new_rgb_with_table(D65, Rec709Primaries, &table).unwrap() + } + + pub fn new_gray_with_gamma(gamma: f32) -> Box<Profile> { + let mut profile = profile_create(); + + profile.grayTRC = Some(curve_from_gamma(gamma)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = GRAY_SIGNATURE; + profile.pcs = XYZ_TYPE; + profile + } + + pub fn new_rgb_with_gamma_set( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + redGamma: f32, + greenGamma: f32, + blueGamma: f32, + ) -> Option<Box<Profile>> { + let mut profile = profile_create(); + + //XXX: should store the whitepoint + if !set_rgb_colorants(&mut profile, white_point, primaries) { + return None; + } + profile.redTRC = Some(curve_from_gamma(redGamma)); + profile.blueTRC = Some(curve_from_gamma(blueGamma)); + profile.greenTRC = Some(curve_from_gamma(greenGamma)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + Some(profile) + } + + pub fn new_from_slice(mem: &[u8]) -> Option<Box<Profile>> { + let length: u32; + let mut source: MemSource = MemSource { + buf: mem, + valid: false, + invalid_reason: None, + }; + let index; + source.valid = true; + let mut src: &mut MemSource = &mut source; + if mem.len() < 4 { + return None; + } + length = read_u32(src, 0); + if length as usize <= mem.len() { + // shrink the area that we can read if appropriate + src.buf = &src.buf[0..length as usize]; + } else { + return None; + } + /* ensure that the profile size is sane so it's easier to reason about */ + if src.buf.len() <= 64 || src.buf.len() >= MAX_PROFILE_SIZE { + return None; + } + let mut profile = profile_create(); + + check_CMM_type_signature(src); + check_profile_version(src); + read_class_signature(&mut profile, src); + read_rendering_intent(&mut profile, src); + read_color_space(&mut profile, src); + read_pcs(&mut profile, src); + //TODO read rest of profile stuff + if !src.valid { + return None; + } + + index = read_tag_table(&mut profile, src); + if !src.valid || index.is_empty() { + return None; + } + + if find_tag(&index, TAG_CHAD).is_some() { + profile.chromaticAdaption = read_tag_s15Fixed16ArrayType(src, &index, TAG_CHAD) + } else { + profile.chromaticAdaption.invalid = true //Signal the data is not present + } + + if profile.class_type == DISPLAY_DEVICE_PROFILE + || profile.class_type == INPUT_DEVICE_PROFILE + || profile.class_type == OUTPUT_DEVICE_PROFILE + || profile.class_type == COLOR_SPACE_PROFILE + { + if profile.color_space == RGB_SIGNATURE { + if let Some(A2B0) = find_tag(&index, TAG_A2B0) { + let lut_type = read_u32(src, A2B0.offset as usize); + if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE { + profile.A2B0 = read_tag_lutType(src, A2B0) + } else if lut_type == LUT_MAB_TYPE { + profile.mAB = read_tag_lutmABType(src, A2B0) + } + } + if let Some(B2A0) = find_tag(&index, TAG_B2A0) { + let lut_type = read_u32(src, B2A0.offset as usize); + if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE { + profile.B2A0 = read_tag_lutType(src, B2A0) + } else if lut_type == LUT_MBA_TYPE { + profile.mBA = read_tag_lutmABType(src, B2A0) + } + } + if find_tag(&index, TAG_rXYZ).is_some() || !SUPPORTS_ICCV4.load(Ordering::Relaxed) { + profile.redColorant = read_tag_XYZType(src, &index, TAG_rXYZ); + profile.greenColorant = read_tag_XYZType(src, &index, TAG_gXYZ); + profile.blueColorant = read_tag_XYZType(src, &index, TAG_bXYZ) + } + if !src.valid { + return None; + } + + if find_tag(&index, TAG_rTRC).is_some() || !SUPPORTS_ICCV4.load(Ordering::Relaxed) { + profile.redTRC = read_tag_curveType(src, &index, TAG_rTRC); + profile.greenTRC = read_tag_curveType(src, &index, TAG_gTRC); + profile.blueTRC = read_tag_curveType(src, &index, TAG_bTRC); + if profile.redTRC.is_none() + || profile.blueTRC.is_none() + || profile.greenTRC.is_none() + { + return None; + } + } + } else if profile.color_space == GRAY_SIGNATURE { + profile.grayTRC = read_tag_curveType(src, &index, TAG_kTRC); + profile.grayTRC.as_ref()?; + } else { + debug_assert!(false, "read_color_space protects against entering here"); + return None; + } + } else { + return None; + } + + if !src.valid { + return None; + } + Some(profile) + } + /// Precomputes the information needed for this profile to be + /// used as the output profile when constructing a `Transform`. + pub fn precache_output_transform(&mut self) { + crate::transform::qcms_profile_precache_output_transform(self); + } +} diff --git a/gfx/qcms/src/lib.rs b/gfx/qcms/src/lib.rs new file mode 100644 index 0000000000..0b7a5f6989 --- /dev/null +++ b/gfx/qcms/src/lib.rs @@ -0,0 +1,73 @@ +/*! A pure Rust color management library. +*/ + +#![allow(dead_code)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#![feature(stdsimd)] +// These are needed for the neon intrinsics implementation +// and can be removed once the MSRV is high enough (1.48) +#![feature(platform_intrinsics)] +#![feature(simd_ffi)] +#![feature(link_llvm_intrinsics)] +#![feature(aarch64_target_feature)] +#![feature(arm_target_feature)] +#![feature(raw_ref_op)] + +/// These values match the Rendering Intent values from the ICC spec +#[repr(u32)] +#[derive(Clone, Copy)] +pub enum Intent { + AbsoluteColorimetric = 3, + Saturation = 2, + RelativeColorimetric = 1, + Perceptual = 0, +} + +use Intent::*; + +impl Default for Intent { + fn default() -> Self { + /* Chris Murphy (CM consultant) suggests this as a default in the event that we + * cannot reproduce relative + Black Point Compensation. BPC brings an + * unacceptable performance overhead, so we go with perceptual. */ + Perceptual + } +} + +pub(crate) type s15Fixed16Number = i32; + +/* produces the nearest float to 'a' with a maximum error + * of 1/1024 which happens for large values like 0x40000040 */ +#[inline] +fn s15Fixed16Number_to_float(a: s15Fixed16Number) -> f32 { + a as f32 / 65536.0 +} + +#[inline] +fn double_to_s15Fixed16Number(v: f64) -> s15Fixed16Number { + (v * 65536f64) as i32 +} + +#[cfg(feature = "c_bindings")] +extern crate libc; +#[cfg(feature = "c_bindings")] +pub mod c_bindings; +mod chain; +mod gtest; +mod iccread; +mod matrix; +mod transform; +pub use iccread::qcms_CIE_xyY as CIE_xyY; +pub use iccread::qcms_CIE_xyYTRIPLE as CIE_xyYTRIPLE; +pub use iccread::Profile; +pub use transform::DataType; +pub use transform::Transform; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod transform_avx; +#[cfg(any(target_arch = "aarch64", target_arch = "arm"))] +mod transform_neon; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod transform_sse2; +mod transform_util; diff --git a/gfx/qcms/src/matrix.rs b/gfx/qcms/src/matrix.rs new file mode 100644 index 0000000000..a7d4bc6455 --- /dev/null +++ b/gfx/qcms/src/matrix.rs @@ -0,0 +1,147 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#[derive(Copy, Clone, Default)] +pub struct Matrix { + pub m: [[f32; 3]; 3], + pub invalid: bool, +} + +#[derive(Copy, Clone)] +pub struct Vector { + pub v: [f32; 3], +} + +impl Matrix { + pub fn eval(&self, v: Vector) -> Vector { + let mut result: Vector = Vector { v: [0.; 3] }; + result.v[0] = self.m[0][0] * v.v[0] + self.m[0][1] * v.v[1] + self.m[0][2] * v.v[2]; + result.v[1] = self.m[1][0] * v.v[0] + self.m[1][1] * v.v[1] + self.m[1][2] * v.v[2]; + result.v[2] = self.m[2][0] * v.v[0] + self.m[2][1] * v.v[1] + self.m[2][2] * v.v[2]; + result + } + + //probably reuse this computation in matrix_invert + pub fn det(&self) -> f32 { + let det: f32 = self.m[0][0] * self.m[1][1] * self.m[2][2] + + self.m[0][1] * self.m[1][2] * self.m[2][0] + + self.m[0][2] * self.m[1][0] * self.m[2][1] + - self.m[0][0] * self.m[1][2] * self.m[2][1] + - self.m[0][1] * self.m[1][0] * self.m[2][2] + - self.m[0][2] * self.m[1][1] * self.m[2][0]; + det + } + /* from pixman and cairo and Mathematics for Game Programmers */ + /* lcms uses gauss-jordan elimination with partial pivoting which is + * less efficient and not as numerically stable. See Mathematics for + * Game Programmers. */ + pub fn invert(&self) -> Matrix { + let mut dest_mat: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + let mut i: i32; + + const a: [i32; 3] = [2, 2, 1]; + const b: [i32; 3] = [1, 0, 0]; + /* inv (A) = 1/det (A) * adj (A) */ + let mut det: f32 = self.det(); + if det == 0. { + dest_mat.invalid = true; + return dest_mat; + } + dest_mat.invalid = false; + det = 1. / det; + let mut j: i32 = 0; + while j < 3 { + i = 0; + while i < 3 { + let ai: i32 = a[i as usize]; + let aj: i32 = a[j as usize]; + let bi: i32 = b[i as usize]; + let bj: i32 = b[j as usize]; + let mut p: f64 = (self.m[ai as usize][aj as usize] + * self.m[bi as usize][bj as usize] + - self.m[ai as usize][bj as usize] * self.m[bi as usize][aj as usize]) + as f64; + if ((i + j) & 1) != 0 { + p = -p + } + dest_mat.m[j as usize][i as usize] = (det as f64 * p) as f32; + i += 1 + } + j += 1 + } + dest_mat + } + pub fn identity() -> Matrix { + let mut i: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + i.m[0][0] = 1.; + i.m[0][1] = 0.; + i.m[0][2] = 0.; + i.m[1][0] = 0.; + i.m[1][1] = 1.; + i.m[1][2] = 0.; + i.m[2][0] = 0.; + i.m[2][1] = 0.; + i.m[2][2] = 1.; + i.invalid = false; + i + } + pub fn invalid() -> Matrix { + let mut inv: Matrix = Self::identity(); + inv.invalid = true; + inv + } + /* from pixman */ + /* MAT3per... */ + pub fn multiply(a: Matrix, b: Matrix) -> Matrix { + let mut result: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + let mut dx: i32; + + let mut o: i32; + let mut dy: i32 = 0; + while dy < 3 { + dx = 0; + while dx < 3 { + let mut v: f64 = 0f64; + o = 0; + while o < 3 { + v += (a.m[dy as usize][o as usize] * b.m[o as usize][dx as usize]) as f64; + o += 1 + } + result.m[dy as usize][dx as usize] = v as f32; + dx += 1 + } + dy += 1 + } + result.invalid = a.invalid as i32 != 0 || b.invalid as i32 != 0; + result + } +} diff --git a/gfx/qcms/src/transform.rs b/gfx/qcms/src/transform.rs new file mode 100644 index 0000000000..faece155c6 --- /dev/null +++ b/gfx/qcms/src/transform.rs @@ -0,0 +1,1381 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +use crate::transform_neon::{ + qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon, + qcms_transform_data_rgba_out_lut_neon, +}; +use crate::{ + chain::chain_transform, + double_to_s15Fixed16Number, + iccread::SUPPORTS_ICCV4, + matrix::*, + transform_util::{ + build_colorant_matrix, build_input_gamma_table, build_output_lut, compute_precache, + lut_interp_linear, + }, +}; +use crate::{ + iccread::{qcms_CIE_xyY, qcms_CIE_xyYTRIPLE, Profile, GRAY_SIGNATURE, RGB_SIGNATURE}, + transform_util::clamp_float, + Intent, +}; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::{ + transform_avx::{ + qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx, + qcms_transform_data_rgba_out_lut_avx, + }, + transform_sse2::{ + qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2, + qcms_transform_data_rgba_out_lut_sse2, + }, +}; + +use std::sync::atomic::Ordering; +use std::sync::Arc; + +pub const PRECACHE_OUTPUT_SIZE: usize = 8192; +pub const PRECACHE_OUTPUT_MAX: usize = PRECACHE_OUTPUT_SIZE - 1; +pub const FLOATSCALE: f32 = PRECACHE_OUTPUT_SIZE as f32; +pub const CLAMPMAXVAL: f32 = ((PRECACHE_OUTPUT_SIZE - 1) as f32) / PRECACHE_OUTPUT_SIZE as f32; + +#[repr(C)] +pub struct PrecacheOuput { + /* We previously used a count of 65536 here but that seems like more + * precision than we actually need. By reducing the size we can + * improve startup performance and reduce memory usage. ColorSync on + * 10.5 uses 4097 which is perhaps because they use a fixed point + * representation where 1. is represented by 0x1000. */ + pub data: [u8; PRECACHE_OUTPUT_SIZE], +} + +impl Default for PrecacheOuput { + fn default() -> PrecacheOuput { + PrecacheOuput { + data: [0; PRECACHE_OUTPUT_SIZE], + } + } +} + +/* used as a lookup table for the output transformation. + * we refcount them so we only need to have one around per output + * profile, instead of duplicating them per transform */ + +#[repr(C)] +#[repr(align(16))] +#[derive(Clone, Default)] +pub struct qcms_transform { + pub matrix: [[f32; 4]; 3], + pub input_gamma_table_r: Option<Vec<f32>>, + pub input_gamma_table_g: Option<Vec<f32>>, + pub input_gamma_table_b: Option<Vec<f32>>, + pub input_clut_table_length: u16, + pub clut: Option<Vec<f32>>, + pub grid_size: u16, + pub output_clut_table_length: u16, + pub input_gamma_table_gray: Option<Vec<f32>>, + pub out_gamma_r: f32, + pub out_gamma_g: f32, + pub out_gamma_b: f32, + pub out_gamma_gray: f32, + pub output_gamma_lut_r: Option<Vec<u16>>, + pub output_gamma_lut_g: Option<Vec<u16>>, + pub output_gamma_lut_b: Option<Vec<u16>>, + pub output_gamma_lut_gray: Option<Vec<u16>>, + pub output_gamma_lut_r_length: usize, + pub output_gamma_lut_g_length: usize, + pub output_gamma_lut_b_length: usize, + pub output_gamma_lut_gray_length: usize, + pub output_table_r: Option<Arc<PrecacheOuput>>, + pub output_table_g: Option<Arc<PrecacheOuput>>, + pub output_table_b: Option<Arc<PrecacheOuput>>, + pub transform_fn: transform_fn_t, +} + +pub type transform_fn_t = + Option<unsafe extern "C" fn(_: &qcms_transform, _: *const u8, _: *mut u8, _: usize) -> ()>; +/// The format of pixel data +#[repr(u32)] +#[derive(PartialEq, Eq, Clone, Copy)] +pub enum DataType { + RGB8 = 0, + RGBA8 = 1, + BGRA8 = 2, + Gray8 = 3, + GrayA8 = 4, +} + +impl DataType { + pub fn bytes_per_pixel(&self) -> usize { + match self { + RGB8 => 3, + RGBA8 => 4, + BGRA8 => 4, + Gray8 => 1, + GrayA8 => 2, + } + } +} + +use DataType::*; + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct CIE_XYZ { + pub X: f64, + pub Y: f64, + pub Z: f64, +} + +pub trait Format { + const kRIndex: usize; + const kGIndex: usize; + const kBIndex: usize; + const kAIndex: usize; +} + +pub struct BGRA; +impl Format for BGRA { + const kBIndex: usize = 0; + const kGIndex: usize = 1; + const kRIndex: usize = 2; + const kAIndex: usize = 3; +} + +pub struct RGBA; +impl Format for RGBA { + const kRIndex: usize = 0; + const kGIndex: usize = 1; + const kBIndex: usize = 2; + const kAIndex: usize = 3; +} + +pub struct RGB; +impl Format for RGB { + const kRIndex: usize = 0; + const kGIndex: usize = 1; + const kBIndex: usize = 2; + const kAIndex: usize = 0xFF; +} + +pub trait GrayFormat { + const has_alpha: bool; +} + +pub struct Gray; +impl GrayFormat for Gray { + const has_alpha: bool = false; +} + +pub struct GrayAlpha; +impl GrayFormat for GrayAlpha { + const has_alpha: bool = true; +} + +#[inline] +fn clamp_u8(v: f32) -> u8 { + if v > 255. { + 255 + } else if v < 0. { + 0 + } else { + (v + 0.5).floor() as u8 + } +} + +// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ +// This is just an approximation, I am not handling all the non-linear +// aspects of the RGB to XYZ process, and assumming that the gamma correction +// has transitive property in the tranformation chain. +// +// the alghoritm: +// +// - First I build the absolute conversion matrix using +// primaries in XYZ. This matrix is next inverted +// - Then I eval the source white point across this matrix +// obtaining the coeficients of the transformation +// - Then, I apply these coeficients to the original matrix +fn build_RGB_to_XYZ_transfer_matrix(white: qcms_CIE_xyY, primrs: qcms_CIE_xyYTRIPLE) -> Matrix { + let mut primaries: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + + let mut result: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + let mut white_point: Vector = Vector { v: [0.; 3] }; + + let xn: f64 = white.x; + let yn: f64 = white.y; + if yn == 0.0f64 { + return Matrix::invalid(); + } + + let xr: f64 = primrs.red.x; + let yr: f64 = primrs.red.y; + let xg: f64 = primrs.green.x; + let yg: f64 = primrs.green.y; + let xb: f64 = primrs.blue.x; + let yb: f64 = primrs.blue.y; + primaries.m[0][0] = xr as f32; + primaries.m[0][1] = xg as f32; + primaries.m[0][2] = xb as f32; + primaries.m[1][0] = yr as f32; + primaries.m[1][1] = yg as f32; + primaries.m[1][2] = yb as f32; + primaries.m[2][0] = (1f64 - xr - yr) as f32; + primaries.m[2][1] = (1f64 - xg - yg) as f32; + primaries.m[2][2] = (1f64 - xb - yb) as f32; + primaries.invalid = false; + white_point.v[0] = (xn / yn) as f32; + white_point.v[1] = 1.; + white_point.v[2] = ((1.0f64 - xn - yn) / yn) as f32; + let primaries_invert: Matrix = primaries.invert(); + if primaries_invert.invalid { + return Matrix::invalid(); + } + let coefs: Vector = primaries_invert.eval(white_point); + result.m[0][0] = (coefs.v[0] as f64 * xr) as f32; + result.m[0][1] = (coefs.v[1] as f64 * xg) as f32; + result.m[0][2] = (coefs.v[2] as f64 * xb) as f32; + result.m[1][0] = (coefs.v[0] as f64 * yr) as f32; + result.m[1][1] = (coefs.v[1] as f64 * yg) as f32; + result.m[1][2] = (coefs.v[2] as f64 * yb) as f32; + result.m[2][0] = (coefs.v[0] as f64 * (1.0f64 - xr - yr)) as f32; + result.m[2][1] = (coefs.v[1] as f64 * (1.0f64 - xg - yg)) as f32; + result.m[2][2] = (coefs.v[2] as f64 * (1.0f64 - xb - yb)) as f32; + result.invalid = primaries_invert.invalid; + result +} +/* CIE Illuminant D50 */ +const D50_XYZ: CIE_XYZ = CIE_XYZ { + X: 0.9642f64, + Y: 1.0000f64, + Z: 0.8249f64, +}; +/* from lcms: xyY2XYZ() + * corresponds to argyll: icmYxy2XYZ() */ +fn xyY2XYZ(source: qcms_CIE_xyY) -> CIE_XYZ { + let mut dest: CIE_XYZ = CIE_XYZ { + X: 0., + Y: 0., + Z: 0., + }; + dest.X = source.x / source.y * source.Y; + dest.Y = source.Y; + dest.Z = (1f64 - source.x - source.y) / source.y * source.Y; + dest +} +/* from lcms: ComputeChromaticAdaption */ +// Compute chromatic adaption matrix using chad as cone matrix +fn compute_chromatic_adaption( + source_white_point: CIE_XYZ, + dest_white_point: CIE_XYZ, + chad: Matrix, +) -> Matrix { + let mut cone_source_XYZ: Vector = Vector { v: [0.; 3] }; + + let mut cone_dest_XYZ: Vector = Vector { v: [0.; 3] }; + + let mut cone: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + + let tmp: Matrix = chad; + let chad_inv: Matrix = tmp.invert(); + if chad_inv.invalid { + return Matrix::invalid(); + } + cone_source_XYZ.v[0] = source_white_point.X as f32; + cone_source_XYZ.v[1] = source_white_point.Y as f32; + cone_source_XYZ.v[2] = source_white_point.Z as f32; + cone_dest_XYZ.v[0] = dest_white_point.X as f32; + cone_dest_XYZ.v[1] = dest_white_point.Y as f32; + cone_dest_XYZ.v[2] = dest_white_point.Z as f32; + + let cone_source_rgb: Vector = chad.eval(cone_source_XYZ); + let cone_dest_rgb: Vector = chad.eval(cone_dest_XYZ); + cone.m[0][0] = cone_dest_rgb.v[0] / cone_source_rgb.v[0]; + cone.m[0][1] = 0.; + cone.m[0][2] = 0.; + cone.m[1][0] = 0.; + cone.m[1][1] = cone_dest_rgb.v[1] / cone_source_rgb.v[1]; + cone.m[1][2] = 0.; + cone.m[2][0] = 0.; + cone.m[2][1] = 0.; + cone.m[2][2] = cone_dest_rgb.v[2] / cone_source_rgb.v[2]; + cone.invalid = false; + // Normalize + Matrix::multiply(chad_inv, Matrix::multiply(cone, chad)) +} +/* from lcms: cmsAdaptionMatrix */ +// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll +// Bradford is assumed +fn adaption_matrix(source_illumination: CIE_XYZ, target_illumination: CIE_XYZ) -> Matrix { + let lam_rigg: Matrix = { + let init = Matrix { + m: [ + [0.8951, 0.2664, -0.1614], + [-0.7502, 1.7135, 0.0367], + [0.0389, -0.0685, 1.0296], + ], + invalid: false, + }; + init + }; + compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg) +} +/* from lcms: cmsAdaptMatrixToD50 */ +fn adapt_matrix_to_D50(r: Matrix, source_white_pt: qcms_CIE_xyY) -> Matrix { + if source_white_pt.y == 0.0f64 { + return Matrix::invalid(); + } + + let Dn: CIE_XYZ = xyY2XYZ(source_white_pt); + let Bradford: Matrix = adaption_matrix(Dn, D50_XYZ); + if Bradford.invalid { + return Matrix::invalid(); + } + Matrix::multiply(Bradford, r) +} +pub(crate) fn set_rgb_colorants( + mut profile: &mut Profile, + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, +) -> bool { + let mut colorants: Matrix = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); + colorants = adapt_matrix_to_D50(colorants, white_point); + if colorants.invalid { + return false; + } + /* note: there's a transpose type of operation going on here */ + profile.redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0] as f64); + profile.redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0] as f64); + profile.redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0] as f64); + profile.greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1] as f64); + profile.greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1] as f64); + profile.greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1] as f64); + profile.blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2] as f64); + profile.blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2] as f64); + profile.blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2] as f64); + true +} +pub(crate) fn get_rgb_colorants( + colorants: &mut Matrix, + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, +) -> bool { + *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); + *colorants = adapt_matrix_to_D50(*colorants, white_point); + colorants.invalid +} +/* Alpha is not corrected. + A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If + RGB Is?" Tech Memo 17 (December 14, 1998). + See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf +*/ +unsafe extern "C" fn qcms_transform_data_gray_template_lut<I: GrayFormat, F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let input_gamma_table_gray = (*transform) + .input_gamma_table_gray + .as_ref() + .unwrap() + .as_ptr(); + + let mut i: u32 = 0; + while (i as usize) < length { + let fresh0 = src; + src = src.offset(1); + let device: u8 = *fresh0; + let mut alpha: u8 = 0xffu8; + if I::has_alpha { + let fresh1 = src; + src = src.offset(1); + alpha = *fresh1 + } + let linear: f32 = *input_gamma_table_gray.offset(device as isize); + + let out_device_r: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_r.as_ref().unwrap(), + ); + let out_device_g: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_g.as_ref().unwrap(), + ); + let out_device_b: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_b.as_ref().unwrap(), + ); + *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32); + *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32); + *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +unsafe extern "C" fn qcms_transform_data_gray_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::<Gray, RGB>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_gray_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::<Gray, RGBA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_gray_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::<Gray, BGRA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_graya_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::<GrayAlpha, RGBA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_graya_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::<GrayAlpha, BGRA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_gray_template_precache<I: GrayFormat, F: Format>( + transform: *const qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let output_table_r = ((*transform).output_table_r).as_deref().unwrap(); + let output_table_g = ((*transform).output_table_g).as_deref().unwrap(); + let output_table_b = ((*transform).output_table_b).as_deref().unwrap(); + + let input_gamma_table_gray = (*transform) + .input_gamma_table_gray + .as_ref() + .unwrap() + .as_ptr(); + + let mut i: u32 = 0; + while (i as usize) < length { + let fresh2 = src; + src = src.offset(1); + let device: u8 = *fresh2; + let mut alpha: u8 = 0xffu8; + if I::has_alpha { + let fresh3 = src; + src = src.offset(1); + alpha = *fresh3 + } + + let linear: f32 = *input_gamma_table_gray.offset(device as isize); + /* we could round here... */ + let gray: u16 = (linear * PRECACHE_OUTPUT_MAX as f32) as u16; + *dest.add(F::kRIndex) = (output_table_r).data[gray as usize]; + *dest.add(F::kGIndex) = (output_table_g).data[gray as usize]; + *dest.add(F::kBIndex) = (output_table_b).data[gray as usize]; + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +unsafe extern "C" fn qcms_transform_data_gray_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::<Gray, RGB>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_gray_rgba_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::<Gray, RGBA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_gray_bgra_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::<Gray, BGRA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_graya_rgba_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::<GrayAlpha, RGBA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_graya_bgra_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::<GrayAlpha, BGRA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_template_lut_precache<F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let output_table_r = ((*transform).output_table_r).as_deref().unwrap(); + let output_table_g = ((*transform).output_table_g).as_deref().unwrap(); + let output_table_b = ((*transform).output_table_b).as_deref().unwrap(); + let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut i: u32 = 0; + while (i as usize) < length { + let device_r: u8 = *src.add(F::kRIndex); + let device_g: u8 = *src.add(F::kGIndex); + let device_b: u8 = *src.add(F::kBIndex); + let mut alpha: u8 = 0; + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + + let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize); + let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize); + let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize); + let mut out_linear_r: f32 = (*mat.offset(0isize))[0] * linear_r + + (*mat.offset(1isize))[0] * linear_g + + (*mat.offset(2isize))[0] * linear_b; + let mut out_linear_g: f32 = (*mat.offset(0isize))[1] * linear_r + + (*mat.offset(1isize))[1] * linear_g + + (*mat.offset(2isize))[1] * linear_b; + let mut out_linear_b: f32 = (*mat.offset(0isize))[2] * linear_r + + (*mat.offset(1isize))[2] * linear_g + + (*mat.offset(2isize))[2] * linear_b; + out_linear_r = clamp_float(out_linear_r); + out_linear_g = clamp_float(out_linear_g); + out_linear_b = clamp_float(out_linear_b); + /* we could round here... */ + + let r: u16 = (out_linear_r * PRECACHE_OUTPUT_MAX as f32) as u16; + let g: u16 = (out_linear_g * PRECACHE_OUTPUT_MAX as f32) as u16; + let b: u16 = (out_linear_b * PRECACHE_OUTPUT_MAX as f32) as u16; + *dest.add(F::kRIndex) = (output_table_r).data[r as usize]; + *dest.add(F::kGIndex) = (output_table_g).data[g as usize]; + *dest.add(F::kBIndex) = (output_table_b).data[b as usize]; + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::<RGB>(transform, src, dest, length); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::<RGBA>(transform, src, dest, length); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::<BGRA>(transform, src, dest, length); +} +// Not used +/* +static void qcms_transform_data_clut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) { + unsigned int i; + int xy_len = 1; + int x_len = transform->grid_size; + int len = x_len * x_len; + const float* r_table = transform->r_clut; + const float* g_table = transform->g_clut; + const float* b_table = transform->b_clut; + + for (i = 0; i < length; i++) { + unsigned char in_r = *src++; + unsigned char in_g = *src++; + unsigned char in_b = *src++; + float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; + + int x = floorf(linear_r * (transform->grid_size-1)); + int y = floorf(linear_g * (transform->grid_size-1)); + int z = floorf(linear_b * (transform->grid_size-1)); + int x_n = ceilf(linear_r * (transform->grid_size-1)); + int y_n = ceilf(linear_g * (transform->grid_size-1)); + int z_n = ceilf(linear_b * (transform->grid_size-1)); + float x_d = linear_r * (transform->grid_size-1) - x; + float y_d = linear_g * (transform->grid_size-1) - y; + float z_d = linear_b * (transform->grid_size-1) - z; + + float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); + float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); + float r_y1 = lerp(r_x1, r_x2, y_d); + float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); + float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); + float r_y2 = lerp(r_x3, r_x4, y_d); + float clut_r = lerp(r_y1, r_y2, z_d); + + float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); + float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); + float g_y1 = lerp(g_x1, g_x2, y_d); + float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); + float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); + float g_y2 = lerp(g_x3, g_x4, y_d); + float clut_g = lerp(g_y1, g_y2, z_d); + + float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); + float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); + float b_y1 = lerp(b_x1, b_x2, y_d); + float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); + float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); + float b_y2 = lerp(b_x3, b_x4, y_d); + float clut_b = lerp(b_y1, b_y2, z_d); + + *dest++ = clamp_u8(clut_r*255.0f); + *dest++ = clamp_u8(clut_g*255.0f); + *dest++ = clamp_u8(clut_b*255.0f); + } +} +*/ +fn int_div_ceil(value: i32, div: i32) -> i32 { + (value + div - 1) / div +} +// Using lcms' tetra interpolation algorithm. +unsafe extern "C" fn qcms_transform_data_tetra_clut_template<F: Format>( + transform: *const qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + + let xy_len: i32 = 1; + let x_len: i32 = (*transform).grid_size as i32; + let len: i32 = x_len * x_len; + let table = (*transform).clut.as_ref().unwrap().as_ptr(); + let r_table: *const f32 = table; + let g_table: *const f32 = table.offset(1); + let b_table: *const f32 = table.offset(2); + let mut c0_r: f32; + let mut c1_r: f32; + let mut c2_r: f32; + let mut c3_r: f32; + let mut c0_g: f32; + let mut c1_g: f32; + let mut c2_g: f32; + let mut c3_g: f32; + let mut c0_b: f32; + let mut c1_b: f32; + let mut c2_b: f32; + let mut c3_b: f32; + let mut clut_r: f32; + let mut clut_g: f32; + let mut clut_b: f32; + let mut i: u32 = 0; + while (i as usize) < length { + let in_r: u8 = *src.add(F::kRIndex); + let in_g: u8 = *src.add(F::kGIndex); + let in_b: u8 = *src.add(F::kBIndex); + let mut in_a: u8 = 0; + if F::kAIndex != 0xff { + in_a = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + let linear_r: f32 = in_r as i32 as f32 / 255.0; + let linear_g: f32 = in_g as i32 as f32 / 255.0; + let linear_b: f32 = in_b as i32 as f32 / 255.0; + let x: i32 = in_r as i32 * ((*transform).grid_size as i32 - 1) / 255; + let y: i32 = in_g as i32 * ((*transform).grid_size as i32 - 1) / 255; + let z: i32 = in_b as i32 * ((*transform).grid_size as i32 - 1) / 255; + let x_n: i32 = int_div_ceil(in_r as i32 * ((*transform).grid_size as i32 - 1), 255); + let y_n: i32 = int_div_ceil(in_g as i32 * ((*transform).grid_size as i32 - 1), 255); + let z_n: i32 = int_div_ceil(in_b as i32 * ((*transform).grid_size as i32 - 1), 255); + let rx: f32 = linear_r * ((*transform).grid_size as i32 - 1) as f32 - x as f32; + let ry: f32 = linear_g * ((*transform).grid_size as i32 - 1) as f32 - y as f32; + let rz: f32 = linear_b * ((*transform).grid_size as i32 - 1) as f32 - z as f32; + c0_r = *r_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize); + c0_g = *g_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize); + c0_b = *b_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize); + if rx >= ry { + if ry >= rz { + //rx >= ry && ry >= rz + c1_r = *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_r; //rz > rx && rx >= ry + c2_r = *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize); + c3_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_g = *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_g; + c2_g = *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize); + c3_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_b = *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_b; + c2_b = *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize); + c3_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + } else if rx >= rz { + //rx >= rz && rz >= ry + c1_r = *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_r; + c2_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_r = *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize); + c1_g = *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_g; + c2_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_g = *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize); + c1_b = *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_b; + c2_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_b = *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) + } else { + c1_r = *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c2_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_r = *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_r; + c1_g = *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c2_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_g = *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_g; + c1_b = *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c2_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_b = *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_b + } + } else if rx >= rz { + //ry > rx && rx >= rz + c1_r = *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); //rz > ry && ry > rx + c2_r = *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_r; + c3_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_g = *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); + c2_g = *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_g; + c3_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_b = *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); + c2_b = *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_b; + c3_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize) + } else if ry >= rz { + //ry >= rz && rz > rx + c1_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_r = *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_r; + c3_r = *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_g = *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_g; + c3_g = *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); + c1_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_b = *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_b; + c3_b = *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) + } else { + c1_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_r = *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_r = *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_r; + c1_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_g = *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_g = *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_g; + c1_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize); + c2_b = *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize) + - *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize); + c3_b = *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_b + } + clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz; + clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz; + clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz; + *dest.add(F::kRIndex) = clamp_u8(clut_r * 255.0); + *dest.add(F::kGIndex) = clamp_u8(clut_g * 255.0); + *dest.add(F::kBIndex) = clamp_u8(clut_b * 255.0); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = in_a + } + dest = dest.offset(components as isize); + i += 1 + } +} +unsafe extern "C" fn qcms_transform_data_tetra_clut_rgb( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::<RGB>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_tetra_clut_rgba( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::<RGBA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_tetra_clut_bgra( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::<BGRA>(transform, src, dest, length); +} +unsafe extern "C" fn qcms_transform_data_template_lut<F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut i: u32 = 0; + let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + while (i as usize) < length { + let device_r: u8 = *src.add(F::kRIndex); + let device_g: u8 = *src.add(F::kGIndex); + let device_b: u8 = *src.add(F::kBIndex); + let mut alpha: u8 = 0; + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + + let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize); + let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize); + let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize); + let mut out_linear_r: f32 = (*mat.offset(0isize))[0] * linear_r + + (*mat.offset(1isize))[0] * linear_g + + (*mat.offset(2isize))[0] * linear_b; + let mut out_linear_g: f32 = (*mat.offset(0isize))[1] * linear_r + + (*mat.offset(1isize))[1] * linear_g + + (*mat.offset(2isize))[1] * linear_b; + let mut out_linear_b: f32 = (*mat.offset(0isize))[2] * linear_r + + (*mat.offset(1isize))[2] * linear_g + + (*mat.offset(2isize))[2] * linear_b; + out_linear_r = clamp_float(out_linear_r); + out_linear_g = clamp_float(out_linear_g); + out_linear_b = clamp_float(out_linear_b); + + let out_device_r: f32 = lut_interp_linear( + out_linear_r as f64, + &(*transform).output_gamma_lut_r.as_ref().unwrap(), + ); + let out_device_g: f32 = lut_interp_linear( + out_linear_g as f64, + (*transform).output_gamma_lut_g.as_ref().unwrap(), + ); + let out_device_b: f32 = lut_interp_linear( + out_linear_b as f64, + (*transform).output_gamma_lut_b.as_ref().unwrap(), + ); + *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32); + *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32); + *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::<RGB>(transform, src, dest, length); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::<RGBA>(transform, src, dest, length); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::<BGRA>(transform, src, dest, length); +} + +fn precache_create() -> Arc<PrecacheOuput> { + Arc::new(PrecacheOuput::default()) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_release(t: *mut qcms_transform) { + let t = Box::from_raw(t); + drop(t) +} + +const bradford_matrix: Matrix = Matrix { + m: [ + [0.8951, 0.2664, -0.1614], + [-0.7502, 1.7135, 0.0367], + [0.0389, -0.0685, 1.0296], + ], + invalid: false, +}; + +const bradford_matrix_inv: Matrix = Matrix { + m: [ + [0.9869929, -0.1470543, 0.1599627], + [0.4323053, 0.5183603, 0.0492912], + [-0.0085287, 0.0400428, 0.9684867], + ], + invalid: false, +}; + +// See ICCv4 E.3 +fn compute_whitepoint_adaption(X: f32, Y: f32, Z: f32) -> Matrix { + let p: f32 = (0.96422 * bradford_matrix.m[0][0] + + 1.000 * bradford_matrix.m[1][0] + + 0.82521 * bradford_matrix.m[2][0]) + / (X * bradford_matrix.m[0][0] + Y * bradford_matrix.m[1][0] + Z * bradford_matrix.m[2][0]); + let y: f32 = (0.96422 * bradford_matrix.m[0][1] + + 1.000 * bradford_matrix.m[1][1] + + 0.82521 * bradford_matrix.m[2][1]) + / (X * bradford_matrix.m[0][1] + Y * bradford_matrix.m[1][1] + Z * bradford_matrix.m[2][1]); + let b: f32 = (0.96422 * bradford_matrix.m[0][2] + + 1.000 * bradford_matrix.m[1][2] + + 0.82521 * bradford_matrix.m[2][2]) + / (X * bradford_matrix.m[0][2] + Y * bradford_matrix.m[1][2] + Z * bradford_matrix.m[2][2]); + let white_adaption = Matrix { + m: [[p, 0., 0.], [0., y, 0.], [0., 0., b]], + invalid: false, + }; + Matrix::multiply( + bradford_matrix_inv, + Matrix::multiply(white_adaption, bradford_matrix), + ) +} +#[no_mangle] +pub extern "C" fn qcms_profile_precache_output_transform(mut profile: &mut Profile) { + /* we only support precaching on rgb profiles */ + if profile.color_space != RGB_SIGNATURE { + return; + } + if SUPPORTS_ICCV4.load(Ordering::Relaxed) { + /* don't precache since we will use the B2A LUT */ + if profile.B2A0.is_some() { + return; + } + /* don't precache since we will use the mBA LUT */ + if profile.mBA.is_some() { + return; + } + } + /* don't precache if we do not have the TRC curves */ + if profile.redTRC.is_none() || profile.greenTRC.is_none() || profile.blueTRC.is_none() { + return; + } + if profile.output_table_r.is_none() { + let mut output_table_r = precache_create(); + if compute_precache( + profile.redTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_r).unwrap().data, + ) { + profile.output_table_r = Some(output_table_r); + } + } + if profile.output_table_g.is_none() { + let mut output_table_g = precache_create(); + if compute_precache( + profile.greenTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_g).unwrap().data, + ) { + profile.output_table_g = Some(output_table_g); + } + } + if profile.output_table_b.is_none() { + let mut output_table_b = precache_create(); + if compute_precache( + profile.blueTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_b).unwrap().data, + ) { + profile.output_table_b = Some(output_table_b); + } + }; +} +/* Replace the current transformation with a LUT transformation using a given number of sample points */ +fn transform_precacheLUT_float( + mut transform: Box<qcms_transform>, + input: &Profile, + output: &Profile, + samples: i32, + in_type: DataType, +) -> Option<Box<qcms_transform>> { + /* The range between which 2 consecutive sample points can be used to interpolate */ + let lutSize: u32 = (3 * samples * samples * samples) as u32; + + let mut src = Vec::with_capacity(lutSize as usize); + let dest = vec![0.; lutSize as usize]; + /* Prepare a list of points we want to sample */ + for x in 0..samples { + for y in 0..samples { + for z in 0..samples { + src.push(x as f32 / (samples - 1) as f32); + src.push(y as f32 / (samples - 1) as f32); + src.push(z as f32 / (samples - 1) as f32); + } + } + } + let lut = chain_transform(input, output, src, dest, lutSize as usize); + if let Some(lut) = lut { + (*transform).clut = Some(lut); + (*transform).grid_size = samples as u16; + if in_type == RGBA8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgba) + } else if in_type == BGRA8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_bgra) + } else if in_type == RGB8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgb) + } + debug_assert!((*transform).transform_fn.is_some()); + } else { + return None; + } + + Some(transform) +} + +pub fn transform_create( + input: &Profile, + in_type: DataType, + output: &Profile, + out_type: DataType, + _intent: Intent, +) -> Option<Box<qcms_transform>> { + // Ensure the requested input and output types make sense. + let matching_format = match (in_type, out_type) { + (RGB8, RGB8) => true, + (RGBA8, RGBA8) => true, + (BGRA8, BGRA8) => true, + (Gray8, out_type) => matches!(out_type, RGB8 | RGBA8 | BGRA8), + (GrayA8, out_type) => matches!(out_type, RGBA8 | BGRA8), + _ => false, + }; + if !matching_format { + debug_assert!(false, "input/output type"); + return None; + } + let mut transform: Box<qcms_transform> = Box::new(Default::default()); + let mut precache: bool = false; + if output.output_table_r.is_some() + && output.output_table_g.is_some() + && output.output_table_b.is_some() + { + precache = true + } + // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) + if SUPPORTS_ICCV4.load(Ordering::Relaxed) + && (in_type == RGB8 || in_type == RGBA8 || in_type == BGRA8) + && (input.A2B0.is_some() + || output.B2A0.is_some() + || input.mAB.is_some() + || output.mAB.is_some()) + { + // Precache the transformation to a CLUT 33x33x33 in size. + // 33 is used by many profiles and works well in pratice. + // This evenly divides 256 into blocks of 8x8x8. + // TODO For transforming small data sets of about 200x200 or less + // precaching should be avoided. + let result = transform_precacheLUT_float(transform, input, output, 33, in_type); + debug_assert!(result.is_some(), "precacheLUT failed"); + return result; + } + if precache { + transform.output_table_r = Some(Arc::clone(output.output_table_r.as_ref().unwrap())); + transform.output_table_g = Some(Arc::clone(output.output_table_g.as_ref().unwrap())); + transform.output_table_b = Some(Arc::clone(output.output_table_b.as_ref().unwrap())); + } else { + if output.redTRC.is_none() || output.greenTRC.is_none() || output.blueTRC.is_none() { + return None; + } + transform.output_gamma_lut_r = Some(build_output_lut(output.redTRC.as_deref().unwrap())); + transform.output_gamma_lut_g = Some(build_output_lut(output.greenTRC.as_deref().unwrap())); + transform.output_gamma_lut_b = Some(build_output_lut(output.blueTRC.as_deref().unwrap())); + + if transform.output_gamma_lut_r.is_none() + || transform.output_gamma_lut_g.is_none() + || transform.output_gamma_lut_b.is_none() + { + return None; + } + } + if input.color_space == RGB_SIGNATURE { + if precache { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if is_x86_feature_detected!("avx") { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_avx) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_avx) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_avx) + } + } else if cfg!(not(miri)) && is_x86_feature_detected!("sse2") { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_sse2) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_sse2) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_sse2) + } + } + + #[cfg(target_arch = "arm")] + let neon_supported = is_arm_feature_detected!("neon"); + #[cfg(target_arch = "aarch64")] + let neon_supported = is_aarch64_feature_detected!("neon"); + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + if neon_supported { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_neon) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_neon) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_neon) + } + } + + if transform.transform_fn.is_none() { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_precache) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_precache) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_precache) + } + } + } else if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut) + } + //XXX: avoid duplicating tables if we can + transform.input_gamma_table_r = build_input_gamma_table(input.redTRC.as_deref()); + transform.input_gamma_table_g = build_input_gamma_table(input.greenTRC.as_deref()); + transform.input_gamma_table_b = build_input_gamma_table(input.blueTRC.as_deref()); + if transform.input_gamma_table_r.is_none() + || transform.input_gamma_table_g.is_none() + || transform.input_gamma_table_b.is_none() + { + return None; + } + /* build combined colorant matrix */ + + let in_matrix: Matrix = build_colorant_matrix(input); + let mut out_matrix: Matrix = build_colorant_matrix(output); + out_matrix = out_matrix.invert(); + if out_matrix.invalid { + return None; + } + let result_0: Matrix = Matrix::multiply(out_matrix, in_matrix); + /* check for NaN values in the matrix and bail if we find any */ + let mut i: u32 = 0; + while i < 3 { + let mut j: u32 = 0; + while j < 3 { + if result_0.m[i as usize][j as usize] != result_0.m[i as usize][j as usize] { + return None; + } + j += 1 + } + i += 1 + } + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + transform.matrix[0][0] = result_0.m[0][0]; + transform.matrix[1][0] = result_0.m[0][1]; + transform.matrix[2][0] = result_0.m[0][2]; + transform.matrix[0][1] = result_0.m[1][0]; + transform.matrix[1][1] = result_0.m[1][1]; + transform.matrix[2][1] = result_0.m[1][2]; + transform.matrix[0][2] = result_0.m[2][0]; + transform.matrix[1][2] = result_0.m[2][1]; + transform.matrix[2][2] = result_0.m[2][2] + } else if input.color_space == GRAY_SIGNATURE { + transform.input_gamma_table_gray = build_input_gamma_table(input.grayTRC.as_deref()); + transform.input_gamma_table_gray.as_ref()?; + if precache { + if out_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_gray_out_precache) + } else if out_type == RGBA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_precache) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_precache) + } + } else if out_type == BGRA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_precache) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_precache) + } + } + } else if out_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_gray_out_lut) + } else if out_type == RGBA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_lut) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_lut) + } + } else if out_type == BGRA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_lut) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_lut) + } + } + } else { + debug_assert!(false, "unexpected colorspace"); + return None; + } + debug_assert!(transform.transform_fn.is_some()); + Some(transform) +} + /// A transform from an input profile to an output one. +pub struct Transform { + ty: DataType, + xfm: Box<qcms_transform>, +} + +impl Transform { + /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent` + pub fn new( + input: &Profile, + output: &Profile, + ty: DataType, + intent: Intent, + ) -> Option<Self> { + transform_create(input, ty, output, ty, intent).map(|xfm| Transform { ty, xfm }) + } + + /// Apply the color space transform to `data` + pub fn apply(&self, data: &mut [u8]) { + if data.len() % self.ty.bytes_per_pixel() != 0 { + panic!( + "incomplete pixels: should be a multiple of {} got {}", + self.ty.bytes_per_pixel(), + data.len() + ) + } + unsafe { + self.xfm.transform_fn.expect("non-null function pointer")( + &*self.xfm, + data.as_ptr(), + data.as_mut_ptr(), + data.len() / self.ty.bytes_per_pixel(), + ); + } + } +} + +#[no_mangle] +pub extern "C" fn qcms_enable_iccv4() { + SUPPORTS_ICCV4.store(true, Ordering::Relaxed); +} diff --git a/gfx/qcms/src/transform_avx.rs b/gfx/qcms/src/transform_avx.rs new file mode 100644 index 0000000000..d104e4426a --- /dev/null +++ b/gfx/qcms/src/transform_avx.rs @@ -0,0 +1,230 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "x86")] +pub use std::arch::x86::{ + __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256, + _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps, + _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256, + _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_setzero_ps, _mm_store_si128, +}; +#[cfg(target_arch = "x86_64")] +pub use std::arch::x86_64::{ + __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256, + _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps, + _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256, + _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_setzero_ps, _mm_store_si128, +}; + +#[repr(align(32))] +struct Output([u32; 8]); + +#[target_feature(enable = "avx")] +unsafe extern "C" fn qcms_transform_data_template_lut_avx<F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut input: Output = std::mem::zeroed(); + /* share input and output locations to save having to keep the + * locations in separate registers */ + let output: *const u32 = &mut input as *mut Output as *mut u32; + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: __m256 = _mm256_broadcast_ps(&*((*mat.offset(0isize)).as_ptr() as *const __m128)); + let mat1: __m256 = _mm256_broadcast_ps(&*((*mat.offset(1isize)).as_ptr() as *const __m128)); + let mat2: __m256 = _mm256_broadcast_ps(&*((*mat.offset(2isize)).as_ptr() as *const __m128)); + /* these values don't change, either */ + let max: __m256 = _mm256_set1_ps(CLAMPMAXVAL); + let min: __m256 = _mm256_setzero_ps(); + let scale: __m256 = _mm256_set1_ps(FLOATSCALE); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: __m256 = _mm256_setzero_ps(); + let mut vec_g: __m256 = _mm256_setzero_ps(); + let mut vec_b: __m256 = _mm256_setzero_ps(); + let mut result: __m256; + let mut vec_r0: __m128; + let mut vec_g0: __m128; + let mut vec_b0: __m128; + let mut vec_r1: __m128; + let mut vec_g1: __m128; + let mut vec_b1: __m128; + let mut alpha1: u8 = 0; + let mut alpha2: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* If there are at least 2 pixels, then we can load their components into + a single 256-bit register for processing. */ + if length > 1 { + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r1 = + _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize)); + vec_g1 = + _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize)); + vec_b1 = + _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize)); + vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1); + vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1); + vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1); + if F::kAIndex != 0xff { + alpha1 = *src.add(F::kAIndex); + alpha2 = *src.add(F::kAIndex + components as usize) + } + } + /* If there are at least 4 pixels, then we can iterate and preload the + next 2 while we store the result of the current 2. */ + while length > 3 { + /* Ensure we are pointing at the next 2 pixels for the next load. */ + src = src.offset((2 * components) as isize); + /* gamma * matrix */ + vec_r = _mm256_mul_ps(vec_r, mat0); + vec_g = _mm256_mul_ps(vec_g, mat1); + vec_b = _mm256_mul_ps(vec_b, mat2); + /* store alpha for these pixels; load alpha for next two */ + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha1; + *dest.add(F::kAIndex + components as usize) = alpha2; + alpha1 = *src.add(F::kAIndex); + alpha2 = *src.add(F::kAIndex + components as usize) + } + /* crunch, crunch, crunch */ + vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b)); + vec_r = _mm256_max_ps(min, vec_r); + vec_r = _mm256_min_ps(max, vec_r); + result = _mm256_mul_ps(vec_r, scale); + /* store calc'd output tables indices */ + _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result)); + /* load gamma values for next loop while store completes */ + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r1 = + _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize)); + vec_g1 = + _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize)); + vec_b1 = + _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize)); + vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1); + vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1); + vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1); + /* use calc'd indices to output RGB values */ + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + *dest.add(F::kRIndex + components as usize) = + *otdata_r.offset(*output.offset(4isize) as isize); + *dest.add(F::kGIndex + components as usize) = + *otdata_g.offset(*output.offset(5isize) as isize); + *dest.add(F::kBIndex + components as usize) = + *otdata_b.offset(*output.offset(6isize) as isize); + dest = dest.offset((2 * components) as isize); + length -= 2 + } + /* There are 0-3 pixels remaining. If there are 2-3 remaining, then we know + we have already populated the necessary registers to start the transform. */ + if length > 1 { + vec_r = _mm256_mul_ps(vec_r, mat0); + vec_g = _mm256_mul_ps(vec_g, mat1); + vec_b = _mm256_mul_ps(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha1; + *dest.add(F::kAIndex + components as usize) = alpha2 + } + vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b)); + vec_r = _mm256_max_ps(min, vec_r); + vec_r = _mm256_min_ps(max, vec_r); + result = _mm256_mul_ps(vec_r, scale); + _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + *dest.add(F::kRIndex + components as usize) = + *otdata_r.offset(*output.offset(4isize) as isize); + *dest.add(F::kGIndex + components as usize) = + *otdata_g.offset(*output.offset(5isize) as isize); + *dest.add(F::kBIndex + components as usize) = + *otdata_b.offset(*output.offset(6isize) as isize); + src = src.offset((2 * components) as isize); + dest = dest.offset((2 * components) as isize); + length -= 2 + } + /* There may be 0-1 pixels remaining. */ + if length == 1 { + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(mat0)); + vec_g0 = _mm_mul_ps(vec_g0, _mm256_castps256_ps128(mat1)); + vec_b0 = _mm_mul_ps(vec_b0, _mm256_castps256_ps128(mat2)); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = *src.add(F::kAIndex) + } + vec_r0 = _mm_add_ps(vec_r0, _mm_add_ps(vec_g0, vec_b0)); + vec_r0 = _mm_max_ps(_mm256_castps256_ps128(min), vec_r0); + vec_r0 = _mm_min_ps(_mm256_castps256_ps128(max), vec_r0); + vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(scale)); + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(vec_r0)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize) + }; +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::<RGB>(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::<RGBA>(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::<BGRA>(transform, src, dest, length); +} diff --git a/gfx/qcms/src/transform_neon.rs b/gfx/qcms/src/transform_neon.rs new file mode 100644 index 0000000000..64a56f7874 --- /dev/null +++ b/gfx/qcms/src/transform_neon.rs @@ -0,0 +1,255 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "aarch64")] +use core::arch::aarch64::{float32x4_t, int32x4_t, vaddq_f32}; +#[cfg(target_arch = "arm")] +use core::arch::arm::{float32x4_t, int32x4_t, vaddq_f32}; +use std::mem::zeroed; + +static mut floatScale: f32 = FLOATSCALE; +static mut clampMaxValue: f32 = CLAMPMAXVAL; + +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +unsafe extern "C" fn qcms_transform_data_template_lut_neon<F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: float32x4_t = vld1q_f32((*mat.offset(0isize)).as_ptr()); + let mat1: float32x4_t = vld1q_f32((*mat.offset(1isize)).as_ptr()); + let mat2: float32x4_t = vld1q_f32((*mat.offset(2isize)).as_ptr()); + /* these values don't change, either */ + let max: float32x4_t = vld1q_dup_f32(&clampMaxValue); + let min: float32x4_t = zeroed(); + let scale: float32x4_t = vld1q_dup_f32(&floatScale); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: float32x4_t; + let mut vec_g: float32x4_t; + let mut vec_b: float32x4_t; + let mut result: int32x4_t; + let mut alpha: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* one pixel is handled outside of the loop */ + length = length.wrapping_sub(1); + /* setup for transforming 1st pixel */ + vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize)); + vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize)); + vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize)); + if F::kAIndex != 0xff { + alpha = *src.offset(F::kAIndex as isize) + } + src = src.offset(components as isize); + let mut i: u32 = 0; + while (i as usize) < length { + /* gamma * matrix */ + vec_r = vmulq_f32(vec_r, mat0); + vec_g = vmulq_f32(vec_g, mat1); + vec_b = vmulq_f32(vec_b, mat2); + /* store alpha for this pixel; load alpha for next */ + if F::kAIndex != 0xff { + *dest.offset(F::kAIndex as isize) = alpha; + alpha = *src.offset(F::kAIndex as isize) + } + /* crunch, crunch, crunch */ + vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b)); + vec_r = vmaxq_f32(min, vec_r); + vec_r = vminq_f32(max, vec_r); + result = vcvtq_s32_f32(vmulq_f32(vec_r, scale)); + + /* use calc'd indices to output RGB values */ + *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize); + *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize); + *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize); + + /* load gamma values for next loop while store completes */ + vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize)); + vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize)); + vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize)); + + dest = dest.offset(components as isize); + src = src.offset(components as isize); + i = i.wrapping_add(1) + } + /* handle final (maybe only) pixel */ + vec_r = vmulq_f32(vec_r, mat0); + vec_g = vmulq_f32(vec_g, mat1); + vec_b = vmulq_f32(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.offset(F::kAIndex as isize) = alpha + } + vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b)); + vec_r = vmaxq_f32(min, vec_r); + vec_r = vminq_f32(max, vec_r); + result = vcvtq_s32_f32(vmulq_f32(vec_r, scale)); + + *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize); + *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize); + *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize); +} +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::<RGB>(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::<RGBA>(transform, src, dest, length); +} + +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::<BGRA>(transform, src, dest, length); +} + +use std::mem::transmute; + +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "aarch64")] +pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t { + transmute([*addr, *addr.offset(1), *addr.offset(2), *addr.offset(3)]) +} + +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t { + vld1q_v4f32(addr as *const u8, 4) +} + +#[cfg(target_arch = "arm")] +#[allow(improper_ctypes)] +extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32.p0i8")] + fn vld1q_v4f32(addr: *const u8, align: u32) -> float32x4_t; +} + +#[cfg(target_arch = "aarch64")] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.aarch64.neon.fcvtzs.v4.v4f32"] + fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t; +} + +#[allow(improper_ctypes)] +extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")] + fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")] + fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; +} + +/// Move vector element to general-purpose register +#[inline] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn vgetq_lane_s32(v: int32x4_t, imm5: i32) -> i32 { + assert!(imm5 >= 0 && imm5 <= 3); + simd_extract(v, imm5 as u32) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_mul(a, b) +} + +/// Floating-point minimum (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + vminq_f32_(a, b) +} + +/// Floating-point maxmimum (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + vmaxq_f32_(a, b) +} + +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + vcvtq_s32_f32_(a) +} +/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector) +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + simd_cast::<_, int32x4_t>(a) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t { + let v = *addr; + transmute([v, v, v, v]) +} + +extern "platform-intrinsic" { + pub fn simd_mul<T>(x: T, y: T) -> T; + pub fn simd_extract<T, U>(x: T, idx: u32) -> U; + pub fn simd_cast<T, U>(x: T) -> U; +} diff --git a/gfx/qcms/src/transform_sse2.rs b/gfx/qcms/src/transform_sse2.rs new file mode 100644 index 0000000000..1ab975bc57 --- /dev/null +++ b/gfx/qcms/src/transform_sse2.rs @@ -0,0 +1,159 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "x86")] +pub use std::arch::x86::{ + __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128, +}; +#[cfg(target_arch = "x86_64")] +pub use std::arch::x86_64::{ + __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128, +}; + +#[repr(align(16))] +struct Output([u32; 4]); + +unsafe extern "C" fn qcms_transform_data_template_lut_sse2<F: Format>( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut input: Output = std::mem::zeroed(); + /* share input and output locations to save having to keep the + * locations in separate registers */ + let output: *const u32 = &mut input as *mut Output as *mut u32; + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: __m128 = _mm_load_ps((*mat.offset(0isize)).as_ptr()); + let mat1: __m128 = _mm_load_ps((*mat.offset(1isize)).as_ptr()); + let mat2: __m128 = _mm_load_ps((*mat.offset(2isize)).as_ptr()); + /* these values don't change, either */ + let max: __m128 = _mm_set1_ps(CLAMPMAXVAL); + let min: __m128 = _mm_setzero_ps(); + let scale: __m128 = _mm_set1_ps(FLOATSCALE); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: __m128; + let mut vec_g: __m128; + let mut vec_b: __m128; + let mut result: __m128; + let mut alpha: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* one pixel is handled outside of the loop */ + length -= 1; + /* setup for transforming 1st pixel */ + vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + let mut i: u32 = 0; + while (i as usize) < length { + /* position values from gamma tables */ + vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); + vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); + vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); + /* gamma * matrix */ + vec_r = _mm_mul_ps(vec_r, mat0); + vec_g = _mm_mul_ps(vec_g, mat1); + vec_b = _mm_mul_ps(vec_b, mat2); + /* store alpha for this pixel; load alpha for next */ + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha; + alpha = *src.add(F::kAIndex) + } + /* crunch, crunch, crunch */ + vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); + vec_r = _mm_max_ps(min, vec_r); + vec_r = _mm_min_ps(max, vec_r); + result = _mm_mul_ps(vec_r, scale); + /* store calc'd output tables indices */ + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result)); + /* load gamma values for next loop while store completes */ + vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + src = src.offset(components as isize); + /* use calc'd indices to output RGB values */ + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + dest = dest.offset(components as isize); + i += 1 + } + /* handle final (maybe only) pixel */ + vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); + vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); + vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); + vec_r = _mm_mul_ps(vec_r, mat0); + vec_g = _mm_mul_ps(vec_g, mat1); + vec_b = _mm_mul_ps(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); + vec_r = _mm_max_ps(min, vec_r); + vec_r = _mm_min_ps(max, vec_r); + result = _mm_mul_ps(vec_r, scale); + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::<RGB>(transform, src, dest, length); +} +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::<RGBA>(transform, src, dest, length); +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::<BGRA>(transform, src, dest, length); +} diff --git a/gfx/qcms/src/transform_util.rs b/gfx/qcms/src/transform_util.rs new file mode 100644 index 0000000000..5cda0385e5 --- /dev/null +++ b/gfx/qcms/src/transform_util.rs @@ -0,0 +1,476 @@ +/* vim: set ts=8 sw=8 noexpandtab: */ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use crate::{ + iccread::{curveType, Profile}, + s15Fixed16Number_to_float, +}; +use crate::{matrix::Matrix, transform::PRECACHE_OUTPUT_MAX, transform::PRECACHE_OUTPUT_SIZE}; + +//XXX: could use a bettername +pub type uint16_fract_t = u16; + +#[inline] +fn u8Fixed8Number_to_float(x: u16) -> f32 { + // 0x0000 = 0. + // 0x0100 = 1. + // 0xffff = 255 + 255/256 + (x as i32 as f64 / 256.0f64) as f32 +} +#[inline] +pub fn clamp_float(a: f32) -> f32 { + /* One would naturally write this function as the following: + if (a > 1.) + return 1.; + else if (a < 0) + return 0; + else + return a; + + However, that version will let NaNs pass through which is undesirable + for most consumers. + */ + if a > 1. { + 1. + } else if a >= 0. { + a + } else { + // a < 0 or a is NaN + 0. + } +} +/* value must be a value between 0 and 1 */ +//XXX: is the above a good restriction to have? +// the output range of this functions is 0..1 +pub fn lut_interp_linear(mut input_value: f64, table: &[u16]) -> f32 { + input_value *= (table.len() - 1) as f64; + + let upper: i32 = input_value.ceil() as i32; + let lower: i32 = input_value.floor() as i32; + let value: f32 = ((table[upper as usize] as f64) * (1. - (upper as f64 - input_value)) + + (table[lower as usize] as f64 * (upper as f64 - input_value))) + as f32; + /* scale the value */ + value * (1.0 / 65535.0) +} +/* same as above but takes and returns a uint16_t value representing a range from 0..1 */ +#[no_mangle] +pub fn lut_interp_linear16(input_value: u16, table: &[u16]) -> u16 { + /* Start scaling input_value to the length of the array: 65535*(length-1). + * We'll divide out the 65535 next */ + let mut value: u32 = (input_value as i32 * (table.len() as i32 - 1)) as u32; /* equivalent to ceil(value/65535) */ + let upper: u32 = (value + 65534) / 65535; /* equivalent to floor(value/65535) */ + let lower: u32 = value / 65535; + /* interp is the distance from upper to value scaled to 0..65535 */ + let interp: u32 = value % 65535; // 0..65535*65535 + value = (table[upper as usize] as u32 * interp + + table[lower as usize] as u32 * (65535 - interp)) + / 65535; + value as u16 +} +/* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX + * and returns a uint8_t value representing a range from 0..1 */ +fn lut_interp_linear_precache_output(input_value: u32, table: &[u16]) -> u8 { + /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1). + * We'll divide out the PRECACHE_OUTPUT_MAX next */ + let mut value: u32 = input_value * (table.len() - 1) as u32; + /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */ + let upper: u32 = (value + PRECACHE_OUTPUT_MAX as u32 - 1) / PRECACHE_OUTPUT_MAX as u32; + /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */ + let lower: u32 = value / PRECACHE_OUTPUT_MAX as u32; + /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */ + let interp: u32 = value % PRECACHE_OUTPUT_MAX as u32; + /* the table values range from 0..65535 */ + value = table[upper as usize] as u32 * interp + + table[lower as usize] as u32 * (PRECACHE_OUTPUT_MAX as u32 - interp); // 0..(65535*PRECACHE_OUTPUT_MAX) + /* round and scale */ + value += (PRECACHE_OUTPUT_MAX * 65535 / 255 / 2) as u32; // scale to 0..255 + value /= (PRECACHE_OUTPUT_MAX * 65535 / 255) as u32; + value as u8 +} +/* value must be a value between 0 and 1 */ +//XXX: is the above a good restriction to have? +pub fn lut_interp_linear_float(mut value: f32, table: &[f32]) -> f32 { + value *= (table.len() - 1) as f32; + + let upper: i32 = value.ceil() as i32; + let lower: i32 = value.floor() as i32; + //XXX: can we be more performant here? + value = (table[upper as usize] as f64 * (1.0f64 - (upper as f32 - value) as f64) + + (table[lower as usize] * (upper as f32 - value)) as f64) as f32; + /* scale the value */ + value +} +fn compute_curve_gamma_table_type1(gamma_table: &mut Vec<f32>, gamma: u16) { + let gamma_float: f32 = u8Fixed8Number_to_float(gamma); + for i in 0..256 { + // 0..1^(0..255 + 255/256) will always be between 0 and 1 + gamma_table.push((i as f64 / 255.0f64).powf(gamma_float as f64) as f32); + } +} +fn compute_curve_gamma_table_type2(gamma_table: &mut Vec<f32>, table: &[u16]) { + for i in 0..256 { + gamma_table.push(lut_interp_linear(i as f64 / 255.0f64, table)); + } +} +fn compute_curve_gamma_table_type_parametric(gamma_table: &mut Vec<f32>, params: &[f32]) { + let interval: f32; + let a: f32; + let b: f32; + let c: f32; + let e: f32; + let f: f32; + let y: f32 = params[0]; + // XXX: this could probably be cleaner with slice patterns + if params.len() == 1 { + a = 1.; + b = 0.; + c = 0.; + e = 0.; + f = 0.; + interval = -1. + } else if params.len() == 3 { + a = params[1]; + b = params[2]; + c = 0.; + e = 0.; + f = 0.; + interval = -1. * params[2] / params[1] + } else if params.len() == 4 { + a = params[1]; + b = params[2]; + c = 0.; + e = params[3]; + f = params[3]; + interval = -1. * params[2] / params[1] + } else if params.len() == 5 { + a = params[1]; + b = params[2]; + c = params[3]; + e = -c; + f = 0.; + interval = params[4] + } else if params.len() == 7 { + a = params[1]; + b = params[2]; + c = params[3]; + e = params[5] - c; + f = params[6]; + interval = params[4] + } else { + debug_assert!(false, "invalid parametric function type."); + a = 1.; + b = 0.; + c = 0.; + e = 0.; + f = 0.; + interval = -1. + } + for X in 0..256 { + if X as f32 >= interval { + // XXX The equations are not exactly as defined in the spec but are + // algebraically equivalent. + // TODO Should division by 255 be for the whole expression. + gamma_table.push(clamp_float( + (((a * X as f32) as f64 / 255.0f64 + b as f64).powf(y as f64) + c as f64 + e as f64) + as f32, + )); + } else { + gamma_table.push(clamp_float( + ((c * X as f32) as f64 / 255.0f64 + f as f64) as f32, + )); + } + } +} + +fn compute_curve_gamma_table_type0(gamma_table: &mut Vec<f32>) { + for i in 0..256 { + gamma_table.push((i as f64 / 255.0f64) as f32); + } +} +pub(crate) fn build_input_gamma_table(TRC: Option<&curveType>) -> Option<Vec<f32>> { + let TRC = match TRC { + Some(TRC) => TRC, + None => return None, + }; + let mut gamma_table = Vec::with_capacity(256); + match TRC { + curveType::Parametric(params) => { + compute_curve_gamma_table_type_parametric(&mut gamma_table, params) + } + curveType::Curve(data) => match data.len() { + 0 => compute_curve_gamma_table_type0(&mut gamma_table), + 1 => compute_curve_gamma_table_type1(&mut gamma_table, data[0]), + _ => compute_curve_gamma_table_type2(&mut gamma_table, data), + }, + } + + Some(gamma_table) +} +pub fn build_colorant_matrix(p: &Profile) -> Matrix { + let mut result: Matrix = Matrix { + m: [[0.; 3]; 3], + invalid: false, + }; + result.m[0][0] = s15Fixed16Number_to_float(p.redColorant.X); + result.m[0][1] = s15Fixed16Number_to_float(p.greenColorant.X); + result.m[0][2] = s15Fixed16Number_to_float(p.blueColorant.X); + result.m[1][0] = s15Fixed16Number_to_float(p.redColorant.Y); + result.m[1][1] = s15Fixed16Number_to_float(p.greenColorant.Y); + result.m[1][2] = s15Fixed16Number_to_float(p.blueColorant.Y); + result.m[2][0] = s15Fixed16Number_to_float(p.redColorant.Z); + result.m[2][1] = s15Fixed16Number_to_float(p.greenColorant.Z); + result.m[2][2] = s15Fixed16Number_to_float(p.blueColorant.Z); + result.invalid = false; + result +} +/* The following code is copied nearly directly from lcms. + * I think it could be much better. For example, Argyll seems to have better code in + * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way + * to a working solution and allows for easy comparing with lcms. */ +#[no_mangle] +pub fn lut_inverse_interp16(Value: u16, LutTable: &[u16]) -> uint16_fract_t { + let mut l: i32 = 1; // 'int' Give spacing for negative values + let mut r: i32 = 0x10000; + let mut x: i32 = 0; + let mut res: i32; + let length = LutTable.len() as i32; + + let mut NumZeroes: i32 = 0; + while LutTable[NumZeroes as usize] as i32 == 0 && NumZeroes < length - 1 { + NumZeroes += 1 + } + // There are no zeros at the beginning and we are trying to find a zero, so + // return anything. It seems zero would be the less destructive choice + /* I'm not sure that this makes sense, but oh well... */ + if NumZeroes == 0 && Value as i32 == 0 { + return 0u16; + } + let mut NumPoles: i32 = 0; + while LutTable[(length - 1 - NumPoles) as usize] as i32 == 0xffff && NumPoles < length - 1 { + NumPoles += 1 + } + // Does the curve belong to this case? + if NumZeroes > 1 || NumPoles > 1 { + let a_0: i32; + let b_0: i32; + // Identify if value fall downto 0 or FFFF zone + if Value as i32 == 0 { + return 0u16; + } + // if (Value == 0xFFFF) return 0xFFFF; + // else restrict to valid zone + if NumZeroes > 1 { + a_0 = (NumZeroes - 1) * 0xffff / (length - 1); + l = a_0 - 1 + } + if NumPoles > 1 { + b_0 = (length - 1 - NumPoles) * 0xffff / (length - 1); + r = b_0 + 1 + } + } + if r <= l { + // If this happens LutTable is not invertible + return 0u16; + } + // Seems not a degenerated case... apply binary search + while r > l { + x = (l + r) / 2; + res = lut_interp_linear16((x - 1) as uint16_fract_t, LutTable) as i32; + if res == Value as i32 { + // Found exact match. + return (x - 1) as uint16_fract_t; + } + if res > Value as i32 { + r = x - 1 + } else { + l = x + 1 + } + } + + // Not found, should we interpolate? + + // Get surrounding nodes + debug_assert!(x >= 1); + + let val2: f64 = (length - 1) as f64 * ((x - 1) as f64 / 65535.0f64); + let cell0: i32 = val2.floor() as i32; + let cell1: i32 = val2.ceil() as i32; + if cell0 == cell1 { + return x as uint16_fract_t; + } + + let y0: f64 = LutTable[cell0 as usize] as f64; + let x0: f64 = 65535.0f64 * cell0 as f64 / (length - 1) as f64; + let y1: f64 = LutTable[cell1 as usize] as f64; + let x1: f64 = 65535.0f64 * cell1 as f64 / (length - 1) as f64; + let a: f64 = (y1 - y0) / (x1 - x0); + let b: f64 = y0 - a * x0; + if a.abs() < 0.01f64 { + return x as uint16_fract_t; + } + let f: f64 = (Value as i32 as f64 - b) / a; + if f < 0.0f64 { + return 0u16; + } + if f >= 65535.0f64 { + return 0xffffu16; + } + (f + 0.5f64).floor() as uint16_fract_t +} +/* +The number of entries needed to invert a lookup table should not +necessarily be the same as the original number of entries. This is +especially true of lookup tables that have a small number of entries. + +For example: +Using a table like: + {0, 3104, 14263, 34802, 65535} +invert_lut will produce an inverse of: + {3, 34459, 47529, 56801, 65535} +which has an maximum error of about 9855 (pixel difference of ~38.346) + +For now, we punt the decision of output size to the caller. */ +fn invert_lut(table: &[u16], out_length: i32) -> Vec<u16> { + /* for now we invert the lut by creating a lut of size out_length + * and attempting to lookup a value for each entry using lut_inverse_interp16 */ + let mut output = Vec::with_capacity(out_length as usize); + for i in 0..out_length { + let x: f64 = i as f64 * 65535.0f64 / (out_length - 1) as f64; + let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t; + output.push(lut_inverse_interp16(input, table)); + } + output +} +fn compute_precache_pow(output: &mut [u8; PRECACHE_OUTPUT_SIZE], gamma: f32) { + for v in 0..PRECACHE_OUTPUT_SIZE { + //XXX: don't do integer/float conversion... and round? + output[v] = (255. * (v as f32 / PRECACHE_OUTPUT_MAX as f32).powf(gamma)) as u8; + } +} +pub fn compute_precache_lut(output: &mut [u8; PRECACHE_OUTPUT_SIZE], table: &[u16]) { + for v in 0..PRECACHE_OUTPUT_SIZE { + output[v] = lut_interp_linear_precache_output(v as u32, table); + } +} +pub fn compute_precache_linear(output: &mut [u8; PRECACHE_OUTPUT_SIZE]) { + for v in 0..PRECACHE_OUTPUT_SIZE { + //XXX: round? + output[v] = (v / (PRECACHE_OUTPUT_SIZE / 256)) as u8; + } +} +pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPUT_SIZE]) -> bool { + match trc { + curveType::Parametric(params) => { + let mut gamma_table = Vec::with_capacity(256); + let mut gamma_table_uint: [u16; 256] = [0; 256]; + + let mut inverted_size: i32 = 256; + compute_curve_gamma_table_type_parametric(&mut gamma_table, params); + let mut i: u16 = 0u16; + while (i as i32) < 256 { + gamma_table_uint[i as usize] = (gamma_table[i as usize] * 65535f32) as u16; + i += 1 + } + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, howeve r it is what lcms uses. + // the maximum number we would need is 65535 because that's the + // accuracy used for computing the pre cache table + if inverted_size < 256 { + inverted_size = 256 + } + let inverted = invert_lut(&gamma_table_uint, inverted_size); + compute_precache_lut(output, &inverted); + } + curveType::Curve(data) => { + match data.len() { + 0 => compute_precache_linear(output), + 1 => compute_precache_pow(output, 1. / u8Fixed8Number_to_float(data[0])), + _ => { + let mut inverted_size = data.len() as i32; + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, howeve r it is what lcms uses. + // the maximum number we would need is 65535 because that's the + // accuracy used for computing the pre cache table + if inverted_size < 256 { + inverted_size = 256 + } //XXX turn this conversion into a function + let inverted = invert_lut(data, inverted_size); + compute_precache_lut(output, &inverted); + } + } + } + } + true +} +fn build_linear_table(length: i32) -> Vec<u16> { + let mut output = Vec::with_capacity(length as usize); + for i in 0..length { + let x: f64 = i as f64 * 65535.0f64 / (length - 1) as f64; + let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t; + output.push(input); + } + output +} +fn build_pow_table(gamma: f32, length: i32) -> Vec<u16> { + let mut output = Vec::with_capacity(length as usize); + for i in 0..length { + let mut x: f64 = i as f64 / (length - 1) as f64; + x = x.powf(gamma as f64); + let result: uint16_fract_t = (x * 65535.0f64 + 0.5f64).floor() as uint16_fract_t; + output.push(result); + } + output +} + +pub(crate) fn build_output_lut(trc: &curveType) -> Vec<u16> { + match trc { + curveType::Parametric(params) => { + let mut gamma_table = Vec::with_capacity(256); + let mut output = Vec::with_capacity(256); + compute_curve_gamma_table_type_parametric(&mut gamma_table, params); + for i in 0..256 { + output.push((gamma_table[i as usize] * 65535f32) as u16); + } + output + } + curveType::Curve(data) => { + match data.len() { + 0 => build_linear_table(4096), + 1 => { + let gamma = 1. / u8Fixed8Number_to_float(data[0]); + build_pow_table(gamma, 4096) + } + _ => { + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, however it is what lcms uses. + let mut output_gamma_lut_length = data.len(); + if output_gamma_lut_length < 256 { + output_gamma_lut_length = 256 + } + invert_lut(data, output_gamma_lut_length as i32) + } + } + } + } +} |