From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- gfx/qcms/src/c_bindings.rs | 507 ++++++++++++ gfx/qcms/src/chain.rs | 1029 ++++++++++++++++++++++++ gfx/qcms/src/gtest.rs | 962 ++++++++++++++++++++++ gfx/qcms/src/iccread.rs | 1718 ++++++++++++++++++++++++++++++++++++++++ gfx/qcms/src/lib.rs | 72 ++ gfx/qcms/src/matrix.rs | 134 ++++ gfx/qcms/src/transform.rs | 1571 ++++++++++++++++++++++++++++++++++++ gfx/qcms/src/transform_avx.rs | 230 ++++++ gfx/qcms/src/transform_neon.rs | 158 ++++ gfx/qcms/src/transform_sse2.rs | 159 ++++ gfx/qcms/src/transform_util.rs | 608 ++++++++++++++ 11 files changed, 7148 insertions(+) create mode 100644 gfx/qcms/src/c_bindings.rs create mode 100644 gfx/qcms/src/chain.rs create mode 100644 gfx/qcms/src/gtest.rs create mode 100644 gfx/qcms/src/iccread.rs create mode 100644 gfx/qcms/src/lib.rs create mode 100644 gfx/qcms/src/matrix.rs create mode 100644 gfx/qcms/src/transform.rs create mode 100644 gfx/qcms/src/transform_avx.rs create mode 100644 gfx/qcms/src/transform_neon.rs create mode 100644 gfx/qcms/src/transform_sse2.rs create mode 100644 gfx/qcms/src/transform_util.rs (limited to 'gfx/qcms/src') diff --git a/gfx/qcms/src/c_bindings.rs b/gfx/qcms/src/c_bindings.rs new file mode 100644 index 0000000000..efe6674fe5 --- /dev/null +++ b/gfx/qcms/src/c_bindings.rs @@ -0,0 +1,507 @@ +#![allow(clippy::missing_safety_doc)] + +use std::{ptr::null_mut, slice}; + +use libc::{fclose, fopen, fread, free, malloc, memset, FILE}; + +use crate::{ + double_to_s15Fixed16Number, + iccread::*, + s15Fixed16Number_to_float, + transform::get_rgb_colorants, + transform::DataType, + transform::{qcms_transform, transform_create}, + transform_util, + Intent, +}; + +#[no_mangle] +pub extern "C" fn qcms_profile_sRGB() -> *mut Profile { + let profile = Profile::new_sRGB(); + Box::into_raw(profile) +} + +//XXX: it would be nice if we had a way of ensuring +// everything in a profile was initialized regardless of how it was created +//XXX: should this also be taking a black_point? +/* similar to CGColorSpaceCreateCalibratedRGB */ +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma_set( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + redGamma: f32, + greenGamma: f32, + blueGamma: f32, +) -> *mut Profile { + let profile = + Profile::new_rgb_with_gamma_set(white_point, primaries, redGamma, greenGamma, blueGamma); + profile.map_or_else(null_mut, Box::into_raw) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_gray_with_gamma(gamma: f32) -> *mut Profile { + let profile = Profile::new_gray_with_gamma(gamma); + Box::into_raw(profile) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + gamma: f32, +) -> *mut Profile { + qcms_profile_create_rgb_with_gamma_set(white_point, primaries, gamma, gamma, gamma) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_rgb_with_table( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + table: *const u16, + num_entries: i32, +) -> *mut Profile { + let table = slice::from_raw_parts(table, num_entries as usize); + let profile = Profile::new_rgb_with_table(white_point, primaries, table); + profile.map_or_else(null_mut, Box::into_raw) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_create_cicp( + colour_primaries: u8, + transfer_characteristics: u8, +) -> *mut Profile { + Profile::new_cicp(colour_primaries.into(), transfer_characteristics.into()) + .map_or_else(null_mut, Box::into_raw) +} + +/* qcms_profile_from_memory does not hold a reference to the memory passed in */ +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_memory( + mem: *const libc::c_void, + size: usize, +) -> *mut Profile { + let mem = slice::from_raw_parts(mem as *const libc::c_uchar, size); + let profile = Profile::new_from_slice(mem, false); + profile.map_or_else(null_mut, Box::into_raw) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_memory_curves_only( + mem: *const libc::c_void, + size: usize, +) -> *mut Profile { + let mem = slice::from_raw_parts(mem as *const libc::c_uchar, size); + let profile = Profile::new_from_slice(mem, true); + profile.map_or_else(null_mut, Box::into_raw) +} + + +#[no_mangle] +pub extern "C" fn qcms_profile_get_rendering_intent(profile: &Profile) -> Intent { + profile.rendering_intent +} +#[no_mangle] +pub extern "C" fn qcms_profile_get_color_space(profile: &Profile) -> icColorSpaceSignature { + profile.color_space +} +#[no_mangle] +pub extern "C" fn qcms_profile_is_sRGB(profile: &Profile) -> bool { + profile.is_sRGB() +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_release(profile: *mut Profile) { + drop(Box::from_raw(profile)); +} +unsafe extern "C" fn qcms_data_from_file( + file: *mut FILE, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + let length: u32; + let remaining_length: u32; + let read_length: usize; + let mut length_be: u32 = 0; + let data: *mut libc::c_void; + *mem = std::ptr::null_mut::(); + *size = 0; + if fread( + &mut length_be as *mut u32 as *mut libc::c_void, + 1, + ::std::mem::size_of::(), + file, + ) != ::std::mem::size_of::() + { + return; + } + length = u32::from_be(length_be); + if length > MAX_PROFILE_SIZE as libc::c_uint + || (length as libc::c_ulong) < ::std::mem::size_of::() as libc::c_ulong + { + return; + } + /* allocate room for the entire profile */ + data = malloc(length as usize); + if data.is_null() { + return; + } + /* copy in length to the front so that the buffer will contain the entire profile */ + *(data as *mut u32) = length_be; + remaining_length = + (length as libc::c_ulong - ::std::mem::size_of::() as libc::c_ulong) as u32; + /* read the rest profile */ + read_length = fread( + (data as *mut libc::c_uchar).add(::std::mem::size_of::()) as *mut libc::c_void, + 1, + remaining_length as usize, + file, + ) as usize; + if read_length != remaining_length as usize { + free(data); + return; + } + /* successfully get the profile.*/ + *mem = data; + *size = length as usize; +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_file(file: *mut FILE) -> *mut Profile { + let mut length: usize = 0; + let profile: *mut Profile; + let mut data: *mut libc::c_void = std::ptr::null_mut::(); + qcms_data_from_file(file, &mut data, &mut length); + if data.is_null() || length == 0 { + return std::ptr::null_mut::(); + } + profile = qcms_profile_from_memory(data, length); + free(data); + profile +} +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_path(path: *const libc::c_char) -> *mut Profile { + if let Ok(Some(boxed_profile)) = std::ffi::CStr::from_ptr(path) + .to_str() + .map(Profile::new_from_path) + { + Box::into_raw(boxed_profile) + } else { + std::ptr::null_mut() + } +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_data_from_path( + path: *const libc::c_char, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + *mem = std::ptr::null_mut::(); + *size = 0; + let file = fopen(path, b"rb\x00" as *const u8 as *const libc::c_char); + if !file.is_null() { + qcms_data_from_file(file, mem, size); + fclose(file); + }; +} + +#[cfg(windows)] +extern "C" { + pub fn _wfopen(filename: *const libc::wchar_t, mode: *const libc::wchar_t) -> *mut FILE; +} + +#[cfg(windows)] +#[no_mangle] +pub unsafe extern "C" fn qcms_profile_from_unicode_path(path: *const libc::wchar_t) { + let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr()); + if !file.is_null() { + qcms_profile_from_file(file); + fclose(file); + }; +} + +#[cfg(windows)] +#[no_mangle] +pub unsafe extern "C" fn qcms_data_from_unicode_path( + path: *const libc::wchar_t, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + *mem = 0 as *mut libc::c_void; + *size = 0; + let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr()); + if !file.is_null() { + qcms_data_from_file(file, mem, size); + fclose(file); + }; +} + +#[no_mangle] +pub extern "C" fn qcms_transform_create( + in_0: &Profile, + in_type: DataType, + out: &Profile, + out_type: DataType, + intent: Intent, +) -> *mut qcms_transform { + let transform = transform_create(in_0, in_type, out, out_type, intent); + match transform { + Some(transform) => Box::into_raw(transform), + None => null_mut(), + } +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_data_create_rgb_with_gamma( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + gamma: f32, + mem: *mut *mut libc::c_void, + size: *mut usize, +) { + let length: u32; + let mut index: u32; + let xyz_count: u32; + let trc_count: u32; + let mut tag_table_offset: usize; + let mut tag_data_offset: usize; + let data: *mut libc::c_void; + + let TAG_XYZ: [u32; 3] = [TAG_rXYZ, TAG_gXYZ, TAG_bXYZ]; + let TAG_TRC: [u32; 3] = [TAG_rTRC, TAG_gTRC, TAG_bTRC]; + if mem.is_null() || size.is_null() { + return; + } + *mem = std::ptr::null_mut::(); + *size = 0; + /* + * total length = icc profile header(128) + tag count(4) + + * (tag table item (12) * total tag (6 = 3 rTRC + 3 rXYZ)) + rTRC elements data (3 * 20) + * + rXYZ elements data (3*16), and all tag data elements must start at the 4-byte boundary. + */ + xyz_count = 3; // rXYZ, gXYZ, bXYZ + trc_count = 3; // rTRC, gTRC, bTRC + length = + (128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count) + xyz_count * 20 + trc_count * 16; + // reserve the total memory. + data = malloc(length as usize); + if data.is_null() { + return; + } + memset(data, 0, length as usize); + // Part1 : write rXYZ, gXYZ and bXYZ + let colorants = match get_rgb_colorants(white_point, primaries) { + Some(colorants) => colorants, + None => { + free(data); + return; + } + }; + + let data = std::slice::from_raw_parts_mut(data as *mut u8, length as usize); + // the position of first tag's signature in tag table + tag_table_offset = (128 + 4) as usize; // the start of tag data elements. + tag_data_offset = ((128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count)) as usize; + index = 0; + while index < xyz_count { + // tag table + write_u32(data, tag_table_offset, TAG_XYZ[index as usize]); // 20 bytes per TAG_(r/g/b)XYZ tag element + write_u32(data, tag_table_offset + 4, tag_data_offset as u32); + write_u32(data, tag_table_offset + 8, 20); + // tag data element + write_u32(data, tag_data_offset, XYZ_TYPE); + // reserved 4 bytes. + write_u32( + data, + tag_data_offset + 8, + double_to_s15Fixed16Number(colorants.m[0][index as usize] as f64) as u32, + ); + write_u32( + data, + tag_data_offset + 12, + double_to_s15Fixed16Number(colorants.m[1][index as usize] as f64) as u32, + ); + write_u32( + data, + tag_data_offset + 16, + double_to_s15Fixed16Number(colorants.m[2][index as usize] as f64) as u32, + ); + tag_table_offset += 12; + tag_data_offset += 20; + index += 1 + } + // Part2 : write rTRC, gTRC and bTRC + index = 0; + while index < trc_count { + // tag table + write_u32(data, tag_table_offset, TAG_TRC[index as usize]); // 14 bytes per TAG_(r/g/b)TRC element + write_u32(data, tag_table_offset + 4, tag_data_offset as u32); + write_u32(data, tag_table_offset + 8, 14); + // tag data element + write_u32(data, tag_data_offset, CURVE_TYPE); + // reserved 4 bytes. + write_u32(data, tag_data_offset + 8, 1); // count + write_u16(data, tag_data_offset + 12, float_to_u8Fixed8Number(gamma)); + tag_table_offset += 12; + tag_data_offset += 16; + index += 1 + } + /* Part3 : write profile header + * + * Important header fields are left empty. This generates a profile for internal use only. + * We should be generating: Profile version (04300000h), Profile signature (acsp), + * PCS illumiant field. Likewise mandatory profile tags are omitted. + */ + write_u32(data, 0, length); // the total length of this memory + write_u32(data, 12, DISPLAY_DEVICE_PROFILE); // profile->class_type + write_u32(data, 16, RGB_SIGNATURE); // profile->color_space + write_u32(data, 20, XYZ_TYPE); // profile->pcs + write_u32(data, 64, Intent::Perceptual as u32); // profile->rendering_intent + write_u32(data, 128, 6); // total tag count + // prepare the result + *mem = data.as_mut_ptr() as *mut libc::c_void; + *size = length as usize; +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_data( + transform: &qcms_transform, + src: *const libc::c_void, + dest: *mut libc::c_void, + length: usize, +) { + transform.transform_fn.expect("non-null function pointer")( + transform, + src as *const u8, + dest as *mut u8, + length, + ); +} +/* +use crate::matrix; +#[repr(C)] +#[derive(Clone, Debug, Default)] +pub struct qcms_mat3r3 { + pub rows: [[f32; 3] ; 3], +} +impl qcms_mat3r3 { + fn from(m: matrix::Matrix) -> qcms_mat3r3 { + qcms_mat3r3{ + rows: [ + m.row(0), + m.row(1), + m.row(2), + ], + } + } +} +*/ +#[repr(C)] +#[derive(Clone, Debug, Default)] +#[allow(clippy::upper_case_acronyms)] +pub struct qcms_profile_data { + pub class_type: u32, + pub color_space: u32, + pub pcs: u32, + pub rendering_intent: Intent, + pub red_colorant_xyzd50: [f32; 3], + pub blue_colorant_xyzd50: [f32; 3], + pub green_colorant_xyzd50: [f32; 3], + // Number of samples in the e.g. gamma->linear LUT. + pub linear_from_trc_red_samples: i32, + pub linear_from_trc_blue_samples: i32, + pub linear_from_trc_green_samples: i32, +} + +pub use crate::iccread::Profile as qcms_profile; + +#[no_mangle] +pub extern "C" fn qcms_profile_get_data( + profile: &qcms_profile, + out_data: &mut qcms_profile_data, +) { + out_data.class_type = profile.class_type; + out_data.color_space = profile.color_space; + out_data.pcs = profile.pcs; + out_data.rendering_intent = profile.rendering_intent; + + fn colorant(c: &XYZNumber) -> [f32;3] { + [c.X, c.Y, c.Z].map(s15Fixed16Number_to_float) + } + out_data.red_colorant_xyzd50 = colorant(&profile.redColorant); + out_data.blue_colorant_xyzd50 = colorant(&profile.blueColorant); + out_data.green_colorant_xyzd50 = colorant(&profile.greenColorant); + + fn trc_to_samples(trc: &Option>) -> i32 { + if let Some(ref trc) = *trc { + match &**trc { + curveType::Curve(v) => { + let len = v.len(); + if len <= 1 { + -1 + } else { + len as i32 + } + }, + curveType::Parametric(_) => -1, + } + } else { + 0 + } + } + out_data.linear_from_trc_red_samples = trc_to_samples(&profile.redTRC); + out_data.linear_from_trc_blue_samples = trc_to_samples(&profile.blueTRC); + out_data.linear_from_trc_green_samples = trc_to_samples(&profile.greenTRC); +} + +#[repr(u8)] +pub enum qcms_color_channel { + Red, + Green, + Blue, +} + +#[no_mangle] +pub extern "C" fn qcms_profile_get_lut( + profile: &qcms_profile, + channel: qcms_color_channel, // FYI: UB if you give Rust something out of range! + out_begin: *mut f32, + out_end: *mut f32, +) { + let out_slice = unsafe { + std::slice::from_raw_parts_mut(out_begin, out_end.offset_from(out_begin) as usize) + }; + + let trc = match channel { + qcms_color_channel::Red => &profile.redTRC, + qcms_color_channel::Green => &profile.greenTRC, + qcms_color_channel::Blue => &profile.blueTRC, + }; + + let samples_u16 = if let Some(trc) = trc { + let trc = &*trc; + // Yes, sub-optimal, but easier to implement, and these aren't big or hot: + // 1. Ask for a new vec lut based on the trc. + // * (eat the extra alloc) + // 2. Convert the u16s back out to f32s in our slice. + // * (eat the copy and quantization error from f32->u16->f32 roundtrip) + transform_util::build_lut_for_linear_from_tf(trc, Some(out_slice.len())) + } else { + Vec::new() + }; + + assert_eq!(samples_u16.len(), out_slice.len()); + for (d, s) in out_slice.iter_mut().zip(samples_u16.into_iter()) { + *d = (s as f32) / (u16::MAX as f32); + } +} + +pub type icColorSpaceSignature = u32; +pub const icSigGrayData: icColorSpaceSignature = 0x47524159; // 'GRAY' +pub const icSigRgbData: icColorSpaceSignature = 0x52474220; // 'RGB ' +pub const icSigCmykData: icColorSpaceSignature = 0x434d594b; // 'CMYK' + +pub use crate::iccread::qcms_profile_is_bogus; +pub use crate::transform::{ + qcms_enable_iccv4, qcms_profile_precache_output_transform, qcms_transform_release, +}; diff --git a/gfx/qcms/src/chain.rs b/gfx/qcms/src/chain.rs new file mode 100644 index 0000000000..35a3896138 --- /dev/null +++ b/gfx/qcms/src/chain.rs @@ -0,0 +1,1029 @@ +// qcms +// Copyright (C) 2009 Mozilla Corporation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +use crate::{ + iccread::LAB_SIGNATURE, + iccread::RGB_SIGNATURE, + iccread::XYZ_SIGNATURE, + iccread::{lutType, lutmABType, Profile, CMYK_SIGNATURE}, + matrix::Matrix, + s15Fixed16Number_to_float, + transform_util::clamp_float, + transform_util::{ + build_colorant_matrix, build_input_gamma_table, build_output_lut, lut_interp_linear, + lut_interp_linear_float, + }, +}; + +trait ModularTransform { + fn transform(&self, src: &[f32], dst: &mut [f32]); +} + +#[inline] +fn lerp(a: f32, b: f32, t: f32) -> f32 { + a * (1.0 - t) + b * t +} + +fn build_lut_matrix(lut: &lutType) -> Matrix { + let mut result: Matrix = Matrix { m: [[0.; 3]; 3] }; + result.m[0][0] = s15Fixed16Number_to_float(lut.e00); + result.m[0][1] = s15Fixed16Number_to_float(lut.e01); + result.m[0][2] = s15Fixed16Number_to_float(lut.e02); + result.m[1][0] = s15Fixed16Number_to_float(lut.e10); + result.m[1][1] = s15Fixed16Number_to_float(lut.e11); + result.m[1][2] = s15Fixed16Number_to_float(lut.e12); + result.m[2][0] = s15Fixed16Number_to_float(lut.e20); + result.m[2][1] = s15Fixed16Number_to_float(lut.e21); + result.m[2][2] = s15Fixed16Number_to_float(lut.e22); + result +} +fn build_mAB_matrix(lut: &lutmABType) -> Matrix { + let mut result: Matrix = Matrix { m: [[0.; 3]; 3] }; + + result.m[0][0] = s15Fixed16Number_to_float(lut.e00); + result.m[0][1] = s15Fixed16Number_to_float(lut.e01); + result.m[0][2] = s15Fixed16Number_to_float(lut.e02); + result.m[1][0] = s15Fixed16Number_to_float(lut.e10); + result.m[1][1] = s15Fixed16Number_to_float(lut.e11); + result.m[1][2] = s15Fixed16Number_to_float(lut.e12); + result.m[2][0] = s15Fixed16Number_to_float(lut.e20); + result.m[2][1] = s15Fixed16Number_to_float(lut.e21); + result.m[2][2] = s15Fixed16Number_to_float(lut.e22); + + result +} +//Based on lcms cmsLab2XYZ +fn f(t: f32) -> f32 { + if t <= 24. / 116. * (24. / 116.) * (24. / 116.) { + (841. / 108. * t) + 16. / 116. + } else { + t.powf(1. / 3.) + } +} +fn f_1(t: f32) -> f32 { + if t <= 24.0 / 116.0 { + (108.0 / 841.0) * (t - 16.0 / 116.0) + } else { + t * t * t + } +} + +#[allow(clippy::upper_case_acronyms)] +struct LABtoXYZ; +impl ModularTransform for LABtoXYZ { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + // lcms: D50 XYZ values + let WhitePointX: f32 = 0.9642; + let WhitePointY: f32 = 1.0; + let WhitePointZ: f32 = 0.8249; + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let device_L: f32 = src[0] * 100.0; + let device_a: f32 = src[1] * 255.0 - 128.0; + let device_b: f32 = src[2] * 255.0 - 128.0; + + let y: f32 = (device_L + 16.0) / 116.0; + + let X = f_1(y + 0.002 * device_a) * WhitePointX; + let Y = f_1(y) * WhitePointY; + let Z = f_1(y - 0.005 * device_b) * WhitePointZ; + + dest[0] = (X as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + dest[1] = (Y as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + dest[2] = (Z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32; + } + } +} + +#[allow(clippy::upper_case_acronyms)] +struct XYZtoLAB; +impl ModularTransform for XYZtoLAB { + //Based on lcms cmsXYZ2Lab + fn transform(&self, src: &[f32], dest: &mut [f32]) { + // lcms: D50 XYZ values + let WhitePointX: f32 = 0.9642; + let WhitePointY: f32 = 1.0; + let WhitePointZ: f32 = 0.8249; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let device_x: f32 = + (src[0] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointX as f64) as f32; + let device_y: f32 = + (src[1] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointY as f64) as f32; + let device_z: f32 = + (src[2] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointZ as f64) as f32; + + let fx = f(device_x); + let fy = f(device_y); + let fz = f(device_z); + + let L: f32 = 116.0 * fy - 16.0; + let a: f32 = 500.0 * (fx - fy); + let b: f32 = 200.0 * (fy - fz); + + dest[0] = L / 100.0; + dest[1] = (a + 128.0) / 255.0; + dest[2] = (b + 128.0) / 255.0; + } + } +} +#[derive(Default)] +struct ClutOnly { + clut: Option>, + grid_size: u16, +} +impl ModularTransform for ClutOnly { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let xy_len: i32 = 1; + let x_len: i32 = self.grid_size as i32; + let len: i32 = x_len * x_len; + + let r_table = &self.clut.as_ref().unwrap()[0..]; + let g_table = &self.clut.as_ref().unwrap()[1..]; + let b_table = &self.clut.as_ref().unwrap()[2..]; + + let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize]; + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + debug_assert!(self.grid_size as i32 >= 1); + let linear_r: f32 = src[0]; + let linear_g: f32 = src[1]; + let linear_b: f32 = src[2]; + let x: i32 = (linear_r * (self.grid_size as i32 - 1) as f32).floor() as i32; + let y: i32 = (linear_g * (self.grid_size as i32 - 1) as f32).floor() as i32; + let z: i32 = (linear_b * (self.grid_size as i32 - 1) as f32).floor() as i32; + let x_n: i32 = (linear_r * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let y_n: i32 = (linear_g * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let z_n: i32 = (linear_b * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let x_d: f32 = linear_r * (self.grid_size as i32 - 1) as f32 - x as f32; + let y_d: f32 = linear_g * (self.grid_size as i32 - 1) as f32 - y as f32; + let z_d: f32 = linear_b * (self.grid_size as i32 - 1) as f32 - z as f32; + + let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d); + let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d); + let r_y1: f32 = lerp(r_x1, r_x2, y_d); + let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d); + let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d); + let r_y2: f32 = lerp(r_x3, r_x4, y_d); + let clut_r: f32 = lerp(r_y1, r_y2, z_d); + + let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d); + let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d); + let g_y1: f32 = lerp(g_x1, g_x2, y_d); + let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d); + let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d); + let g_y2: f32 = lerp(g_x3, g_x4, y_d); + let clut_g: f32 = lerp(g_y1, g_y2, z_d); + + let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d); + let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d); + let b_y1: f32 = lerp(b_x1, b_x2, y_d); + let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d); + let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d); + let b_y2: f32 = lerp(b_x3, b_x4, y_d); + let clut_b: f32 = lerp(b_y1, b_y2, z_d); + + dest[0] = clamp_float(clut_r); + dest[1] = clamp_float(clut_g); + dest[2] = clamp_float(clut_b); + } + } +} +#[derive(Default)] +struct Clut3x3 { + input_clut_table: [Option>; 3], + clut: Option>, + grid_size: u16, + output_clut_table: [Option>; 3], +} +impl ModularTransform for Clut3x3 { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let xy_len: i32 = 1; + let x_len: i32 = self.grid_size as i32; + let len: i32 = x_len * x_len; + + let r_table = &self.clut.as_ref().unwrap()[0..]; + let g_table = &self.clut.as_ref().unwrap()[1..]; + let b_table = &self.clut.as_ref().unwrap()[2..]; + let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize]; + + let input_clut_table_r = self.input_clut_table[0].as_ref().unwrap(); + let input_clut_table_g = self.input_clut_table[1].as_ref().unwrap(); + let input_clut_table_b = self.input_clut_table[2].as_ref().unwrap(); + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + debug_assert!(self.grid_size as i32 >= 1); + let device_r: f32 = src[0]; + let device_g: f32 = src[1]; + let device_b: f32 = src[2]; + let linear_r: f32 = lut_interp_linear_float(device_r, &input_clut_table_r); + let linear_g: f32 = lut_interp_linear_float(device_g, &input_clut_table_g); + let linear_b: f32 = lut_interp_linear_float(device_b, &input_clut_table_b); + let x: i32 = (linear_r * (self.grid_size as i32 - 1) as f32).floor() as i32; + let y: i32 = (linear_g * (self.grid_size as i32 - 1) as f32).floor() as i32; + let z: i32 = (linear_b * (self.grid_size as i32 - 1) as f32).floor() as i32; + let x_n: i32 = (linear_r * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let y_n: i32 = (linear_g * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let z_n: i32 = (linear_b * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let x_d: f32 = linear_r * (self.grid_size as i32 - 1) as f32 - x as f32; + let y_d: f32 = linear_g * (self.grid_size as i32 - 1) as f32 - y as f32; + let z_d: f32 = linear_b * (self.grid_size as i32 - 1) as f32 - z as f32; + + let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d); + let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d); + let r_y1: f32 = lerp(r_x1, r_x2, y_d); + let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d); + let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d); + let r_y2: f32 = lerp(r_x3, r_x4, y_d); + let clut_r: f32 = lerp(r_y1, r_y2, z_d); + + let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d); + let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d); + let g_y1: f32 = lerp(g_x1, g_x2, y_d); + let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d); + let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d); + let g_y2: f32 = lerp(g_x3, g_x4, y_d); + let clut_g: f32 = lerp(g_y1, g_y2, z_d); + + let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d); + let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d); + let b_y1: f32 = lerp(b_x1, b_x2, y_d); + let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d); + let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d); + let b_y2: f32 = lerp(b_x3, b_x4, y_d); + let clut_b: f32 = lerp(b_y1, b_y2, z_d); + let pcs_r: f32 = + lut_interp_linear_float(clut_r, &self.output_clut_table[0].as_ref().unwrap()); + let pcs_g: f32 = + lut_interp_linear_float(clut_g, &self.output_clut_table[1].as_ref().unwrap()); + let pcs_b: f32 = + lut_interp_linear_float(clut_b, &self.output_clut_table[2].as_ref().unwrap()); + dest[0] = clamp_float(pcs_r); + dest[1] = clamp_float(pcs_g); + dest[2] = clamp_float(pcs_b); + } + } +} +#[derive(Default)] +struct Clut4x3 { + input_clut_table: [Option>; 4], + clut: Option>, + grid_size: u16, + output_clut_table: [Option>; 3], +} +impl ModularTransform for Clut4x3 { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let z_stride: i32 = self.grid_size as i32; + let y_stride: i32 = z_stride * z_stride; + let x_stride: i32 = z_stride * z_stride * z_stride; + + let r_tbl = &self.clut.as_ref().unwrap()[0..]; + let g_tbl = &self.clut.as_ref().unwrap()[1..]; + let b_tbl = &self.clut.as_ref().unwrap()[2..]; + + let CLU = |table: &[f32], x, y, z, w| { + table[((x * x_stride + y * y_stride + z * z_stride + w) * 3) as usize] + }; + + let input_clut_table_0 = self.input_clut_table[0].as_ref().unwrap(); + let input_clut_table_1 = self.input_clut_table[1].as_ref().unwrap(); + let input_clut_table_2 = self.input_clut_table[2].as_ref().unwrap(); + let input_clut_table_3 = self.input_clut_table[3].as_ref().unwrap(); + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(4)) { + debug_assert!(self.grid_size as i32 >= 1); + let linear_x: f32 = lut_interp_linear_float(src[0], &input_clut_table_0); + let linear_y: f32 = lut_interp_linear_float(src[1], &input_clut_table_1); + let linear_z: f32 = lut_interp_linear_float(src[2], &input_clut_table_2); + let linear_w: f32 = lut_interp_linear_float(src[3], &input_clut_table_3); + + let x: i32 = (linear_x * (self.grid_size as i32 - 1) as f32).floor() as i32; + let y: i32 = (linear_y * (self.grid_size as i32 - 1) as f32).floor() as i32; + let z: i32 = (linear_z * (self.grid_size as i32 - 1) as f32).floor() as i32; + let w: i32 = (linear_w * (self.grid_size as i32 - 1) as f32).floor() as i32; + + let x_n: i32 = (linear_x * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let y_n: i32 = (linear_y * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let z_n: i32 = (linear_z * (self.grid_size as i32 - 1) as f32).ceil() as i32; + let w_n: i32 = (linear_w * (self.grid_size as i32 - 1) as f32).ceil() as i32; + + let x_d: f32 = linear_x * (self.grid_size as i32 - 1) as f32 - x as f32; + let y_d: f32 = linear_y * (self.grid_size as i32 - 1) as f32 - y as f32; + let z_d: f32 = linear_z * (self.grid_size as i32 - 1) as f32 - z as f32; + let w_d: f32 = linear_w * (self.grid_size as i32 - 1) as f32 - w as f32; + + let quadlinear = |tbl| { + let CLU = |x, y, z, w| CLU(tbl, x, y, z, w); + let r_x1 = lerp(CLU(x, y, z, w), CLU(x_n, y, z, w), x_d); + let r_x2 = lerp(CLU(x, y_n, z, w), CLU(x_n, y_n, z, w), x_d); + let r_y1 = lerp(r_x1, r_x2, y_d); + let r_x3 = lerp(CLU(x, y, z_n, w), CLU(x_n, y, z_n, w), x_d); + let r_x4 = lerp(CLU(x, y_n, z_n, w), CLU(x_n, y_n, z_n, w), x_d); + let r_y2 = lerp(r_x3, r_x4, y_d); + let r_z1 = lerp(r_y1, r_y2, z_d); + + let r_x1 = lerp(CLU(x, y, z, w_n), CLU(x_n, y, z, w_n), x_d); + let r_x2 = lerp(CLU(x, y_n, z, w_n), CLU(x_n, y_n, z, w_n), x_d); + let r_y1 = lerp(r_x1, r_x2, y_d); + let r_x3 = lerp(CLU(x, y, z_n, w_n), CLU(x_n, y, z_n, w_n), x_d); + let r_x4 = lerp(CLU(x, y_n, z_n, w_n), CLU(x_n, y_n, z_n, w_n), x_d); + let r_y2 = lerp(r_x3, r_x4, y_d); + let r_z2 = lerp(r_y1, r_y2, z_d); + lerp(r_z1, r_z2, w_d) + }; + // TODO: instead of reading each component separately we should read all three components at once. + let clut_r = quadlinear(r_tbl); + let clut_g = quadlinear(g_tbl); + let clut_b = quadlinear(b_tbl); + + let pcs_r = + lut_interp_linear_float(clut_r, &self.output_clut_table[0].as_ref().unwrap()); + let pcs_g = + lut_interp_linear_float(clut_g, &self.output_clut_table[1].as_ref().unwrap()); + let pcs_b = + lut_interp_linear_float(clut_b, &self.output_clut_table[2].as_ref().unwrap()); + dest[0] = clamp_float(pcs_r); + dest[1] = clamp_float(pcs_g); + dest[2] = clamp_float(pcs_b); + } + } +} +/* NOT USED +static void qcms_transform_module_tetra_clut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length) +{ + size_t i; + int xy_len = 1; + int x_len = transform->grid_size; + int len = x_len * x_len; + float* r_table = transform->r_clut; + float* g_table = transform->g_clut; + float* b_table = transform->b_clut; + float c0_r, c1_r, c2_r, c3_r; + float c0_g, c1_g, c2_g, c3_g; + float c0_b, c1_b, c2_b, c3_b; + float clut_r, clut_g, clut_b; + float pcs_r, pcs_g, pcs_b; + for (i = 0; i < length; i++) { + float device_r = *src++; + float device_g = *src++; + float device_b = *src++; + float linear_r = lut_interp_linear_float(device_r, + transform->input_clut_table_r, transform->input_clut_table_length); + float linear_g = lut_interp_linear_float(device_g, + transform->input_clut_table_g, transform->input_clut_table_length); + float linear_b = lut_interp_linear_float(device_b, + transform->input_clut_table_b, transform->input_clut_table_length); + + int x = floorf(linear_r * (transform->grid_size-1)); + int y = floorf(linear_g * (transform->grid_size-1)); + int z = floorf(linear_b * (transform->grid_size-1)); + int x_n = ceilf(linear_r * (transform->grid_size-1)); + int y_n = ceilf(linear_g * (transform->grid_size-1)); + int z_n = ceilf(linear_b * (transform->grid_size-1)); + float rx = linear_r * (transform->grid_size-1) - x; + float ry = linear_g * (transform->grid_size-1) - y; + float rz = linear_b * (transform->grid_size-1) - z; + + c0_r = CLU(r_table, x, y, z); + c0_g = CLU(g_table, x, y, z); + c0_b = CLU(b_table, x, y, z); + if( rx >= ry ) { + if (ry >= rz) { //rx >= ry && ry >= rz + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else { + if (rx >= rz) { //rx >= rz && rz >= ry + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); + } else { //rz > rx && rx >= ry + c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + } + } else { + if (rx >= rz) { //ry > rx && rx >= rz + c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); + c2_r = CLU(r_table, x_n, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); + c2_g = CLU(g_table, x_n, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); + c2_b = CLU(b_table, x_n, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else { + if (ry >= rz) { //ry >= rz && rz > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); + } else { //rz > ry && ry > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } + } + } + + clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz; + clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz; + clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz; + + pcs_r = lut_interp_linear_float(clut_r, + transform->output_clut_table_r, transform->output_clut_table_length); + pcs_g = lut_interp_linear_float(clut_g, + transform->output_clut_table_g, transform->output_clut_table_length); + pcs_b = lut_interp_linear_float(clut_b, + transform->output_clut_table_b, transform->output_clut_table_length); + *dest++ = clamp_float(pcs_r); + *dest++ = clamp_float(pcs_g); + *dest++ = clamp_float(pcs_b); + } +} +*/ +#[derive(Default)] +struct GammaTable { + input_clut_table: [Option>; 3], +} +impl ModularTransform for GammaTable { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let mut out_r: f32; + let mut out_g: f32; + let mut out_b: f32; + let input_clut_table_r = self.input_clut_table[0].as_ref().unwrap(); + let input_clut_table_g = self.input_clut_table[1].as_ref().unwrap(); + let input_clut_table_b = self.input_clut_table[2].as_ref().unwrap(); + + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + out_r = lut_interp_linear_float(in_r, input_clut_table_r); + out_g = lut_interp_linear_float(in_g, input_clut_table_g); + out_b = lut_interp_linear_float(in_b, input_clut_table_b); + + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } + } +} +#[derive(Default)] +struct GammaLut { + output_gamma_lut_r: Option>, + output_gamma_lut_g: Option>, + output_gamma_lut_b: Option>, +} +impl ModularTransform for GammaLut { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let mut out_r: f32; + let mut out_g: f32; + let mut out_b: f32; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + out_r = lut_interp_linear(in_r as f64, &self.output_gamma_lut_r.as_ref().unwrap()); + out_g = lut_interp_linear(in_g as f64, &self.output_gamma_lut_g.as_ref().unwrap()); + out_b = lut_interp_linear(in_b as f64, &self.output_gamma_lut_b.as_ref().unwrap()); + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } + } +} +#[derive(Default)] +struct MatrixTranslate { + matrix: Matrix, + tx: f32, + ty: f32, + tz: f32, +} +impl ModularTransform for MatrixTranslate { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let mut mat: Matrix = Matrix { m: [[0.; 3]; 3] }; + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + mat.m[0][0] = self.matrix.m[0][0]; + mat.m[1][0] = self.matrix.m[0][1]; + mat.m[2][0] = self.matrix.m[0][2]; + mat.m[0][1] = self.matrix.m[1][0]; + mat.m[1][1] = self.matrix.m[1][1]; + mat.m[2][1] = self.matrix.m[1][2]; + mat.m[0][2] = self.matrix.m[2][0]; + mat.m[1][2] = self.matrix.m[2][1]; + mat.m[2][2] = self.matrix.m[2][2]; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + let out_r: f32 = mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b + self.tx; + let out_g: f32 = mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b + self.ty; + let out_b: f32 = mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b + self.tz; + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } + } +} +#[derive(Default)] +struct MatrixTransform { + matrix: Matrix, +} +impl ModularTransform for MatrixTransform { + fn transform(&self, src: &[f32], dest: &mut [f32]) { + let mut mat: Matrix = Matrix { m: [[0.; 3]; 3] }; + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + mat.m[0][0] = self.matrix.m[0][0]; + mat.m[1][0] = self.matrix.m[0][1]; + mat.m[2][0] = self.matrix.m[0][2]; + mat.m[0][1] = self.matrix.m[1][0]; + mat.m[1][1] = self.matrix.m[1][1]; + mat.m[2][1] = self.matrix.m[1][2]; + mat.m[0][2] = self.matrix.m[2][0]; + mat.m[1][2] = self.matrix.m[2][1]; + mat.m[2][2] = self.matrix.m[2][2]; + for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) { + let in_r: f32 = src[0]; + let in_g: f32 = src[1]; + let in_b: f32 = src[2]; + let out_r: f32 = mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b; + let out_g: f32 = mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b; + let out_b: f32 = mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b; + dest[0] = clamp_float(out_r); + dest[1] = clamp_float(out_g); + dest[2] = clamp_float(out_b); + } + } +} + +fn modular_transform_create_mAB(lut: &lutmABType) -> Option>> { + let mut transforms: Vec> = Vec::new(); + if lut.a_curves[0].is_some() { + let clut_length: usize; + // If the A curve is present this also implies the + // presence of a CLUT. + lut.clut_table.as_ref()?; + + // Prepare A curve. + let mut transform = Box::new(GammaTable::default()); + transform.input_clut_table[0] = build_input_gamma_table(lut.a_curves[0].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[1] = build_input_gamma_table(lut.a_curves[1].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[2] = build_input_gamma_table(lut.a_curves[2].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + + if lut.num_grid_points[0] as i32 != lut.num_grid_points[1] as i32 + || lut.num_grid_points[1] as i32 != lut.num_grid_points[2] as i32 + { + //XXX: We don't currently support clut that are not squared! + return None; + } + transforms.push(transform); + + // Prepare CLUT + let mut transform = Box::new(ClutOnly::default()); + clut_length = (lut.num_grid_points[0] as usize).pow(3) * 3; + assert_eq!(clut_length, lut.clut_table.as_ref().unwrap().len()); + transform.clut = lut.clut_table.clone(); + transform.grid_size = lut.num_grid_points[0] as u16; + transforms.push(transform); + } + + if lut.m_curves[0].is_some() { + // M curve imples the presence of a Matrix + + // Prepare M curve + let mut transform = Box::new(GammaTable::default()); + transform.input_clut_table[0] = build_input_gamma_table(lut.m_curves[0].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[1] = build_input_gamma_table(lut.m_curves[1].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[2] = build_input_gamma_table(lut.m_curves[2].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transforms.push(transform); + + // Prepare Matrix + let mut transform = Box::new(MatrixTranslate::default()); + transform.matrix = build_mAB_matrix(lut); + transform.tx = s15Fixed16Number_to_float(lut.e03); + transform.ty = s15Fixed16Number_to_float(lut.e13); + transform.tz = s15Fixed16Number_to_float(lut.e23); + transforms.push(transform); + } + + if lut.b_curves[0].is_some() { + // Prepare B curve + let mut transform = Box::new(GammaTable::default()); + transform.input_clut_table[0] = build_input_gamma_table(lut.b_curves[0].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[1] = build_input_gamma_table(lut.b_curves[1].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[2] = build_input_gamma_table(lut.b_curves[2].as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transforms.push(transform); + } else { + // B curve is mandatory + return None; + } + + if lut.reversed { + // mBA are identical to mAB except that the transformation order + // is reversed + transforms.reverse(); + } + Some(transforms) +} + +fn modular_transform_create_lut(lut: &lutType) -> Option>> { + let mut transforms: Vec> = Vec::new(); + + let clut_length: usize; + let mut transform = Box::new(MatrixTransform::default()); + + transform.matrix = build_lut_matrix(lut); + if true { + transforms.push(transform); + + // Prepare input curves + let mut transform = Box::new(Clut3x3::default()); + transform.input_clut_table[0] = + Some(lut.input_table[0..lut.num_input_table_entries as usize].to_vec()); + transform.input_clut_table[1] = Some( + lut.input_table + [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] + .to_vec(), + ); + transform.input_clut_table[2] = Some( + lut.input_table[lut.num_input_table_entries as usize * 2 + ..lut.num_input_table_entries as usize * 3] + .to_vec(), + ); + // Prepare table + clut_length = (lut.num_clut_grid_points as usize).pow(3) * 3; + assert_eq!(clut_length, lut.clut_table.len()); + transform.clut = Some(lut.clut_table.clone()); + + transform.grid_size = lut.num_clut_grid_points as u16; + // Prepare output curves + transform.output_clut_table[0] = + Some(lut.output_table[0..lut.num_output_table_entries as usize].to_vec()); + transform.output_clut_table[1] = Some( + lut.output_table + [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] + .to_vec(), + ); + transform.output_clut_table[2] = Some( + lut.output_table[lut.num_output_table_entries as usize * 2 + ..lut.num_output_table_entries as usize * 3] + .to_vec(), + ); + transforms.push(transform); + return Some(transforms); + } + None +} + +fn modular_transform_create_lut4x3(lut: &lutType) -> Vec> { + let mut transforms: Vec> = Vec::new(); + + let clut_length: usize; + // the matrix of lutType is only used when the input color space is XYZ. + + // Prepare input curves + let mut transform = Box::new(Clut4x3::default()); + transform.input_clut_table[0] = + Some(lut.input_table[0..lut.num_input_table_entries as usize].to_vec()); + transform.input_clut_table[1] = Some( + lut.input_table + [lut.num_input_table_entries as usize..lut.num_input_table_entries as usize * 2] + .to_vec(), + ); + transform.input_clut_table[2] = Some( + lut.input_table + [lut.num_input_table_entries as usize * 2..lut.num_input_table_entries as usize * 3] + .to_vec(), + ); + transform.input_clut_table[3] = Some( + lut.input_table + [lut.num_input_table_entries as usize * 3..lut.num_input_table_entries as usize * 4] + .to_vec(), + ); + // Prepare table + clut_length = (lut.num_clut_grid_points as usize).pow(lut.num_input_channels as u32) + * lut.num_output_channels as usize; + assert_eq!(clut_length, lut.clut_table.len()); + transform.clut = Some(lut.clut_table.clone()); + + transform.grid_size = lut.num_clut_grid_points as u16; + // Prepare output curves + transform.output_clut_table[0] = + Some(lut.output_table[0..lut.num_output_table_entries as usize].to_vec()); + transform.output_clut_table[1] = Some( + lut.output_table + [lut.num_output_table_entries as usize..lut.num_output_table_entries as usize * 2] + .to_vec(), + ); + transform.output_clut_table[2] = Some( + lut.output_table + [lut.num_output_table_entries as usize * 2..lut.num_output_table_entries as usize * 3] + .to_vec(), + ); + transforms.push(transform); + transforms +} + +fn modular_transform_create_input(input: &Profile) -> Option>> { + let mut transforms = Vec::new(); + if let Some(A2B0) = &input.A2B0 { + let lut_transform; + if A2B0.num_input_channels == 4 { + lut_transform = Some(modular_transform_create_lut4x3(&A2B0)); + } else { + lut_transform = modular_transform_create_lut(&A2B0); + } + if let Some(lut_transform) = lut_transform { + transforms.extend(lut_transform); + } else { + return None; + } + } else if input.mAB.is_some() + && (*input.mAB.as_deref().unwrap()).num_in_channels == 3 + && (*input.mAB.as_deref().unwrap()).num_out_channels == 3 + { + let mAB_transform = modular_transform_create_mAB(input.mAB.as_deref().unwrap()); + if let Some(mAB_transform) = mAB_transform { + transforms.extend(mAB_transform); + } else { + return None; + } + } else { + let mut transform = Box::new(GammaTable::default()); + transform.input_clut_table[0] = + build_input_gamma_table(input.redTRC.as_deref()).map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[1] = build_input_gamma_table(input.greenTRC.as_deref()) + .map(|x| (x as Box<[f32]>).into_vec()); + transform.input_clut_table[2] = + build_input_gamma_table(input.blueTRC.as_deref()).map(|x| (x as Box<[f32]>).into_vec()); + if transform.input_clut_table[0].is_none() + || transform.input_clut_table[1].is_none() + || transform.input_clut_table[2].is_none() + { + return None; + } else { + transforms.push(transform); + + let mut transform = Box::new(MatrixTransform::default()); + transform.matrix.m[0][0] = 1. / 1.999_969_5; + transform.matrix.m[0][1] = 0.0; + transform.matrix.m[0][2] = 0.0; + transform.matrix.m[1][0] = 0.0; + transform.matrix.m[1][1] = 1. / 1.999_969_5; + transform.matrix.m[1][2] = 0.0; + transform.matrix.m[2][0] = 0.0; + transform.matrix.m[2][1] = 0.0; + transform.matrix.m[2][2] = 1. / 1.999_969_5; + transforms.push(transform); + + let mut transform = Box::new(MatrixTransform::default()); + transform.matrix = build_colorant_matrix(input); + transforms.push(transform); + } + } + Some(transforms) +} +fn modular_transform_create_output(out: &Profile) -> Option>> { + let mut transforms = Vec::new(); + if let Some(B2A0) = &out.B2A0 { + if B2A0.num_input_channels != 3 || B2A0.num_output_channels != 3 { + return None; + } + let lut_transform = modular_transform_create_lut(B2A0); + if let Some(lut_transform) = lut_transform { + transforms.extend(lut_transform); + } else { + return None; + } + } else if out.mBA.is_some() + && (*out.mBA.as_deref().unwrap()).num_in_channels == 3 + && (*out.mBA.as_deref().unwrap()).num_out_channels == 3 + { + let lut_transform = modular_transform_create_mAB(out.mBA.as_deref().unwrap()); + if let Some(lut_transform) = lut_transform { + transforms.extend(lut_transform) + } else { + return None; + } + } else if let (Some(redTRC), Some(greenTRC), Some(blueTRC)) = + (&out.redTRC, &out.greenTRC, &out.blueTRC) + { + let mut transform = Box::new(MatrixTransform::default()); + transform.matrix = build_colorant_matrix(out).invert()?; + transforms.push(transform); + + let mut transform = Box::new(MatrixTransform::default()); + transform.matrix.m[0][0] = 1.999_969_5; + transform.matrix.m[0][1] = 0.0; + transform.matrix.m[0][2] = 0.0; + transform.matrix.m[1][0] = 0.0; + transform.matrix.m[1][1] = 1.999_969_5; + transform.matrix.m[1][2] = 0.0; + transform.matrix.m[2][0] = 0.0; + transform.matrix.m[2][1] = 0.0; + transform.matrix.m[2][2] = 1.999_969_5; + transforms.push(transform); + + let mut transform = Box::new(GammaLut::default()); + transform.output_gamma_lut_r = Some(build_output_lut(redTRC)?); + transform.output_gamma_lut_g = Some(build_output_lut(greenTRC)?); + transform.output_gamma_lut_b = Some(build_output_lut(blueTRC)?); + transforms.push(transform); + } else { + debug_assert!(false, "Unsupported output profile workflow."); + return None; + } + Some(transforms) +} +/* Not Completed +// Simplify the transformation chain to an equivalent transformation chain +static struct qcms_modular_transform* qcms_modular_transform_reduce(struct qcms_modular_transform *transform) +{ + struct qcms_modular_transform *first_transform = NULL; + struct qcms_modular_transform *curr_trans = transform; + struct qcms_modular_transform *prev_trans = NULL; + while (curr_trans) { + struct qcms_modular_transform *next_trans = curr_trans->next_transform; + if (curr_trans->transform_module_fn == qcms_transform_module_matrix) { + if (next_trans && next_trans->transform_module_fn == qcms_transform_module_matrix) { + curr_trans->matrix = matrix_multiply(curr_trans->matrix, next_trans->matrix); + goto remove_next; + } + } + if (curr_trans->transform_module_fn == qcms_transform_module_gamma_table) { + bool isLinear = true; + uint16_t i; + for (i = 0; isLinear && i < 256; i++) { + isLinear &= (int)(curr_trans->input_clut_table_r[i] * 255) == i; + isLinear &= (int)(curr_trans->input_clut_table_g[i] * 255) == i; + isLinear &= (int)(curr_trans->input_clut_table_b[i] * 255) == i; + } + goto remove_current; + } + +next_transform: + if (!next_trans) break; + prev_trans = curr_trans; + curr_trans = next_trans; + continue; +remove_current: + if (curr_trans == transform) { + //Update head + transform = next_trans; + } else { + prev_trans->next_transform = next_trans; + } + curr_trans->next_transform = NULL; + qcms_modular_transform_release(curr_trans); + //return transform; + return qcms_modular_transform_reduce(transform); +remove_next: + curr_trans->next_transform = next_trans->next_transform; + next_trans->next_transform = NULL; + qcms_modular_transform_release(next_trans); + continue; + } + return transform; +} +*/ +fn modular_transform_create( + input: &Profile, + output: &Profile, +) -> Option>> { + let mut transforms = Vec::new(); + if input.color_space == RGB_SIGNATURE || input.color_space == CMYK_SIGNATURE { + let rgb_to_pcs = modular_transform_create_input(input); + if let Some(rgb_to_pcs) = rgb_to_pcs { + transforms.extend(rgb_to_pcs); + } else { + return None; + } + } else { + debug_assert!(false, "input color space not supported"); + return None; + } + + if input.pcs == LAB_SIGNATURE && output.pcs == XYZ_SIGNATURE { + transforms.push(Box::new(LABtoXYZ {})); + } + + // This does not improve accuracy in practice, something is wrong here. + //if (in->chromaticAdaption.invalid == false) { + // struct qcms_modular_transform* chromaticAdaption; + // chromaticAdaption = qcms_modular_transform_alloc(); + // if (!chromaticAdaption) + // goto fail; + // append_transform(chromaticAdaption, &next_transform); + // chromaticAdaption->matrix = matrix_invert(in->chromaticAdaption); + // chromaticAdaption->transform_module_fn = qcms_transform_module_matrix; + //} + + if input.pcs == XYZ_SIGNATURE && output.pcs == LAB_SIGNATURE { + transforms.push(Box::new(XYZtoLAB {})); + } + + if output.color_space == RGB_SIGNATURE { + let pcs_to_rgb = modular_transform_create_output(output); + if let Some(pcs_to_rgb) = pcs_to_rgb { + transforms.extend(pcs_to_rgb); + } else { + return None; + } + } else if output.color_space == CMYK_SIGNATURE { + let pcs_to_cmyk = modular_transform_create_output(output)?; + transforms.extend(pcs_to_cmyk); + } else { + debug_assert!(false, "output color space not supported"); + } + + // Not Completed + //return qcms_modular_transform_reduce(first_transform); + Some(transforms) +} +fn modular_transform_data( + transforms: Vec>, + mut src: Vec, + mut dest: Vec, + _len: usize, +) -> Vec { + for transform in transforms { + // Keep swaping src/dest when performing a transform to use less memory. + transform.transform(&src, &mut dest); + std::mem::swap(&mut src, &mut dest); + } + // The results end up in the src buffer because of the switching + src +} + +pub fn chain_transform( + input: &Profile, + output: &Profile, + src: Vec, + dest: Vec, + lutSize: usize, +) -> Option> { + let transform_list = modular_transform_create(input, output); + if let Some(transform_list) = transform_list { + let lut = modular_transform_data(transform_list, src, dest, lutSize / 3); + return Some(lut); + } + None +} diff --git a/gfx/qcms/src/gtest.rs b/gfx/qcms/src/gtest.rs new file mode 100644 index 0000000000..bfe350def4 --- /dev/null +++ b/gfx/qcms/src/gtest.rs @@ -0,0 +1,962 @@ +#[cfg(all(test, feature = "c_bindings"))] +#[allow(clippy::all)] +mod gtest { + use crate::{ + c_bindings::*, iccread::*, transform::DataType::*, transform::*, + transform_util::lut_inverse_interp16, Intent::Perceptual, + }; + use libc::c_void; + #[cfg(target_arch = "arm")] + use std::arch::is_arm_feature_detected; + #[cfg(target_arch = "aarch64")] + use std::arch::is_aarch64_feature_detected; + use std::ptr::null_mut; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + use crate::transform_neon::{ + qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon, + qcms_transform_data_rgba_out_lut_neon, + }; + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + use crate::{ + transform_avx::{ + qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx, + qcms_transform_data_rgba_out_lut_avx, + }, + transform_sse2::{ + qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2, + qcms_transform_data_rgba_out_lut_sse2, + }, + }; + + #[test] + fn test_lut_inverse_crash() { + let lutTable1: [u16; 128] = [ + 0x0000, 0x0000, 0x0000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + ]; + let lutTable2: [u16; 128] = [ + 0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + ]; + + // Crash/Assert test + + lut_inverse_interp16(5, &lutTable1); + lut_inverse_interp16(5, &lutTable2); + } + + #[test] + fn test_lut_inverse() { + // mimic sRGB_v4_ICC mBA Output + // + // XXXX + // X + // X + // XXXX + let mut value: u16; + let mut lutTable: [u16; 256] = [0; 256]; + + for i in 0..20 { + lutTable[i] = 0; + } + + for i in 20..200 { + lutTable[i] = ((i - 20) * 0xFFFF / (200 - 20)) as u16; + } + + for i in 200..lutTable.len() { + lutTable[i] = 0xFFFF; + } + + for i in 0..65535 { + lut_inverse_interp16(i, &lutTable); + } + + // Lookup the interesting points + + value = lut_inverse_interp16(0, &lutTable); + assert!(value <= 20 * 256); + + value = lut_inverse_interp16(1, &lutTable); + assert!(value > 20 * 256); + + value = lut_inverse_interp16(65535, &lutTable); + assert!(value < 201 * 256); + } + + // this test takes to long to run on miri + #[cfg(not(miri))] + #[test] + fn test_lut_inverse_non_monotonic() { + // Make sure we behave sanely for non monotic functions + // X X X + // X X X + // X X X + let mut lutTable: [u16; 256] = [0; 256]; + + for i in 0..100 { + lutTable[i] = ((i - 0) * 0xFFFF / (100 - 0)) as u16; + } + + for i in 100..200 { + lutTable[i] = ((i - 100) * 0xFFFF / (200 - 100)) as u16; + } + + for i in 200..256 { + lutTable[i] = ((i - 200) * 0xFFFF / (256 - 200)) as u16; + } + + for i in 0..65535 { + lut_inverse_interp16(i, &lutTable); + } + + // Make sure we don't crash, hang or let sanitizers do their magic + } + /* qcms_data_create_rgb_with_gamma is broken + #[test] + fn profile_from_gamma() { + + let white_point = qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.}; + let primaries = qcms_CIE_xyYTRIPLE { + red: qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.}, + green: qcms_CIE_xyY { x: 0.21, y: 0.71, Y: 1.}, + blue: qcms_CIE_xyY { x: 0.15, y: 0.06, Y: 1.} + }; + let mut mem: *mut libc::c_void = std::ptr::null_mut(); + let mut size: size_t = 0; + unsafe { qcms_data_create_rgb_with_gamma(white_point, primaries, 2.2, &mut mem, &mut size); } + assert!(size != 0) + } + */ + + #[test] + fn alignment() { + assert_eq!(std::mem::align_of::(), 16); + } + + #[test] + fn basic() { + let sRGB_profile = crate::c_bindings::qcms_profile_sRGB(); + + let Rec709Primaries = qcms_CIE_xyYTRIPLE { + red: qcms_CIE_xyY { + x: 0.6400f64, + y: 0.3300f64, + Y: 1.0f64, + }, + green: qcms_CIE_xyY { + x: 0.3000f64, + y: 0.6000f64, + Y: 1.0f64, + }, + blue: qcms_CIE_xyY { + x: 0.1500f64, + y: 0.0600f64, + Y: 1.0f64, + }, + }; + let D65 = qcms_white_point_sRGB(); + let other = unsafe { qcms_profile_create_rgb_with_gamma(D65, Rec709Primaries, 2.2) }; + unsafe { qcms_profile_precache_output_transform(&mut *other) }; + + let transform = unsafe { + qcms_transform_create(&mut *sRGB_profile, RGB8, &mut *other, RGB8, Perceptual) + }; + let mut data: [u8; 120] = [0; 120]; + + unsafe { + qcms_transform_data( + &*transform, + data.as_ptr() as *const libc::c_void, + data.as_mut_ptr() as *mut libc::c_void, + data.len() / 3, + ) + }; + + unsafe { + qcms_transform_release(transform); + qcms_profile_release(sRGB_profile); + qcms_profile_release(other); + } + } + + #[test] + fn gray_alpha() { + let sRGB_profile = qcms_profile_sRGB(); + let other = unsafe { qcms_profile_create_gray_with_gamma(2.2) }; + unsafe { qcms_profile_precache_output_transform(&mut *other) }; + + let transform = unsafe { + qcms_transform_create(&mut *other, GrayA8, &mut *sRGB_profile, RGBA8, Perceptual) + }; + assert!(!transform.is_null()); + + let in_data: [u8; 4] = [0, 255, 255, 0]; + let mut out_data: [u8; 2 * 4] = [0; 8]; + unsafe { + qcms_transform_data( + &*transform, + in_data.as_ptr() as *const libc::c_void, + out_data.as_mut_ptr() as *mut libc::c_void, + in_data.len() / 2, + ) + }; + + assert_eq!(out_data, [0, 0, 0, 255, 255, 255, 255, 0]); + unsafe { + qcms_transform_release(transform); + qcms_profile_release(sRGB_profile); + qcms_profile_release(other); + } + } + #[test] + fn samples() { + use libc::c_void; + use std::io::Read; + + let mut d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + qcms_enable_iccv4(); + d.push("fuzz"); + d.push("samples"); + let samples = [ + "0220-ca351238d719fd07ef8607d326b398fe.icc", + "0372-973178997787ee780b4b58ee47cad683.icc", + "0744-0a5faafe175e682b10c590b03d3f093b.icc", + "0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc", + "0732-80707d91aea0f8e64ef0286cc7720e99.icc", + "1809-2bd4b77651214ca6110fdbee2502671e.icc", + ]; + for s in samples.iter() { + let mut p = d.clone(); + p.push(s); + let mut file = std::fs::File::open(p.clone()).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + unsafe { qcms_profile_release(profile) }; + } + } + + #[test] + fn v4() { + use libc::c_void; + use std::io::Read; + + let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + qcms_enable_iccv4(); + p.push("profiles"); + // this profile was made by taking the lookup table profile from + // http://displaycal.net/icc-color-management-test/ and removing + // the unneeed tables using lcms + p.push("displaycal-lut-stripped.icc"); + + let mut file = std::fs::File::open(p).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + + let srgb_profile = qcms_profile_sRGB(); + assert_ne!(srgb_profile, std::ptr::null_mut()); + + unsafe { qcms_profile_precache_output_transform(&mut *srgb_profile) }; + + let intent = unsafe { qcms_profile_get_rendering_intent(&*profile) }; + let transform = + unsafe { qcms_transform_create(&*profile, RGB8, &*srgb_profile, RGB8, intent) }; + + assert_ne!(transform, std::ptr::null_mut()); + + const SRC_SIZE: usize = 4; + let src: [u8; SRC_SIZE * 3] = [ + 246, 246, 246, // gray + 255, 0, 0, // red + 0, 255, 255, // cyan + 255, 255, 0, // yellow + ]; + let mut dst: [u8; SRC_SIZE * 3] = [0; SRC_SIZE * 3]; + + // the reference values here should be adjusted if the accuracy + // of the transformation changes + let reference = [ + 246, 246, 246, // gray + 255, 0, 0, // red + 248, 14, 22, // red + 0, 0, 255, // blue + ]; + + unsafe { + qcms_transform_data( + &*transform, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + SRC_SIZE, + ); + } + + assert_eq!(reference, dst); + unsafe { qcms_transform_release(transform) } + unsafe { qcms_profile_release(profile) } + unsafe { qcms_profile_release(srgb_profile) } + } + + fn CmpRgbChannel(reference: &[u8], test: &[u8], index: usize) -> bool { + (reference[index] as i32 - test[index] as i32).abs() <= 1 + } + + fn CmpRgbBufferImpl( + refBuffer: &[u8], + testBuffer: &[u8], + pixels: usize, + kSwapRB: bool, + hasAlpha: bool, + ) -> bool { + let pixelSize = if hasAlpha { 4 } else { 3 }; + if refBuffer[..pixels * pixelSize] == testBuffer[..pixels * pixelSize] { + return true; + } + + let kRIndex = if kSwapRB { 2 } else { 0 }; + let kGIndex = 1; + let kBIndex = if kSwapRB { 0 } else { 2 }; + let kAIndex = 3; + + let mut remaining = pixels; + let mut reference = &refBuffer[..]; + let mut test = &testBuffer[..]; + while remaining > 0 { + if !CmpRgbChannel(reference, test, kRIndex) + || !CmpRgbChannel(reference, test, kGIndex) + || !CmpRgbChannel(reference, test, kBIndex) + || (hasAlpha && reference[kAIndex] != test[kAIndex]) + { + assert_eq!(test[kRIndex], reference[kRIndex]); + assert_eq!(test[kGIndex], reference[kGIndex]); + assert_eq!(test[kBIndex], reference[kBIndex]); + if hasAlpha { + assert_eq!(test[kAIndex], reference[kAIndex]); + } + return false; + } + remaining -= 1; + reference = &reference[pixelSize..]; + test = &test[pixelSize..]; + } + + true + } + + fn GetRgbInputBufferImpl(kSwapRB: bool, kHasAlpha: bool) -> (usize, Vec) { + let colorSamples = [0, 5, 16, 43, 101, 127, 182, 255]; + let colorSampleMax = colorSamples.len(); + let pixelSize = if kHasAlpha { 4 } else { 3 }; + let pixelCount = colorSampleMax * colorSampleMax * 256 * 3; + + let mut outBuffer = vec![0; pixelCount * pixelSize]; + + let kRIndex = if kSwapRB { 2 } else { 0 }; + let kGIndex = 1; + let kBIndex = if kSwapRB { 0 } else { 2 }; + let kAIndex = 3; + + // Sample every red pixel value with a subset of green and blue. + // we use a u16 for r to avoid https://github.com/rust-lang/rust/issues/78283 + let mut color: &mut [u8] = &mut outBuffer[..]; + for r in 0..=255u16 { + for &g in colorSamples.iter() { + for &b in colorSamples.iter() { + color[kRIndex] = r as u8; + color[kGIndex] = g; + color[kBIndex] = b; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + // Sample every green pixel value with a subset of red and blue. + let mut color = &mut outBuffer[..]; + for &r in colorSamples.iter() { + for g in 0..=255u16 { + for &b in colorSamples.iter() { + color[kRIndex] = r; + color[kGIndex] = g as u8; + color[kBIndex] = b; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + // Sample every blue pixel value with a subset of red and green. + let mut color = &mut outBuffer[..]; + for &r in colorSamples.iter() { + for &g in colorSamples.iter() { + for b in 0..=255u16 { + color[kRIndex] = r; + color[kGIndex] = g; + color[kBIndex] = b as u8; + if kHasAlpha { + color[kAIndex] = 0x80; + } + color = &mut color[pixelSize..]; + } + } + } + + (pixelCount, outBuffer) + } + + fn GetRgbInputBuffer() -> (usize, Vec) { + GetRgbInputBufferImpl(false, false) + } + + fn GetRgbaInputBuffer() -> (usize, Vec) { + GetRgbInputBufferImpl(false, true) + } + + fn GetBgraInputBuffer() -> (usize, Vec) { + GetRgbInputBufferImpl(true, true) + } + + fn CmpRgbBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, false) + } + + fn CmpRgbaBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, true) + } + + fn CmpBgraBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool { + CmpRgbBufferImpl(refBuffer, testBuffer, pixels, true, true) + } + + fn ClearRgbBuffer(buffer: &mut [u8], pixels: usize) { + for i in 0..pixels * 3 { + buffer[i] = 0; + } + } + + fn ClearRgbaBuffer(buffer: &mut [u8], pixels: usize) { + for i in 0..pixels * 4 { + buffer[i] = 0; + } + } + + fn GetRgbOutputBuffer(pixels: usize) -> Vec { + vec![0; pixels * 3] + } + + fn GetRgbaOutputBuffer(pixels: usize) -> Vec { + vec![0; pixels * 4] + } + + struct QcmsProfileTest { + in_profile: *mut Profile, + out_profile: *mut Profile, + transform: *mut qcms_transform, + + input: Vec, + output: Vec, + reference: Vec, + + pixels: usize, + storage_type: DataType, + precache: bool, + } + + impl QcmsProfileTest { + fn new() -> QcmsProfileTest { + QcmsProfileTest { + in_profile: null_mut(), + out_profile: null_mut(), + transform: null_mut(), + input: Vec::new(), + output: Vec::new(), + reference: Vec::new(), + + pixels: 0, + storage_type: RGB8, + precache: false, + } + } + + fn SetUp(&mut self) { + qcms_enable_iccv4(); + } + + unsafe fn TearDown(&mut self) { + if self.in_profile != null_mut() { + qcms_profile_release(self.in_profile) + } + + if self.out_profile != null_mut() { + qcms_profile_release(self.out_profile) + } + + if self.transform != null_mut() { + qcms_transform_release(self.transform) + } + } + + unsafe fn SetTransform(&mut self, transform: *mut qcms_transform) -> bool { + if self.transform != null_mut() { + qcms_transform_release(self.transform) + } + self.transform = transform; + self.transform != null_mut() + } + + unsafe fn SetTransformForType(&mut self, ty: DataType) -> bool { + self.SetTransform(qcms_transform_create( + &*self.in_profile, + ty, + &*self.out_profile, + ty, + Perceptual, + )) + } + + unsafe fn SetBuffers(&mut self, ty: DataType) -> bool { + match ty { + RGB8 => { + let (pixels, input) = GetRgbInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbOutputBuffer(self.pixels); + self.output = GetRgbOutputBuffer(self.pixels) + } + RGBA8 => { + let (pixels, input) = GetBgraInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbaOutputBuffer(self.pixels); + self.output = GetRgbaOutputBuffer(self.pixels); + } + BGRA8 => { + let (pixels, input) = GetRgbaInputBuffer(); + self.input = input; + self.pixels = pixels; + self.reference = GetRgbaOutputBuffer(self.pixels); + self.output = GetRgbaOutputBuffer(self.pixels); + } + _ => unreachable!("Unknown type!"), + } + self.storage_type = ty; + self.pixels > 0 + } + + unsafe fn ClearOutputBuffer(&mut self) { + match self.storage_type { + RGB8 => ClearRgbBuffer(&mut self.output, self.pixels), + RGBA8 | BGRA8 => ClearRgbaBuffer(&mut self.output, self.pixels), + _ => unreachable!("Unknown type!"), + } + } + + unsafe fn ProduceRef(&mut self, trans_fn: transform_fn_t) { + trans_fn.unwrap()( + &*self.transform, + self.input.as_mut_ptr(), + self.reference.as_mut_ptr(), + self.pixels, + ) + } + + fn CopyInputToRef(&mut self) { + let pixelSize = match self.storage_type { + RGB8 => 3, + RGBA8 | BGRA8 => 4, + _ => unreachable!("Unknown type!"), + }; + self.reference + .copy_from_slice(&self.input[..self.pixels * pixelSize]) + } + + unsafe fn ProduceOutput(&mut self, trans_fn: transform_fn_t) { + self.ClearOutputBuffer(); + trans_fn.unwrap()( + &*self.transform, + self.input.as_mut_ptr(), + self.output.as_mut_ptr(), + self.pixels, + ) + } + + unsafe fn VerifyOutput(&self, buf: &[u8]) -> bool { + match self.storage_type { + RGB8 => CmpRgbBuffer(buf, &self.output, self.pixels), + RGBA8 => CmpRgbaBuffer(buf, &self.output, self.pixels), + BGRA8 => CmpBgraBuffer(buf, &self.output, self.pixels), + _ => unreachable!("Unknown type!"), + } + } + + unsafe fn ProduceVerifyOutput(&mut self, trans_fn: transform_fn_t) -> bool { + self.ProduceOutput(trans_fn); + self.VerifyOutput(&self.reference) + } + + unsafe fn PrecacheOutput(&mut self) { + qcms_profile_precache_output_transform(&mut *self.out_profile); + self.precache = true; + } + unsafe fn TransformPrecache(&mut self) { + assert_eq!(self.precache, false); + assert!(self.SetBuffers(RGB8)); + assert!(self.SetTransformForType(RGB8)); + self.ProduceRef(Some(qcms_transform_data_rgb_out_lut)); + + self.PrecacheOutput(); + assert!(self.SetTransformForType(RGB8)); + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_precache))) + } + + unsafe fn TransformPrecachePlatformExt(&mut self) { + self.PrecacheOutput(); + + // Verify RGB transforms. + assert!(self.SetBuffers(RGB8)); + assert!(self.SetTransformForType(RGB8)); + self.ProduceRef(Some(qcms_transform_data_rgb_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_neon))) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_neon))) + } + } + + // Verify RGBA transform. + assert!(self.SetBuffers(RGBA8)); + assert!(self.SetTransformForType(RGBA8)); + self.ProduceRef(Some(qcms_transform_data_rgba_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon))) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon))) + } + } + + // Verify BGRA transform. + assert!(self.SetBuffers(BGRA8)); + assert!(self.SetTransformForType(BGRA8)); + self.ProduceRef(Some(qcms_transform_data_bgra_out_lut_precache)); + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse2") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_sse2))); + } + if is_x86_feature_detected!("avx") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_avx))) + } + } + + #[cfg(target_arch = "arm")] + { + if is_arm_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon))) + } + } + + #[cfg(target_arch = "aarch64")] + { + if is_aarch64_feature_detected!("neon") { + assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon))) + } + } + } + } + + #[test] + fn sRGB_to_sRGB_precache() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = qcms_profile_sRGB(); + pt.TransformPrecache(); + pt.TearDown(); + } + } + + #[test] + fn sRGB_to_sRGB_transform_identity() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = qcms_profile_sRGB(); + pt.PrecacheOutput(); + pt.SetBuffers(RGB8); + pt.SetTransformForType(RGB8); + qcms_transform_data( + &*pt.transform, + pt.input.as_mut_ptr() as *mut c_void, + pt.output.as_mut_ptr() as *mut c_void, + pt.pixels, + ); + assert!(pt.VerifyOutput(&pt.input)); + pt.TearDown(); + } + } + + fn profile_from_path(file: &str) -> *mut Profile { + use std::io::Read; + let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("profiles"); + path.push(file); + let mut file = std::fs::File::open(path).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + let profile = + unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) }; + assert_ne!(profile, std::ptr::null_mut()); + profile + } + + #[test] + fn sRGB_to_ThinkpadW540() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = profile_from_path("lcms_thinkpad_w540.icc"); + pt.TransformPrecachePlatformExt(); + pt.TearDown(); + } + } + + #[test] + fn sRGB_to_SamsungSyncmaster() { + unsafe { + let mut pt = QcmsProfileTest::new(); + pt.SetUp(); + pt.in_profile = qcms_profile_sRGB(); + pt.out_profile = profile_from_path("lcms_samsung_syncmaster.icc"); + pt.TransformPrecachePlatformExt(); + pt.TearDown(); + } + } + + #[test] + fn v4_output() { + qcms_enable_iccv4(); + let input = qcms_profile_sRGB(); + // B2A0-ident.icc was created from the profile in bug 1679621 + // manually edited using iccToXML/iccFromXML + let output = profile_from_path("B2A0-ident.icc"); + + let transform = unsafe { qcms_transform_create(&*input, RGB8, &*output, RGB8, Perceptual) }; + let src = [0u8, 60, 195]; + let mut dst = [0u8, 0, 0]; + unsafe { + qcms_transform_data( + &*transform, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + 1, + ); + } + assert_eq!(dst, [15, 16, 122]); + unsafe { + qcms_transform_release(transform); + qcms_profile_release(input); + qcms_profile_release(output); + } + } + + #[test] + fn gray_smoke_test() { + let input = crate::Profile::new_gray_with_gamma(2.2); + let output = crate::Profile::new_sRGB(); + let xfm = + transform_create(&input, GrayA8, &output, RGBA8, crate::Intent::default()).unwrap(); + let src = [20u8, 20u8]; + let mut dst = [0u8, 0, 0, 0]; + unsafe { + qcms_transform_data( + &xfm, + src.as_ptr() as *const libc::c_void, + dst.as_mut_ptr() as *mut libc::c_void, + src.len() / GrayA8.bytes_per_pixel(), + ); + } + } + + #[test] + fn data_create_rgb_with_gamma() { + let Rec709Primaries = qcms_CIE_xyYTRIPLE { + red: { + qcms_CIE_xyY { + x: 0.6400, + y: 0.3300, + Y: 1.0, + } + }, + green: { + qcms_CIE_xyY { + x: 0.3000, + y: 0.6000, + Y: 1.0, + } + }, + blue: { + qcms_CIE_xyY { + x: 0.1500, + y: 0.0600, + Y: 1.0, + } + }, + }; + let D65 = qcms_white_point_sRGB(); + let mut mem = std::ptr::null_mut(); + let mut size = 0; + unsafe { + qcms_data_create_rgb_with_gamma(D65, Rec709Primaries, 2.2, &mut mem, &mut size); + } + assert_ne!(size, 0); + unsafe { libc::free(mem) }; + } +} + +#[cfg(test)] +mod test { + use crate::{Profile, Transform}; + #[test] + fn identity() { + let p1 = Profile::new_sRGB(); + let p2 = Profile::new_sRGB(); + let xfm = + Transform::new(&p1, &p2, crate::DataType::RGB8, crate::Intent::default()).unwrap(); + let mut data = [4, 30, 80]; + xfm.apply(&mut data); + assert_eq!(data, [4, 30, 80]); + } + #[test] + fn D50() { + let p1 = Profile::new_sRGB(); + let p2 = Profile::new_XYZD50(); + let xfm = + Transform::new(&p1, &p2, crate::DataType::RGB8, crate::Intent::default()).unwrap(); + let mut data = [4, 30, 80]; + xfm.apply(&mut data); + assert_eq!(data, [4, 4, 15]); + } + + fn profile_from_path(file: &str) -> Box { + use std::io::Read; + let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("profiles"); + path.push(file); + let mut file = std::fs::File::open(path).unwrap(); + let mut data = Vec::new(); + file.read_to_end(&mut data).unwrap(); + Profile::new_from_slice(&data, false).unwrap() + } + + #[test] + fn parametric_threshold() { + let src = profile_from_path("parametric-thresh.icc"); + let dst = crate::Profile::new_sRGB(); + let xfm = + Transform::new(&src, &dst, crate::DataType::RGB8, crate::Intent::default()).unwrap(); + let mut data = [4, 30, 80]; + xfm.apply(&mut data); + assert_eq!(data, [188, 188, 189]); + } + + #[test] + fn cmyk() { + let input = profile_from_path("ps_cmyk_min.icc"); + let output = Profile::new_sRGB(); + let xfm = crate::Transform::new_to( + &input, + &output, + crate::DataType::CMYK, + crate::DataType::RGB8, + crate::Intent::default(), + ) + .unwrap(); + let src = [4, 30, 80, 10]; + let mut dst = [0, 0, 0]; + xfm.convert(&src, &mut dst); + assert_eq!(dst, [252, 237, 211]); + } + + #[test] + fn sRGB_parametric() { + let src = Profile::new_sRGB(); + let dst = Profile::new_sRGB_parametric(); + let xfm = + Transform::new(&src, &dst, crate::DataType::RGB8, crate::Intent::default()).unwrap(); + let mut data = [4, 30, 80]; + xfm.apply(&mut data); + assert_eq!(data, [4, 30, 80]); + } +} diff --git a/gfx/qcms/src/iccread.rs b/gfx/qcms/src/iccread.rs new file mode 100644 index 0000000000..d86e9742d4 --- /dev/null +++ b/gfx/qcms/src/iccread.rs @@ -0,0 +1,1718 @@ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use std::{ + convert::{TryInto, TryFrom}, + sync::atomic::AtomicBool, + sync::Arc, +}; + +use crate::{ + double_to_s15Fixed16Number, + transform::{set_rgb_colorants, PrecacheOuput}, +}; +use crate::{matrix::Matrix, s15Fixed16Number, s15Fixed16Number_to_float, Intent, Intent::*}; + +pub static SUPPORTS_ICCV4: AtomicBool = AtomicBool::new(cfg!(feature = "iccv4-enabled")); + +pub const RGB_SIGNATURE: u32 = 0x52474220; +pub const GRAY_SIGNATURE: u32 = 0x47524159; +pub const XYZ_SIGNATURE: u32 = 0x58595A20; +pub const LAB_SIGNATURE: u32 = 0x4C616220; +pub const CMYK_SIGNATURE: u32 = 0x434D594B; // 'CMYK' + +/// A color profile +#[derive(Default, Debug)] +pub struct Profile { + pub(crate) class_type: u32, + pub(crate) color_space: u32, + pub(crate) pcs: u32, + pub(crate) rendering_intent: Intent, + pub(crate) redColorant: XYZNumber, + pub(crate) blueColorant: XYZNumber, + pub(crate) greenColorant: XYZNumber, + // "TRC" is EOTF, e.g. gamma->linear transfer function. + // Because ICC profiles are phrased as decodings to the xyzd50-linear PCS. + pub(crate) redTRC: Option>, + pub(crate) blueTRC: Option>, + pub(crate) greenTRC: Option>, + pub(crate) grayTRC: Option>, + pub(crate) A2B0: Option>, + pub(crate) B2A0: Option>, + pub(crate) mAB: Option>, + pub(crate) mBA: Option>, + pub(crate) chromaticAdaption: Option, + pub(crate) output_table_r: Option>, + pub(crate) output_table_g: Option>, + pub(crate) output_table_b: Option>, + is_srgb: bool, +} + +#[derive(Debug, Default)] +#[allow(clippy::upper_case_acronyms)] +pub(crate) struct lutmABType { + pub num_in_channels: u8, + pub num_out_channels: u8, + // 16 is the upperbound, actual is 0..num_in_channels. + pub num_grid_points: [u8; 16], + pub e00: s15Fixed16Number, + pub e01: s15Fixed16Number, + pub e02: s15Fixed16Number, + pub e03: s15Fixed16Number, + pub e10: s15Fixed16Number, + pub e11: s15Fixed16Number, + pub e12: s15Fixed16Number, + pub e13: s15Fixed16Number, + pub e20: s15Fixed16Number, + pub e21: s15Fixed16Number, + pub e22: s15Fixed16Number, + pub e23: s15Fixed16Number, + // reversed elements (for mBA) + pub reversed: bool, + pub clut_table: Option>, + pub a_curves: [Option>; MAX_CHANNELS], + pub b_curves: [Option>; MAX_CHANNELS], + pub m_curves: [Option>; MAX_CHANNELS], +} +#[derive(Clone, Debug)] +pub(crate) enum curveType { + Curve(Vec), // len=0 => Linear, len=1 => Gamma(v[0]), _ => lut + /// The ICC parametricCurveType is specified in terms of s15Fixed16Number, + /// so it's possible to use this variant to specify greater precision than + /// any raw ICC profile could + Parametric(Vec), +} +type uInt16Number = u16; + +/* should lut8Type and lut16Type be different types? */ +#[derive(Debug)] +pub(crate) struct lutType { + // used by lut8Type/lut16Type (mft2) only + pub num_input_channels: u8, + pub num_output_channels: u8, + pub num_clut_grid_points: u8, + pub e00: s15Fixed16Number, + pub e01: s15Fixed16Number, + pub e02: s15Fixed16Number, + pub e10: s15Fixed16Number, + pub e11: s15Fixed16Number, + pub e12: s15Fixed16Number, + pub e20: s15Fixed16Number, + pub e21: s15Fixed16Number, + pub e22: s15Fixed16Number, + pub num_input_table_entries: u16, + pub num_output_table_entries: u16, + pub input_table: Vec, + pub clut_table: Vec, + pub output_table: Vec, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, Default)] +#[allow(clippy::upper_case_acronyms)] +pub struct XYZNumber { + pub X: s15Fixed16Number, + pub Y: s15Fixed16Number, + pub Z: s15Fixed16Number, +} + +/// A color in the CIE xyY color space +/* the names for the following two types are sort of ugly */ +#[repr(C)] +#[derive(Copy, Clone)] +#[allow(clippy::upper_case_acronyms)] +pub struct qcms_CIE_xyY { + pub x: f64, + pub y: f64, + pub Y: f64, +} + +/// A more convenient type for specifying primaries and white points where +/// luminosity is irrelevant +struct qcms_chromaticity { + x: f64, + y: f64, +} + +impl qcms_chromaticity { + const D65: Self = Self { + x: 0.3127, + y: 0.3290, + }; +} + +impl From for qcms_CIE_xyY { + fn from(qcms_chromaticity { x, y }: qcms_chromaticity) -> Self { + Self { x, y, Y: 1.0 } + } +} + +/// a set of CIE_xyY values that can use to describe the primaries of a color space +#[repr(C)] +#[derive(Copy, Clone)] +#[allow(clippy::upper_case_acronyms)] +pub struct qcms_CIE_xyYTRIPLE { + pub red: qcms_CIE_xyY, + pub green: qcms_CIE_xyY, + pub blue: qcms_CIE_xyY, +} + +struct Tag { + signature: u32, + offset: u32, + size: u32, +} + +/* It might be worth having a unified limit on content controlled + * allocation per profile. This would remove the need for many + * of the arbitrary limits that we used */ + +type TagIndex = [Tag]; + +/* a wrapper around the memory that we are going to parse + * into a qcms_profile */ +struct MemSource<'a> { + buf: &'a [u8], + valid: bool, + invalid_reason: Option<&'static str>, +} +pub type uInt8Number = u8; +#[inline] +fn uInt8Number_to_float(a: uInt8Number) -> f32 { + a as f32 / 255.0 +} + +#[inline] +fn uInt16Number_to_float(a: uInt16Number) -> f32 { + a as f32 / 65535.0 +} + +fn invalid_source(mut mem: &mut MemSource, reason: &'static str) { + mem.valid = false; + mem.invalid_reason = Some(reason); +} +fn read_u32(mem: &mut MemSource, offset: usize) -> u32 { + let val = mem.buf.get(offset..offset + 4); + if let Some(val) = val { + let val = val.try_into().unwrap(); + u32::from_be_bytes(val) + } else { + invalid_source(mem, "Invalid offset"); + 0 + } +} +fn read_u16(mem: &mut MemSource, offset: usize) -> u16 { + let val = mem.buf.get(offset..offset + 2); + if let Some(val) = val { + let val = val.try_into().unwrap(); + u16::from_be_bytes(val) + } else { + invalid_source(mem, "Invalid offset"); + 0 + } +} +fn read_u8(mem: &mut MemSource, offset: usize) -> u8 { + let val = mem.buf.get(offset); + if let Some(val) = val { + *val + } else { + invalid_source(mem, "Invalid offset"); + 0 + } +} +fn read_s15Fixed16Number(mem: &mut MemSource, offset: usize) -> s15Fixed16Number { + read_u32(mem, offset) as s15Fixed16Number +} +fn read_uInt8Number(mem: &mut MemSource, offset: usize) -> uInt8Number { + read_u8(mem, offset) +} +fn read_uInt16Number(mem: &mut MemSource, offset: usize) -> uInt16Number { + read_u16(mem, offset) +} +pub fn write_u32(mem: &mut [u8], offset: usize, value: u32) { + // we use get() and expect() instead of [..] so there's only one call to panic + // instead of two + mem.get_mut(offset..offset + std::mem::size_of_val(&value)) + .expect("OOB") + .copy_from_slice(&value.to_be_bytes()); +} +pub fn write_u16(mem: &mut [u8], offset: usize, value: u16) { + // we use get() and expect() instead of [..] so there's only one call to panic + // intead of two + mem.get_mut(offset..offset + std::mem::size_of_val(&value)) + .expect("OOB") + .copy_from_slice(&value.to_be_bytes()); +} + +/* An arbitrary 4MB limit on profile size */ +pub(crate) const MAX_PROFILE_SIZE: usize = 1024 * 1024 * 4; +const MAX_TAG_COUNT: u32 = 1024; + +fn check_CMM_type_signature(_src: &mut MemSource) { + //uint32_t CMM_type_signature = read_u32(src, 4); + //TODO: do the check? +} +fn check_profile_version(src: &mut MemSource) { + /* + uint8_t major_revision = read_u8(src, 8 + 0); + uint8_t minor_revision = read_u8(src, 8 + 1); + */ + let reserved1: u8 = read_u8(src, (8 + 2) as usize); + let reserved2: u8 = read_u8(src, (8 + 3) as usize); + /* Checking the version doesn't buy us anything + if (major_revision != 0x4) { + if (major_revision > 0x2) + invalid_source(src, "Unsupported major revision"); + if (minor_revision > 0x40) + invalid_source(src, "Unsupported minor revision"); + } + */ + if reserved1 != 0 || reserved2 != 0 { + invalid_source(src, "Invalid reserved bytes"); + }; +} + +const INPUT_DEVICE_PROFILE: u32 = 0x73636e72; // 'scnr' +pub const DISPLAY_DEVICE_PROFILE: u32 = 0x6d6e7472; // 'mntr' +const OUTPUT_DEVICE_PROFILE: u32 = 0x70727472; // 'prtr' +const DEVICE_LINK_PROFILE: u32 = 0x6c696e6b; // 'link' +const COLOR_SPACE_PROFILE: u32 = 0x73706163; // 'spac' +const ABSTRACT_PROFILE: u32 = 0x61627374; // 'abst' +const NAMED_COLOR_PROFILE: u32 = 0x6e6d636c; // 'nmcl' + +fn read_class_signature(mut profile: &mut Profile, mem: &mut MemSource) { + profile.class_type = read_u32(mem, 12); + match profile.class_type { + DISPLAY_DEVICE_PROFILE + | INPUT_DEVICE_PROFILE + | OUTPUT_DEVICE_PROFILE + | COLOR_SPACE_PROFILE => {} + _ => { + invalid_source(mem, "Invalid Profile/Device Class signature"); + } + }; +} +fn read_color_space(mut profile: &mut Profile, mem: &mut MemSource) { + profile.color_space = read_u32(mem, 16); + match profile.color_space { + RGB_SIGNATURE | GRAY_SIGNATURE => {} + #[cfg(feature = "cmyk")] + CMYK_SIGNATURE => {} + _ => { + invalid_source(mem, "Unsupported colorspace"); + } + }; +} +fn read_pcs(mut profile: &mut Profile, mem: &mut MemSource) { + profile.pcs = read_u32(mem, 20); + match profile.pcs { + XYZ_SIGNATURE | LAB_SIGNATURE => {} + _ => { + invalid_source(mem, "Unsupported pcs"); + } + }; +} +fn read_tag_table(_profile: &mut Profile, mem: &mut MemSource) -> Vec { + let count = read_u32(mem, 128); + if count > MAX_TAG_COUNT { + invalid_source(mem, "max number of tags exceeded"); + return Vec::new(); + } + let mut index = Vec::with_capacity(count as usize); + for i in 0..count { + let tag_start = (128 + 4 + 4 * i * 3) as usize; + let offset = read_u32(mem, tag_start + 4); + if offset as usize > mem.buf.len() { + invalid_source(mem, "tag points beyond the end of the buffer"); + } + index.push(Tag { + signature: read_u32(mem, tag_start), + offset, + size: read_u32(mem, tag_start + 8), + }); + } + + index +} + +/// Checks a profile for obvious inconsistencies and returns +/// true if the profile looks bogus and should probably be +/// ignored. +#[no_mangle] +pub extern "C" fn qcms_profile_is_bogus(profile: &mut Profile) -> bool { + let mut sum: [f32; 3] = [0.; 3]; + let mut target: [f32; 3] = [0.; 3]; + let mut tolerance: [f32; 3] = [0.; 3]; + let rX: f32; + let rY: f32; + let rZ: f32; + let gX: f32; + let gY: f32; + let gZ: f32; + let bX: f32; + let bY: f32; + let bZ: f32; + let negative: bool; + let mut i: u32; + // We currently only check the bogosity of RGB profiles + if profile.color_space != RGB_SIGNATURE { + return false; + } + if profile.A2B0.is_some() + || profile.B2A0.is_some() + || profile.mAB.is_some() + || profile.mBA.is_some() + { + return false; + } + rX = s15Fixed16Number_to_float(profile.redColorant.X); + rY = s15Fixed16Number_to_float(profile.redColorant.Y); + rZ = s15Fixed16Number_to_float(profile.redColorant.Z); + gX = s15Fixed16Number_to_float(profile.greenColorant.X); + gY = s15Fixed16Number_to_float(profile.greenColorant.Y); + gZ = s15Fixed16Number_to_float(profile.greenColorant.Z); + bX = s15Fixed16Number_to_float(profile.blueColorant.X); + bY = s15Fixed16Number_to_float(profile.blueColorant.Y); + bZ = s15Fixed16Number_to_float(profile.blueColorant.Z); + // Sum the values; they should add up to something close to white + sum[0] = rX + gX + bX; + sum[1] = rY + gY + bY; + sum[2] = rZ + gZ + bZ; + // Build our target vector (see mozilla bug 460629) + target[0] = 0.96420; + target[1] = 1.00000; + target[2] = 0.82491; + // Our tolerance vector - Recommended by Chris Murphy based on + // conversion from the LAB space criterion of no more than 3 in any one + // channel. This is similar to, but slightly more tolerant than Adobe's + // criterion. + tolerance[0] = 0.02; + tolerance[1] = 0.02; + tolerance[2] = 0.04; + // Compare with our tolerance + i = 0; + while i < 3 { + if !(sum[i as usize] - tolerance[i as usize] <= target[i as usize] + && sum[i as usize] + tolerance[i as usize] >= target[i as usize]) + { + return true; + } + i += 1 + } + if false { + negative = (rX < 0.) + || (rY < 0.) + || (rZ < 0.) + || (gX < 0.) + || (gY < 0.) + || (gZ < 0.) + || (bX < 0.) + || (bY < 0.) + || (bZ < 0.); + } else { + // Chromatic adaption to D50 can result in negative XYZ, but the white + // point D50 tolerance test has passed. Accept negative values herein. + // See https://bugzilla.mozilla.org/show_bug.cgi?id=498245#c18 onwards + // for discussion about whether profile XYZ can or cannot be negative, + // per the spec. Also the https://bugzil.la/450923 user report. + // Also: https://bugzil.la/1799391 and https://bugzil.la/1792469 + negative = false; // bogus + } + if negative { + return true; + } + // All Good + false +} + +pub const TAG_bXYZ: u32 = 0x6258595a; +pub const TAG_gXYZ: u32 = 0x6758595a; +pub const TAG_rXYZ: u32 = 0x7258595a; +pub const TAG_rTRC: u32 = 0x72545243; +pub const TAG_bTRC: u32 = 0x62545243; +pub const TAG_gTRC: u32 = 0x67545243; +pub const TAG_kTRC: u32 = 0x6b545243; +pub const TAG_A2B0: u32 = 0x41324230; +pub const TAG_B2A0: u32 = 0x42324130; +pub const TAG_CHAD: u32 = 0x63686164; + +fn find_tag(index: &TagIndex, tag_id: u32) -> Option<&Tag> { + for t in index { + if t.signature == tag_id { + return Some(t); + } + } + None +} + +pub const XYZ_TYPE: u32 = 0x58595a20; // 'XYZ ' +pub const CURVE_TYPE: u32 = 0x63757276; // 'curv' +pub const PARAMETRIC_CURVE_TYPE: u32 = 0x70617261; // 'para' +pub const LUT16_TYPE: u32 = 0x6d667432; // 'mft2' +pub const LUT8_TYPE: u32 = 0x6d667431; // 'mft1' +pub const LUT_MAB_TYPE: u32 = 0x6d414220; // 'mAB ' +pub const LUT_MBA_TYPE: u32 = 0x6d424120; // 'mBA ' +pub const CHROMATIC_TYPE: u32 = 0x73663332; // 'sf32' + +fn read_tag_s15Fixed16ArrayType(src: &mut MemSource, tag: &Tag) -> Matrix { + let mut matrix: Matrix = Matrix { m: [[0.; 3]; 3] }; + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + // Check mandatory type signature for s16Fixed16ArrayType + if type_0 != CHROMATIC_TYPE { + invalid_source(src, "unexpected type, expected \'sf32\'"); + } + for i in 0..=8 { + matrix.m[(i / 3) as usize][(i % 3) as usize] = s15Fixed16Number_to_float( + read_s15Fixed16Number(src, (offset + 8 + (i * 4) as u32) as usize), + ); + } + matrix +} +fn read_tag_XYZType(src: &mut MemSource, index: &TagIndex, tag_id: u32) -> XYZNumber { + let mut num = XYZNumber { X: 0, Y: 0, Z: 0 }; + let tag = find_tag(&index, tag_id); + if let Some(tag) = tag { + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + if type_0 != XYZ_TYPE { + invalid_source(src, "unexpected type, expected XYZ"); + } + num.X = read_s15Fixed16Number(src, (offset + 8) as usize); + num.Y = read_s15Fixed16Number(src, (offset + 12) as usize); + num.Z = read_s15Fixed16Number(src, (offset + 16) as usize) + } else { + invalid_source(src, "missing xyztag"); + } + num +} +// Read the tag at a given offset rather then the tag_index. +// This method is used when reading mAB tags where nested curveType are +// present that are not part of the tag_index. +fn read_curveType(src: &mut MemSource, offset: u32, len: &mut u32) -> Option> { + const COUNT_TO_LENGTH: [u32; 5] = [1, 3, 4, 5, 7]; //PARAMETRIC_CURVE_TYPE + let type_0: u32 = read_u32(src, offset as usize); + let count: u32; + if type_0 != CURVE_TYPE && type_0 != PARAMETRIC_CURVE_TYPE { + invalid_source(src, "unexpected type, expected CURV or PARA"); + return None; + } + if type_0 == CURVE_TYPE { + count = read_u32(src, (offset + 8) as usize); + //arbitrary + if count > 40000 { + invalid_source(src, "curve size too large"); + return None; + } + let mut table = Vec::with_capacity(count as usize); + for i in 0..count { + table.push(read_u16(src, (offset + 12 + i * 2) as usize)); + } + *len = 12 + count * 2; + Some(Box::new(curveType::Curve(table))) + } else { + count = read_u16(src, (offset + 8) as usize) as u32; + if count > 4 { + invalid_source(src, "parametric function type not supported."); + return None; + } + let mut params = Vec::with_capacity(count as usize); + for i in 0..COUNT_TO_LENGTH[count as usize] { + params.push(s15Fixed16Number_to_float(read_s15Fixed16Number( + src, + (offset + 12 + i * 4) as usize, + ))); + } + *len = 12 + COUNT_TO_LENGTH[count as usize] * 4; + if count == 1 || count == 2 { + /* we have a type 1 or type 2 function that has a division by 'a' */ + let a: f32 = params[1]; + if a == 0.0 { + invalid_source(src, "parametricCurve definition causes division by zero"); + } + } + Some(Box::new(curveType::Parametric(params))) + } +} +fn read_tag_curveType( + src: &mut MemSource, + index: &TagIndex, + tag_id: u32, +) -> Option> { + let tag = find_tag(index, tag_id); + if let Some(tag) = tag { + let mut len: u32 = 0; + return read_curveType(src, tag.offset, &mut len); + } else { + invalid_source(src, "missing curvetag"); + } + None +} + +const MAX_LUT_SIZE: u32 = 500000; // arbitrary +const MAX_CHANNELS: usize = 10; // arbitrary +fn read_nested_curveType( + src: &mut MemSource, + curveArray: &mut [Option>; MAX_CHANNELS], + num_channels: u8, + curve_offset: u32, +) { + let mut channel_offset: u32 = 0; + #[allow(clippy::needless_range_loop)] + for i in 0..usize::from(num_channels) { + let mut tag_len: u32 = 0; + curveArray[i] = read_curveType(src, curve_offset + channel_offset, &mut tag_len); + if curveArray[i].is_none() { + invalid_source(src, "invalid nested curveType curve"); + break; + } else { + channel_offset += tag_len; + // 4 byte aligned + if tag_len % 4 != 0 { + channel_offset += 4 - tag_len % 4 + } + } + } +} + +/* See section 10.10 for specs */ +fn read_tag_lutmABType(src: &mut MemSource, tag: &Tag) -> Option> { + let offset: u32 = tag.offset; + let mut clut_size: u32 = 1; + let type_0: u32 = read_u32(src, offset as usize); + if type_0 != LUT_MAB_TYPE && type_0 != LUT_MBA_TYPE { + return None; + } + let num_in_channels = read_u8(src, (offset + 8) as usize); + let num_out_channels = read_u8(src, (offset + 9) as usize); + if num_in_channels > 10 || num_out_channels > 10 { + return None; + } + // We require 3in/out channels since we only support RGB->XYZ (or RGB->LAB) + // XXX: If we remove this restriction make sure that the number of channels + // is less or equal to the maximum number of mAB curves in qcmsint.h + // also check for clut_size overflow. Also make sure it's != 0 + if num_in_channels != 3 || num_out_channels != 3 { + return None; + } + // some of this data is optional and is denoted by a zero offset + // we also use this to track their existance + let mut a_curve_offset = read_u32(src, (offset + 28) as usize); + let mut clut_offset = read_u32(src, (offset + 24) as usize); + let mut m_curve_offset = read_u32(src, (offset + 20) as usize); + let mut matrix_offset = read_u32(src, (offset + 16) as usize); + let mut b_curve_offset = read_u32(src, (offset + 12) as usize); + // Convert offsets relative to the tag to relative to the profile + // preserve zero for optional fields + if a_curve_offset != 0 { + a_curve_offset += offset + } + if clut_offset != 0 { + clut_offset += offset + } + if m_curve_offset != 0 { + m_curve_offset += offset + } + if matrix_offset != 0 { + matrix_offset += offset + } + if b_curve_offset != 0 { + b_curve_offset += offset + } + if clut_offset != 0 { + debug_assert!(num_in_channels == 3); + // clut_size can not overflow since lg(256^num_in_channels) = 24 bits. + for i in 0..u32::from(num_in_channels) { + clut_size *= read_u8(src, (clut_offset + i) as usize) as u32; + if clut_size == 0 { + invalid_source(src, "bad clut_size"); + } + } + } else { + clut_size = 0 + } + // 24bits * 3 won't overflow either + clut_size *= num_out_channels as u32; + if clut_size > MAX_LUT_SIZE { + return None; + } + + let mut lut = Box::new(lutmABType::default()); + + if clut_offset != 0 { + for i in 0..usize::from(num_in_channels) { + lut.num_grid_points[i] = read_u8(src, clut_offset as usize + i); + if lut.num_grid_points[i] == 0 { + invalid_source(src, "bad grid_points"); + } + } + } + // Reverse the processing of transformation elements for mBA type. + lut.reversed = type_0 == LUT_MBA_TYPE; + lut.num_in_channels = num_in_channels; + lut.num_out_channels = num_out_channels; + #[allow(clippy::identity_op, clippy::erasing_op)] + if matrix_offset != 0 { + // read the matrix if we have it + lut.e00 = read_s15Fixed16Number(src, (matrix_offset + (4 * 0) as u32) as usize); // the caller checks that this doesn't happen + lut.e01 = read_s15Fixed16Number(src, (matrix_offset + (4 * 1) as u32) as usize); + lut.e02 = read_s15Fixed16Number(src, (matrix_offset + (4 * 2) as u32) as usize); + lut.e10 = read_s15Fixed16Number(src, (matrix_offset + (4 * 3) as u32) as usize); + lut.e11 = read_s15Fixed16Number(src, (matrix_offset + (4 * 4) as u32) as usize); + lut.e12 = read_s15Fixed16Number(src, (matrix_offset + (4 * 5) as u32) as usize); + lut.e20 = read_s15Fixed16Number(src, (matrix_offset + (4 * 6) as u32) as usize); + lut.e21 = read_s15Fixed16Number(src, (matrix_offset + (4 * 7) as u32) as usize); + lut.e22 = read_s15Fixed16Number(src, (matrix_offset + (4 * 8) as u32) as usize); + lut.e03 = read_s15Fixed16Number(src, (matrix_offset + (4 * 9) as u32) as usize); + lut.e13 = read_s15Fixed16Number(src, (matrix_offset + (4 * 10) as u32) as usize); + lut.e23 = read_s15Fixed16Number(src, (matrix_offset + (4 * 11) as u32) as usize) + } + if a_curve_offset != 0 { + read_nested_curveType(src, &mut lut.a_curves, num_in_channels, a_curve_offset); + } + if m_curve_offset != 0 { + read_nested_curveType(src, &mut lut.m_curves, num_out_channels, m_curve_offset); + } + if b_curve_offset != 0 { + read_nested_curveType(src, &mut lut.b_curves, num_out_channels, b_curve_offset); + } else { + invalid_source(src, "B curves required"); + } + if clut_offset != 0 { + let clut_precision = read_u8(src, (clut_offset + 16) as usize); + let mut clut_table = Vec::with_capacity(clut_size as usize); + if clut_precision == 1 { + for i in 0..clut_size { + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + (clut_offset + 20 + i) as usize, + ))); + } + lut.clut_table = Some(clut_table); + } else if clut_precision == 2 { + for i in 0..clut_size { + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + (clut_offset + 20 + i * 2) as usize, + ))); + } + lut.clut_table = Some(clut_table); + } else { + invalid_source(src, "Invalid clut precision"); + } + } + if !src.valid { + return None; + } + Some(lut) +} +fn read_tag_lutType(src: &mut MemSource, tag: &Tag) -> Option> { + let offset: u32 = tag.offset; + let type_0: u32 = read_u32(src, offset as usize); + let num_input_table_entries: u16; + let num_output_table_entries: u16; + let input_offset: u32; + let entry_size: usize; + if type_0 == LUT8_TYPE { + num_input_table_entries = 256u16; + num_output_table_entries = 256u16; + entry_size = 1; + input_offset = 48 + } else if type_0 == LUT16_TYPE { + num_input_table_entries = read_u16(src, (offset + 48) as usize); + num_output_table_entries = read_u16(src, (offset + 50) as usize); + + // these limits come from the spec + if !(2..=4096).contains(&num_input_table_entries) + || !(2..=4096).contains(&num_output_table_entries) + { + invalid_source(src, "Bad channel count"); + return None; + } + entry_size = 2; + input_offset = 52 + } else { + debug_assert!(false); + invalid_source(src, "Unexpected lut type"); + return None; + } + let in_chan = read_u8(src, (offset + 8) as usize); + let out_chan = read_u8(src, (offset + 9) as usize); + if !(in_chan == 3 || in_chan == 4) || out_chan != 3 { + invalid_source(src, "CLUT only supports RGB and CMYK"); + return None; + } + + let grid_points = read_u8(src, (offset + 10) as usize); + let clut_size = match (grid_points as u32).checked_pow(in_chan as u32) { + Some(clut_size) => clut_size, + _ => { + invalid_source(src, "CLUT size overflow"); + return None; + } + }; + match clut_size { + 1..=MAX_LUT_SIZE => {} // OK + 0 => { + invalid_source(src, "CLUT must not be empty."); + return None; + } + _ => { + invalid_source(src, "CLUT too large"); + return None; + } + } + + let e00 = read_s15Fixed16Number(src, (offset + 12) as usize); + let e01 = read_s15Fixed16Number(src, (offset + 16) as usize); + let e02 = read_s15Fixed16Number(src, (offset + 20) as usize); + let e10 = read_s15Fixed16Number(src, (offset + 24) as usize); + let e11 = read_s15Fixed16Number(src, (offset + 28) as usize); + let e12 = read_s15Fixed16Number(src, (offset + 32) as usize); + let e20 = read_s15Fixed16Number(src, (offset + 36) as usize); + let e21 = read_s15Fixed16Number(src, (offset + 40) as usize); + let e22 = read_s15Fixed16Number(src, (offset + 44) as usize); + + let mut input_table = Vec::with_capacity((num_input_table_entries * in_chan as u16) as usize); + for i in 0..(num_input_table_entries * in_chan as u16) { + if type_0 == LUT8_TYPE { + input_table.push(uInt8Number_to_float(read_uInt8Number( + src, + (offset + input_offset) as usize + i as usize * entry_size, + ))) + } else { + input_table.push(uInt16Number_to_float(read_uInt16Number( + src, + (offset + input_offset) as usize + i as usize * entry_size, + ))) + } + } + let clut_offset = ((offset + input_offset) as usize + + (num_input_table_entries as i32 * in_chan as i32) as usize * entry_size) + as u32; + + let mut clut_table = Vec::with_capacity((clut_size * out_chan as u32) as usize); + for i in 0..clut_size * out_chan as u32 { + if type_0 == LUT8_TYPE { + clut_table.push(uInt8Number_to_float(read_uInt8Number( + src, + clut_offset as usize + i as usize * entry_size, + ))); + } else if type_0 == LUT16_TYPE { + clut_table.push(uInt16Number_to_float(read_uInt16Number( + src, + clut_offset as usize + i as usize * entry_size, + ))); + } + } + + let output_offset = + (clut_offset as usize + (clut_size * out_chan as u32) as usize * entry_size) as u32; + + let mut output_table = + Vec::with_capacity((num_output_table_entries * out_chan as u16) as usize); + for i in 0..num_output_table_entries as i32 * out_chan as i32 { + if type_0 == LUT8_TYPE { + output_table.push(uInt8Number_to_float(read_uInt8Number( + src, + output_offset as usize + i as usize * entry_size, + ))) + } else { + output_table.push(uInt16Number_to_float(read_uInt16Number( + src, + output_offset as usize + i as usize * entry_size, + ))) + } + } + Some(Box::new(lutType { + num_input_table_entries, + num_output_table_entries, + num_input_channels: in_chan, + num_output_channels: out_chan, + num_clut_grid_points: grid_points, + e00, + e01, + e02, + e10, + e11, + e12, + e20, + e21, + e22, + input_table, + clut_table, + output_table, + })) +} +fn read_rendering_intent(mut profile: &mut Profile, src: &mut MemSource) { + let intent = read_u32(src, 64); + profile.rendering_intent = match intent { + x if x == Perceptual as u32 => Perceptual, + x if x == RelativeColorimetric as u32 => RelativeColorimetric, + x if x == Saturation as u32 => Saturation, + x if x == AbsoluteColorimetric as u32 => AbsoluteColorimetric, + _ => { + invalid_source(src, "unknown rendering intent"); + Intent::default() + } + }; +} +fn profile_create() -> Box { + Box::new(Profile::default()) +} +/* build sRGB gamma table */ +/* based on cmsBuildParametricGamma() */ +#[allow(clippy::many_single_char_names)] +fn build_sRGB_gamma_table(num_entries: i32) -> Vec { + /* taken from lcms: Build_sRGBGamma() */ + let gamma: f64 = 2.4; + let a: f64 = 1.0 / 1.055; + let b: f64 = 0.055 / 1.055; + let c: f64 = 1.0 / 12.92; + let d: f64 = 0.04045; + + build_trc_table( + num_entries, + // IEC 61966-2.1 (sRGB) + // Y = (aX + b)^Gamma | X >= d + // Y = cX | X < d + |x| { + if x >= d { + let e: f64 = a * x + b; + if e > 0. { + e.powf(gamma) + } else { + 0. + } + } else { + c * x + } + }, + ) +} + +/// eotf: electro-optical transfer characteristic function, maps from [0, 1] +/// in non-linear (voltage) space to [0, 1] in linear (optical) space. Should +/// generally be a concave up function. +fn build_trc_table(num_entries: i32, eotf: impl Fn(f64) -> f64) -> Vec { + let mut table = Vec::with_capacity(num_entries as usize); + + for i in 0..num_entries { + let x: f64 = i as f64 / (num_entries - 1) as f64; + let y: f64 = eotf(x); + let mut output: f64; + // Saturate -- this could likely move to a separate function + output = y * 65535.0 + 0.5; + if output > 65535.0 { + output = 65535.0 + } + if output < 0.0 { + output = 0.0 + } + table.push(output.floor() as u16); + } + table +} +fn curve_from_table(table: &[u16]) -> Box { + Box::new(curveType::Curve(table.to_vec())) +} +pub fn float_to_u8Fixed8Number(a: f32) -> u16 { + if a > 255.0 + 255.0 / 256f32 { + 0xffffu16 + } else if a < 0.0 { + 0u16 + } else { + (a * 256.0 + 0.5).floor() as u16 + } +} + +fn curve_from_gamma(gamma: f32) -> Box { + Box::new(curveType::Curve(vec![float_to_u8Fixed8Number(gamma)])) +} + +fn identity_curve() -> Box { + Box::new(curveType::Curve(Vec::new())) +} + +/* from lcms: cmsWhitePointFromTemp */ +/* tempK must be >= 4000. and <= 25000. + * Invalid values of tempK will return + * (x,y,Y) = (-1.0, -1.0, -1.0) + * similar to argyll: icx_DTEMP2XYZ() */ +fn white_point_from_temp(temp_K: i32) -> qcms_CIE_xyY { + let mut white_point: qcms_CIE_xyY = qcms_CIE_xyY { + x: 0., + y: 0., + Y: 0., + }; + // No optimization provided. + let T = temp_K as f64; // Square + let T2 = T * T; // Cube + let T3 = T2 * T; + // For correlated color temperature (T) between 4000K and 7000K: + let x = if (4000.0..=7000.0).contains(&T) { + -4.6070 * (1E9 / T3) + 2.9678 * (1E6 / T2) + 0.09911 * (1E3 / T) + 0.244063 + } else if T > 7000.0 && T <= 25000.0 { + -2.0064 * (1E9 / T3) + 1.9018 * (1E6 / T2) + 0.24748 * (1E3 / T) + 0.237040 + } else { + // or for correlated color temperature (T) between 7000K and 25000K: + // Invalid tempK + white_point.x = -1.0; + white_point.y = -1.0; + white_point.Y = -1.0; + debug_assert!(false, "invalid temp"); + return white_point; + }; + // Obtain y(x) + let y = -3.000 * (x * x) + 2.870 * x - 0.275; + // wave factors (not used, but here for futures extensions) + // let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y); + // let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y); + // Fill white_point struct + white_point.x = x; + white_point.y = y; + white_point.Y = 1.0; + white_point +} +#[no_mangle] +pub extern "C" fn qcms_white_point_sRGB() -> qcms_CIE_xyY { + white_point_from_temp(6504) +} + +/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 2 +/// Values 0, 3, 13–21, 23–255 are all reserved so all map to the same variant +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum ColourPrimaries { + /// For future use by ITU-T | ISO/IEC + Reserved, + /// Rec. ITU-R BT.709-6
+ /// Rec. ITU-R BT.1361-0 conventional colour gamut system and extended colour gamut system (historical)
+ /// IEC 61966-2-1 sRGB or sYCC IEC 61966-2-4
+ /// Society of Motion Picture and Television Engineers (MPTE) RP 177 (1993) Annex B
+ Bt709 = 1, + /// Unspecified
+ /// Image characteristics are unknown or are determined by the application. + Unspecified = 2, + /// Rec. ITU-R BT.470-6 System M (historical)
+ /// United States National Television System Committee 1953 Recommendation for transmission standards for color television
+ /// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)
+ Bt470M = 4, + /// Rec. ITU-R BT.470-6 System B, G (historical) Rec. ITU-R BT.601-7 625
+ /// Rec. ITU-R BT.1358-0 625 (historical)
+ /// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+ Bt470Bg = 5, + /// Rec. ITU-R BT.601-7 525
+ /// Rec. ITU-R BT.1358-1 525 or 625 (historical) Rec. ITU-R BT.1700-0 NTSC
+ /// SMPTE 170M (2004)
+ /// (functionally the same as the value 7)
+ Bt601 = 6, + /// SMPTE 240M (1999) (historical) (functionally the same as the value 6)
+ Smpte240 = 7, + /// Generic film (colour filters using Illuminant C)
+ Generic_film = 8, + /// Rec. ITU-R BT.2020-2
+ /// Rec. ITU-R BT.2100-0
+ Bt2020 = 9, + /// SMPTE ST 428-1
+ /// (CIE 1931 XYZ as in ISO 11664-1)
+ Xyz = 10, + /// SMPTE RP 431-2 (2011)
+ Smpte431 = 11, + /// SMPTE EG 432-1 (2010)
+ Smpte432 = 12, + /// EBU Tech. 3213-E (1975)
+ Ebu3213 = 22, +} + +impl From for ColourPrimaries { + fn from(value: u8) -> Self { + match value { + 0 | 3 | 13..=21 | 23..=255 => Self::Reserved, + 1 => Self::Bt709, + 2 => Self::Unspecified, + 4 => Self::Bt470M, + 5 => Self::Bt470Bg, + 6 => Self::Bt601, + 7 => Self::Smpte240, + 8 => Self::Generic_film, + 9 => Self::Bt2020, + 10 => Self::Xyz, + 11 => Self::Smpte431, + 12 => Self::Smpte432, + 22 => Self::Ebu3213, + } + } +} + +#[test] +fn colour_primaries() { + for value in 0..=u8::MAX { + match ColourPrimaries::from(value) { + ColourPrimaries::Reserved => {} + variant => assert_eq!(value, variant as u8), + } + } +} + +impl From for qcms_CIE_xyYTRIPLE { + fn from(value: ColourPrimaries) -> Self { + let red; + let green; + let blue; + + match value { + ColourPrimaries::Reserved => panic!("CP={} is reserved", value as u8), + ColourPrimaries::Bt709 => { + green = qcms_chromaticity { x: 0.300, y: 0.600 }; + blue = qcms_chromaticity { x: 0.150, y: 0.060 }; + red = qcms_chromaticity { x: 0.640, y: 0.330 }; + } + ColourPrimaries::Unspecified => panic!("CP={} is unspecified", value as u8), + ColourPrimaries::Bt470M => { + green = qcms_chromaticity { x: 0.21, y: 0.71 }; + blue = qcms_chromaticity { x: 0.14, y: 0.08 }; + red = qcms_chromaticity { x: 0.67, y: 0.33 }; + } + ColourPrimaries::Bt470Bg => { + green = qcms_chromaticity { x: 0.29, y: 0.60 }; + blue = qcms_chromaticity { x: 0.15, y: 0.06 }; + red = qcms_chromaticity { x: 0.64, y: 0.33 }; + } + ColourPrimaries::Bt601 | ColourPrimaries::Smpte240 => { + green = qcms_chromaticity { x: 0.310, y: 0.595 }; + blue = qcms_chromaticity { x: 0.155, y: 0.070 }; + red = qcms_chromaticity { x: 0.630, y: 0.340 }; + } + ColourPrimaries::Generic_film => { + green = qcms_chromaticity { x: 0.243, y: 0.692 }; + blue = qcms_chromaticity { x: 0.145, y: 0.049 }; + red = qcms_chromaticity { x: 0.681, y: 0.319 }; + } + ColourPrimaries::Bt2020 => { + green = qcms_chromaticity { x: 0.170, y: 0.797 }; + blue = qcms_chromaticity { x: 0.131, y: 0.046 }; + red = qcms_chromaticity { x: 0.708, y: 0.292 }; + } + ColourPrimaries::Xyz => { + green = qcms_chromaticity { x: 0.0, y: 1.0 }; + blue = qcms_chromaticity { x: 0.0, y: 0.0 }; + red = qcms_chromaticity { x: 1.0, y: 0.0 }; + } + // These two share primaries, but have distinct white points + ColourPrimaries::Smpte431 | ColourPrimaries::Smpte432 => { + green = qcms_chromaticity { x: 0.265, y: 0.690 }; + blue = qcms_chromaticity { x: 0.150, y: 0.060 }; + red = qcms_chromaticity { x: 0.680, y: 0.320 }; + } + ColourPrimaries::Ebu3213 => { + green = qcms_chromaticity { x: 0.295, y: 0.605 }; + blue = qcms_chromaticity { x: 0.155, y: 0.077 }; + red = qcms_chromaticity { x: 0.630, y: 0.340 }; + } + } + + Self { + red: red.into(), + green: green.into(), + blue: blue.into(), + } + } +} + +impl ColourPrimaries { + fn white_point(self) -> qcms_CIE_xyY { + match self { + Self::Reserved => panic!("CP={} is reserved", self as u8), + Self::Bt709 + | Self::Bt470Bg + | Self::Bt601 + | Self::Smpte240 + | Self::Bt2020 + | Self::Smpte432 + | Self::Ebu3213 => qcms_chromaticity::D65, + Self::Unspecified => panic!("CP={} is unspecified", self as u8), + Self::Bt470M => qcms_chromaticity { x: 0.310, y: 0.316 }, + Self::Generic_film => qcms_chromaticity { x: 0.310, y: 0.316 }, + Self::Xyz => qcms_chromaticity { + x: 1. / 3., + y: 1. / 3., + }, + Self::Smpte431 => qcms_chromaticity { x: 0.314, y: 0.351 }, + } + .into() + } +} + +/// See [Rec. ITU-T H.273 (12/2016)](https://www.itu.int/rec/T-REC-H.273-201612-I/en) Table 3 +/// Values 0, 3, 19–255 are all reserved so all map to the same variant +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum TransferCharacteristics { + /// For future use by ITU-T | ISO/IEC + Reserved, + /// Rec. ITU-R BT.709-6
+ /// Rec. ITU-R BT.1361-0 conventional colour gamut system (historical)
+ /// (functionally the same as the values 6, 14 and 15)
+ Bt709 = 1, + /// Image characteristics are unknown or are determined by the application.
+ Unspecified = 2, + /// Rec. ITU-R BT.470-6 System M (historical)
+ /// United States National Television System Committee 1953 Recommendation for transmission standards for color television
+ /// United States Federal Communications Commission (2003) Title 47 Code of Federal Regulations 73.682 (a) (20)
+ /// Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM
+ Bt470M = 4, + /// Rec. ITU-R BT.470-6 System B, G (historical)
+ Bt470Bg = 5, + /// Rec. ITU-R BT.601-7 525 or 625
+ /// Rec. ITU-R BT.1358-1 525 or 625 (historical)
+ /// Rec. ITU-R BT.1700-0 NTSC SMPTE 170M (2004)
+ /// (functionally the same as the values 1, 14 and 15)
+ Bt601 = 6, + /// SMPTE 240M (1999) (historical)
+ Smpte240 = 7, + /// Linear transfer characteristics
+ Linear = 8, + /// Logarithmic transfer characteristic (100:1 range)
+ Log_100 = 9, + /// Logarithmic transfer characteristic (100 * Sqrt( 10 ) : 1 range)
+ Log_100_sqrt10 = 10, + /// IEC 61966-2-4
+ Iec61966 = 11, + /// Rec. ITU-R BT.1361-0 extended colour gamut system (historical)
+ Bt_1361 = 12, + /// IEC 61966-2-1 sRGB or sYCC
+ Srgb = 13, + /// Rec. ITU-R BT.2020-2 (10-bit system)
+ /// (functionally the same as the values 1, 6 and 15)
+ Bt2020_10bit = 14, + /// Rec. ITU-R BT.2020-2 (12-bit system)
+ /// (functionally the same as the values 1, 6 and 14)
+ Bt2020_12bit = 15, + /// SMPTE ST 2084 for 10-, 12-, 14- and 16-bitsystems
+ /// Rec. ITU-R BT.2100-0 perceptual quantization (PQ) system
+ Smpte2084 = 16, + /// SMPTE ST 428-1
+ Smpte428 = 17, + /// ARIB STD-B67
+ /// Rec. ITU-R BT.2100-0 hybrid log- gamma (HLG) system
+ Hlg = 18, +} + +#[test] +fn transfer_characteristics() { + for value in 0..=u8::MAX { + match TransferCharacteristics::from(value) { + TransferCharacteristics::Reserved => {} + variant => assert_eq!(value, variant as u8), + } + } +} + +impl From for TransferCharacteristics { + fn from(value: u8) -> Self { + match value { + 0 | 3 | 19..=255 => Self::Reserved, + 1 => Self::Bt709, + 2 => Self::Unspecified, + 4 => Self::Bt470M, + 5 => Self::Bt470Bg, + 6 => Self::Bt601, + 7 => Self::Smpte240, // unimplemented + 8 => Self::Linear, + 9 => Self::Log_100, + 10 => Self::Log_100_sqrt10, + 11 => Self::Iec61966, // unimplemented + 12 => Self::Bt_1361, // unimplemented + 13 => Self::Srgb, + 14 => Self::Bt2020_10bit, + 15 => Self::Bt2020_12bit, + 16 => Self::Smpte2084, + 17 => Self::Smpte428, // unimplemented + 18 => Self::Hlg, + } + } +} + +impl TryFrom for curveType { + type Error = (); + /// See [ICC.1:2010](https://www.color.org/specification/ICC1v43_2010-12.pdf) + /// See [Rec. ITU-R BT.2100-2](https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2100-2-201807-I!!PDF-E.pdf) + fn try_from(value: TransferCharacteristics) -> Result { + const NUM_TRC_TABLE_ENTRIES: i32 = 1024; + + Ok(match value { + TransferCharacteristics::Reserved => panic!("TC={} is reserved", value as u8), + TransferCharacteristics::Bt709 + | TransferCharacteristics::Bt601 + | TransferCharacteristics::Bt2020_10bit + | TransferCharacteristics::Bt2020_12bit => { + // The opto-electronic transfer characteristic function (OETF) + // as defined in ITU-T H.273 table 3, row 1: + // + // V = (α * Lc^0.45) − (α − 1) for 1 >= Lc >= β + // V = 4.500 * Lc for β > Lc >= 0 + // + // Inverting gives the electro-optical transfer characteristic + // function (EOTF) which can be represented as ICC + // parametricCurveType with 4 parameters (ICC.1:2010 Table 5). + // Converting between the two (Lc ↔︎ Y, V ↔︎ X): + // + // Y = (a * X + b)^g for (X >= d) + // Y = c * X for (X < d) + // + // g, a, b, c, d can then be defined in terms of α and β: + // + // g = 1 / 0.45 + // a = 1 / α + // b = 1 - α + // c = 1 / 4.500 + // d = 4.500 * β + // + // α and β are determined by solving the piecewise equations to + // ensure continuity of both value and slope at the value β. + // We use the values specified for 10-bit systems in + // https://www.itu.int/rec/R-REC-BT.2020-2-201510-I Table 4 + // since this results in the similar values as available ICC + // profiles after converting to s15Fixed16Number, providing us + // good test coverage. + + type Float = f32; + + const alpha: Float = 1.099; + const beta: Float = 0.018; + + const linear_coef: Float = 4.500; + const pow_exp: Float = 0.45; + + const g: Float = 1. / pow_exp; + const a: Float = 1. / alpha; + const b: Float = 1. - a; + const c: Float = 1. / linear_coef; + const d: Float = linear_coef * beta; + + curveType::Parametric(vec![g, a, b, c, d]) + } + TransferCharacteristics::Unspecified => panic!("TC={} is unspecified", value as u8), + TransferCharacteristics::Bt470M => *curve_from_gamma(2.2), + TransferCharacteristics::Bt470Bg => *curve_from_gamma(2.8), + TransferCharacteristics::Smpte240 => return Err(()), + TransferCharacteristics::Linear => *curve_from_gamma(1.), + TransferCharacteristics::Log_100 => { + // See log_100_transfer_characteristics() for derivation + // The opto-electronic transfer characteristic function (OETF) + // as defined in ITU-T H.273 table 3, row 9: + // + // V = 1.0 + Log10(Lc) ÷ 2 for 1 >= Lc >= 0.01 + // V = 0.0 for 0.01 > Lc >= 0 + // + // Inverting this to give the EOTF required for the profile gives + // + // Lc = 10^(2*V - 2) for 1 >= V >= 0 + let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, |v| 10f64.powf(2. * v - 2.)); + curveType::Curve(table) + } + TransferCharacteristics::Log_100_sqrt10 => { + // The opto-electronic transfer characteristic function (OETF) + // as defined in ITU-T H.273 table 3, row 10: + // + // V = 1.0 + Log10(Lc) ÷ 2.5 for 1 >= Lc >= Sqrt(10) ÷ 1000 + // V = 0.0 for Sqrt(10) ÷ 1000 > Lc >= 0 + // + // Inverting this to give the EOTF required for the profile gives + // + // Lc = 10^(2.5*V - 2.5) for 1 >= V >= 0 + let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, |v| 10f64.powf(2.5 * v - 2.5)); + curveType::Curve(table) + } + TransferCharacteristics::Iec61966 => return Err(()), + TransferCharacteristics::Bt_1361 => return Err(()), + TransferCharacteristics::Srgb => { + // Should we prefer this or curveType::Parametric? + curveType::Curve(build_sRGB_gamma_table(NUM_TRC_TABLE_ENTRIES)) + } + + TransferCharacteristics::Smpte2084 => { + // Despite using Lo rather than Lc, H.273 gives the OETF: + // + // V = ( ( c1 + c2 * (Lo)^n ) ÷ ( 1 + c3 * (Lo)^n ) )^m + const c1: f64 = 0.8359375; + const c2: f64 = 18.8515625; + const c3: f64 = 18.6875; + const m: f64 = 78.84375; + const n: f64 = 0.1593017578125; + + // Inverting this to give the EOTF required for the profile + // (and confirmed by Rec. ITU-R BT.2100-2, Table 4) gives + // + // Y = ( max[( X^(1/m) - c1 ), 0] ÷ ( c2 - c3 * X^(1/m) ) )^(1/n) + let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, |x| { + ((x.powf(1. / m) - c1).max(0.) / (c2 - c3 * x.powf(1. / m))).powf(1. / n) + }); + curveType::Curve(table) + } + TransferCharacteristics::Smpte428 => return Err(()), + TransferCharacteristics::Hlg => { + // The opto-electronic transfer characteristic function (OETF) + // as defined in ITU-T H.273 table 3, row 18: + // + // V = a * Ln(12 * Lc - b) + c for 1 >= Lc > 1 ÷ 12 + // V = Sqrt(3) * Lc^0.5 for 1 ÷ 12 >= Lc >= 0 + const a: f64 = 0.17883277; + const b: f64 = 0.28466892; + const c: f64 = 0.55991073; + + // Inverting this to give the EOTF required for the profile + // (and confirmed by Rec. ITU-R BT.2100-2, Table 4) gives + // + // Y = (X^2) / 3 for 0 <= X <= 0.5 + // Y = ((e^((X-c)/a))+b)/12 for 0.5 < X <= 1 + let table = build_trc_table(NUM_TRC_TABLE_ENTRIES, |x| { + if x <= 0.5 { + let y1 = x.powf(2.) / 3.; + assert!((0. ..=1. / 12.).contains(&y1)); + y1 + } else { + (std::f64::consts::E.powf((x - c) / a) + b) / 12. + } + }); + curveType::Curve(table) + } + }) + } +} + +#[cfg(test)] +fn check_transfer_characteristics(cicp: TransferCharacteristics, icc_path: &str) { + let mut cicp_out = [0u8; crate::transform::PRECACHE_OUTPUT_SIZE]; + let mut icc_out = [0u8; crate::transform::PRECACHE_OUTPUT_SIZE]; + let cicp_tc = curveType::try_from(cicp).unwrap(); + let icc = Profile::new_from_path(icc_path).unwrap(); + let icc_tc = icc.redTRC.as_ref().unwrap(); + + eprintln!("cicp_tc: {:?}", cicp_tc); + eprintln!("icc_tc: {:?}", icc_tc); + + crate::transform_util::compute_precache(icc_tc, &mut icc_out); + crate::transform_util::compute_precache(&cicp_tc, &mut cicp_out); + + let mut off_by_one = 0; + for i in 0..cicp_out.len() { + match (cicp_out[i] as i16) - (icc_out[i] as i16) { + 0 => {} + 1 | -1 => { + off_by_one += 1; + } + _ => assert_eq!(cicp_out[i], icc_out[i], "difference at index {}", i), + } + } + eprintln!("{} / {} off by one", off_by_one, cicp_out.len()); +} + +#[test] +fn srgb_transfer_characteristics() { + check_transfer_characteristics(TransferCharacteristics::Srgb, "sRGB_lcms.icc"); +} + +#[test] +fn bt709_transfer_characteristics() { + check_transfer_characteristics(TransferCharacteristics::Bt709, "ITU-709.icc"); +} + +#[test] +fn bt2020_10bit_transfer_characteristics() { + check_transfer_characteristics(TransferCharacteristics::Bt2020_10bit, "ITU-2020.icc"); +} + +#[test] +fn bt2020_12bit_transfer_characteristics() { + check_transfer_characteristics(TransferCharacteristics::Bt2020_12bit, "ITU-2020.icc"); +} + +impl Profile { + //XXX: it would be nice if we had a way of ensuring + // everything in a profile was initialized regardless of how it was created + //XXX: should this also be taking a black_point? + /* similar to CGColorSpaceCreateCalibratedRGB */ + pub fn new_rgb_with_table( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + table: &[u16], + ) -> Option> { + let mut profile = profile_create(); + //XXX: should store the whitepoint + if !set_rgb_colorants(&mut profile, white_point, primaries) { + return None; + } + profile.redTRC = Some(curve_from_table(table)); + profile.blueTRC = Some(curve_from_table(table)); + profile.greenTRC = Some(curve_from_table(table)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + Some(profile) + } + pub fn new_sRGB() -> Box { + let D65 = qcms_white_point_sRGB(); + let table = build_sRGB_gamma_table(1024); + + let mut srgb = Profile::new_rgb_with_table( + D65, + qcms_CIE_xyYTRIPLE::from(ColourPrimaries::Bt709), + &table, + ) + .unwrap(); + srgb.is_srgb = true; + srgb + } + + /// Returns true if this profile is sRGB + pub fn is_sRGB(&self) -> bool { + self.is_srgb + } + + pub(crate) fn new_sRGB_parametric() -> Box { + let primaries = qcms_CIE_xyYTRIPLE::from(ColourPrimaries::Bt709); + let white_point = qcms_white_point_sRGB(); + let mut profile = profile_create(); + set_rgb_colorants(&mut profile, white_point, primaries); + + let curve = Box::new(curveType::Parametric(vec![ + 2.4, + 1. / 1.055, + 0.055 / 1.055, + 1. / 12.92, + 0.04045, + ])); + profile.redTRC = Some(curve.clone()); + profile.blueTRC = Some(curve.clone()); + profile.greenTRC = Some(curve); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + profile.is_srgb = true; + profile + } + + /// Create a new profile with D50 adopted white and identity transform functions + pub fn new_XYZD50() -> Box { + let mut profile = profile_create(); + profile.redColorant.X = double_to_s15Fixed16Number(1.); + profile.redColorant.Y = double_to_s15Fixed16Number(0.); + profile.redColorant.Z = double_to_s15Fixed16Number(0.); + profile.greenColorant.X = double_to_s15Fixed16Number(0.); + profile.greenColorant.Y = double_to_s15Fixed16Number(1.); + profile.greenColorant.Z = double_to_s15Fixed16Number(0.); + profile.blueColorant.X = double_to_s15Fixed16Number(0.); + profile.blueColorant.Y = double_to_s15Fixed16Number(0.); + profile.blueColorant.Z = double_to_s15Fixed16Number(1.); + profile.redTRC = Some(identity_curve()); + profile.blueTRC = Some(identity_curve()); + profile.greenTRC = Some(identity_curve()); + + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + profile + } + + pub fn new_cicp(cp: ColourPrimaries, tc: TransferCharacteristics) -> Option> { + let mut profile = profile_create(); + //XXX: should store the whitepoint + if !set_rgb_colorants(&mut profile, cp.white_point(), qcms_CIE_xyYTRIPLE::from(cp)) { + return None; + } + let curve = curveType::try_from(tc).ok()?; + profile.redTRC = Some(Box::new(curve.clone())); + profile.blueTRC = Some(Box::new(curve.clone())); + profile.greenTRC = Some(Box::new(curve)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + + profile.is_srgb = (cp, tc) == (ColourPrimaries::Bt709, TransferCharacteristics::Srgb); + Some(profile) + } + + pub fn new_gray_with_gamma(gamma: f32) -> Box { + let mut profile = profile_create(); + + profile.grayTRC = Some(curve_from_gamma(gamma)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = GRAY_SIGNATURE; + profile.pcs = XYZ_TYPE; + profile + } + + pub fn new_rgb_with_gamma_set( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, + redGamma: f32, + greenGamma: f32, + blueGamma: f32, + ) -> Option> { + let mut profile = profile_create(); + + //XXX: should store the whitepoint + if !set_rgb_colorants(&mut profile, white_point, primaries) { + return None; + } + profile.redTRC = Some(curve_from_gamma(redGamma)); + profile.blueTRC = Some(curve_from_gamma(blueGamma)); + profile.greenTRC = Some(curve_from_gamma(greenGamma)); + profile.class_type = DISPLAY_DEVICE_PROFILE; + profile.rendering_intent = Perceptual; + profile.color_space = RGB_SIGNATURE; + profile.pcs = XYZ_TYPE; + Some(profile) + } + + pub fn new_from_path(file: &str) -> Option> { + Profile::new_from_slice(&std::fs::read(file).ok()?, false) + } + + pub fn new_from_slice(mem: &[u8], curves_only: bool) -> Option> { + let length: u32; + let mut source: MemSource = MemSource { + buf: mem, + valid: false, + invalid_reason: None, + }; + let index; + source.valid = true; + let mut src: &mut MemSource = &mut source; + if mem.len() < 4 { + return None; + } + length = read_u32(src, 0); + if length as usize <= mem.len() { + // shrink the area that we can read if appropriate + src.buf = &src.buf[0..length as usize]; + } else { + return None; + } + /* ensure that the profile size is sane so it's easier to reason about */ + if src.buf.len() <= 64 || src.buf.len() >= MAX_PROFILE_SIZE { + return None; + } + let mut profile = profile_create(); + + check_CMM_type_signature(src); + check_profile_version(src); + read_class_signature(&mut profile, src); + read_rendering_intent(&mut profile, src); + read_color_space(&mut profile, src); + read_pcs(&mut profile, src); + //TODO read rest of profile stuff + if !src.valid { + return None; + } + + index = read_tag_table(&mut profile, src); + if !src.valid || index.is_empty() { + return None; + } + + if let Some(chad) = find_tag(&index, TAG_CHAD) { + profile.chromaticAdaption = Some(read_tag_s15Fixed16ArrayType(src, chad)) + } else { + profile.chromaticAdaption = None; //Signal the data is not present + } + + if profile.class_type == DISPLAY_DEVICE_PROFILE + || profile.class_type == INPUT_DEVICE_PROFILE + || profile.class_type == OUTPUT_DEVICE_PROFILE + || profile.class_type == COLOR_SPACE_PROFILE + { + if profile.color_space == RGB_SIGNATURE { + if !curves_only { + if let Some(A2B0) = find_tag(&index, TAG_A2B0) { + let lut_type = read_u32(src, A2B0.offset as usize); + if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE { + profile.A2B0 = read_tag_lutType(src, A2B0) + } else if lut_type == LUT_MAB_TYPE { + profile.mAB = read_tag_lutmABType(src, A2B0) + } + } + if let Some(B2A0) = find_tag(&index, TAG_B2A0) { + let lut_type = read_u32(src, B2A0.offset as usize); + if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE { + profile.B2A0 = read_tag_lutType(src, B2A0) + } else if lut_type == LUT_MBA_TYPE { + profile.mBA = read_tag_lutmABType(src, B2A0) + } + } + } + if find_tag(&index, TAG_rXYZ).is_some() || curves_only { + profile.redColorant = read_tag_XYZType(src, &index, TAG_rXYZ); + profile.greenColorant = read_tag_XYZType(src, &index, TAG_gXYZ); + profile.blueColorant = read_tag_XYZType(src, &index, TAG_bXYZ) + } + if !src.valid { + return None; + } + + if find_tag(&index, TAG_rTRC).is_some() || curves_only { + profile.redTRC = read_tag_curveType(src, &index, TAG_rTRC); + profile.greenTRC = read_tag_curveType(src, &index, TAG_gTRC); + profile.blueTRC = read_tag_curveType(src, &index, TAG_bTRC); + if profile.redTRC.is_none() + || profile.blueTRC.is_none() + || profile.greenTRC.is_none() + { + return None; + } + } + } else if profile.color_space == GRAY_SIGNATURE { + profile.grayTRC = read_tag_curveType(src, &index, TAG_kTRC); + profile.grayTRC.as_ref()?; + } else if profile.color_space == CMYK_SIGNATURE { + if let Some(A2B0) = find_tag(&index, TAG_A2B0) { + let lut_type = read_u32(src, A2B0.offset as usize); + if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE { + profile.A2B0 = read_tag_lutType(src, A2B0) + } else if lut_type == LUT_MBA_TYPE { + profile.mAB = read_tag_lutmABType(src, A2B0) + } + } + } else { + debug_assert!(false, "read_color_space protects against entering here"); + return None; + } + } else { + return None; + } + + if !src.valid { + return None; + } + Some(profile) + } + /// Precomputes the information needed for this profile to be + /// used as the output profile when constructing a `Transform`. + pub fn precache_output_transform(&mut self) { + crate::transform::qcms_profile_precache_output_transform(self); + } +} diff --git a/gfx/qcms/src/lib.rs b/gfx/qcms/src/lib.rs new file mode 100644 index 0000000000..c311964ee3 --- /dev/null +++ b/gfx/qcms/src/lib.rs @@ -0,0 +1,72 @@ +/*! A pure Rust color management library. +*/ + +#![allow(dead_code)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +// These are needed for the neon SIMD code and can be removed once the MSRV supports the +// instrinsics we use +#![cfg_attr(feature = "neon", feature(stdsimd))] +#![cfg_attr( + feature = "neon", + feature(arm_target_feature, raw_ref_op) + +)] + +/// These values match the Rendering Intent values from the ICC spec +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub enum Intent { + AbsoluteColorimetric = 3, + Saturation = 2, + RelativeColorimetric = 1, + Perceptual = 0, +} + +use Intent::*; + +impl Default for Intent { + fn default() -> Self { + /* Chris Murphy (CM consultant) suggests this as a default in the event that we + * cannot reproduce relative + Black Point Compensation. BPC brings an + * unacceptable performance overhead, so we go with perceptual. */ + Perceptual + } +} + +pub(crate) type s15Fixed16Number = i32; + +/* produces the nearest float to 'a' with a maximum error + * of 1/1024 which happens for large values like 0x40000040 */ +#[inline] +fn s15Fixed16Number_to_float(a: s15Fixed16Number) -> f32 { + a as f32 / 65536.0 +} + +#[inline] +fn double_to_s15Fixed16Number(v: f64) -> s15Fixed16Number { + (v * 65536f64) as i32 +} + +#[cfg(feature = "c_bindings")] +extern crate libc; +#[cfg(feature = "c_bindings")] +pub mod c_bindings; +mod chain; +mod gtest; +mod iccread; +mod matrix; +mod transform; +pub use iccread::qcms_CIE_xyY as CIE_xyY; +pub use iccread::qcms_CIE_xyYTRIPLE as CIE_xyYTRIPLE; +pub use iccread::Profile; +pub use transform::DataType; +pub use transform::Transform; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod transform_avx; +#[cfg(all(any(target_arch = "aarch64", target_arch = "arm"), feature = "neon"))] +mod transform_neon; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod transform_sse2; +mod transform_util; diff --git a/gfx/qcms/src/matrix.rs b/gfx/qcms/src/matrix.rs new file mode 100644 index 0000000000..8cd450241e --- /dev/null +++ b/gfx/qcms/src/matrix.rs @@ -0,0 +1,134 @@ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#[derive(Copy, Clone, Debug, Default)] +pub struct Matrix { + pub m: [[f32; 3]; 3], // Three rows of three elems. +} + +#[derive(Copy, Clone)] +pub struct Vector { + pub v: [f32; 3], +} + +impl Matrix { + pub fn eval(&self, v: Vector) -> Vector { + let mut result: Vector = Vector { v: [0.; 3] }; + result.v[0] = self.m[0][0] * v.v[0] + self.m[0][1] * v.v[1] + self.m[0][2] * v.v[2]; + result.v[1] = self.m[1][0] * v.v[0] + self.m[1][1] * v.v[1] + self.m[1][2] * v.v[2]; + result.v[2] = self.m[2][0] * v.v[0] + self.m[2][1] * v.v[1] + self.m[2][2] * v.v[2]; + result + } + + pub fn row(&self, r: usize) -> [f32; 3] { + self.m[r] + } + + //probably reuse this computation in matrix_invert + pub fn det(&self) -> f32 { + let det: f32 = self.m[0][0] * self.m[1][1] * self.m[2][2] + + self.m[0][1] * self.m[1][2] * self.m[2][0] + + self.m[0][2] * self.m[1][0] * self.m[2][1] + - self.m[0][0] * self.m[1][2] * self.m[2][1] + - self.m[0][1] * self.m[1][0] * self.m[2][2] + - self.m[0][2] * self.m[1][1] * self.m[2][0]; + det + } + /* from pixman and cairo and Mathematics for Game Programmers */ + /* lcms uses gauss-jordan elimination with partial pivoting which is + * less efficient and not as numerically stable. See Mathematics for + * Game Programmers. */ + pub fn invert(&self) -> Option { + let mut dest_mat: Matrix = Matrix { m: [[0.; 3]; 3] }; + let mut i: i32; + + const a: [i32; 3] = [2, 2, 1]; + const b: [i32; 3] = [1, 0, 0]; + /* inv (A) = 1/det (A) * adj (A) */ + let mut det: f32 = self.det(); + if det == 0. { + return None; + } + det = 1. / det; + let mut j: i32 = 0; + while j < 3 { + i = 0; + while i < 3 { + let ai: i32 = a[i as usize]; + let aj: i32 = a[j as usize]; + let bi: i32 = b[i as usize]; + let bj: i32 = b[j as usize]; + let mut p: f64 = (self.m[ai as usize][aj as usize] + * self.m[bi as usize][bj as usize] + - self.m[ai as usize][bj as usize] * self.m[bi as usize][aj as usize]) + as f64; + if ((i + j) & 1) != 0 { + p = -p + } + dest_mat.m[j as usize][i as usize] = (det as f64 * p) as f32; + i += 1 + } + j += 1 + } + Some(dest_mat) + } + pub fn identity() -> Matrix { + let mut i: Matrix = Matrix { m: [[0.; 3]; 3] }; + i.m[0][0] = 1.; + i.m[0][1] = 0.; + i.m[0][2] = 0.; + i.m[1][0] = 0.; + i.m[1][1] = 1.; + i.m[1][2] = 0.; + i.m[2][0] = 0.; + i.m[2][1] = 0.; + i.m[2][2] = 1.; + i + } + pub fn invalid() -> Option { + None + } + /* from pixman */ + /* MAT3per... */ + pub fn multiply(a: Matrix, b: Matrix) -> Matrix { + let mut result: Matrix = Matrix { m: [[0.; 3]; 3] }; + let mut dx: i32; + + let mut o: i32; + let mut dy: i32 = 0; + while dy < 3 { + dx = 0; + while dx < 3 { + let mut v: f64 = 0f64; + o = 0; + while o < 3 { + v += (a.m[dy as usize][o as usize] * b.m[o as usize][dx as usize]) as f64; + o += 1 + } + result.m[dy as usize][dx as usize] = v as f32; + dx += 1 + } + dy += 1 + } + result + } +} diff --git a/gfx/qcms/src/transform.rs b/gfx/qcms/src/transform.rs new file mode 100644 index 0000000000..cfca37be4c --- /dev/null +++ b/gfx/qcms/src/transform.rs @@ -0,0 +1,1571 @@ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#![allow(clippy::missing_safety_doc)] +#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "neon"))] +use crate::transform_neon::{ + qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon, + qcms_transform_data_rgba_out_lut_neon, +}; +use crate::{ + chain::chain_transform, + double_to_s15Fixed16Number, + iccread::SUPPORTS_ICCV4, + matrix::*, + transform_util::{ + build_colorant_matrix, build_input_gamma_table, build_output_lut, compute_precache, + lut_interp_linear, + }, +}; +use crate::{ + iccread::{qcms_CIE_xyY, qcms_CIE_xyYTRIPLE, Profile, GRAY_SIGNATURE, RGB_SIGNATURE}, + transform_util::clamp_float, + Intent, +}; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use crate::{ + transform_avx::{ + qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx, + qcms_transform_data_rgba_out_lut_avx, + }, + transform_sse2::{ + qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2, + qcms_transform_data_rgba_out_lut_sse2, + }, +}; + +use std::sync::atomic::Ordering; +use std::sync::Arc; +#[cfg(all(target_arch = "arm", feature = "neon"))] +use std::arch::is_arm_feature_detected; +#[cfg(all(target_arch = "aarch64", feature = "neon"))] +use std::arch::is_aarch64_feature_detected; + +pub const PRECACHE_OUTPUT_SIZE: usize = 8192; +pub const PRECACHE_OUTPUT_MAX: usize = PRECACHE_OUTPUT_SIZE - 1; +pub const FLOATSCALE: f32 = PRECACHE_OUTPUT_SIZE as f32; +pub const CLAMPMAXVAL: f32 = ((PRECACHE_OUTPUT_SIZE - 1) as f32) / PRECACHE_OUTPUT_SIZE as f32; + +#[repr(C)] +#[derive(Debug)] +pub struct PrecacheOuput { + /* We previously used a count of 65536 here but that seems like more + * precision than we actually need. By reducing the size we can + * improve startup performance and reduce memory usage. ColorSync on + * 10.5 uses 4097 which is perhaps because they use a fixed point + * representation where 1. is represented by 0x1000. */ + pub data: [u8; PRECACHE_OUTPUT_SIZE], +} + +impl Default for PrecacheOuput { + fn default() -> PrecacheOuput { + PrecacheOuput { + data: [0; PRECACHE_OUTPUT_SIZE], + } + } +} + +/* used as a lookup table for the output transformation. + * we refcount them so we only need to have one around per output + * profile, instead of duplicating them per transform */ + +#[repr(C)] +#[repr(align(16))] +#[derive(Clone, Default)] +pub struct qcms_transform { + pub matrix: [[f32; 4]; 3], + pub input_gamma_table_r: Option>, + pub input_gamma_table_g: Option>, + pub input_gamma_table_b: Option>, + pub input_clut_table_length: u16, + pub clut: Option>, + pub grid_size: u16, + pub output_clut_table_length: u16, + pub input_gamma_table_gray: Option>, + pub out_gamma_r: f32, + pub out_gamma_g: f32, + pub out_gamma_b: f32, + pub out_gamma_gray: f32, + pub output_gamma_lut_r: Option>, + pub output_gamma_lut_g: Option>, + pub output_gamma_lut_b: Option>, + pub output_gamma_lut_gray: Option>, + pub output_gamma_lut_r_length: usize, + pub output_gamma_lut_g_length: usize, + pub output_gamma_lut_b_length: usize, + pub output_gamma_lut_gray_length: usize, + pub output_table_r: Option>, + pub output_table_g: Option>, + pub output_table_b: Option>, + pub transform_fn: transform_fn_t, +} + +pub type transform_fn_t = + Option ()>; +/// The format of pixel data +#[repr(u32)] +#[derive(PartialEq, Eq, Clone, Copy)] +#[allow(clippy::upper_case_acronyms)] +pub enum DataType { + RGB8 = 0, + RGBA8 = 1, + BGRA8 = 2, + Gray8 = 3, + GrayA8 = 4, + CMYK = 5, +} + +impl DataType { + pub fn bytes_per_pixel(&self) -> usize { + match self { + RGB8 => 3, + RGBA8 => 4, + BGRA8 => 4, + Gray8 => 1, + GrayA8 => 2, + CMYK => 4, + } + } +} + +use DataType::*; + +#[repr(C)] +#[derive(Copy, Clone)] +#[allow(clippy::upper_case_acronyms)] +pub struct CIE_XYZ { + pub X: f64, + pub Y: f64, + pub Z: f64, +} + +pub trait Format { + const kRIndex: usize; + const kGIndex: usize; + const kBIndex: usize; + const kAIndex: usize; +} + +#[allow(clippy::upper_case_acronyms)] +pub struct BGRA; +impl Format for BGRA { + const kBIndex: usize = 0; + const kGIndex: usize = 1; + const kRIndex: usize = 2; + const kAIndex: usize = 3; +} + +#[allow(clippy::upper_case_acronyms)] +pub struct RGBA; +impl Format for RGBA { + const kRIndex: usize = 0; + const kGIndex: usize = 1; + const kBIndex: usize = 2; + const kAIndex: usize = 3; +} + +#[allow(clippy::upper_case_acronyms)] +pub struct RGB; +impl Format for RGB { + const kRIndex: usize = 0; + const kGIndex: usize = 1; + const kBIndex: usize = 2; + const kAIndex: usize = 0xFF; +} + +pub trait GrayFormat { + const has_alpha: bool; +} + +pub struct Gray; +impl GrayFormat for Gray { + const has_alpha: bool = false; +} + +pub struct GrayAlpha; +impl GrayFormat for GrayAlpha { + const has_alpha: bool = true; +} + +#[inline] +fn clamp_u8(v: f32) -> u8 { + if v > 255. { + 255 + } else if v < 0. { + 0 + } else { + (v + 0.5).floor() as u8 + } +} + +// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ +// This is just an approximation, I am not handling all the non-linear +// aspects of the RGB to XYZ process, and assumming that the gamma correction +// has transitive property in the tranformation chain. +// +// the alghoritm: +// +// - First I build the absolute conversion matrix using +// primaries in XYZ. This matrix is next inverted +// - Then I eval the source white point across this matrix +// obtaining the coeficients of the transformation +// - Then, I apply these coeficients to the original matrix +fn build_RGB_to_XYZ_transfer_matrix( + white: qcms_CIE_xyY, + primrs: qcms_CIE_xyYTRIPLE, +) -> Option { + let mut primaries: Matrix = Matrix { m: [[0.; 3]; 3] }; + + let mut result: Matrix = Matrix { m: [[0.; 3]; 3] }; + let mut white_point: Vector = Vector { v: [0.; 3] }; + + let xn: f64 = white.x; + let yn: f64 = white.y; + if yn == 0.0f64 { + return None; + } + + let xr: f64 = primrs.red.x; + let yr: f64 = primrs.red.y; + let xg: f64 = primrs.green.x; + let yg: f64 = primrs.green.y; + let xb: f64 = primrs.blue.x; + let yb: f64 = primrs.blue.y; + primaries.m[0][0] = xr as f32; + primaries.m[0][1] = xg as f32; + primaries.m[0][2] = xb as f32; + primaries.m[1][0] = yr as f32; + primaries.m[1][1] = yg as f32; + primaries.m[1][2] = yb as f32; + primaries.m[2][0] = (1f64 - xr - yr) as f32; + primaries.m[2][1] = (1f64 - xg - yg) as f32; + primaries.m[2][2] = (1f64 - xb - yb) as f32; + white_point.v[0] = (xn / yn) as f32; + white_point.v[1] = 1.; + white_point.v[2] = ((1.0f64 - xn - yn) / yn) as f32; + let primaries_invert: Matrix = primaries.invert()?; + + let coefs: Vector = primaries_invert.eval(white_point); + result.m[0][0] = (coefs.v[0] as f64 * xr) as f32; + result.m[0][1] = (coefs.v[1] as f64 * xg) as f32; + result.m[0][2] = (coefs.v[2] as f64 * xb) as f32; + result.m[1][0] = (coefs.v[0] as f64 * yr) as f32; + result.m[1][1] = (coefs.v[1] as f64 * yg) as f32; + result.m[1][2] = (coefs.v[2] as f64 * yb) as f32; + result.m[2][0] = (coefs.v[0] as f64 * (1.0f64 - xr - yr)) as f32; + result.m[2][1] = (coefs.v[1] as f64 * (1.0f64 - xg - yg)) as f32; + result.m[2][2] = (coefs.v[2] as f64 * (1.0f64 - xb - yb)) as f32; + Some(result) +} +/* CIE Illuminant D50 */ +const D50_XYZ: CIE_XYZ = CIE_XYZ { + X: 0.9642f64, + Y: 1.0000f64, + Z: 0.8249f64, +}; +/* from lcms: xyY2XYZ() + * corresponds to argyll: icmYxy2XYZ() */ +fn xyY2XYZ(source: qcms_CIE_xyY) -> CIE_XYZ { + let mut dest: CIE_XYZ = CIE_XYZ { + X: 0., + Y: 0., + Z: 0., + }; + dest.X = source.x / source.y * source.Y; + dest.Y = source.Y; + dest.Z = (1f64 - source.x - source.y) / source.y * source.Y; + dest +} +/* from lcms: ComputeChromaticAdaption */ +// Compute chromatic adaption matrix using chad as cone matrix +fn compute_chromatic_adaption( + source_white_point: CIE_XYZ, + dest_white_point: CIE_XYZ, + chad: Matrix, +) -> Option { + let mut cone_source_XYZ: Vector = Vector { v: [0.; 3] }; + + let mut cone_dest_XYZ: Vector = Vector { v: [0.; 3] }; + + let mut cone: Matrix = Matrix { m: [[0.; 3]; 3] }; + + let chad_inv: Matrix = chad.invert()?; + cone_source_XYZ.v[0] = source_white_point.X as f32; + cone_source_XYZ.v[1] = source_white_point.Y as f32; + cone_source_XYZ.v[2] = source_white_point.Z as f32; + cone_dest_XYZ.v[0] = dest_white_point.X as f32; + cone_dest_XYZ.v[1] = dest_white_point.Y as f32; + cone_dest_XYZ.v[2] = dest_white_point.Z as f32; + + let cone_source_rgb: Vector = chad.eval(cone_source_XYZ); + let cone_dest_rgb: Vector = chad.eval(cone_dest_XYZ); + cone.m[0][0] = cone_dest_rgb.v[0] / cone_source_rgb.v[0]; + cone.m[0][1] = 0.; + cone.m[0][2] = 0.; + cone.m[1][0] = 0.; + cone.m[1][1] = cone_dest_rgb.v[1] / cone_source_rgb.v[1]; + cone.m[1][2] = 0.; + cone.m[2][0] = 0.; + cone.m[2][1] = 0.; + cone.m[2][2] = cone_dest_rgb.v[2] / cone_source_rgb.v[2]; + // Normalize + Some(Matrix::multiply(chad_inv, Matrix::multiply(cone, chad))) +} +/* from lcms: cmsAdaptionMatrix */ +// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll +// Bradford is assumed +fn adaption_matrix(source_illumination: CIE_XYZ, target_illumination: CIE_XYZ) -> Option { + let lam_rigg: Matrix = { + Matrix { + m: [ + [0.8951, 0.2664, -0.1614], + [-0.7502, 1.7135, 0.0367], + [0.0389, -0.0685, 1.0296], + ], + } + }; + compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg) +} +/* from lcms: cmsAdaptMatrixToD50 */ +fn adapt_matrix_to_D50(r: Option, source_white_pt: qcms_CIE_xyY) -> Option { + if source_white_pt.y == 0.0f64 { + return None; + } + + let Dn: CIE_XYZ = xyY2XYZ(source_white_pt); + let Bradford: Matrix = adaption_matrix(Dn, D50_XYZ)?; + Some(Matrix::multiply(Bradford, r?)) +} +pub(crate) fn set_rgb_colorants( + mut profile: &mut Profile, + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, +) -> bool { + let colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); + let colorants = match adapt_matrix_to_D50(colorants, white_point) { + Some(colorants) => colorants, + None => return false, + }; + + /* note: there's a transpose type of operation going on here */ + profile.redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0] as f64); + profile.redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0] as f64); + profile.redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0] as f64); + profile.greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1] as f64); + profile.greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1] as f64); + profile.greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1] as f64); + profile.blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2] as f64); + profile.blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2] as f64); + profile.blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2] as f64); + true +} +pub(crate) fn get_rgb_colorants( + white_point: qcms_CIE_xyY, + primaries: qcms_CIE_xyYTRIPLE, +) -> Option { + let colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries); + adapt_matrix_to_D50(colorants, white_point) +} +/* Alpha is not corrected. + A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If + RGB Is?" Tech Memo 17 (December 14, 1998). + See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf +*/ +unsafe extern "C" fn qcms_transform_data_gray_template_lut( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let input_gamma_table_gray = transform.input_gamma_table_gray.as_ref().unwrap(); + + let mut i: u32 = 0; + while (i as usize) < length { + let fresh0 = src; + src = src.offset(1); + let device: u8 = *fresh0; + let mut alpha: u8 = 0xffu8; + if I::has_alpha { + let fresh1 = src; + src = src.offset(1); + alpha = *fresh1 + } + let linear: f32 = input_gamma_table_gray[device as usize]; + + let out_device_r: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_r.as_ref().unwrap(), + ); + let out_device_g: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_g.as_ref().unwrap(), + ); + let out_device_b: f32 = lut_interp_linear( + linear as f64, + &(*transform).output_gamma_lut_b.as_ref().unwrap(), + ); + *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32); + *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32); + *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +unsafe fn qcms_transform_data_gray_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_gray_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_gray_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_graya_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_graya_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_lut::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_gray_template_precache( + transform: *const qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let output_table_r = ((*transform).output_table_r).as_deref().unwrap(); + let output_table_g = ((*transform).output_table_g).as_deref().unwrap(); + let output_table_b = ((*transform).output_table_b).as_deref().unwrap(); + + let input_gamma_table_gray = (*transform) + .input_gamma_table_gray + .as_ref() + .unwrap() + .as_ptr(); + + let mut i: u32 = 0; + while (i as usize) < length { + let fresh2 = src; + src = src.offset(1); + let device: u8 = *fresh2; + let mut alpha: u8 = 0xffu8; + if I::has_alpha { + let fresh3 = src; + src = src.offset(1); + alpha = *fresh3 + } + + let linear: f32 = *input_gamma_table_gray.offset(device as isize); + /* we could round here... */ + let gray: u16 = (linear * PRECACHE_OUTPUT_MAX as f32) as u16; + *dest.add(F::kRIndex) = (output_table_r).data[gray as usize]; + *dest.add(F::kGIndex) = (output_table_g).data[gray as usize]; + *dest.add(F::kBIndex) = (output_table_b).data[gray as usize]; + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +unsafe fn qcms_transform_data_gray_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_gray_rgba_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_gray_bgra_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_graya_rgba_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_graya_bgra_out_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_gray_template_precache::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_template_lut_precache( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + let output_table_r = ((*transform).output_table_r).as_deref().unwrap(); + let output_table_g = ((*transform).output_table_g).as_deref().unwrap(); + let output_table_b = ((*transform).output_table_b).as_deref().unwrap(); + let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + + let mat = &transform.matrix; + let mut i: u32 = 0; + while (i as usize) < length { + let device_r: u8 = *src.add(F::kRIndex); + let device_g: u8 = *src.add(F::kGIndex); + let device_b: u8 = *src.add(F::kBIndex); + let mut alpha: u8 = 0; + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + + let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize); + let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize); + let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize); + let mut out_linear_r = mat[0][0] * linear_r + mat[1][0] * linear_g + mat[2][0] * linear_b; + let mut out_linear_g = mat[0][1] * linear_r + mat[1][1] * linear_g + mat[2][1] * linear_b; + let mut out_linear_b = mat[0][2] * linear_r + mat[1][2] * linear_g + mat[2][2] * linear_b; + out_linear_r = clamp_float(out_linear_r); + out_linear_g = clamp_float(out_linear_g); + out_linear_b = clamp_float(out_linear_b); + /* we could round here... */ + + let r: u16 = (out_linear_r * PRECACHE_OUTPUT_MAX as f32) as u16; + let g: u16 = (out_linear_g * PRECACHE_OUTPUT_MAX as f32) as u16; + let b: u16 = (out_linear_b * PRECACHE_OUTPUT_MAX as f32) as u16; + *dest.add(F::kRIndex) = (output_table_r).data[r as usize]; + *dest.add(F::kGIndex) = (output_table_g).data[g as usize]; + *dest.add(F::kBIndex) = (output_table_b).data[b as usize]; + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgb_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::(transform, src, dest, length); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgba_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::(transform, src, dest, length); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_bgra_out_lut_precache( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_precache::(transform, src, dest, length); +} +// Not used +/* +static void qcms_transform_data_clut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) { + unsigned int i; + int xy_len = 1; + int x_len = transform->grid_size; + int len = x_len * x_len; + const float* r_table = transform->r_clut; + const float* g_table = transform->g_clut; + const float* b_table = transform->b_clut; + + for (i = 0; i < length; i++) { + unsigned char in_r = *src++; + unsigned char in_g = *src++; + unsigned char in_b = *src++; + float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f; + + int x = floorf(linear_r * (transform->grid_size-1)); + int y = floorf(linear_g * (transform->grid_size-1)); + int z = floorf(linear_b * (transform->grid_size-1)); + int x_n = ceilf(linear_r * (transform->grid_size-1)); + int y_n = ceilf(linear_g * (transform->grid_size-1)); + int z_n = ceilf(linear_b * (transform->grid_size-1)); + float x_d = linear_r * (transform->grid_size-1) - x; + float y_d = linear_g * (transform->grid_size-1) - y; + float z_d = linear_b * (transform->grid_size-1) - z; + + float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d); + float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d); + float r_y1 = lerp(r_x1, r_x2, y_d); + float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d); + float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d); + float r_y2 = lerp(r_x3, r_x4, y_d); + float clut_r = lerp(r_y1, r_y2, z_d); + + float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d); + float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d); + float g_y1 = lerp(g_x1, g_x2, y_d); + float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d); + float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d); + float g_y2 = lerp(g_x3, g_x4, y_d); + float clut_g = lerp(g_y1, g_y2, z_d); + + float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d); + float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d); + float b_y1 = lerp(b_x1, b_x2, y_d); + float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d); + float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d); + float b_y2 = lerp(b_x3, b_x4, y_d); + float clut_b = lerp(b_y1, b_y2, z_d); + + *dest++ = clamp_u8(clut_r*255.0f); + *dest++ = clamp_u8(clut_g*255.0f); + *dest++ = clamp_u8(clut_b*255.0f); + } +} +*/ +fn int_div_ceil(value: i32, div: i32) -> i32 { + (value + div - 1) / div +} +// Using lcms' tetra interpolation algorithm. +unsafe extern "C" fn qcms_transform_data_tetra_clut_template( + transform: *const qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + + let xy_len: i32 = 1; + let x_len: i32 = (*transform).grid_size as i32; + let len: i32 = x_len * x_len; + let table = (*transform).clut.as_ref().unwrap().as_ptr(); + let r_table: *const f32 = table; + let g_table: *const f32 = table.offset(1); + let b_table: *const f32 = table.offset(2); + + let mut i: u32 = 0; + while (i as usize) < length { + let c0_r: f32; + let c1_r: f32; + let c2_r: f32; + let c3_r: f32; + let c0_g: f32; + let c1_g: f32; + let c2_g: f32; + let c3_g: f32; + let c0_b: f32; + let c1_b: f32; + let c2_b: f32; + let c3_b: f32; + let in_r: u8 = *src.add(F::kRIndex); + let in_g: u8 = *src.add(F::kGIndex); + let in_b: u8 = *src.add(F::kBIndex); + let mut in_a: u8 = 0; + if F::kAIndex != 0xff { + in_a = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + let linear_r: f32 = in_r as i32 as f32 / 255.0; + let linear_g: f32 = in_g as i32 as f32 / 255.0; + let linear_b: f32 = in_b as i32 as f32 / 255.0; + let x: i32 = in_r as i32 * ((*transform).grid_size as i32 - 1) / 255; + let y: i32 = in_g as i32 * ((*transform).grid_size as i32 - 1) / 255; + let z: i32 = in_b as i32 * ((*transform).grid_size as i32 - 1) / 255; + let x_n: i32 = int_div_ceil(in_r as i32 * ((*transform).grid_size as i32 - 1), 255); + let y_n: i32 = int_div_ceil(in_g as i32 * ((*transform).grid_size as i32 - 1), 255); + let z_n: i32 = int_div_ceil(in_b as i32 * ((*transform).grid_size as i32 - 1), 255); + let rx: f32 = linear_r * ((*transform).grid_size as i32 - 1) as f32 - x as f32; + let ry: f32 = linear_g * ((*transform).grid_size as i32 - 1) as f32 - y as f32; + let rz: f32 = linear_b * ((*transform).grid_size as i32 - 1) as f32 - z as f32; + let CLU = |table: *const f32, x, y, z| { + *table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize) + }; + + c0_r = CLU(r_table, x, y, z); + c0_g = CLU(g_table, x, y, z); + c0_b = CLU(b_table, x, y, z); + if rx >= ry { + if ry >= rz { + //rx >= ry && ry >= rz + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else if rx >= rz { + //rx >= rz && rz >= ry + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); + } else { + //rz > rx && rx >= ry + c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + } else if rx >= rz { + //ry > rx && rx >= rz + c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else if ry >= rz { + //ry >= rz && rz > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); + } else { + //rz > ry && ry > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + let clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz; + let clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz; + let clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz; + *dest.add(F::kRIndex) = clamp_u8(clut_r * 255.0); + *dest.add(F::kGIndex) = clamp_u8(clut_g * 255.0); + *dest.add(F::kBIndex) = clamp_u8(clut_b * 255.0); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = in_a + } + dest = dest.offset(components as isize); + i += 1 + } +} + +unsafe fn tetra( + transform: &qcms_transform, + table: *const f32, + in_r: u8, + in_g: u8, + in_b: u8, +) -> (f32, f32, f32) { + let r_table: *const f32 = table; + let g_table: *const f32 = table.offset(1); + let b_table: *const f32 = table.offset(2); + let linear_r: f32 = in_r as i32 as f32 / 255.0; + let linear_g: f32 = in_g as i32 as f32 / 255.0; + let linear_b: f32 = in_b as i32 as f32 / 255.0; + let xy_len: i32 = 1; + let x_len: i32 = (*transform).grid_size as i32; + let len: i32 = x_len * x_len; + let x: i32 = in_r as i32 * ((*transform).grid_size as i32 - 1) / 255; + let y: i32 = in_g as i32 * ((*transform).grid_size as i32 - 1) / 255; + let z: i32 = in_b as i32 * ((*transform).grid_size as i32 - 1) / 255; + let x_n: i32 = int_div_ceil(in_r as i32 * ((*transform).grid_size as i32 - 1), 255); + let y_n: i32 = int_div_ceil(in_g as i32 * ((*transform).grid_size as i32 - 1), 255); + let z_n: i32 = int_div_ceil(in_b as i32 * ((*transform).grid_size as i32 - 1), 255); + let rx: f32 = linear_r * ((*transform).grid_size as i32 - 1) as f32 - x as f32; + let ry: f32 = linear_g * ((*transform).grid_size as i32 - 1) as f32 - y as f32; + let rz: f32 = linear_b * ((*transform).grid_size as i32 - 1) as f32 - z as f32; + let CLU = |table: *const f32, x, y, z| { + *table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize) + }; + let c0_r: f32; + let c1_r: f32; + let c2_r: f32; + let c3_r: f32; + let c0_g: f32; + let c1_g: f32; + let c2_g: f32; + let c3_g: f32; + let c0_b: f32; + let c1_b: f32; + let c2_b: f32; + let c3_b: f32; + c0_r = CLU(r_table, x, y, z); + c0_g = CLU(g_table, x, y, z); + c0_b = CLU(b_table, x, y, z); + if rx >= ry { + if ry >= rz { + //rx >= ry && ry >= rz + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z); + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z); + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z); + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else if rx >= rz { + //rx >= rz && rz >= ry + c1_r = CLU(r_table, x_n, y, z) - c0_r; + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z); + c1_g = CLU(g_table, x_n, y, z) - c0_g; + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z); + c1_b = CLU(b_table, x_n, y, z) - c0_b; + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z); + } else { + //rz > rx && rx >= ry + c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n); + c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n); + c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n); + c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + } else if rx >= rz { + //ry > rx && rx >= rz + c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z); + } else if ry >= rz { + //ry >= rz && rz > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z) - c0_r; + c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z); + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z) - c0_g; + c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z); + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z) - c0_b; + c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z); + } else { + //rz > ry && ry > rx + c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n); + c2_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y, z_n); + c3_r = CLU(r_table, x, y, z_n) - c0_r; + c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n); + c2_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y, z_n); + c3_g = CLU(g_table, x, y, z_n) - c0_g; + c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n); + c2_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y, z_n); + c3_b = CLU(b_table, x, y, z_n) - c0_b; + } + let clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz; + let clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz; + let clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz; + (clut_r, clut_g, clut_b) +} + +#[inline] +fn lerp(a: f32, b: f32, t: f32) -> f32 { + a * (1.0 - t) + b * t +} + +// lerp between two tetrahedral interpolations +// See lcms:Eval4InputsFloat +#[allow(clippy::many_single_char_names)] +unsafe fn qcms_transform_data_tetra_clut_cmyk( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let table = (*transform).clut.as_ref().unwrap().as_ptr(); + assert!( + (*transform).clut.as_ref().unwrap().len() + >= ((transform.grid_size as i32).pow(4) * 3) as usize + ); + for _ in 0..length { + let c: u8 = *src.add(0); + let m: u8 = *src.add(1); + let y: u8 = *src.add(2); + let k: u8 = *src.add(3); + src = src.offset(4); + let linear_k: f32 = k as i32 as f32 / 255.0; + let grid_size = (*transform).grid_size as i32; + let w: i32 = k as i32 * ((*transform).grid_size as i32 - 1) / 255; + let w_n: i32 = int_div_ceil(k as i32 * ((*transform).grid_size as i32 - 1), 255); + let t: f32 = linear_k * ((*transform).grid_size as i32 - 1) as f32 - w as f32; + + let table1 = table.offset((w * grid_size * grid_size * grid_size * 3) as isize); + let table2 = table.offset((w_n * grid_size * grid_size * grid_size * 3) as isize); + + let (r1, g1, b1) = tetra(transform, table1, c, m, y); + let (r2, g2, b2) = tetra(transform, table2, c, m, y); + let r = lerp(r1, r2, t); + let g = lerp(g1, g2, t); + let b = lerp(b1, b2, t); + *dest.add(0) = clamp_u8(r * 255.0); + *dest.add(1) = clamp_u8(g * 255.0); + *dest.add(2) = clamp_u8(b * 255.0); + dest = dest.offset(3); + } +} + +unsafe fn qcms_transform_data_tetra_clut_rgb( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_tetra_clut_rgba( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_tetra_clut_bgra( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_tetra_clut_template::(transform, src, dest, length); +} +unsafe fn qcms_transform_data_template_lut( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + length: usize, +) { + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + + let mat = &transform.matrix; + let mut i: u32 = 0; + let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + while (i as usize) < length { + let device_r: u8 = *src.add(F::kRIndex); + let device_g: u8 = *src.add(F::kGIndex); + let device_b: u8 = *src.add(F::kBIndex); + let mut alpha: u8 = 0; + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + + let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize); + let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize); + let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize); + let mut out_linear_r = mat[0][0] * linear_r + mat[1][0] * linear_g + mat[2][0] * linear_b; + let mut out_linear_g = mat[0][1] * linear_r + mat[1][1] * linear_g + mat[2][1] * linear_b; + let mut out_linear_b = mat[0][2] * linear_r + mat[1][2] * linear_g + mat[2][2] * linear_b; + out_linear_r = clamp_float(out_linear_r); + out_linear_g = clamp_float(out_linear_g); + out_linear_b = clamp_float(out_linear_b); + + let out_device_r: f32 = lut_interp_linear( + out_linear_r as f64, + &(*transform).output_gamma_lut_r.as_ref().unwrap(), + ); + let out_device_g: f32 = lut_interp_linear( + out_linear_g as f64, + (*transform).output_gamma_lut_g.as_ref().unwrap(), + ); + let out_device_b: f32 = lut_interp_linear( + out_linear_b as f64, + (*transform).output_gamma_lut_b.as_ref().unwrap(), + ); + *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32); + *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32); + *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + dest = dest.offset(components as isize); + i += 1 + } +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgb_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::(transform, src, dest, length); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgba_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::(transform, src, dest, length); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_bgra_out_lut( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut::(transform, src, dest, length); +} + +fn precache_create() -> Arc { + Arc::new(PrecacheOuput::default()) +} + +#[no_mangle] +pub unsafe extern "C" fn qcms_transform_release(t: *mut qcms_transform) { + drop(Box::from_raw(t)); +} + +const bradford_matrix: Matrix = Matrix { + m: [ + [0.8951, 0.2664, -0.1614], + [-0.7502, 1.7135, 0.0367], + [0.0389, -0.0685, 1.0296], + ], +}; + +const bradford_matrix_inv: Matrix = Matrix { + m: [ + [0.9869929, -0.1470543, 0.1599627], + [0.4323053, 0.5183603, 0.0492912], + [-0.0085287, 0.0400428, 0.9684867], + ], +}; + +// See ICCv4 E.3 +fn compute_whitepoint_adaption(X: f32, Y: f32, Z: f32) -> Matrix { + let p: f32 = (0.96422 * bradford_matrix.m[0][0] + + 1.000 * bradford_matrix.m[1][0] + + 0.82521 * bradford_matrix.m[2][0]) + / (X * bradford_matrix.m[0][0] + Y * bradford_matrix.m[1][0] + Z * bradford_matrix.m[2][0]); + let y: f32 = (0.96422 * bradford_matrix.m[0][1] + + 1.000 * bradford_matrix.m[1][1] + + 0.82521 * bradford_matrix.m[2][1]) + / (X * bradford_matrix.m[0][1] + Y * bradford_matrix.m[1][1] + Z * bradford_matrix.m[2][1]); + let b: f32 = (0.96422 * bradford_matrix.m[0][2] + + 1.000 * bradford_matrix.m[1][2] + + 0.82521 * bradford_matrix.m[2][2]) + / (X * bradford_matrix.m[0][2] + Y * bradford_matrix.m[1][2] + Z * bradford_matrix.m[2][2]); + let white_adaption = Matrix { + m: [[p, 0., 0.], [0., y, 0.], [0., 0., b]], + }; + Matrix::multiply( + bradford_matrix_inv, + Matrix::multiply(white_adaption, bradford_matrix), + ) +} +#[no_mangle] +pub extern "C" fn qcms_profile_precache_output_transform(mut profile: &mut Profile) { + /* we only support precaching on rgb profiles */ + if profile.color_space != RGB_SIGNATURE { + return; + } + if SUPPORTS_ICCV4.load(Ordering::Relaxed) { + /* don't precache since we will use the B2A LUT */ + if profile.B2A0.is_some() { + return; + } + /* don't precache since we will use the mBA LUT */ + if profile.mBA.is_some() { + return; + } + } + /* don't precache if we do not have the TRC curves */ + if profile.redTRC.is_none() || profile.greenTRC.is_none() || profile.blueTRC.is_none() { + return; + } + if profile.output_table_r.is_none() { + let mut output_table_r = precache_create(); + if compute_precache( + profile.redTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_r).unwrap().data, + ) { + profile.output_table_r = Some(output_table_r); + } + } + if profile.output_table_g.is_none() { + let mut output_table_g = precache_create(); + if compute_precache( + profile.greenTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_g).unwrap().data, + ) { + profile.output_table_g = Some(output_table_g); + } + } + if profile.output_table_b.is_none() { + let mut output_table_b = precache_create(); + if compute_precache( + profile.blueTRC.as_deref().unwrap(), + &mut Arc::get_mut(&mut output_table_b).unwrap().data, + ) { + profile.output_table_b = Some(output_table_b); + } + }; +} +/* Replace the current transformation with a LUT transformation using a given number of sample points */ +fn transform_precacheLUT_float( + mut transform: Box, + input: &Profile, + output: &Profile, + samples: i32, + in_type: DataType, +) -> Option> { + /* The range between which 2 consecutive sample points can be used to interpolate */ + let lutSize: u32 = (3 * samples * samples * samples) as u32; + + let mut src = Vec::with_capacity(lutSize as usize); + let dest = vec![0.; lutSize as usize]; + /* Prepare a list of points we want to sample */ + for x in 0..samples { + for y in 0..samples { + for z in 0..samples { + src.push(x as f32 / (samples - 1) as f32); + src.push(y as f32 / (samples - 1) as f32); + src.push(z as f32 / (samples - 1) as f32); + } + } + } + let lut = chain_transform(input, output, src, dest, lutSize as usize); + if let Some(lut) = lut { + (*transform).clut = Some(lut); + (*transform).grid_size = samples as u16; + if in_type == RGBA8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgba) + } else if in_type == BGRA8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_bgra) + } else if in_type == RGB8 { + (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgb) + } + debug_assert!((*transform).transform_fn.is_some()); + } else { + return None; + } + + Some(transform) +} + +fn transform_precacheLUT_cmyk_float( + mut transform: Box, + input: &Profile, + output: &Profile, + samples: i32, + in_type: DataType, +) -> Option> { + /* The range between which 2 consecutive sample points can be used to interpolate */ + let lutSize: u32 = (4 * samples * samples * samples * samples) as u32; + + let mut src = Vec::with_capacity(lutSize as usize); + let dest = vec![0.; lutSize as usize]; + /* Prepare a list of points we want to sample */ + for k in 0..samples { + for c in 0..samples { + for m in 0..samples { + for y in 0..samples { + src.push(c as f32 / (samples - 1) as f32); + src.push(m as f32 / (samples - 1) as f32); + src.push(y as f32 / (samples - 1) as f32); + src.push(k as f32 / (samples - 1) as f32); + } + } + } + } + let lut = chain_transform(input, output, src, dest, lutSize as usize); + if let Some(lut) = lut { + transform.clut = Some(lut); + transform.grid_size = samples as u16; + assert!(in_type == DataType::CMYK); + transform.transform_fn = Some(qcms_transform_data_tetra_clut_cmyk) + } else { + return None; + } + + Some(transform) +} + +pub fn transform_create( + input: &Profile, + in_type: DataType, + output: &Profile, + out_type: DataType, + _intent: Intent, +) -> Option> { + // Ensure the requested input and output types make sense. + let matching_format = match (in_type, out_type) { + (RGB8, RGB8) => true, + (RGBA8, RGBA8) => true, + (BGRA8, BGRA8) => true, + (Gray8, out_type) => matches!(out_type, RGB8 | RGBA8 | BGRA8), + (GrayA8, out_type) => matches!(out_type, RGBA8 | BGRA8), + (CMYK, RGB8) => true, + _ => false, + }; + if !matching_format { + debug_assert!(false, "input/output type"); + return None; + } + let mut transform: Box = Box::new(Default::default()); + let mut precache: bool = false; + if output.output_table_r.is_some() + && output.output_table_g.is_some() + && output.output_table_b.is_some() + { + precache = true + } + // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance) + if SUPPORTS_ICCV4.load(Ordering::Relaxed) + && (in_type == RGB8 || in_type == RGBA8 || in_type == BGRA8 || in_type == CMYK) + && (input.A2B0.is_some() + || output.B2A0.is_some() + || input.mAB.is_some() + || output.mAB.is_some()) + { + if in_type == CMYK { + return transform_precacheLUT_cmyk_float(transform, input, output, 17, in_type); + } + // Precache the transformation to a CLUT 33x33x33 in size. + // 33 is used by many profiles and works well in pratice. + // This evenly divides 256 into blocks of 8x8x8. + // TODO For transforming small data sets of about 200x200 or less + // precaching should be avoided. + let result = transform_precacheLUT_float(transform, input, output, 33, in_type); + debug_assert!(result.is_some(), "precacheLUT failed"); + return result; + } + if precache { + transform.output_table_r = Some(Arc::clone(output.output_table_r.as_ref().unwrap())); + transform.output_table_g = Some(Arc::clone(output.output_table_g.as_ref().unwrap())); + transform.output_table_b = Some(Arc::clone(output.output_table_b.as_ref().unwrap())); + } else { + if output.redTRC.is_none() || output.greenTRC.is_none() || output.blueTRC.is_none() { + return None; + } + transform.output_gamma_lut_r = build_output_lut(output.redTRC.as_deref().unwrap()); + transform.output_gamma_lut_g = build_output_lut(output.greenTRC.as_deref().unwrap()); + transform.output_gamma_lut_b = build_output_lut(output.blueTRC.as_deref().unwrap()); + + if transform.output_gamma_lut_r.is_none() + || transform.output_gamma_lut_g.is_none() + || transform.output_gamma_lut_b.is_none() + { + return None; + } + } + if input.color_space == RGB_SIGNATURE { + if precache { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + if is_x86_feature_detected!("avx") { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_avx) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_avx) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_avx) + } + } else if cfg!(not(miri)) && is_x86_feature_detected!("sse2") { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_sse2) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_sse2) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_sse2) + } + } + + #[cfg(all(target_arch = "arm", feature = "neon"))] + let neon_supported = is_arm_feature_detected!("neon"); + #[cfg(all(target_arch = "aarch64", feature = "neon"))] + let neon_supported = is_aarch64_feature_detected!("neon"); + + #[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), feature = "neon"))] + if neon_supported { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_neon) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_neon) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_neon) + } + } + + if transform.transform_fn.is_none() { + if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_precache) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_precache) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_precache) + } + } + } else if in_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_rgb_out_lut) + } else if in_type == RGBA8 { + transform.transform_fn = Some(qcms_transform_data_rgba_out_lut) + } else if in_type == BGRA8 { + transform.transform_fn = Some(qcms_transform_data_bgra_out_lut) + } + //XXX: avoid duplicating tables if we can + transform.input_gamma_table_r = build_input_gamma_table(input.redTRC.as_deref()); + transform.input_gamma_table_g = build_input_gamma_table(input.greenTRC.as_deref()); + transform.input_gamma_table_b = build_input_gamma_table(input.blueTRC.as_deref()); + if transform.input_gamma_table_r.is_none() + || transform.input_gamma_table_g.is_none() + || transform.input_gamma_table_b.is_none() + { + return None; + } + /* build combined colorant matrix */ + + let in_matrix: Matrix = build_colorant_matrix(input); + let mut out_matrix: Matrix = build_colorant_matrix(output); + out_matrix = out_matrix.invert()?; + + let result_0: Matrix = Matrix::multiply(out_matrix, in_matrix); + /* check for NaN values in the matrix and bail if we find any */ + let mut i: u32 = 0; + while i < 3 { + let mut j: u32 = 0; + while j < 3 { + #[allow(clippy::eq_op, clippy::float_cmp)] + if result_0.m[i as usize][j as usize].is_nan() { + return None; + } + j += 1 + } + i += 1 + } + /* store the results in column major mode + * this makes doing the multiplication with sse easier */ + transform.matrix[0][0] = result_0.m[0][0]; + transform.matrix[1][0] = result_0.m[0][1]; + transform.matrix[2][0] = result_0.m[0][2]; + transform.matrix[0][1] = result_0.m[1][0]; + transform.matrix[1][1] = result_0.m[1][1]; + transform.matrix[2][1] = result_0.m[1][2]; + transform.matrix[0][2] = result_0.m[2][0]; + transform.matrix[1][2] = result_0.m[2][1]; + transform.matrix[2][2] = result_0.m[2][2] + } else if input.color_space == GRAY_SIGNATURE { + transform.input_gamma_table_gray = build_input_gamma_table(input.grayTRC.as_deref()); + transform.input_gamma_table_gray.as_ref()?; + if precache { + if out_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_gray_out_precache) + } else if out_type == RGBA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_precache) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_precache) + } + } else if out_type == BGRA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_precache) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_precache) + } + } + } else if out_type == RGB8 { + transform.transform_fn = Some(qcms_transform_data_gray_out_lut) + } else if out_type == RGBA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_lut) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_lut) + } + } else if out_type == BGRA8 { + if in_type == Gray8 { + transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_lut) + } else { + transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_lut) + } + } + } else { + debug_assert!(false, "unexpected colorspace"); + return None; + } + debug_assert!(transform.transform_fn.is_some()); + Some(transform) +} +/// A transform from an input profile to an output one. +pub struct Transform { + src_ty: DataType, + dst_ty: DataType, + xfm: Box, +} + +impl Transform { + /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent` + pub fn new(input: &Profile, output: &Profile, ty: DataType, intent: Intent) -> Option { + transform_create(input, ty, output, ty, intent).map(|xfm| Transform { + src_ty: ty, + dst_ty: ty, + xfm, + }) + } + + /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent` + pub fn new_to( + input: &Profile, + output: &Profile, + src_ty: DataType, + dst_ty: DataType, + intent: Intent, + ) -> Option { + transform_create(input, src_ty, output, dst_ty, intent).map(|xfm| Transform { + src_ty, + dst_ty, + xfm, + }) + } + + /// Apply the color space transform to `data` + pub fn apply(&self, data: &mut [u8]) { + if data.len() % self.src_ty.bytes_per_pixel() != 0 { + panic!( + "incomplete pixels: should be a multiple of {} got {}", + self.src_ty.bytes_per_pixel(), + data.len() + ) + } + unsafe { + self.xfm.transform_fn.expect("non-null function pointer")( + &*self.xfm, + data.as_ptr(), + data.as_mut_ptr(), + data.len() / self.src_ty.bytes_per_pixel(), + ); + } + } + + /// Apply the color space transform to `data` + pub fn convert(&self, src: &[u8], dst: &mut [u8]) { + if src.len() % self.src_ty.bytes_per_pixel() != 0 { + panic!( + "incomplete pixels: should be a multiple of {} got {}", + self.src_ty.bytes_per_pixel(), + src.len() + ) + } + if dst.len() % self.dst_ty.bytes_per_pixel() != 0 { + panic!( + "incomplete pixels: should be a multiple of {} got {}", + self.dst_ty.bytes_per_pixel(), + dst.len() + ) + } + assert_eq!( + src.len() / self.src_ty.bytes_per_pixel(), + dst.len() / self.dst_ty.bytes_per_pixel() + ); + unsafe { + self.xfm.transform_fn.expect("non-null function pointer")( + &*self.xfm, + src.as_ptr(), + dst.as_mut_ptr(), + src.len() / self.src_ty.bytes_per_pixel(), + ); + } + } +} + +#[no_mangle] +pub extern "C" fn qcms_enable_iccv4() { + SUPPORTS_ICCV4.store(true, Ordering::Relaxed); +} diff --git a/gfx/qcms/src/transform_avx.rs b/gfx/qcms/src/transform_avx.rs new file mode 100644 index 0000000000..b34fc869d5 --- /dev/null +++ b/gfx/qcms/src/transform_avx.rs @@ -0,0 +1,230 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "x86")] +pub use std::arch::x86::{ + __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256, + _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps, + _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256, + _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_setzero_ps, _mm_store_si128, +}; +#[cfg(target_arch = "x86_64")] +pub use std::arch::x86_64::{ + __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256, + _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps, + _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256, + _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_setzero_ps, _mm_store_si128, +}; + +#[repr(align(32))] +struct Output([u32; 8]); + +#[target_feature(enable = "avx")] +unsafe extern "C" fn qcms_transform_data_template_lut_avx( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut input: Output = std::mem::zeroed(); + /* share input and output locations to save having to keep the + * locations in separate registers */ + let output: *const u32 = &mut input as *mut Output as *mut u32; + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: __m256 = _mm256_broadcast_ps(&*((*mat.offset(0isize)).as_ptr() as *const __m128)); + let mat1: __m256 = _mm256_broadcast_ps(&*((*mat.offset(1isize)).as_ptr() as *const __m128)); + let mat2: __m256 = _mm256_broadcast_ps(&*((*mat.offset(2isize)).as_ptr() as *const __m128)); + /* these values don't change, either */ + let max: __m256 = _mm256_set1_ps(CLAMPMAXVAL); + let min: __m256 = _mm256_setzero_ps(); + let scale: __m256 = _mm256_set1_ps(FLOATSCALE); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: __m256 = _mm256_setzero_ps(); + let mut vec_g: __m256 = _mm256_setzero_ps(); + let mut vec_b: __m256 = _mm256_setzero_ps(); + let mut result: __m256; + let mut vec_r0: __m128; + let mut vec_g0: __m128; + let mut vec_b0: __m128; + let mut vec_r1: __m128; + let mut vec_g1: __m128; + let mut vec_b1: __m128; + let mut alpha1: u8 = 0; + let mut alpha2: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* If there are at least 2 pixels, then we can load their components into + a single 256-bit register for processing. */ + if length > 1 { + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r1 = + _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize)); + vec_g1 = + _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize)); + vec_b1 = + _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize)); + vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1); + vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1); + vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1); + if F::kAIndex != 0xff { + alpha1 = *src.add(F::kAIndex); + alpha2 = *src.add(F::kAIndex + components as usize) + } + } + /* If there are at least 4 pixels, then we can iterate and preload the + next 2 while we store the result of the current 2. */ + while length > 3 { + /* Ensure we are pointing at the next 2 pixels for the next load. */ + src = src.offset((2 * components) as isize); + /* gamma * matrix */ + vec_r = _mm256_mul_ps(vec_r, mat0); + vec_g = _mm256_mul_ps(vec_g, mat1); + vec_b = _mm256_mul_ps(vec_b, mat2); + /* store alpha for these pixels; load alpha for next two */ + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha1; + *dest.add(F::kAIndex + components as usize) = alpha2; + alpha1 = *src.add(F::kAIndex); + alpha2 = *src.add(F::kAIndex + components as usize) + } + /* crunch, crunch, crunch */ + vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b)); + vec_r = _mm256_max_ps(min, vec_r); + vec_r = _mm256_min_ps(max, vec_r); + result = _mm256_mul_ps(vec_r, scale); + /* store calc'd output tables indices */ + _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result)); + /* load gamma values for next loop while store completes */ + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r1 = + _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize)); + vec_g1 = + _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize)); + vec_b1 = + _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize)); + vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1); + vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1); + vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1); + /* use calc'd indices to output RGB values */ + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + *dest.add(F::kRIndex + components as usize) = + *otdata_r.offset(*output.offset(4isize) as isize); + *dest.add(F::kGIndex + components as usize) = + *otdata_g.offset(*output.offset(5isize) as isize); + *dest.add(F::kBIndex + components as usize) = + *otdata_b.offset(*output.offset(6isize) as isize); + dest = dest.offset((2 * components) as isize); + length -= 2 + } + /* There are 0-3 pixels remaining. If there are 2-3 remaining, then we know + we have already populated the necessary registers to start the transform. */ + if length > 1 { + vec_r = _mm256_mul_ps(vec_r, mat0); + vec_g = _mm256_mul_ps(vec_g, mat1); + vec_b = _mm256_mul_ps(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha1; + *dest.add(F::kAIndex + components as usize) = alpha2 + } + vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b)); + vec_r = _mm256_max_ps(min, vec_r); + vec_r = _mm256_min_ps(max, vec_r); + result = _mm256_mul_ps(vec_r, scale); + _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + *dest.add(F::kRIndex + components as usize) = + *otdata_r.offset(*output.offset(4isize) as isize); + *dest.add(F::kGIndex + components as usize) = + *otdata_g.offset(*output.offset(5isize) as isize); + *dest.add(F::kBIndex + components as usize) = + *otdata_b.offset(*output.offset(6isize) as isize); + src = src.offset((2 * components) as isize); + dest = dest.offset((2 * components) as isize); + length -= 2 + } + /* There may be 0-1 pixels remaining. */ + if length == 1 { + vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(mat0)); + vec_g0 = _mm_mul_ps(vec_g0, _mm256_castps256_ps128(mat1)); + vec_b0 = _mm_mul_ps(vec_b0, _mm256_castps256_ps128(mat2)); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = *src.add(F::kAIndex) + } + vec_r0 = _mm_add_ps(vec_r0, _mm_add_ps(vec_g0, vec_b0)); + vec_r0 = _mm_max_ps(_mm256_castps256_ps128(min), vec_r0); + vec_r0 = _mm_min_ps(_mm256_castps256_ps128(max), vec_r0); + vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(scale)); + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(vec_r0)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize) + }; +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe fn qcms_transform_data_rgb_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe fn qcms_transform_data_rgba_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "avx")] +pub unsafe fn qcms_transform_data_bgra_out_lut_avx( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_avx::(transform, src, dest, length); +} diff --git a/gfx/qcms/src/transform_neon.rs b/gfx/qcms/src/transform_neon.rs new file mode 100644 index 0000000000..d3983ba18c --- /dev/null +++ b/gfx/qcms/src/transform_neon.rs @@ -0,0 +1,158 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "aarch64")] +use core::arch::aarch64::{ + float32x4_t, int32x4_t, vaddq_f32, vcvtq_s32_f32, vgetq_lane_s32, vld1q_dup_f32, vld1q_f32, + vmaxq_f32, vminq_f32, vmulq_f32, +}; +#[cfg(target_arch = "arm")] +use core::arch::arm::{ + float32x4_t, int32x4_t, vaddq_f32, vcvtq_s32_f32, vgetq_lane_s32, vld1q_dup_f32, vld1q_f32, + vmaxq_f32, vminq_f32, vmulq_f32, +}; +use std::mem::zeroed; + +static mut floatScale: f32 = FLOATSCALE; +static mut clampMaxValue: f32 = CLAMPMAXVAL; + +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +unsafe fn qcms_transform_data_template_lut_neon( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: float32x4_t = vld1q_f32((*mat.offset(0isize)).as_ptr()); + let mat1: float32x4_t = vld1q_f32((*mat.offset(1isize)).as_ptr()); + let mat2: float32x4_t = vld1q_f32((*mat.offset(2isize)).as_ptr()); + /* these values don't change, either */ + let max: float32x4_t = vld1q_dup_f32(&clampMaxValue); + let min: float32x4_t = zeroed(); + let scale: float32x4_t = vld1q_dup_f32(&floatScale); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: float32x4_t; + let mut vec_g: float32x4_t; + let mut vec_b: float32x4_t; + let mut result: int32x4_t; + let mut alpha: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* one pixel is handled outside of the loop */ + length = length.wrapping_sub(1); + /* setup for transforming 1st pixel */ + vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize)); + vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize)); + vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize)); + if F::kAIndex != 0xff { + alpha = *src.offset(F::kAIndex as isize) + } + src = src.offset(components as isize); + let mut i: u32 = 0; + while (i as usize) < length { + /* gamma * matrix */ + vec_r = vmulq_f32(vec_r, mat0); + vec_g = vmulq_f32(vec_g, mat1); + vec_b = vmulq_f32(vec_b, mat2); + /* store alpha for this pixel; load alpha for next */ + if F::kAIndex != 0xff { + *dest.offset(F::kAIndex as isize) = alpha; + alpha = *src.offset(F::kAIndex as isize) + } + /* crunch, crunch, crunch */ + vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b)); + vec_r = vmaxq_f32(min, vec_r); + vec_r = vminq_f32(max, vec_r); + result = vcvtq_s32_f32(vmulq_f32(vec_r, scale)); + + /* use calc'd indices to output RGB values */ + *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize); + *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize); + *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize); + + /* load gamma values for next loop while store completes */ + vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize)); + vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize)); + vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize)); + + dest = dest.offset(components as isize); + src = src.offset(components as isize); + i = i.wrapping_add(1) + } + /* handle final (maybe only) pixel */ + vec_r = vmulq_f32(vec_r, mat0); + vec_g = vmulq_f32(vec_g, mat1); + vec_b = vmulq_f32(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.offset(F::kAIndex as isize) = alpha + } + vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b)); + vec_r = vmaxq_f32(min, vec_r); + vec_r = vminq_f32(max, vec_r); + result = vcvtq_s32_f32(vmulq_f32(vec_r, scale)); + + *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize); + *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize); + *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize); +} +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn qcms_transform_data_rgb_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::(transform, src, dest, length); +} +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn qcms_transform_data_rgba_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::(transform, src, dest, length); +} + +#[no_mangle] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +pub unsafe fn qcms_transform_data_bgra_out_lut_neon( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_neon::(transform, src, dest, length); +} diff --git a/gfx/qcms/src/transform_sse2.rs b/gfx/qcms/src/transform_sse2.rs new file mode 100644 index 0000000000..f6bccaadc3 --- /dev/null +++ b/gfx/qcms/src/transform_sse2.rs @@ -0,0 +1,159 @@ +use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA}; +#[cfg(target_arch = "x86")] +pub use std::arch::x86::{ + __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128, +}; +#[cfg(target_arch = "x86_64")] +pub use std::arch::x86_64::{ + __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps, + _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128, +}; + +#[repr(align(16))] +struct Output([u32; 4]); + +unsafe extern "C" fn qcms_transform_data_template_lut_sse2( + transform: &qcms_transform, + mut src: *const u8, + mut dest: *mut u8, + mut length: usize, +) { + let mat: *const [f32; 4] = (*transform).matrix.as_ptr(); + let mut input: Output = std::mem::zeroed(); + /* share input and output locations to save having to keep the + * locations in separate registers */ + let output: *const u32 = &mut input as *mut Output as *mut u32; + /* deref *transform now to avoid it in loop */ + let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr(); + let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr(); + let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr(); + /* deref *transform now to avoid it in loop */ + let otdata_r: *const u8 = (*transform) + .output_table_r + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_g: *const u8 = (*transform) + .output_table_g + .as_deref() + .unwrap() + .data + .as_ptr(); + let otdata_b: *const u8 = (*transform) + .output_table_b + .as_deref() + .unwrap() + .data + .as_ptr(); + /* input matrix values never change */ + let mat0: __m128 = _mm_load_ps((*mat.offset(0isize)).as_ptr()); + let mat1: __m128 = _mm_load_ps((*mat.offset(1isize)).as_ptr()); + let mat2: __m128 = _mm_load_ps((*mat.offset(2isize)).as_ptr()); + /* these values don't change, either */ + let max: __m128 = _mm_set1_ps(CLAMPMAXVAL); + let min: __m128 = _mm_setzero_ps(); + let scale: __m128 = _mm_set1_ps(FLOATSCALE); + let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32; + /* working variables */ + let mut vec_r: __m128; + let mut vec_g: __m128; + let mut vec_b: __m128; + let mut result: __m128; + let mut alpha: u8 = 0; + /* CYA */ + if length == 0 { + return; + } + /* one pixel is handled outside of the loop */ + length -= 1; + /* setup for transforming 1st pixel */ + vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + if F::kAIndex != 0xff { + alpha = *src.add(F::kAIndex) + } + src = src.offset(components as isize); + let mut i: u32 = 0; + while (i as usize) < length { + /* position values from gamma tables */ + vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); + vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); + vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); + /* gamma * matrix */ + vec_r = _mm_mul_ps(vec_r, mat0); + vec_g = _mm_mul_ps(vec_g, mat1); + vec_b = _mm_mul_ps(vec_b, mat2); + /* store alpha for this pixel; load alpha for next */ + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha; + alpha = *src.add(F::kAIndex) + } + /* crunch, crunch, crunch */ + vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); + vec_r = _mm_max_ps(min, vec_r); + vec_r = _mm_min_ps(max, vec_r); + result = _mm_mul_ps(vec_r, scale); + /* store calc'd output tables indices */ + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result)); + /* load gamma values for next loop while store completes */ + vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize)); + vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize)); + vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize)); + src = src.offset(components as isize); + /* use calc'd indices to output RGB values */ + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); + dest = dest.offset(components as isize); + i += 1 + } + /* handle final (maybe only) pixel */ + vec_r = _mm_shuffle_ps(vec_r, vec_r, 0); + vec_g = _mm_shuffle_ps(vec_g, vec_g, 0); + vec_b = _mm_shuffle_ps(vec_b, vec_b, 0); + vec_r = _mm_mul_ps(vec_r, mat0); + vec_g = _mm_mul_ps(vec_g, mat1); + vec_b = _mm_mul_ps(vec_b, mat2); + if F::kAIndex != 0xff { + *dest.add(F::kAIndex) = alpha + } + vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b)); + vec_r = _mm_max_ps(min, vec_r); + vec_r = _mm_min_ps(max, vec_r); + result = _mm_mul_ps(vec_r, scale); + _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result)); + *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize); + *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize); + *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgb_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::(transform, src, dest, length); +} +#[no_mangle] +pub unsafe fn qcms_transform_data_rgba_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::(transform, src, dest, length); +} + +#[no_mangle] +pub unsafe fn qcms_transform_data_bgra_out_lut_sse2( + transform: &qcms_transform, + src: *const u8, + dest: *mut u8, + length: usize, +) { + qcms_transform_data_template_lut_sse2::(transform, src, dest, length); +} diff --git a/gfx/qcms/src/transform_util.rs b/gfx/qcms/src/transform_util.rs new file mode 100644 index 0000000000..75fd2ca0e2 --- /dev/null +++ b/gfx/qcms/src/transform_util.rs @@ -0,0 +1,608 @@ +// qcms +// Copyright (C) 2009 Mozilla Foundation +// Copyright (C) 1998-2007 Marti Maria +// +// Permission is hereby granted, free of charge, to any person obtaining +// a copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use std::convert::TryInto; + +use crate::{ + iccread::{curveType, Profile}, + s15Fixed16Number_to_float, +}; +use crate::{matrix::Matrix, transform::PRECACHE_OUTPUT_MAX, transform::PRECACHE_OUTPUT_SIZE}; + +//XXX: could use a bettername +pub type uint16_fract_t = u16; + +#[inline] +fn u8Fixed8Number_to_float(x: u16) -> f32 { + // 0x0000 = 0. + // 0x0100 = 1. + // 0xffff = 255 + 255/256 + (x as i32 as f64 / 256.0f64) as f32 +} +#[inline] +pub fn clamp_float(a: f32) -> f32 { + /* One would naturally write this function as the following: + if (a > 1.) + return 1.; + else if (a < 0) + return 0; + else + return a; + + However, that version will let NaNs pass through which is undesirable + for most consumers. + */ + if a > 1. { + 1. + } else if a >= 0. { + a + } else { + // a < 0 or a is NaN + 0. + } +} +/* value must be a value between 0 and 1 */ +//XXX: is the above a good restriction to have? +// the output range of this functions is 0..1 +pub fn lut_interp_linear(mut input_value: f64, table: &[u16]) -> f32 { + input_value *= (table.len() - 1) as f64; + + let upper: i32 = input_value.ceil() as i32; + let lower: i32 = input_value.floor() as i32; + let value: f32 = ((table[upper as usize] as f64) * (1. - (upper as f64 - input_value)) + + (table[lower as usize] as f64 * (upper as f64 - input_value))) + as f32; + /* scale the value */ + value * (1.0 / 65535.0) +} +/* same as above but takes and returns a uint16_t value representing a range from 0..1 */ +#[no_mangle] +pub fn lut_interp_linear16(input_value: u16, table: &[u16]) -> u16 { + /* Start scaling input_value to the length of the array: 65535*(length-1). + * We'll divide out the 65535 next */ + let mut value: u32 = (input_value as i32 * (table.len() as i32 - 1)) as u32; /* equivalent to ceil(value/65535) */ + let upper: u32 = (value + 65534) / 65535; /* equivalent to floor(value/65535) */ + let lower: u32 = value / 65535; + /* interp is the distance from upper to value scaled to 0..65535 */ + let interp: u32 = value % 65535; // 0..65535*65535 + value = (table[upper as usize] as u32 * interp + + table[lower as usize] as u32 * (65535 - interp)) + / 65535; + value as u16 +} +/* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX + * and returns a uint8_t value representing a range from 0..1 */ +fn lut_interp_linear_precache_output(input_value: u32, table: &[u16]) -> u8 { + /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1). + * We'll divide out the PRECACHE_OUTPUT_MAX next */ + let mut value: u32 = input_value * (table.len() - 1) as u32; + /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */ + let upper: u32 = (value + PRECACHE_OUTPUT_MAX as u32 - 1) / PRECACHE_OUTPUT_MAX as u32; + /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */ + let lower: u32 = value / PRECACHE_OUTPUT_MAX as u32; + /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */ + let interp: u32 = value % PRECACHE_OUTPUT_MAX as u32; + /* the table values range from 0..65535 */ + value = table[upper as usize] as u32 * interp + + table[lower as usize] as u32 * (PRECACHE_OUTPUT_MAX as u32 - interp); // 0..(65535*PRECACHE_OUTPUT_MAX) + /* round and scale */ + value += (PRECACHE_OUTPUT_MAX * 65535 / 255 / 2) as u32; // scale to 0..255 + value /= (PRECACHE_OUTPUT_MAX * 65535 / 255) as u32; + value as u8 +} +/* value must be a value between 0 and 1 */ +//XXX: is the above a good restriction to have? +pub fn lut_interp_linear_float(mut value: f32, table: &[f32]) -> f32 { + value *= (table.len() - 1) as f32; + + let upper: i32 = value.ceil() as i32; + let lower: i32 = value.floor() as i32; + //XXX: can we be more performant here? + value = (table[upper as usize] as f64 * (1.0f64 - (upper as f32 - value) as f64) + + (table[lower as usize] * (upper as f32 - value)) as f64) as f32; + /* scale the value */ + value +} +fn compute_curve_gamma_table_type1(gamma: u16) -> Box<[f32; 256]> { + let mut gamma_table = Vec::with_capacity(256); + let gamma_float: f32 = u8Fixed8Number_to_float(gamma); + for i in 0..256 { + // 0..1^(0..255 + 255/256) will always be between 0 and 1 + gamma_table.push((i as f64 / 255.0f64).powf(gamma_float as f64) as f32); + } + gamma_table.into_boxed_slice().try_into().unwrap() +} +fn compute_curve_gamma_table_type2(table: &[u16]) -> Box<[f32; 256]> { + let mut gamma_table = Vec::with_capacity(256); + for i in 0..256 { + gamma_table.push(lut_interp_linear(i as f64 / 255.0f64, table)); + } + gamma_table.into_boxed_slice().try_into().unwrap() +} +fn compute_curve_gamma_table_type_parametric(params: &[f32]) -> Box<[f32; 256]> { + let params = Param::new(params); + let mut gamma_table = Vec::with_capacity(256); + for i in 0..256 { + let X = i as f32 / 255.; + gamma_table.push(clamp_float(params.eval(X))); + } + gamma_table.into_boxed_slice().try_into().unwrap() +} + +fn compute_curve_gamma_table_type0() -> Box<[f32; 256]> { + let mut gamma_table = Vec::with_capacity(256); + for i in 0..256 { + gamma_table.push((i as f64 / 255.0f64) as f32); + } + gamma_table.into_boxed_slice().try_into().unwrap() +} +pub(crate) fn build_input_gamma_table(TRC: Option<&curveType>) -> Option> { + let TRC = match TRC { + Some(TRC) => TRC, + None => return None, + }; + Some(match TRC { + curveType::Parametric(params) => compute_curve_gamma_table_type_parametric(params), + curveType::Curve(data) => match data.len() { + 0 => compute_curve_gamma_table_type0(), + 1 => compute_curve_gamma_table_type1(data[0]), + _ => compute_curve_gamma_table_type2(data), + }, + }) +} +pub fn build_colorant_matrix(p: &Profile) -> Matrix { + let mut result: Matrix = Matrix { m: [[0.; 3]; 3] }; + result.m[0][0] = s15Fixed16Number_to_float(p.redColorant.X); + result.m[0][1] = s15Fixed16Number_to_float(p.greenColorant.X); + result.m[0][2] = s15Fixed16Number_to_float(p.blueColorant.X); + result.m[1][0] = s15Fixed16Number_to_float(p.redColorant.Y); + result.m[1][1] = s15Fixed16Number_to_float(p.greenColorant.Y); + result.m[1][2] = s15Fixed16Number_to_float(p.blueColorant.Y); + result.m[2][0] = s15Fixed16Number_to_float(p.redColorant.Z); + result.m[2][1] = s15Fixed16Number_to_float(p.greenColorant.Z); + result.m[2][2] = s15Fixed16Number_to_float(p.blueColorant.Z); + result +} + +/** Parametric representation of transfer function */ +#[derive(Debug)] +struct Param { + g: f32, + a: f32, + b: f32, + c: f32, + d: f32, + e: f32, + f: f32, +} + +impl Param { + #[allow(clippy::many_single_char_names)] + fn new(params: &[f32]) -> Param { + // convert from the variable number of parameters + // contained in profiles to a unified representation. + let g: f32 = params[0]; + match params[1..] { + [] => Param { + g, + a: 1., + b: 0., + c: 1., + d: 0., + e: 0., + f: 0., + }, + [a, b] => Param { + g, + a, + b, + c: 0., + d: -b / a, + e: 0., + f: 0., + }, + [a, b, c] => Param { + g, + a, + b, + c: 0., + d: -b / a, + e: c, + f: c, + }, + [a, b, c, d] => Param { + g, + a, + b, + c, + d, + e: 0., + f: 0., + }, + [a, b, c, d, e, f] => Param { + g, + a, + b, + c, + d, + e, + f, + }, + _ => panic!(), + } + } + + fn eval(&self, x: f32) -> f32 { + if x < self.d { + self.c * x + self.f + } else { + (self.a * x + self.b).powf(self.g) + self.e + } + } + #[allow(clippy::many_single_char_names)] + fn invert(&self) -> Option { + // First check if the function is continuous at the cross-over point d. + let d1 = (self.a * self.d + self.b).powf(self.g) + self.e; + let d2 = self.c * self.d + self.f; + + if (d1 - d2).abs() > 0.1 { + return None; + } + let d = d1; + + // y = (a * x + b)^g + e + // y - e = (a * x + b)^g + // (y - e)^(1/g) = a*x + b + // (y - e)^(1/g) - b = a*x + // (y - e)^(1/g)/a - b/a = x + // ((y - e)/a^g)^(1/g) - b/a = x + // ((1/(a^g)) * y - e/(a^g))^(1/g) - b/a = x + let a = 1. / self.a.powf(self.g); + let b = -self.e / self.a.powf(self.g); + let g = 1. / self.g; + let e = -self.b / self.a; + + // y = c * x + f + // y - f = c * x + // y/c - f/c = x + let (c, f); + if d <= 0. { + c = 1.; + f = 0.; + } else { + c = 1. / self.c; + f = -self.f / self.c; + } + + // if self.d > 0. and self.c == 0 as is likely with type 1 and 2 parametric function + // then c and f will not be finite. + if !(g.is_finite() + && a.is_finite() + && b.is_finite() + && c.is_finite() + && d.is_finite() + && e.is_finite() + && f.is_finite()) + { + return None; + } + + Some(Param { + g, + a, + b, + c, + d, + e, + f, + }) + } +} + +#[test] +fn param_invert() { + let p3 = Param::new(&[2.4, 0.948, 0.052, 0.077, 0.04]); + p3.invert().unwrap(); + let g2_2 = Param::new(&[2.2]); + g2_2.invert().unwrap(); + let g2_2 = Param::new(&[2.2, 0.9, 0.052]); + g2_2.invert().unwrap(); + let g2_2 = dbg!(Param::new(&[2.2, 0.9, -0.52])); + g2_2.invert().unwrap(); + let g2_2 = dbg!(Param::new(&[2.2, 0.9, -0.52, 0.1])); + assert!(g2_2.invert().is_none()); +} + +/* The following code is copied nearly directly from lcms. + * I think it could be much better. For example, Argyll seems to have better code in + * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way + * to a working solution and allows for easy comparing with lcms. */ +#[no_mangle] +#[allow(clippy::many_single_char_names)] +pub fn lut_inverse_interp16(Value: u16, LutTable: &[u16]) -> uint16_fract_t { + let mut l: i32 = 1; // 'int' Give spacing for negative values + let mut r: i32 = 0x10000; + let mut x: i32 = 0; + let mut res: i32; + let length = LutTable.len() as i32; + + let mut NumZeroes: i32 = 0; + while LutTable[NumZeroes as usize] as i32 == 0 && NumZeroes < length - 1 { + NumZeroes += 1 + } + // There are no zeros at the beginning and we are trying to find a zero, so + // return anything. It seems zero would be the less destructive choice + /* I'm not sure that this makes sense, but oh well... */ + if NumZeroes == 0 && Value as i32 == 0 { + return 0u16; + } + let mut NumPoles: i32 = 0; + while LutTable[(length - 1 - NumPoles) as usize] as i32 == 0xffff && NumPoles < length - 1 { + NumPoles += 1 + } + // Does the curve belong to this case? + if NumZeroes > 1 || NumPoles > 1 { + let a_0: i32; + let b_0: i32; + // Identify if value fall downto 0 or FFFF zone + if Value as i32 == 0 { + return 0u16; + } + // if (Value == 0xFFFF) return 0xFFFF; + // else restrict to valid zone + if NumZeroes > 1 { + a_0 = (NumZeroes - 1) * 0xffff / (length - 1); + l = a_0 - 1 + } + if NumPoles > 1 { + b_0 = (length - 1 - NumPoles) * 0xffff / (length - 1); + r = b_0 + 1 + } + } + if r <= l { + // If this happens LutTable is not invertible + return 0u16; + } + // Seems not a degenerated case... apply binary search + while r > l { + x = (l + r) / 2; + res = lut_interp_linear16((x - 1) as uint16_fract_t, LutTable) as i32; + if res == Value as i32 { + // Found exact match. + return (x - 1) as uint16_fract_t; + } + if res > Value as i32 { + r = x - 1 + } else { + l = x + 1 + } + } + + // Not found, should we interpolate? + + // Get surrounding nodes + debug_assert!(x >= 1); + + let val2: f64 = (length - 1) as f64 * ((x - 1) as f64 / 65535.0f64); + let cell0: i32 = val2.floor() as i32; + let cell1: i32 = val2.ceil() as i32; + if cell0 == cell1 { + return x as uint16_fract_t; + } + + let y0: f64 = LutTable[cell0 as usize] as f64; + let x0: f64 = 65535.0f64 * cell0 as f64 / (length - 1) as f64; + let y1: f64 = LutTable[cell1 as usize] as f64; + let x1: f64 = 65535.0f64 * cell1 as f64 / (length - 1) as f64; + let a: f64 = (y1 - y0) / (x1 - x0); + let b: f64 = y0 - a * x0; + if a.abs() < 0.01f64 { + return x as uint16_fract_t; + } + let f: f64 = (Value as i32 as f64 - b) / a; + if f < 0.0f64 { + return 0u16; + } + if f >= 65535.0f64 { + return 0xffffu16; + } + (f + 0.5f64).floor() as uint16_fract_t +} +/* +The number of entries needed to invert a lookup table should not +necessarily be the same as the original number of entries. This is +especially true of lookup tables that have a small number of entries. + +For example: +Using a table like: + {0, 3104, 14263, 34802, 65535} +invert_lut will produce an inverse of: + {3, 34459, 47529, 56801, 65535} +which has an maximum error of about 9855 (pixel difference of ~38.346) + +For now, we punt the decision of output size to the caller. */ +fn invert_lut(table: &[u16], out_length: usize) -> Vec { + /* for now we invert the lut by creating a lut of size out_length + * and attempting to lookup a value for each entry using lut_inverse_interp16 */ + let mut output = Vec::with_capacity(out_length); + for i in 0..out_length { + let x: f64 = i as f64 * 65535.0f64 / (out_length - 1) as f64; + let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t; + output.push(lut_inverse_interp16(input, table)); + } + output +} +#[allow(clippy::needless_range_loop)] +fn compute_precache_pow(output: &mut [u8; PRECACHE_OUTPUT_SIZE], gamma: f32) { + for v in 0..PRECACHE_OUTPUT_SIZE { + //XXX: don't do integer/float conversion... and round? + output[v] = (255. * (v as f32 / PRECACHE_OUTPUT_MAX as f32).powf(gamma)) as u8; + } +} +#[allow(clippy::needless_range_loop)] +pub fn compute_precache_lut(output: &mut [u8; PRECACHE_OUTPUT_SIZE], table: &[u16]) { + for v in 0..PRECACHE_OUTPUT_SIZE { + output[v] = lut_interp_linear_precache_output(v as u32, table); + } +} +#[allow(clippy::needless_range_loop)] +pub fn compute_precache_linear(output: &mut [u8; PRECACHE_OUTPUT_SIZE]) { + for v in 0..PRECACHE_OUTPUT_SIZE { + //XXX: round? + output[v] = (v / (PRECACHE_OUTPUT_SIZE / 256)) as u8; + } +} +pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPUT_SIZE]) -> bool { + match trc { + curveType::Parametric(params) => { + let mut gamma_table_uint: [u16; 256] = [0; 256]; + + let mut inverted_size: usize = 256; + let gamma_table = compute_curve_gamma_table_type_parametric(params); + let mut i: u16 = 0u16; + while (i as i32) < 256 { + gamma_table_uint[i as usize] = (gamma_table[i as usize] * 65535f32) as u16; + i += 1 + } + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, however it is what lcms uses. + // the maximum number we would need is 65535 because that's the + // accuracy used for computing the pre cache table + if inverted_size < 256 { + inverted_size = 256 + } + let inverted = invert_lut(&gamma_table_uint, inverted_size); + compute_precache_lut(output, &inverted); + } + curveType::Curve(data) => { + match data.len() { + 0 => compute_precache_linear(output), + 1 => compute_precache_pow(output, 1. / u8Fixed8Number_to_float(data[0])), + _ => { + let mut inverted_size = data.len(); + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, however it is what lcms uses. + // the maximum number we would need is 65535 because that's the + // accuracy used for computing the pre cache table + if inverted_size < 256 { + inverted_size = 256 + } //XXX turn this conversion into a function + let inverted = invert_lut(data, inverted_size); + compute_precache_lut(output, &inverted); + } + } + } + } + true +} +fn build_linear_table(length: usize) -> Vec { + let mut output = Vec::with_capacity(length); + for i in 0..length { + let x: f64 = i as f64 * 65535.0f64 / (length - 1) as f64; + let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t; + output.push(input); + } + output +} +fn build_pow_table(gamma: f32, length: usize) -> Vec { + let mut output = Vec::with_capacity(length); + for i in 0..length { + let mut x: f64 = i as f64 / (length - 1) as f64; + x = x.powf(gamma as f64); + let result: uint16_fract_t = (x * 65535.0f64 + 0.5f64).floor() as uint16_fract_t; + output.push(result); + } + output +} + +fn to_lut(params: &Param, len: usize) -> Vec { + let mut output = Vec::with_capacity(len); + for i in 0..len { + let X = i as f32 / (len-1) as f32; + output.push((params.eval(X) * 65535.) as u16); + } + output +} + +pub(crate) fn build_lut_for_linear_from_tf(trc: &curveType, + lut_len: Option) -> Vec { + match trc { + curveType::Parametric(params) => { + let lut_len = lut_len.unwrap_or(256); + let params = Param::new(params); + to_lut(¶ms, lut_len) + }, + curveType::Curve(data) => { + let autogen_lut_len = lut_len.unwrap_or(4096); + match data.len() { + 0 => build_linear_table(autogen_lut_len), + 1 => { + let gamma = u8Fixed8Number_to_float(data[0]); + build_pow_table(gamma, autogen_lut_len) + } + _ => { + let lut_len = lut_len.unwrap_or(data.len()); + assert_eq!(lut_len, data.len()); + data.clone() // I feel bad about this. + } + } + }, + } +} + +pub(crate) fn build_lut_for_tf_from_linear(trc: &curveType) -> Option> { + match trc { + curveType::Parametric(params) => { + let lut_len = 256; + let params = Param::new(params); + if let Some(inv_params) = params.invert() { + return Some(to_lut(&inv_params, lut_len)); + } + // else return None instead of fallthrough to generic lut inversion. + return None; + }, + curveType::Curve(data) => { + let autogen_lut_len = 4096; + match data.len() { + 0 => { + return Some(build_linear_table(autogen_lut_len)); + }, + 1 => { + let gamma = 1. / u8Fixed8Number_to_float(data[0]); + return Some(build_pow_table(gamma, autogen_lut_len)); + }, + _ => {}, + } + }, + } + + let linear_from_tf = build_lut_for_linear_from_tf(trc, None); + + //XXX: the choice of a minimum of 256 here is not backed by any theory, + // measurement or data, however it is what lcms uses. + let inverted_lut_len = std::cmp::max(linear_from_tf.len(), 256); + Some(invert_lut(&linear_from_tf, inverted_lut_len)) +} + +pub(crate) fn build_output_lut(trc: &curveType) -> Option> { + build_lut_for_tf_from_linear(trc) +} -- cgit v1.2.3