33 files changed, 6793 insertions, 0 deletions
diff --git a/gfx/qcms/Cargo.toml b/gfx/qcms/Cargo.toml
new file mode 100644
index 0000000000..e330244d01
--- /dev/null
+++ b/gfx/qcms/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "qcms"
+authors = ["Jeff Muizelaar", "Benoit Girard", "Andrew Osmond"]
+version = "0.2.0"
+edition = "2018"
+include = ["src/**/*", "build.rs"]
+description = "lightweight color management"
+documentation = "https://docs.rs/qcms"
+license = "MIT"
+repository = "https://github.com/FirefoxGraphics/qcms"
+keywords = ["color"]
+categories = ["graphics"]
+
+[features]
+default = []
+c_bindings = ["libc"]
+iccv4-enabled = []
+
+[dependencies]
+libc = {version = "0.2", optional = true }
diff --git a/gfx/qcms/README.md b/gfx/qcms/README.md
new file mode 100644
index 0000000000..1997abcabe
--- /dev/null
+++ b/gfx/qcms/README.md
@@ -0,0 +1,36 @@
+# qcms
+[![Crates.io](https://img.shields.io/crates/v/qcms.svg)](https://crates.io/crates/qcms)
+[![Documentation](https://docs.rs/qcms/badge.svg)](https://docs.rs/qcms)
+
+
+Firefox's library for transforming image data between ICC profiles.
+
+## Example
+```rust
+    // Decode the jpeg
+    let mut d = jpeg_decoder::Decoder::new(std::fs::File::open("/Users/jrmuizel/Desktop/DSCF2460.jpg").unwrap());
+    let mut data = d.decode().unwrap();
+    let info = d.info().unwrap();
+
+    // Extract the profile after decode
+    let profile = d.icc_profile().unwrap();
+
+    // Create a new qcms Profile
+    let input = qcms::Profile::new_from_slice(&profile).unwrap();
+    let mut output = qcms::Profile::new_sRGB();
+    output.precache_output_transform();
+
+    // Create a transform between input and output profiles and apply it.
+    let xfm = qcms::Transform::new(&input, &output, qcms::DataType::RGB8, qcms::Intent::default()).unwrap();
+    xfm.apply(&mut data);
+
+    // write the result to a PNG
+    let mut encoder = png::Encoder::new(std::fs::File::create("out.png").unwrap(), info.width as u32, info.height as u32);
+    encoder.set_color(png::ColorType::Rgb);
+    encoder.set_srgb(png::SrgbRenderingIntent::Perceptual);
+    let mut writer = encoder.write_header().unwrap();
+    writer.write_image_data(&data).unwrap(); // Save
+```
+
+This library was originally written in C, was converted to Rust using [c2rust](https://c2rust.com/), and then refactored to be mostly
+safe and more idiomatic Rust.
diff --git a/gfx/qcms/build.rs b/gfx/qcms/build.rs
new file mode 100644
index 0000000000..26ae7dcc22
--- /dev/null
+++ b/gfx/qcms/build.rs
@@ -0,0 +1,7 @@
+fn main() {
+    println!("cargo:rustc-env=RUSTC_BOOTSTRAP=1");
+    let target = std::env::var("TARGET").expect("TARGET environment variable not defined");
+    if target.contains("neon") {
+        println!("cargo:rustc-cfg=libcore_neon");
+    }
+}
diff --git a/gfx/qcms/fuzz/.gitignore b/gfx/qcms/fuzz/.gitignore
new file mode 100644
index 0000000000..572e03bdf3
--- /dev/null
+++ b/gfx/qcms/fuzz/.gitignore
@@ -0,0 +1,4 @@
+
+target
+corpus
+artifacts
diff --git a/gfx/qcms/fuzz/Cargo.lock b/gfx/qcms/fuzz/Cargo.lock
new file mode 100644
index 0000000000..6d350aaa53
--- /dev/null
+++ b/gfx/qcms/fuzz/Cargo.lock
@@ -0,0 +1,45 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+[[package]]
+name = "arbitrary"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0922a3e746b5a44e111e5603feb6704e5cc959116f66737f50bb5cbd264e9d87"
+
+[[package]]
+name = "cc"
+version = "1.0.60"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef611cc68ff783f18535d77ddd080185275713d852c4f5cbb6122c462a7a825c"
+
+[[package]]
+name = "libc"
+version = "0.2.77"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f96b10ec2560088a8e76961b00d47107b3a625fecb76dedb29ee7ccbf98235"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee8c42ab62f43795ed77a965ed07994c5584cdc94fd0ebf14b22ac1524077acc"
+dependencies = [
+ "arbitrary",
+ "cc",
+]
+
+[[package]]
+name = "qcms"
+version = "0.2.0"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "qcms-fuzz"
+version = "0.0.0"
+dependencies = [
+ "libc",
+ "libfuzzer-sys",
+ "qcms",
+]
diff --git a/gfx/qcms/fuzz/Cargo.toml b/gfx/qcms/fuzz/Cargo.toml
new file mode 100644
index 0000000000..076c4355ff
--- /dev/null
+++ b/gfx/qcms/fuzz/Cargo.toml
@@ -0,0 +1,28 @@
+
+[package]
+name = "qcms-fuzz"
+version = "0.0.0"
+authors = ["Automatically generated"]
+publish = false
+edition = "2018"
+
+[package.metadata]
+cargo-fuzz = true
+
+[dependencies]
+libfuzzer-sys = "0.3"
+libc = "0.2"
+
+[dependencies.qcms]
+path = ".."
+features = ["c_bindings"]
+
+# Prevent this from interfering with workspaces
+[workspace]
+members = ["."]
+
+[[bin]]
+name = "fuzz_target_qcms"
+path = "fuzz_targets/fuzz_target_qcms.rs"
+test = false
+doc = false
diff --git a/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs b/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs
new file mode 100644
index 0000000000..22d9737d3f
--- /dev/null
+++ b/gfx/qcms/fuzz/fuzz_targets/fuzz_target_qcms.rs
@@ -0,0 +1,94 @@
+#![no_main]
+use libfuzzer_sys::fuzz_target;
+extern crate qcms;
+extern crate libc;
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
+
+use qcms::c_bindings::{qcms_profile, icSigRgbData, qcms_profile_is_bogus, icSigGrayData};
+use qcms::c_bindings::{qcms_profile_get_color_space, qcms_profile_get_rendering_intent, qcms_profile_from_memory, qcms_profile_release, qcms_profile_sRGB, qcms_transform_create};
+use qcms::c_bindings::{qcms_profile_precache_output_transform, qcms_transform_data, qcms_transform_release, qcms_enable_iccv4};
+
+use qcms::DataType::*;
+
+ unsafe fn transform(src_profile: *mut qcms_profile, dst_profile: *mut qcms_profile, size: usize)
+ {
+   // qcms supports GRAY and RGB profiles as input, and RGB as output.
+ 
+   let src_color_space = qcms_profile_get_color_space(&*src_profile);
+   let mut src_type = if (size & 1) != 0 { RGBA8 } else { RGB8 };
+   if src_color_space == icSigGrayData {
+     src_type = if (size & 1) != 0 { GrayA8 } else { Gray8 };
+   } else if src_color_space != icSigRgbData {
+     return;
+   }
+ 
+   let dst_color_space = qcms_profile_get_color_space(&*dst_profile);
+   if dst_color_space != icSigRgbData {
+     return;
+   }
+   let dst_type = if (size & 2) != 0 { RGBA8 } else { RGB8 };
+ 
+   let intent = qcms_profile_get_rendering_intent(&*src_profile);
+   // Firefox calls this on the display profile to increase performance.
+   // Skip with low probability to increase coverage.
+   if (size % 15) != 0 {
+     qcms_profile_precache_output_transform(&mut *dst_profile);
+   }
+ 
+   let transform =
+     qcms_transform_create(&*src_profile, src_type, &*dst_profile, dst_type, intent);
+   if transform == std::ptr::null_mut() {
+     return;
+   }
+ 
+   const SRC_SIZE: usize = 36;
+   let src:[u8; SRC_SIZE] = [
+     0x7F, 0x7F, 0x7F, 0x00, 0x00, 0x7F, 0x7F, 0xFF, 0x7F, 0x10, 0x20, 0x30,
+     0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xB0, 0xBF, 0xEF, 0x6F,
+     0x3F, 0xC0, 0x9F, 0xE0, 0x90, 0xCF, 0x40, 0xAF, 0x0F, 0x01, 0x60, 0xF0,
+   ];
+   let mut dst: [u8; 36 * 4] = [0; 144]; // 4x in case of GRAY to RGBA
+ 
+   qcms_transform_data(&*transform, src.as_ptr() as *const libc::c_void, dst.as_mut_ptr() as *mut libc::c_void, (SRC_SIZE / src_type.bytes_per_pixel()) as usize);
+   qcms_transform_release(transform);
+ }
+ 
+ unsafe fn do_fuzz(data: &[u8])
+ {
+   let size = data.len();
+   qcms_enable_iccv4();
+ 
+   let profile = qcms_profile_from_memory(data.as_ptr() as *const libc::c_void, size);
+   if profile == std::ptr::null_mut() {
+     return;
+   }
+ 
+   let srgb_profile = qcms_profile_sRGB();
+   if srgb_profile == std::ptr::null_mut() {
+     qcms_profile_release(profile);
+     return;
+   }
+ 
+   transform(profile, srgb_profile, size);
+ 
+   // Firefox only checks the display (destination) profile.
+   if !qcms_profile_is_bogus(&mut *profile) {
+ 
+     transform(srgb_profile, profile, size);
+ 
+   }
+   qcms_profile_release(profile);
+   qcms_profile_release(srgb_profile);
+ 
+   return;
+ }
+ 
+ 
+
+fuzz_target!(|data: &[u8]| {
+    unsafe { do_fuzz(data) }
+});
diff --git a/gfx/qcms/fuzz/qcms_fuzzer.dict b/gfx/qcms/fuzz/qcms_fuzzer.dict
new file mode 100644
index 0000000000..213193c7d1
--- /dev/null
+++ b/gfx/qcms/fuzz/qcms_fuzzer.dict
@@ -0,0 +1,26 @@
+# v2
+0x41324230="A2B0"
+0x42324130="B2A0"
+0x47524159="GRAY"
+0x4C616220="Lab "
+0x52474220="RGB "
+0x58595a20="XYZ "
+0x62545243="bTRC"
+0x6258595a="bXYZ"
+0x63686164="chad"
+0x63757276="curv"
+0x67545243="gTRC"
+0x6758595a="gXYZ"
+0x6D667431="mft1"
+0x6D667432="mft2"
+0x6b545243="kTRC"
+0x6d6e7472="mntr"
+0x72545243="rTRC"
+0x7258595a="rXYZ"
+0x73636e72="scnr"
+0x73663332="sf32"
+
+# v4
+0x6D414220="mAB "
+0x6D424120="mBA "
+0x70617261="para"
diff --git a/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc b/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc
new file mode 100644
index 0000000000..6dcf942ac1
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/0220-ca351238d719fd07ef8607d326b398fe.icc
diff --git a/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc b/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc
new file mode 100644
index 0000000000..12b096cac0
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc
diff --git a/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc b/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc
new file mode 100644
index 0000000000..2d8efe536b
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/0372-973178997787ee780b4b58ee47cad683.icc
diff --git a/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc b/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc
new file mode 100644
index 0000000000..1626458464
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/0732-80707d91aea0f8e64ef0286cc7720e99.icc
diff --git a/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc b/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc
new file mode 100644
index 0000000000..2db6991c23
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/0744-0a5faafe175e682b10c590b03d3f093b.icc
diff --git a/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc b/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc
new file mode 100644
index 0000000000..c13db9b200
--- /dev/null
+++ b/gfx/qcms/fuzz/samples/1809-2bd4b77651214ca6110fdbee2502671e.icc
diff --git a/gfx/qcms/moz.build b/gfx/qcms/moz.build
new file mode 100644
index 0000000000..1e899c7a46
--- /dev/null
+++ b/gfx/qcms/moz.build
@@ -0,0 +1,10 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+EXPORTS += [
+    'qcms.h',
+    'qcmstypes.h',
+]
diff --git a/gfx/qcms/profiles/B2A0-ident.icc b/gfx/qcms/profiles/B2A0-ident.icc
new file mode 100644
index 0000000000..672553030f
--- /dev/null
+++ b/gfx/qcms/profiles/B2A0-ident.icc
diff --git a/gfx/qcms/profiles/displaycal-lut-stripped.icc b/gfx/qcms/profiles/displaycal-lut-stripped.icc
new file mode 100644
index 0000000000..79ddef53e9
--- /dev/null
+++ b/gfx/qcms/profiles/displaycal-lut-stripped.icc
diff --git a/gfx/qcms/profiles/lcms_samsung_syncmaster.icc b/gfx/qcms/profiles/lcms_samsung_syncmaster.icc
new file mode 100644
index 0000000000..3dcde88d06
--- /dev/null
+++ b/gfx/qcms/profiles/lcms_samsung_syncmaster.icc
diff --git a/gfx/qcms/profiles/lcms_thinkpad_w540.icc b/gfx/qcms/profiles/lcms_thinkpad_w540.icc
new file mode 100644
index 0000000000..c154e7e589
--- /dev/null
+++ b/gfx/qcms/profiles/lcms_thinkpad_w540.icc
diff --git a/gfx/qcms/qcms.h b/gfx/qcms/qcms.h
new file mode 100644
index 0000000000..ae889680bc
--- /dev/null
+++ b/gfx/qcms/qcms.h
@@ -0,0 +1,191 @@
+#ifndef QCMS_H
+#define QCMS_H
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+/* if we've already got an ICC_H header we can ignore the following */
+#ifndef ICC_H
+/* icc34 defines */
+
+/***************************************************************** 
+ Copyright (c) 1994-1996 SunSoft, Inc.
+
+                    Rights Reserved
+
+Permission is hereby granted, free of charge, to any person 
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without restrict- 
+ion, including without limitation the rights to use, copy, modify, 
+merge, publish distribute, sublicense, and/or sell copies of the 
+Software, and to permit persons to whom the Software is furnished 
+to do so, subject to the following conditions: 
+ 
+The above copyright notice and this permission notice shall be 
+included in all copies or substantial portions of the Software. 
+ 
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-
+INFRINGEMENT.  IN NO EVENT SHALL SUNSOFT, INC. OR ITS PARENT 
+COMPANY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
+OTHER DEALINGS IN THE SOFTWARE. 
+ 
+Except as contained in this notice, the name of SunSoft, Inc. 
+shall not be used in advertising or otherwise to promote the 
+sale, use or other dealings in this Software without written 
+authorization from SunSoft Inc. 
+******************************************************************/
+
+/*
+ * QCMS, in general, is not threadsafe. However, it should be safe to create
+ * profile and transformation objects on different threads, so long as you
+ * don't use the same objects on different threads at the same time.
+ */
+
+/* 
+ * Color Space Signatures
+ * Note that only icSigXYZData and icSigLabData are valid
+ * Profile Connection Spaces (PCSs)
+ */ 
+typedef enum {
+    icSigXYZData                        = 0x58595A20L,  /* 'XYZ ' */
+    icSigLabData                        = 0x4C616220L,  /* 'Lab ' */
+    icSigLuvData                        = 0x4C757620L,  /* 'Luv ' */
+    icSigYCbCrData                      = 0x59436272L,  /* 'YCbr' */
+    icSigYxyData                        = 0x59787920L,  /* 'Yxy ' */
+    icSigRgbData                        = 0x52474220L,  /* 'RGB ' */
+    icSigGrayData                       = 0x47524159L,  /* 'GRAY' */
+    icSigHsvData                        = 0x48535620L,  /* 'HSV ' */
+    icSigHlsData                        = 0x484C5320L,  /* 'HLS ' */
+    icSigCmykData                       = 0x434D594BL,  /* 'CMYK' */
+    icSigCmyData                        = 0x434D5920L,  /* 'CMY ' */
+    icSig2colorData                     = 0x32434C52L,  /* '2CLR' */
+    icSig3colorData                     = 0x33434C52L,  /* '3CLR' */
+    icSig4colorData                     = 0x34434C52L,  /* '4CLR' */
+    icSig5colorData                     = 0x35434C52L,  /* '5CLR' */
+    icSig6colorData                     = 0x36434C52L,  /* '6CLR' */
+    icSig7colorData                     = 0x37434C52L,  /* '7CLR' */
+    icSig8colorData                     = 0x38434C52L,  /* '8CLR' */
+    icSig9colorData                     = 0x39434C52L,  /* '9CLR' */
+    icSig10colorData                    = 0x41434C52L,  /* 'ACLR' */
+    icSig11colorData                    = 0x42434C52L,  /* 'BCLR' */
+    icSig12colorData                    = 0x43434C52L,  /* 'CCLR' */
+    icSig13colorData                    = 0x44434C52L,  /* 'DCLR' */
+    icSig14colorData                    = 0x45434C52L,  /* 'ECLR' */
+    icSig15colorData                    = 0x46434C52L,  /* 'FCLR' */
+    icMaxEnumData                       = 0xFFFFFFFFL   
+} icColorSpaceSignature;
+#endif
+
+#include <stdio.h>
+#include <stdbool.h>
+
+struct _qcms_transform;
+typedef struct _qcms_transform qcms_transform;
+
+struct _qcms_profile;
+typedef struct _qcms_profile qcms_profile;
+
+/* these values match the Rendering Intent values from the ICC spec */
+typedef enum {
+	QCMS_INTENT_MIN = 0,
+	QCMS_INTENT_PERCEPTUAL = 0,
+	QCMS_INTENT_RELATIVE_COLORIMETRIC = 1,
+	QCMS_INTENT_SATURATION = 2,
+	QCMS_INTENT_ABSOLUTE_COLORIMETRIC = 3,
+	QCMS_INTENT_MAX = 3,
+
+	/* Chris Murphy (CM consultant) suggests this as a default in the event that we
+	 * cannot reproduce relative + Black Point Compensation.  BPC brings an
+	 * unacceptable performance overhead, so we go with perceptual. */
+	QCMS_INTENT_DEFAULT = QCMS_INTENT_PERCEPTUAL,
+} qcms_intent;
+
+//XXX: I don't really like the _DATA_ prefix
+typedef enum {
+	QCMS_DATA_RGB_8,
+	QCMS_DATA_RGBA_8,
+	QCMS_DATA_BGRA_8,
+	QCMS_DATA_GRAY_8,
+	QCMS_DATA_GRAYA_8
+} qcms_data_type;
+
+/* the names for the following two types are sort of ugly */
+typedef struct
+{
+	double x;
+	double y;
+	double Y;
+} qcms_CIE_xyY;
+
+typedef struct
+{
+	qcms_CIE_xyY red;
+	qcms_CIE_xyY green;
+	qcms_CIE_xyY blue;
+} qcms_CIE_xyYTRIPLE;
+
+qcms_profile* qcms_profile_create_rgb_with_gamma_set(
+                qcms_CIE_xyY white_point,
+                qcms_CIE_xyYTRIPLE primaries,
+                float redGamma,
+                float blueGamma,
+                float greenGamma);
+
+qcms_profile* qcms_profile_create_rgb_with_gamma(
+                qcms_CIE_xyY white_point,
+                qcms_CIE_xyYTRIPLE primaries,
+                float gamma);
+
+void qcms_data_create_rgb_with_gamma(
+                qcms_CIE_xyY white_point,
+                qcms_CIE_xyYTRIPLE primaries,
+                float gamma,
+                void **mem,
+                size_t *size);
+
+qcms_profile* qcms_profile_from_memory(const void *mem, size_t size);
+
+qcms_profile* qcms_profile_from_file(FILE *file);
+qcms_profile* qcms_profile_from_path(const char *path);
+
+void qcms_data_from_path(const char *path, void **mem, size_t *size);
+
+#ifdef _WIN32
+qcms_profile* qcms_profile_from_unicode_path(const wchar_t *path);
+void qcms_data_from_unicode_path(const wchar_t *path, void **mem, size_t *size);
+#endif
+
+qcms_CIE_xyY qcms_white_point_sRGB(void);
+qcms_profile* qcms_profile_sRGB(void);
+
+void qcms_profile_release(qcms_profile *profile);
+
+bool qcms_profile_is_bogus(qcms_profile *profile);
+qcms_intent qcms_profile_get_rendering_intent(qcms_profile *profile);
+icColorSpaceSignature qcms_profile_get_color_space(qcms_profile *profile);
+
+void qcms_profile_precache_output_transform(qcms_profile *profile);
+
+qcms_transform* qcms_transform_create(
+		qcms_profile *in, qcms_data_type in_type,
+		qcms_profile* out, qcms_data_type out_type,
+		qcms_intent intent);
+
+void qcms_transform_release(qcms_transform *);
+
+void qcms_transform_data(qcms_transform *transform, const void *src, void *dest, size_t length);
+
+void qcms_enable_iccv4();
+void qcms_enable_neon();
+void qcms_enable_avx();
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif
diff --git a/gfx/qcms/qcmsint.h b/gfx/qcms/qcmsint.h
new file mode 100644
index 0000000000..b08fc9490b
--- /dev/null
+++ b/gfx/qcms/qcmsint.h
@@ -0,0 +1,120 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+#ifndef QCMS_INT_H
+#define QCMS_INT_H
+
+#include "qcms.h"
+#include "qcmstypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+#define ALIGN __declspec(align(16))
+#else
+#define ALIGN __attribute__(( aligned (16) ))
+#endif
+
+struct _qcms_transform;
+
+typedef void (*transform_fn_t)(const struct _qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length);
+
+
+void qcms_transform_data_rgb_out_lut(const qcms_transform *transform,
+                                     const unsigned char *src,
+                                     unsigned char *dest,
+                                     size_t length);
+void qcms_transform_data_rgba_out_lut(const qcms_transform *transform,
+                                      const unsigned char *src,
+                                      unsigned char *dest,
+                                      size_t length);
+void qcms_transform_data_bgra_out_lut(const qcms_transform *transform,
+                                      const unsigned char *src,
+                                      unsigned char *dest,
+                                      size_t length);
+
+void qcms_transform_data_rgb_out_lut_precache(const qcms_transform *transform,
+                                              const unsigned char *src,
+                                              unsigned char *dest,
+                                              size_t length);
+void qcms_transform_data_rgba_out_lut_precache(const qcms_transform *transform,
+                                               const unsigned char *src,
+                                               unsigned char *dest,
+                                               size_t length);
+void qcms_transform_data_bgra_out_lut_precache(const qcms_transform *transform,
+                                               const unsigned char *src,
+                                               unsigned char *dest,
+                                               size_t length);
+
+void qcms_transform_data_rgb_out_lut_avx(const qcms_transform *transform,
+                                         const unsigned char *src,
+                                         unsigned char *dest,
+                                         size_t length);
+void qcms_transform_data_rgba_out_lut_avx(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_bgra_out_lut_avx(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_rgb_out_lut_sse2(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_rgba_out_lut_sse2(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_bgra_out_lut_sse2(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_rgb_out_lut_sse1(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_rgba_out_lut_sse1(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_bgra_out_lut_sse1(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+
+void qcms_transform_data_rgb_out_lut_altivec(const qcms_transform *transform,
+                                             const unsigned char *src,
+                                             unsigned char *dest,
+                                             size_t length);
+void qcms_transform_data_rgba_out_lut_altivec(const qcms_transform *transform,
+                                              const unsigned char *src,
+                                              unsigned char *dest,
+                                              size_t length);
+void qcms_transform_data_bgra_out_lut_altivec(const qcms_transform *transform,
+                                              const unsigned char *src,
+                                              unsigned char *dest,
+                                              size_t length);
+
+void qcms_transform_data_rgb_out_lut_neon(const qcms_transform *transform,
+                                          const unsigned char *src,
+                                          unsigned char *dest,
+                                          size_t length);
+void qcms_transform_data_rgba_out_lut_neon(const qcms_transform *transform,
+                                           const unsigned char *src,
+                                           unsigned char *dest,
+                                           size_t length);
+void qcms_transform_data_bgra_out_lut_neon(const qcms_transform *transform,
+                                           const unsigned char *src,
+                                           unsigned char *dest,
+                                           size_t length);
+
+extern bool qcms_supports_iccv4;
+extern bool qcms_supports_neon;
+extern bool qcms_supports_avx;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gfx/qcms/qcmstypes.h b/gfx/qcms/qcmstypes.h
new file mode 100644
index 0000000000..7f6c70f12c
--- /dev/null
+++ b/gfx/qcms/qcmstypes.h
@@ -0,0 +1,59 @@
+#ifndef QCMS_TYPES_H
+#define QCMS_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(IS_LITTLE_ENDIAN) && !defined(IS_BIG_ENDIAN)
+#if BYTE_ORDER == LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#elif BYTE_ORDER == BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+/* all of the platforms that we use _MSC_VER on are little endian
+ * so this is sufficient for now */
+#ifdef _MSC_VER
+#define IS_LITTLE_ENDIAN
+#endif
+
+#ifdef __OS2__
+#define IS_LITTLE_ENDIAN
+#endif
+#endif
+
+#if !defined(IS_LITTLE_ENDIAN) && !defined(IS_BIG_ENDIAN)
+#error Unknown endianess
+#endif
+
+#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || defined (_sgi) || defined (__sun) || defined (sun) || defined (__digital__)
+#  include <inttypes.h>
+#elif defined (_MSC_VER) && _MSC_VER < 1600
+typedef __int8 int8_t;
+typedef unsigned __int8 uint8_t;
+typedef __int16 int16_t;
+typedef unsigned __int16 uint16_t;
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#ifdef _WIN64
+typedef unsigned __int64 uintptr_t;
+#else
+typedef unsigned long uintptr_t;
+#endif
+
+#elif defined (_AIX)
+#  include <sys/inttypes.h>
+#else
+#  include <stdint.h>
+#endif
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/gfx/qcms/src/c_bindings.rs b/gfx/qcms/src/c_bindings.rs
new file mode 100644
index 0000000000..912601ac11
--- /dev/null
+++ b/gfx/qcms/src/c_bindings.rs
@@ -0,0 +1,368 @@
+use std::{ptr::null_mut, slice};
+
+use libc::{fclose, fopen, fread, free, malloc, memset, FILE};
+
+use crate::{
+    double_to_s15Fixed16Number,
+    iccread::*,
+    matrix::Matrix,
+    transform::get_rgb_colorants,
+    transform::DataType,
+    transform::{qcms_transform, transform_create},
+    Intent,
+};
+
+#[no_mangle]
+pub extern "C" fn qcms_profile_sRGB() -> *mut Profile {
+    let profile = Profile::new_sRGB();
+    Box::into_raw(profile)
+}
+
+//XXX: it would be nice if we had a way of ensuring
+// everything in a profile was initialized regardless of how it was created
+//XXX: should this also be taking a black_point?
+/* similar to CGColorSpaceCreateCalibratedRGB */
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma_set(
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+    redGamma: f32,
+    greenGamma: f32,
+    blueGamma: f32,
+) -> *mut Profile {
+    let profile =
+        Profile::new_rgb_with_gamma_set(white_point, primaries, redGamma, greenGamma, blueGamma);
+    match profile {
+        Some(profile) => Box::into_raw(profile),
+        None => null_mut(),
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_create_gray_with_gamma(gamma: f32) -> *mut Profile {
+    let profile = Profile::new_gray_with_gamma(gamma);
+    Box::into_raw(profile)
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_create_rgb_with_gamma(
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+    gamma: f32,
+) -> *mut Profile {
+    qcms_profile_create_rgb_with_gamma_set(white_point, primaries, gamma, gamma, gamma)
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_create_rgb_with_table(
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+    table: *const u16,
+    num_entries: i32,
+) -> *mut Profile {
+    let table = slice::from_raw_parts(table, num_entries as usize);
+    let profile = Profile::new_rgb_with_table(white_point, primaries, table);
+    match profile {
+        Some(profile) => Box::into_raw(profile),
+        None => null_mut(),
+    }
+}
+
+/* qcms_profile_from_memory does not hold a reference to the memory passed in */
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_from_memory(
+    mem: *const libc::c_void,
+    size: usize,
+) -> *mut Profile {
+    let mem = slice::from_raw_parts(mem as *const libc::c_uchar, size);
+    let profile = Profile::new_from_slice(mem);
+    match profile {
+        Some(profile) => Box::into_raw(profile),
+        None => null_mut(),
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn qcms_profile_get_rendering_intent(profile: &Profile) -> Intent {
+    profile.rendering_intent
+}
+#[no_mangle]
+pub extern "C" fn qcms_profile_get_color_space(profile: &Profile) -> icColorSpaceSignature {
+    profile.color_space
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_release(profile: *mut Profile) {
+    drop(Box::from_raw(profile));
+}
+unsafe extern "C" fn qcms_data_from_file(
+    file: *mut FILE,
+    mem: *mut *mut libc::c_void,
+    size: *mut usize,
+) {
+    let length: u32;
+    let remaining_length: u32;
+    let read_length: usize;
+    let mut length_be: be32 = 0;
+    let data: *mut libc::c_void;
+    *mem = std::ptr::null_mut::<libc::c_void>();
+    *size = 0;
+    if fread(
+        &mut length_be as *mut be32 as *mut libc::c_void,
+        1,
+        ::std::mem::size_of::<be32>(),
+        file,
+    ) != ::std::mem::size_of::<be32>()
+    {
+        return;
+    }
+    length = u32::from_be(length_be);
+    if length > MAX_PROFILE_SIZE as libc::c_uint
+        || (length as libc::c_ulong) < ::std::mem::size_of::<be32>() as libc::c_ulong
+    {
+        return;
+    }
+    /* allocate room for the entire profile */
+    data = malloc(length as usize);
+    if data.is_null() {
+        return;
+    }
+    /* copy in length to the front so that the buffer will contain the entire profile */
+    *(data as *mut be32) = length_be;
+    remaining_length =
+        (length as libc::c_ulong - ::std::mem::size_of::<be32>() as libc::c_ulong) as u32;
+    /* read the rest profile */
+    read_length = fread(
+        (data as *mut libc::c_uchar).add(::std::mem::size_of::<be32>()) as *mut libc::c_void,
+        1,
+        remaining_length as usize,
+        file,
+    ) as usize;
+    if read_length != remaining_length as usize {
+        free(data);
+        return;
+    }
+    /* successfully get the profile.*/
+    *mem = data;
+    *size = length as usize;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_from_file(file: *mut FILE) -> *mut Profile {
+    let mut length: usize = 0;
+    let profile: *mut Profile;
+    let mut data: *mut libc::c_void = std::ptr::null_mut::<libc::c_void>();
+    qcms_data_from_file(file, &mut data, &mut length);
+    if data.is_null() || length == 0 {
+        return std::ptr::null_mut::<Profile>();
+    }
+    profile = qcms_profile_from_memory(data, length);
+    free(data);
+    profile
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_from_path(path: *const libc::c_char) -> *mut Profile {
+    let mut profile: *mut Profile = std::ptr::null_mut::<Profile>();
+    let file = fopen(path, b"rb\x00" as *const u8 as *const libc::c_char);
+    if !file.is_null() {
+        profile = qcms_profile_from_file(file);
+        fclose(file);
+    }
+    profile
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_data_from_path(
+    path: *const libc::c_char,
+    mem: *mut *mut libc::c_void,
+    size: *mut usize,
+) {
+    *mem = std::ptr::null_mut::<libc::c_void>();
+    *size = 0;
+    let file = fopen(path, b"rb\x00" as *const u8 as *const libc::c_char);
+    if !file.is_null() {
+        qcms_data_from_file(file, mem, size);
+        fclose(file);
+    };
+}
+
+#[cfg(windows)]
+extern "C" {
+    pub fn _wfopen(filename: *const libc::wchar_t, mode: *const libc::wchar_t) -> *mut FILE;
+}
+
+#[cfg(windows)]
+#[no_mangle]
+pub unsafe extern "C" fn qcms_profile_from_unicode_path(path: *const libc::wchar_t) {
+    let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr());
+    if !file.is_null() {
+        qcms_profile_from_file(file);
+        fclose(file);
+    };
+}
+
+#[cfg(windows)]
+#[no_mangle]
+pub unsafe extern "C" fn qcms_data_from_unicode_path(
+    path: *const libc::wchar_t,
+    mem: *mut *mut libc::c_void,
+    size: *mut usize,
+) {
+    *mem = 0 as *mut libc::c_void;
+    *size = 0;
+    let file = _wfopen(path, ['r' as u16, 'b' as u16, '\0' as u16].as_ptr());
+    if !file.is_null() {
+        qcms_data_from_file(file, mem, size);
+        fclose(file);
+    };
+}
+
+#[no_mangle]
+pub extern "C" fn qcms_transform_create(
+    in_0: &Profile,
+    in_type: DataType,
+    out: &Profile,
+    out_type: DataType,
+    intent: Intent,
+) -> *mut qcms_transform {
+    let transform = transform_create(in_0, in_type, out, out_type, intent);
+    match transform {
+        Some(transform) => Box::into_raw(transform),
+        None => null_mut(),
+    }
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_data_create_rgb_with_gamma(
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+    gamma: f32,
+    mem: *mut *mut libc::c_void,
+    size: *mut usize,
+) {
+    let length: u32;
+    let mut index: u32;
+    let xyz_count: u32;
+    let trc_count: u32;
+    let mut tag_table_offset: usize;
+    let mut tag_data_offset: usize;
+    let data: *mut libc::c_void;
+    let mut colorants: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    let TAG_XYZ: [u32; 3] = [TAG_rXYZ, TAG_gXYZ, TAG_bXYZ];
+    let TAG_TRC: [u32; 3] = [TAG_rTRC, TAG_gTRC, TAG_bTRC];
+    if mem.is_null() || size.is_null() {
+        return;
+    }
+    *mem = std::ptr::null_mut::<libc::c_void>();
+    *size = 0;
+    /*
+    	* total length = icc profile header(128) + tag count(4) +
+    	* (tag table item (12) * total tag (6 = 3 rTRC + 3 rXYZ)) + rTRC elements data (3 * 20)
+    	* + rXYZ elements data (3*16), and all tag data elements must start at the 4-byte boundary.
+    	*/
+    xyz_count = 3; // rXYZ, gXYZ, bXYZ
+    trc_count = 3; // rTRC, gTRC, bTRC
+    length =
+        (128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count) + xyz_count * 20 + trc_count * 16;
+    // reserve the total memory.
+    data = malloc(length as usize);
+    if data.is_null() {
+        return;
+    }
+    memset(data, 0, length as usize);
+    // Part1 : write rXYZ, gXYZ and bXYZ
+    if !get_rgb_colorants(&mut colorants, white_point, primaries) {
+        free(data);
+        return;
+    }
+    let data = std::slice::from_raw_parts_mut(data as *mut u8, length as usize);
+    // the position of first tag's signature in tag table
+    tag_table_offset = (128 + 4) as usize; // the start of tag data elements.
+    tag_data_offset = ((128 + 4) as libc::c_uint + 12 * (xyz_count + trc_count)) as usize;
+    index = 0;
+    while index < xyz_count {
+        // tag table
+        write_u32(data, tag_table_offset, TAG_XYZ[index as usize]); // 20 bytes per TAG_(r/g/b)XYZ tag element
+        write_u32(data, tag_table_offset + 4, tag_data_offset as u32);
+        write_u32(data, tag_table_offset + 8, 20);
+        // tag data element
+        write_u32(data, tag_data_offset, XYZ_TYPE);
+        // reserved 4 bytes.
+        write_u32(
+            data,
+            tag_data_offset + 8,
+            double_to_s15Fixed16Number(colorants.m[0][index as usize] as f64) as u32,
+        );
+        write_u32(
+            data,
+            tag_data_offset + 12,
+            double_to_s15Fixed16Number(colorants.m[1][index as usize] as f64) as u32,
+        );
+        write_u32(
+            data,
+            tag_data_offset + 16,
+            double_to_s15Fixed16Number(colorants.m[2][index as usize] as f64) as u32,
+        );
+        tag_table_offset += 12;
+        tag_data_offset += 20;
+        index += 1
+    }
+    // Part2 : write rTRC, gTRC and bTRC
+    index = 0;
+    while index < trc_count {
+        // tag table
+        write_u32(data, tag_table_offset, TAG_TRC[index as usize]); // 14 bytes per TAG_(r/g/b)TRC element
+        write_u32(data, tag_table_offset + 4, tag_data_offset as u32);
+        write_u32(data, tag_table_offset + 8, 14);
+        // tag data element
+        write_u32(data, tag_data_offset, CURVE_TYPE);
+        // reserved 4 bytes.
+        write_u32(data, tag_data_offset + 8, 1); // count
+        write_u16(data, tag_data_offset + 12, float_to_u8Fixed8Number(gamma));
+        tag_table_offset += 12;
+        tag_data_offset += 16;
+        index += 1
+    }
+    /* Part3 : write profile header
+     *
+     * Important header fields are left empty. This generates a profile for internal use only.
+     * We should be generating: Profile version (04300000h), Profile signature (acsp),
+     * PCS illumiant field. Likewise mandatory profile tags are omitted.
+     */
+    write_u32(data, 0, length); // the total length of this memory
+    write_u32(data, 12, DISPLAY_DEVICE_PROFILE); // profile->class_type
+    write_u32(data, 16, RGB_SIGNATURE); // profile->color_space
+    write_u32(data, 20, XYZ_TYPE); // profile->pcs
+    write_u32(data, 64, Intent::Perceptual as u32); // profile->rendering_intent
+    write_u32(data, 128, 6); // total tag count
+                             // prepare the result
+    *mem = data.as_mut_ptr() as *mut libc::c_void;
+    *size = length as usize;
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data(
+    transform: &qcms_transform,
+    src: *const libc::c_void,
+    dest: *mut libc::c_void,
+    length: usize,
+) {
+    transform.transform_fn.expect("non-null function pointer")(
+        transform,
+        src as *const u8,
+        dest as *mut u8,
+        length,
+    );
+}
+
+pub type icColorSpaceSignature = u32;
+pub const icSigGrayData: icColorSpaceSignature = 1196573017;
+pub const icSigRgbData: icColorSpaceSignature = 1380401696;
+
+pub use crate::iccread::qcms_profile_is_bogus;
+pub use crate::iccread::Profile as qcms_profile;
+pub use crate::transform::{
+    qcms_enable_iccv4, qcms_profile_precache_output_transform, qcms_transform_release,
+};
diff --git a/gfx/qcms/src/chain.rs b/gfx/qcms/src/chain.rs
new file mode 100644
index 0000000000..c1faf9dcd7
--- /dev/null
+++ b/gfx/qcms/src/chain.rs
@@ -0,0 +1,998 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+//  qcms
+//  Copyright (C) 2009 Mozilla Corporation
+//  Copyright (C) 1998-2007 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+use crate::{
+    iccread::LAB_SIGNATURE,
+    iccread::RGB_SIGNATURE,
+    iccread::XYZ_SIGNATURE,
+    iccread::{lutType, lutmABType, Profile},
+    matrix::Matrix,
+    s15Fixed16Number_to_float,
+    transform_util::clamp_float,
+    transform_util::{
+        build_colorant_matrix, build_input_gamma_table, build_output_lut, lut_interp_linear,
+        lut_interp_linear_float,
+    },
+};
+
+#[derive(Clone, Default)]
+pub struct ModularTransform {
+    matrix: Matrix,
+    tx: f32,
+    ty: f32,
+    tz: f32,
+    input_clut_table_r: Option<Vec<f32>>,
+    input_clut_table_g: Option<Vec<f32>>,
+    input_clut_table_b: Option<Vec<f32>>,
+    input_clut_table_length: u16,
+    clut: Option<Vec<f32>>,
+    grid_size: u16,
+    output_clut_table_r: Option<Vec<f32>>,
+    output_clut_table_g: Option<Vec<f32>>,
+    output_clut_table_b: Option<Vec<f32>>,
+    output_clut_table_length: u16,
+    output_gamma_lut_r: Option<Vec<u16>>,
+    output_gamma_lut_g: Option<Vec<u16>>,
+    output_gamma_lut_b: Option<Vec<u16>>,
+    output_gamma_lut_r_length: usize,
+    output_gamma_lut_g_length: usize,
+    output_gamma_lut_b_length: usize,
+    transform_module_fn: TransformModuleFn,
+    next_transform: Option<Box<ModularTransform>>,
+}
+pub type TransformModuleFn =
+    Option<fn(_: &ModularTransform, _: &[f32], _: &mut [f32]) -> ()>;
+
+#[inline]
+fn lerp(a: f32, b: f32, t: f32) -> f32 {
+    a * (1.0 - t) + b * t
+}
+
+fn build_lut_matrix(lut: Option<&lutType>) -> Matrix {
+    let mut result: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    if let Some(lut) = lut {
+        result.m[0][0] = s15Fixed16Number_to_float(lut.e00);
+        result.m[0][1] = s15Fixed16Number_to_float(lut.e01);
+        result.m[0][2] = s15Fixed16Number_to_float(lut.e02);
+        result.m[1][0] = s15Fixed16Number_to_float(lut.e10);
+        result.m[1][1] = s15Fixed16Number_to_float(lut.e11);
+        result.m[1][2] = s15Fixed16Number_to_float(lut.e12);
+        result.m[2][0] = s15Fixed16Number_to_float(lut.e20);
+        result.m[2][1] = s15Fixed16Number_to_float(lut.e21);
+        result.m[2][2] = s15Fixed16Number_to_float(lut.e22);
+        result.invalid = false
+    } else {
+        result.m = Default::default();
+        result.invalid = true
+    }
+    result
+}
+fn build_mAB_matrix(lut: &lutmABType) -> Matrix {
+    let mut result: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+
+    result.m[0][0] = s15Fixed16Number_to_float(lut.e00);
+    result.m[0][1] = s15Fixed16Number_to_float(lut.e01);
+    result.m[0][2] = s15Fixed16Number_to_float(lut.e02);
+    result.m[1][0] = s15Fixed16Number_to_float(lut.e10);
+    result.m[1][1] = s15Fixed16Number_to_float(lut.e11);
+    result.m[1][2] = s15Fixed16Number_to_float(lut.e12);
+    result.m[2][0] = s15Fixed16Number_to_float(lut.e20);
+    result.m[2][1] = s15Fixed16Number_to_float(lut.e21);
+    result.m[2][2] = s15Fixed16Number_to_float(lut.e22);
+    result.invalid = false;
+
+    result
+}
+//Based on lcms cmsLab2XYZ
+fn f(t: f32) -> f32 {
+    if t <= 24. / 116. * (24. / 116.) * (24. / 116.) {
+        (841. / 108. * t) + 16. / 116.
+    } else {
+        t.powf(1. / 3.)
+    }
+}
+fn f_1(t: f32) -> f32 {
+    if t <= 24.0 / 116.0 {
+        (108.0 / 841.0) * (t - 16.0 / 116.0)
+    } else {
+        t * t * t
+    }
+}
+
+fn transform_module_LAB_to_XYZ(_transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    // lcms: D50 XYZ values
+    let WhitePointX: f32 = 0.9642;
+    let WhitePointY: f32 = 1.0;
+    let WhitePointZ: f32 = 0.8249;
+
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let device_L: f32 = src[0] * 100.0;
+        let device_a: f32 = src[1] * 255.0 - 128.0;
+        let device_b: f32 = src[2] * 255.0 - 128.0;
+
+        let y: f32 = (device_L + 16.0) / 116.0;
+
+        let X = f_1(y + 0.002 * device_a) * WhitePointX;
+        let Y = f_1(y) * WhitePointY;
+        let Z = f_1(y - 0.005 * device_b) * WhitePointZ;
+
+        dest[0] = (X as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
+        dest[1] = (Y as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
+        dest[2] = (Z as f64 / (1.0f64 + 32767.0f64 / 32768.0f64)) as f32;
+    }
+}
+//Based on lcms cmsXYZ2Lab
+fn transform_module_XYZ_to_LAB(_transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    // lcms: D50 XYZ values
+    let WhitePointX: f32 = 0.9642;
+    let WhitePointY: f32 = 1.0;
+    let WhitePointZ: f32 = 0.8249;
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let device_x: f32 =
+            (src[0] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointX as f64) as f32;
+        let device_y: f32 =
+            (src[1] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointY as f64) as f32;
+        let device_z: f32 =
+            (src[2] as f64 * (1.0f64 + 32767.0f64 / 32768.0f64) / WhitePointZ as f64) as f32;
+
+        let fx = f(device_x);
+        let fy = f(device_y);
+        let fz = f(device_z);
+
+        let L: f32 = 116.0 * fy - 16.0;
+        let a: f32 = 500.0 * (fx - fy);
+        let b: f32 = 200.0 * (fy - fz);
+
+        dest[0] = L / 100.0;
+        dest[1] = (a + 128.0) / 255.0;
+        dest[2] = (b + 128.0) / 255.0;
+    }
+}
+fn transform_module_clut_only(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    let xy_len: i32 = 1;
+    let x_len: i32 = transform.grid_size as i32;
+    let len: i32 = x_len * x_len;
+
+    let r_table = &transform.clut.as_ref().unwrap()[0..];
+    let g_table = &transform.clut.as_ref().unwrap()[1..];
+    let b_table = &transform.clut.as_ref().unwrap()[2..];
+
+    let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize];
+
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        debug_assert!(transform.grid_size as i32 >= 1);
+        let linear_r: f32 = src[0];
+        let linear_g: f32 = src[1];
+        let linear_b: f32 = src[2];
+        let x: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let y: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let z: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let x_n: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let y_n: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let z_n: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let x_d: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32;
+        let y_d: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32;
+        let z_d: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32;
+
+        let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d);
+        let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d);
+        let r_y1: f32 = lerp(r_x1, r_x2, y_d);
+        let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d);
+        let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d);
+        let r_y2: f32 = lerp(r_x3, r_x4, y_d);
+        let clut_r: f32 = lerp(r_y1, r_y2, z_d);
+
+        let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d);
+        let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d);
+        let g_y1: f32 = lerp(g_x1, g_x2, y_d);
+        let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d);
+        let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d);
+        let g_y2: f32 = lerp(g_x3, g_x4, y_d);
+        let clut_g: f32 = lerp(g_y1, g_y2, z_d);
+
+        let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d);
+        let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d);
+        let b_y1: f32 = lerp(b_x1, b_x2, y_d);
+        let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d);
+        let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d);
+        let b_y2: f32 = lerp(b_x3, b_x4, y_d);
+        let clut_b: f32 = lerp(b_y1, b_y2, z_d);
+
+        dest[0] = clamp_float(clut_r);
+        dest[1] = clamp_float(clut_g);
+        dest[2] = clamp_float(clut_b);
+    }
+}
+fn transform_module_clut(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    let xy_len: i32 = 1;
+    let x_len: i32 = transform.grid_size as i32;
+    let len: i32 = x_len * x_len;
+
+    let r_table = &transform.clut.as_ref().unwrap()[0..];
+    let g_table = &transform.clut.as_ref().unwrap()[1..];
+    let b_table = &transform.clut.as_ref().unwrap()[2..];
+    let CLU = |table: &[f32], x, y, z| table[((x * len + y * x_len + z * xy_len) * 3) as usize];
+
+    let input_clut_table_r = transform.input_clut_table_r.as_ref().unwrap();
+    let input_clut_table_g = transform.input_clut_table_g.as_ref().unwrap();
+    let input_clut_table_b = transform.input_clut_table_b.as_ref().unwrap();
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        debug_assert!(transform.grid_size as i32 >= 1);
+        let device_r: f32 = src[0];
+        let device_g: f32 = src[1];
+        let device_b: f32 = src[2];
+        let linear_r: f32 = lut_interp_linear_float(device_r, &input_clut_table_r);
+        let linear_g: f32 = lut_interp_linear_float(device_g, &input_clut_table_g);
+        let linear_b: f32 = lut_interp_linear_float(device_b, &input_clut_table_b);
+        let x: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let y: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let z: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).floor() as i32;
+        let x_n: i32 = (linear_r * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let y_n: i32 = (linear_g * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let z_n: i32 = (linear_b * (transform.grid_size as i32 - 1) as f32).ceil() as i32;
+        let x_d: f32 = linear_r * (transform.grid_size as i32 - 1) as f32 - x as f32;
+        let y_d: f32 = linear_g * (transform.grid_size as i32 - 1) as f32 - y as f32;
+        let z_d: f32 = linear_b * (transform.grid_size as i32 - 1) as f32 - z as f32;
+
+        let r_x1: f32 = lerp(CLU(r_table, x, y, z), CLU(r_table, x_n, y, z), x_d);
+        let r_x2: f32 = lerp(CLU(r_table, x, y_n, z), CLU(r_table, x_n, y_n, z), x_d);
+        let r_y1: f32 = lerp(r_x1, r_x2, y_d);
+        let r_x3: f32 = lerp(CLU(r_table, x, y, z_n), CLU(r_table, x_n, y, z_n), x_d);
+        let r_x4: f32 = lerp(CLU(r_table, x, y_n, z_n), CLU(r_table, x_n, y_n, z_n), x_d);
+        let r_y2: f32 = lerp(r_x3, r_x4, y_d);
+        let clut_r: f32 = lerp(r_y1, r_y2, z_d);
+
+        let g_x1: f32 = lerp(CLU(g_table, x, y, z), CLU(g_table, x_n, y, z), x_d);
+        let g_x2: f32 = lerp(CLU(g_table, x, y_n, z), CLU(g_table, x_n, y_n, z), x_d);
+        let g_y1: f32 = lerp(g_x1, g_x2, y_d);
+        let g_x3: f32 = lerp(CLU(g_table, x, y, z_n), CLU(g_table, x_n, y, z_n), x_d);
+        let g_x4: f32 = lerp(CLU(g_table, x, y_n, z_n), CLU(g_table, x_n, y_n, z_n), x_d);
+        let g_y2: f32 = lerp(g_x3, g_x4, y_d);
+        let clut_g: f32 = lerp(g_y1, g_y2, z_d);
+
+        let b_x1: f32 = lerp(CLU(b_table, x, y, z), CLU(b_table, x_n, y, z), x_d);
+        let b_x2: f32 = lerp(CLU(b_table, x, y_n, z), CLU(b_table, x_n, y_n, z), x_d);
+        let b_y1: f32 = lerp(b_x1, b_x2, y_d);
+        let b_x3: f32 = lerp(CLU(b_table, x, y, z_n), CLU(b_table, x_n, y, z_n), x_d);
+        let b_x4: f32 = lerp(CLU(b_table, x, y_n, z_n), CLU(b_table, x_n, y_n, z_n), x_d);
+        let b_y2: f32 = lerp(b_x3, b_x4, y_d);
+        let clut_b: f32 = lerp(b_y1, b_y2, z_d);
+        let pcs_r: f32 =
+            lut_interp_linear_float(clut_r, &transform.output_clut_table_r.as_ref().unwrap());
+        let pcs_g: f32 =
+            lut_interp_linear_float(clut_g, &transform.output_clut_table_g.as_ref().unwrap());
+        let pcs_b: f32 =
+            lut_interp_linear_float(clut_b, &transform.output_clut_table_b.as_ref().unwrap());
+        dest[0] = clamp_float(pcs_r);
+        dest[1] = clamp_float(pcs_g);
+        dest[2] = clamp_float(pcs_b);
+    }
+}
+/* NOT USED
+static void qcms_transform_module_tetra_clut(struct qcms_modular_transform *transform, float *src, float *dest, size_t length)
+{
+    size_t i;
+    int xy_len = 1;
+    int x_len = transform->grid_size;
+    int len = x_len * x_len;
+    float* r_table = transform->r_clut;
+    float* g_table = transform->g_clut;
+    float* b_table = transform->b_clut;
+    float c0_r, c1_r, c2_r, c3_r;
+    float c0_g, c1_g, c2_g, c3_g;
+    float c0_b, c1_b, c2_b, c3_b;
+    float clut_r, clut_g, clut_b;
+    float pcs_r, pcs_g, pcs_b;
+    for (i = 0; i < length; i++) {
+        float device_r = *src++;
+        float device_g = *src++;
+        float device_b = *src++;
+        float linear_r = lut_interp_linear_float(device_r,
+                transform->input_clut_table_r, transform->input_clut_table_length);
+        float linear_g = lut_interp_linear_float(device_g,
+                transform->input_clut_table_g, transform->input_clut_table_length);
+        float linear_b = lut_interp_linear_float(device_b,
+                transform->input_clut_table_b, transform->input_clut_table_length);
+
+        int x = floorf(linear_r * (transform->grid_size-1));
+        int y = floorf(linear_g * (transform->grid_size-1));
+        int z = floorf(linear_b * (transform->grid_size-1));
+        int x_n = ceilf(linear_r * (transform->grid_size-1));
+        int y_n = ceilf(linear_g * (transform->grid_size-1));
+        int z_n = ceilf(linear_b * (transform->grid_size-1));
+        float rx = linear_r * (transform->grid_size-1) - x;
+        float ry = linear_g * (transform->grid_size-1) - y;
+        float rz = linear_b * (transform->grid_size-1) - z;
+
+        c0_r = CLU(r_table, x, y, z);
+        c0_g = CLU(g_table, x, y, z);
+        c0_b = CLU(b_table, x, y, z);
+        if( rx >= ry ) {
+            if (ry >= rz) { //rx >= ry && ry >= rz
+                c1_r = CLU(r_table, x_n, y, z) - c0_r;
+                c2_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x_n, y, z);
+                c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
+                c1_g = CLU(g_table, x_n, y, z) - c0_g;
+                c2_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x_n, y, z);
+                c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
+                c1_b = CLU(b_table, x_n, y, z) - c0_b;
+                c2_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x_n, y, z);
+                c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
+            } else {
+                if (rx >= rz) { //rx >= rz && rz >= ry
+                    c1_r = CLU(r_table, x_n, y, z) - c0_r;
+                    c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
+                    c3_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x_n, y, z);
+                    c1_g = CLU(g_table, x_n, y, z) - c0_g;
+                    c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
+                    c3_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x_n, y, z);
+                    c1_b = CLU(b_table, x_n, y, z) - c0_b;
+                    c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
+                    c3_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x_n, y, z);
+                } else { //rz > rx && rx >= ry
+                    c1_r = CLU(r_table, x_n, y, z_n) - CLU(r_table, x, y, z_n);
+                    c2_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y, z_n);
+                    c3_r = CLU(r_table, x, y, z_n) - c0_r;
+                    c1_g = CLU(g_table, x_n, y, z_n) - CLU(g_table, x, y, z_n);
+                    c2_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y, z_n);
+                    c3_g = CLU(g_table, x, y, z_n) - c0_g;
+                    c1_b = CLU(b_table, x_n, y, z_n) - CLU(b_table, x, y, z_n);
+                    c2_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y, z_n);
+                    c3_b = CLU(b_table, x, y, z_n) - c0_b;
+                }
+            }
+        } else {
+            if (rx >= rz) { //ry > rx && rx >= rz
+                c1_r = CLU(r_table, x_n, y_n, z) - CLU(r_table, x, y_n, z);
+                c2_r = CLU(r_table, x_n, y_n, z) - c0_r;
+                c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
+                c1_g = CLU(g_table, x_n, y_n, z) - CLU(g_table, x, y_n, z);
+                c2_g = CLU(g_table, x_n, y_n, z) - c0_g;
+                c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
+                c1_b = CLU(b_table, x_n, y_n, z) - CLU(b_table, x, y_n, z);
+                c2_b = CLU(b_table, x_n, y_n, z) - c0_b;
+                c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
+            } else {
+                if (ry >= rz) { //ry >= rz && rz > rx
+                    c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
+                    c2_r = CLU(r_table, x, y_n, z) - c0_r;
+                    c3_r = CLU(r_table, x, y_n, z_n) - CLU(r_table, x, y_n, z);
+                    c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
+                    c2_g = CLU(g_table, x, y_n, z) - c0_g;
+                    c3_g = CLU(g_table, x, y_n, z_n) - CLU(g_table, x, y_n, z);
+                    c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
+                    c2_b = CLU(b_table, x, y_n, z) - c0_b;
+                    c3_b = CLU(b_table, x, y_n, z_n) - CLU(b_table, x, y_n, z);
+                } else { //rz > ry && ry > rx
+                    c1_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x, y_n, z_n);
+                    c2_r = CLU(r_table, x, y_n, z) - c0_r;
+                    c3_r = CLU(r_table, x_n, y_n, z_n) - CLU(r_table, x_n, y_n, z);
+                    c1_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x, y_n, z_n);
+                    c2_g = CLU(g_table, x, y_n, z) - c0_g;
+                    c3_g = CLU(g_table, x_n, y_n, z_n) - CLU(g_table, x_n, y_n, z);
+                    c1_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x, y_n, z_n);
+                    c2_b = CLU(b_table, x, y_n, z) - c0_b;
+                    c3_b = CLU(b_table, x_n, y_n, z_n) - CLU(b_table, x_n, y_n, z);
+                }
+            }
+        }
+
+        clut_r = c0_r + c1_r*rx + c2_r*ry + c3_r*rz;
+        clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
+        clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
+
+        pcs_r = lut_interp_linear_float(clut_r,
+                transform->output_clut_table_r, transform->output_clut_table_length);
+        pcs_g = lut_interp_linear_float(clut_g,
+                transform->output_clut_table_g, transform->output_clut_table_length);
+        pcs_b = lut_interp_linear_float(clut_b,
+                transform->output_clut_table_b, transform->output_clut_table_length);
+        *dest++ = clamp_float(pcs_r);
+        *dest++ = clamp_float(pcs_g);
+        *dest++ = clamp_float(pcs_b);
+    }
+}
+*/
+fn transform_module_gamma_table(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    let mut out_r: f32;
+    let mut out_g: f32;
+    let mut out_b: f32;
+    let input_clut_table_r = transform.input_clut_table_r.as_ref().unwrap();
+    let input_clut_table_g = transform.input_clut_table_g.as_ref().unwrap();
+    let input_clut_table_b = transform.input_clut_table_b.as_ref().unwrap();
+
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let in_r: f32 = src[0];
+        let in_g: f32 = src[1];
+        let in_b: f32 = src[2];
+        out_r = lut_interp_linear_float(in_r, input_clut_table_r);
+        out_g = lut_interp_linear_float(in_g, input_clut_table_g);
+        out_b = lut_interp_linear_float(in_b, input_clut_table_b);
+
+        dest[0] = clamp_float(out_r);
+        dest[1] = clamp_float(out_g);
+        dest[2] = clamp_float(out_b);
+    }
+}
+fn transform_module_gamma_lut(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    let mut out_r: f32;
+    let mut out_g: f32;
+    let mut out_b: f32;
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let in_r: f32 = src[0];
+        let in_g: f32 = src[1];
+        let in_b: f32 = src[2];
+        out_r = lut_interp_linear(in_r as f64, &transform.output_gamma_lut_r.as_ref().unwrap());
+        out_g = lut_interp_linear(in_g as f64, &transform.output_gamma_lut_g.as_ref().unwrap());
+        out_b = lut_interp_linear(in_b as f64, &transform.output_gamma_lut_b.as_ref().unwrap());
+        dest[0] = clamp_float(out_r);
+        dest[1] = clamp_float(out_g);
+        dest[2] = clamp_float(out_b);
+    }
+}
+fn transform_module_matrix_translate(
+    transform: &ModularTransform,
+    src: &[f32],
+    dest: &mut [f32],
+) {
+    let mut mat: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    /* store the results in column major mode
+     * this makes doing the multiplication with sse easier */
+    mat.m[0][0] = transform.matrix.m[0][0];
+    mat.m[1][0] = transform.matrix.m[0][1];
+    mat.m[2][0] = transform.matrix.m[0][2];
+    mat.m[0][1] = transform.matrix.m[1][0];
+    mat.m[1][1] = transform.matrix.m[1][1];
+    mat.m[2][1] = transform.matrix.m[1][2];
+    mat.m[0][2] = transform.matrix.m[2][0];
+    mat.m[1][2] = transform.matrix.m[2][1];
+    mat.m[2][2] = transform.matrix.m[2][2];
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let in_r: f32 = src[0];
+        let in_g: f32 = src[1];
+        let in_b: f32 = src[2];
+        let out_r: f32 =
+            mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b + transform.tx;
+        let out_g: f32 =
+            mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b + transform.ty;
+        let out_b: f32 =
+            mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b + transform.tz;
+        dest[0] = clamp_float(out_r);
+        dest[1] = clamp_float(out_g);
+        dest[2] = clamp_float(out_b);
+    }
+}
+
+fn transform_module_matrix(transform: &ModularTransform, src: &[f32], dest: &mut [f32]) {
+    let mut mat: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    /* store the results in column major mode
+     * this makes doing the multiplication with sse easier */
+    mat.m[0][0] = transform.matrix.m[0][0];
+    mat.m[1][0] = transform.matrix.m[0][1];
+    mat.m[2][0] = transform.matrix.m[0][2];
+    mat.m[0][1] = transform.matrix.m[1][0];
+    mat.m[1][1] = transform.matrix.m[1][1];
+    mat.m[2][1] = transform.matrix.m[1][2];
+    mat.m[0][2] = transform.matrix.m[2][0];
+    mat.m[1][2] = transform.matrix.m[2][1];
+    mat.m[2][2] = transform.matrix.m[2][2];
+    for (dest, src) in dest.chunks_exact_mut(3).zip(src.chunks_exact(3)) {
+        let in_r: f32 = src[0];
+        let in_g: f32 = src[1];
+        let in_b: f32 = src[2];
+        let out_r: f32 = mat.m[0][0] * in_r + mat.m[1][0] * in_g + mat.m[2][0] * in_b;
+        let out_g: f32 = mat.m[0][1] * in_r + mat.m[1][1] * in_g + mat.m[2][1] * in_b;
+        let out_b: f32 = mat.m[0][2] * in_r + mat.m[1][2] * in_g + mat.m[2][2] * in_b;
+        dest[0] = clamp_float(out_r);
+        dest[1] = clamp_float(out_g);
+        dest[2] = clamp_float(out_b);
+    }
+}
+fn modular_transform_alloc() -> Option<Box<ModularTransform>> {
+    Some(Box::new(Default::default()))
+}
+fn modular_transform_release(mut t: Option<Box<ModularTransform>>) {
+    // destroy a list of transforms non-recursively
+    let mut next_transform;
+    while let Some(mut transform) = t {
+        next_transform = std::mem::replace(&mut transform.next_transform, None);
+        t = next_transform
+    }
+}
+/* Set transform to be the next element in the linked list. */
+fn append_transform(
+    transform: Option<Box<ModularTransform>>,
+    mut next_transform: &mut Option<Box<ModularTransform>>,
+) -> &mut Option<Box<ModularTransform>> {
+    *next_transform = transform;
+    while next_transform.is_some() {
+        next_transform = &mut next_transform.as_mut().unwrap().next_transform;
+    }
+    next_transform
+}
+/* reverse the transformation list (used by mBA) */
+fn reverse_transform(
+    mut transform: Option<Box<ModularTransform>>,
+) -> Option<Box<ModularTransform>> {
+    let mut prev_transform = None;
+    while transform.is_some() {
+        let next_transform = std::mem::replace(
+            &mut transform.as_mut().unwrap().next_transform,
+            prev_transform,
+        );
+        prev_transform = transform;
+        transform = next_transform
+    }
+    prev_transform
+}
+fn modular_transform_create_mAB(lut: &lutmABType) -> Option<Box<ModularTransform>> {
+    let mut first_transform = None;
+    let mut next_transform = &mut first_transform;
+    let mut transform;
+    if lut.a_curves[0].is_some() {
+        let clut_length: usize;
+        // If the A curve is present this also implies the
+        // presence of a CLUT.
+        lut.clut_table.as_ref()?;
+
+        // Prepare A curve.
+        transform = modular_transform_alloc();
+        transform.as_ref()?;
+        transform.as_mut().unwrap().input_clut_table_r =
+            build_input_gamma_table(lut.a_curves[0].as_deref());
+        transform.as_mut().unwrap().input_clut_table_g =
+            build_input_gamma_table(lut.a_curves[1].as_deref());
+        transform.as_mut().unwrap().input_clut_table_b =
+            build_input_gamma_table(lut.a_curves[2].as_deref());
+        transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table);
+        next_transform = append_transform(transform, next_transform);
+
+        if lut.num_grid_points[0] as i32 != lut.num_grid_points[1] as i32
+            || lut.num_grid_points[1] as i32 != lut.num_grid_points[2] as i32
+        {
+            //XXX: We don't currently support clut that are not squared!
+            return None;
+        }
+
+        // Prepare CLUT
+        transform = modular_transform_alloc();
+        transform.as_ref()?;
+
+        clut_length = (lut.num_grid_points[0] as usize).pow(3) * 3;
+        assert_eq!(clut_length, lut.clut_table.as_ref().unwrap().len());
+        transform.as_mut().unwrap().clut = lut.clut_table.clone();
+        transform.as_mut().unwrap().grid_size = lut.num_grid_points[0] as u16;
+        transform.as_mut().unwrap().transform_module_fn = Some(transform_module_clut_only);
+        next_transform = append_transform(transform, next_transform);
+    }
+
+    if lut.m_curves[0].is_some() {
+        // M curve imples the presence of a Matrix
+
+        // Prepare M curve
+        transform = modular_transform_alloc();
+        transform.as_ref()?;
+        transform.as_mut().unwrap().input_clut_table_r =
+            build_input_gamma_table(lut.m_curves[0].as_deref());
+        transform.as_mut().unwrap().input_clut_table_g =
+            build_input_gamma_table(lut.m_curves[1].as_deref());
+        transform.as_mut().unwrap().input_clut_table_b =
+            build_input_gamma_table(lut.m_curves[2].as_deref());
+        transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table);
+        next_transform = append_transform(transform, next_transform);
+
+        // Prepare Matrix
+        transform = modular_transform_alloc();
+        transform.as_ref()?;
+        transform.as_mut().unwrap().matrix = build_mAB_matrix(lut);
+        if transform.as_mut().unwrap().matrix.invalid {
+            return None;
+        }
+        transform.as_mut().unwrap().tx = s15Fixed16Number_to_float(lut.e03);
+        transform.as_mut().unwrap().ty = s15Fixed16Number_to_float(lut.e13);
+        transform.as_mut().unwrap().tz = s15Fixed16Number_to_float(lut.e23);
+        transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix_translate);
+        next_transform = append_transform(transform, next_transform);
+    }
+
+    if lut.b_curves[0].is_some() {
+        // Prepare B curve
+        transform = modular_transform_alloc();
+        transform.as_ref()?;
+        transform.as_mut().unwrap().input_clut_table_r =
+            build_input_gamma_table(lut.b_curves[0].as_deref());
+        transform.as_mut().unwrap().input_clut_table_g =
+            build_input_gamma_table(lut.b_curves[1].as_deref());
+        transform.as_mut().unwrap().input_clut_table_b =
+            build_input_gamma_table(lut.b_curves[2].as_deref());
+        transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table);
+        append_transform(transform, next_transform);
+    } else {
+        // B curve is mandatory
+        return None;
+    }
+
+    if lut.reversed {
+        // mBA are identical to mAB except that the transformation order
+        // is reversed
+        first_transform = reverse_transform(first_transform)
+    }
+    first_transform
+}
+
+fn modular_transform_create_lut(lut: &lutType) -> Option<Box<ModularTransform>> {
+    let mut first_transform = None;
+    let mut next_transform = &mut first_transform;
+
+    let _in_curve_len: usize;
+    let clut_length: usize;
+    let _out_curve_len: usize;
+    let _in_curves: *mut f32;
+    let _out_curves: *mut f32;
+    let mut transform = modular_transform_alloc();
+    if transform.is_some() {
+        transform.as_mut().unwrap().matrix = build_lut_matrix(Some(lut));
+        if !transform.as_mut().unwrap().matrix.invalid {
+            transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix);
+            next_transform = append_transform(transform, next_transform);
+            // Prepare input curves
+            transform = modular_transform_alloc();
+            if transform.is_some() {
+                transform.as_mut().unwrap().input_clut_table_r =
+                    Some(lut.input_table[0..lut.num_input_table_entries as usize].to_vec());
+                transform.as_mut().unwrap().input_clut_table_g = Some(
+                    lut.input_table[lut.num_input_table_entries as usize
+                        ..lut.num_input_table_entries as usize * 2]
+                        .to_vec(),
+                );
+                transform.as_mut().unwrap().input_clut_table_b = Some(
+                    lut.input_table[lut.num_input_table_entries as usize * 2
+                        ..lut.num_input_table_entries as usize * 3]
+                        .to_vec(),
+                );
+                transform.as_mut().unwrap().input_clut_table_length = lut.num_input_table_entries;
+                // Prepare table
+                clut_length = (lut.num_clut_grid_points as usize).pow(3) * 3;
+                assert_eq!(clut_length, lut.clut_table.len());
+                transform.as_mut().unwrap().clut = Some(lut.clut_table.clone());
+
+                transform.as_mut().unwrap().grid_size = lut.num_clut_grid_points as u16;
+                // Prepare output curves
+                transform.as_mut().unwrap().output_clut_table_r =
+                    Some(lut.output_table[0..lut.num_output_table_entries as usize].to_vec());
+                transform.as_mut().unwrap().output_clut_table_g = Some(
+                    lut.output_table[lut.num_output_table_entries as usize
+                        ..lut.num_output_table_entries as usize * 2]
+                        .to_vec(),
+                );
+                transform.as_mut().unwrap().output_clut_table_b = Some(
+                    lut.output_table[lut.num_output_table_entries as usize * 2
+                        ..lut.num_output_table_entries as usize * 3]
+                        .to_vec(),
+                );
+                transform.as_mut().unwrap().output_clut_table_length = lut.num_output_table_entries;
+                transform.as_mut().unwrap().transform_module_fn = Some(transform_module_clut);
+                append_transform(transform, next_transform);
+                return first_transform;
+            }
+        }
+    }
+    modular_transform_release(first_transform);
+    None
+}
+
+fn modular_transform_create_input(input: &Profile) -> Option<Box<ModularTransform>> {
+    let mut first_transform = None;
+    let mut next_transform = &mut first_transform;
+    if input.A2B0.is_some() {
+        let lut_transform = modular_transform_create_lut(input.A2B0.as_deref().unwrap());
+        if lut_transform.is_none() {
+            return None;
+        } else {
+            append_transform(lut_transform, next_transform);
+        }
+    } else if input.mAB.is_some()
+        && (*input.mAB.as_deref().unwrap()).num_in_channels == 3
+        && (*input.mAB.as_deref().unwrap()).num_out_channels == 3
+    {
+        let mAB_transform = modular_transform_create_mAB(input.mAB.as_deref().unwrap());
+        if mAB_transform.is_none() {
+            return None;
+        } else {
+            append_transform(mAB_transform, next_transform);
+        }
+    } else {
+        let mut transform = modular_transform_alloc();
+        if transform.is_none() {
+            return None;
+        } else {
+            transform.as_mut().unwrap().input_clut_table_r =
+                build_input_gamma_table(input.redTRC.as_deref());
+            transform.as_mut().unwrap().input_clut_table_g =
+                build_input_gamma_table(input.greenTRC.as_deref());
+            transform.as_mut().unwrap().input_clut_table_b =
+                build_input_gamma_table(input.blueTRC.as_deref());
+            transform.as_mut().unwrap().transform_module_fn = Some(transform_module_gamma_table);
+            if transform.as_mut().unwrap().input_clut_table_r.is_none()
+                || transform.as_mut().unwrap().input_clut_table_g.is_none()
+                || transform.as_mut().unwrap().input_clut_table_b.is_none()
+            {
+                append_transform(transform, next_transform);
+                return None;
+            } else {
+                next_transform = append_transform(transform, next_transform);
+                transform = modular_transform_alloc();
+                if transform.is_none() {
+                    return None;
+                } else {
+                    transform.as_mut().unwrap().matrix.m[0][0] = 1. / 1.999_969_5;
+                    transform.as_mut().unwrap().matrix.m[0][1] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[0][2] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[1][0] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[1][1] = 1. / 1.999_969_5;
+                    transform.as_mut().unwrap().matrix.m[1][2] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[2][0] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[2][1] = 0.0;
+                    transform.as_mut().unwrap().matrix.m[2][2] = 1. / 1.999_969_5;
+                    transform.as_mut().unwrap().matrix.invalid = false;
+                    transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix);
+                    next_transform = append_transform(transform, next_transform);
+                    transform = modular_transform_alloc();
+                    if transform.is_none() {
+                        return None;
+                    } else {
+                        transform.as_mut().unwrap().matrix = build_colorant_matrix(input);
+                        transform.as_mut().unwrap().transform_module_fn =
+                            Some(transform_module_matrix);
+                        append_transform(transform, next_transform);
+                    }
+                }
+            }
+        }
+    }
+    first_transform
+}
+fn modular_transform_create_output(out: &Profile) -> Option<Box<ModularTransform>> {
+    let mut first_transform = None;
+    let mut next_transform = &mut first_transform;
+    if out.B2A0.is_some() {
+        let lut_transform = modular_transform_create_lut(out.B2A0.as_deref().unwrap());
+        if lut_transform.is_none() {
+            return None;
+        } else {
+            append_transform(lut_transform, next_transform);
+        }
+    } else if out.mBA.is_some()
+        && (*out.mBA.as_deref().unwrap()).num_in_channels == 3
+        && (*out.mBA.as_deref().unwrap()).num_out_channels == 3
+    {
+        let lut_transform_0 = modular_transform_create_mAB(out.mBA.as_deref().unwrap());
+        if lut_transform_0.is_none() {
+            return None;
+        } else {
+            append_transform(lut_transform_0, next_transform);
+        }
+    } else if out.redTRC.is_some() && out.greenTRC.is_some() && out.blueTRC.is_some() {
+        let mut transform = modular_transform_alloc();
+        if transform.is_none() {
+            return None;
+        } else {
+            transform.as_mut().unwrap().matrix = build_colorant_matrix(out).invert();
+            transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix);
+            next_transform = append_transform(transform, next_transform);
+            transform = modular_transform_alloc();
+            if transform.is_none() {
+                return None;
+            } else {
+                transform.as_mut().unwrap().matrix.m[0][0] = 1.999_969_5;
+                transform.as_mut().unwrap().matrix.m[0][1] = 0.0;
+                transform.as_mut().unwrap().matrix.m[0][2] = 0.0;
+                transform.as_mut().unwrap().matrix.m[1][0] = 0.0;
+                transform.as_mut().unwrap().matrix.m[1][1] = 1.999_969_5;
+                transform.as_mut().unwrap().matrix.m[1][2] = 0.0;
+                transform.as_mut().unwrap().matrix.m[2][0] = 0.0;
+                transform.as_mut().unwrap().matrix.m[2][1] = 0.0;
+                transform.as_mut().unwrap().matrix.m[2][2] = 1.999_969_5;
+                transform.as_mut().unwrap().matrix.invalid = false;
+                transform.as_mut().unwrap().transform_module_fn = Some(transform_module_matrix);
+                next_transform = append_transform(transform, next_transform);
+                transform = modular_transform_alloc();
+                if transform.is_none() {
+                    return None;
+                } else {
+                    transform.as_mut().unwrap().output_gamma_lut_r =
+                        Some(build_output_lut(out.redTRC.as_deref().unwrap()));
+                    transform.as_mut().unwrap().output_gamma_lut_g =
+                        Some(build_output_lut(out.greenTRC.as_deref().unwrap()));
+                    transform.as_mut().unwrap().output_gamma_lut_b =
+                        Some(build_output_lut(out.blueTRC.as_deref().unwrap()));
+                    transform.as_mut().unwrap().transform_module_fn =
+                        Some(transform_module_gamma_lut);
+                    if transform.as_mut().unwrap().output_gamma_lut_r.is_none()
+                        || transform.as_mut().unwrap().output_gamma_lut_g.is_none()
+                        || transform.as_mut().unwrap().output_gamma_lut_b.is_none()
+                    {
+                        return None;
+                    } else {
+                        append_transform(transform, next_transform);
+                    }
+                }
+            }
+        }
+    } else {
+        debug_assert!(false, "Unsupported output profile workflow.");
+        return None;
+    }
+    first_transform
+}
+/* Not Completed
+// Simplify the transformation chain to an equivalent transformation chain
+static struct qcms_modular_transform* qcms_modular_transform_reduce(struct qcms_modular_transform *transform)
+{
+    struct qcms_modular_transform *first_transform = NULL;
+    struct qcms_modular_transform *curr_trans = transform;
+    struct qcms_modular_transform *prev_trans = NULL;
+    while (curr_trans) {
+        struct qcms_modular_transform *next_trans = curr_trans->next_transform;
+        if (curr_trans->transform_module_fn == qcms_transform_module_matrix) {
+            if (next_trans && next_trans->transform_module_fn == qcms_transform_module_matrix) {
+                curr_trans->matrix = matrix_multiply(curr_trans->matrix, next_trans->matrix);
+                goto remove_next;
+            }
+        }
+        if (curr_trans->transform_module_fn == qcms_transform_module_gamma_table) {
+            bool isLinear = true;
+            uint16_t i;
+            for (i = 0; isLinear && i < 256; i++) {
+                isLinear &= (int)(curr_trans->input_clut_table_r[i] * 255) == i;
+                isLinear &= (int)(curr_trans->input_clut_table_g[i] * 255) == i;
+                isLinear &= (int)(curr_trans->input_clut_table_b[i] * 255) == i;
+            }
+            goto remove_current;
+        }
+
+next_transform:
+        if (!next_trans) break;
+        prev_trans = curr_trans;
+        curr_trans = next_trans;
+        continue;
+remove_current:
+        if (curr_trans == transform) {
+            //Update head
+            transform = next_trans;
+        } else {
+            prev_trans->next_transform = next_trans;
+        }
+        curr_trans->next_transform = NULL;
+        qcms_modular_transform_release(curr_trans);
+        //return transform;
+        return qcms_modular_transform_reduce(transform);
+remove_next:
+        curr_trans->next_transform = next_trans->next_transform;
+        next_trans->next_transform = NULL;
+        qcms_modular_transform_release(next_trans);
+        continue;
+    }
+    return transform;
+}
+*/
+fn modular_transform_create(
+    input: &Profile,
+    output: &Profile,
+) -> Option<Box<ModularTransform>> {
+    let mut first_transform = None;
+    let mut next_transform = &mut first_transform;
+    if input.color_space == RGB_SIGNATURE {
+        let rgb_to_pcs = modular_transform_create_input(input);
+        rgb_to_pcs.as_ref()?;
+        next_transform = append_transform(rgb_to_pcs, next_transform);
+    } else {
+        debug_assert!(false, "input color space not supported");
+        return None;
+    }
+
+    if input.pcs == LAB_SIGNATURE && output.pcs == XYZ_SIGNATURE {
+        let mut lab_to_pcs = modular_transform_alloc();
+        lab_to_pcs.as_ref()?;
+        lab_to_pcs.as_mut().unwrap().transform_module_fn = Some(transform_module_LAB_to_XYZ);
+        next_transform = append_transform(lab_to_pcs, next_transform);
+    }
+
+    // This does not improve accuracy in practice, something is wrong here.
+    //if (in->chromaticAdaption.invalid == false) {
+    //	struct qcms_modular_transform* chromaticAdaption;
+    //	chromaticAdaption = qcms_modular_transform_alloc();
+    //	if (!chromaticAdaption)
+    //		goto fail;
+    //	append_transform(chromaticAdaption, &next_transform);
+    //	chromaticAdaption->matrix = matrix_invert(in->chromaticAdaption);
+    //	chromaticAdaption->transform_module_fn = qcms_transform_module_matrix;
+    //}
+
+    if input.pcs == XYZ_SIGNATURE && output.pcs == LAB_SIGNATURE {
+        let mut pcs_to_lab = modular_transform_alloc();
+        pcs_to_lab.as_ref()?;
+        pcs_to_lab.as_mut().unwrap().transform_module_fn = Some(transform_module_XYZ_to_LAB);
+        next_transform = append_transform(pcs_to_lab, next_transform);
+    }
+
+    if output.color_space == RGB_SIGNATURE {
+        let pcs_to_rgb = modular_transform_create_output(output);
+        pcs_to_rgb.as_ref()?;
+        append_transform(pcs_to_rgb, next_transform);
+    } else {
+        debug_assert!(false, "output color space not supported");
+    }
+
+    // Not Completed
+    //return qcms_modular_transform_reduce(first_transform);
+    first_transform
+}
+fn modular_transform_data(
+    mut transform: Option<&ModularTransform>,
+    mut src: Vec<f32>,
+    mut dest: Vec<f32>,
+    _len: usize,
+) -> Option<Vec<f32>> {
+    while transform.is_some() {
+        // Keep swaping src/dest when performing a transform to use less memory.
+        let _transform_fn: TransformModuleFn = transform.unwrap().transform_module_fn;
+        transform
+            .unwrap()
+            .transform_module_fn
+            .expect("non-null function pointer")(
+            transform.as_ref().unwrap(), &src, &mut dest
+        );
+        std::mem::swap(&mut src, &mut dest);
+        transform = transform.unwrap().next_transform.as_deref();
+    }
+    // The results end up in the src buffer because of the switching
+    Some(src)
+}
+
+pub fn chain_transform(
+    input: &Profile,
+    output: &Profile,
+    src: Vec<f32>,
+    dest: Vec<f32>,
+    lutSize: usize,
+) -> Option<Vec<f32>> {
+    let transform_list = modular_transform_create(input, output);
+    if transform_list.is_some() {
+        let lut = modular_transform_data(transform_list.as_deref(), src, dest, lutSize / 3);
+        modular_transform_release(transform_list);
+        return lut;
+    }
+    None
+}
diff --git a/gfx/qcms/src/gtest.rs b/gfx/qcms/src/gtest.rs
new file mode 100644
index 0000000000..d47a4829e6
--- /dev/null
+++ b/gfx/qcms/src/gtest.rs
@@ -0,0 +1,887 @@
+#[cfg(all(test, feature = "c_bindings"))]
+mod gtest {
+    use crate::{
+        c_bindings::*, iccread::*, transform::DataType::*, transform::*,
+        transform_util::lut_inverse_interp16, Intent::Perceptual,
+    };
+    use libc::c_void;
+    use std::ptr::null_mut;
+
+    #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+    use crate::transform_neon::{
+        qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon,
+        qcms_transform_data_rgba_out_lut_neon,
+    };
+
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    use crate::{
+        transform_avx::{
+            qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx,
+            qcms_transform_data_rgba_out_lut_avx,
+        },
+        transform_sse2::{
+            qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2,
+            qcms_transform_data_rgba_out_lut_sse2,
+        },
+    };
+
+    #[test]
+    fn test_lut_inverse_crash() {
+        let lutTable1: [u16; 128] = [
+            0x0000, 0x0000, 0x0000, 0x8000, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        ];
+        let lutTable2: [u16; 128] = [
+            0xFFF0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+            0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+        ];
+
+        // Crash/Assert test
+
+        lut_inverse_interp16(5, &lutTable1);
+        lut_inverse_interp16(5, &lutTable2);
+    }
+
+    #[test]
+    fn test_lut_inverse() {
+        // mimic sRGB_v4_ICC mBA Output
+        //
+        //       XXXX
+        //      X
+        //     X
+        // XXXX
+        let mut value: u16;
+        let mut lutTable: [u16; 256] = [0; 256];
+
+        for i in 0..20 {
+            lutTable[i] = 0;
+        }
+
+        for i in 20..200 {
+            lutTable[i] = ((i - 20) * 0xFFFF / (200 - 20)) as u16;
+        }
+
+        for i in 200..lutTable.len() {
+            lutTable[i] = 0xFFFF;
+        }
+
+        for i in 0..65535 {
+            lut_inverse_interp16(i, &lutTable);
+        }
+
+        // Lookup the interesting points
+
+        value = lut_inverse_interp16(0, &lutTable);
+        assert!(value <= 20 * 256);
+
+        value = lut_inverse_interp16(1, &lutTable);
+        assert!(value > 20 * 256);
+
+        value = lut_inverse_interp16(65535, &lutTable);
+        assert!(value < 201 * 256);
+    }
+
+    // this test takes to long to run on miri
+    #[cfg(not(miri))]
+    #[test]
+    fn test_lut_inverse_non_monotonic() {
+        // Make sure we behave sanely for non monotic functions
+        //   X  X  X
+        //  X  X  X
+        // X  X  X
+        let mut lutTable: [u16; 256] = [0; 256];
+
+        for i in 0..100 {
+            lutTable[i] = ((i - 0) * 0xFFFF / (100 - 0)) as u16;
+        }
+
+        for i in 100..200 {
+            lutTable[i] = ((i - 100) * 0xFFFF / (200 - 100)) as u16;
+        }
+
+        for i in 200..256 {
+            lutTable[i] = ((i - 200) * 0xFFFF / (256 - 200)) as u16;
+        }
+
+        for i in 0..65535 {
+            lut_inverse_interp16(i, &lutTable);
+        }
+
+        // Make sure we don't crash, hang or let sanitizers do their magic
+    }
+    /* qcms_data_create_rgb_with_gamma is broken
+    #[test]
+    fn profile_from_gamma() {
+
+        let white_point = qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.};
+        let primaries = qcms_CIE_xyYTRIPLE {
+            red: qcms_CIE_xyY { x: 0.64, y: 0.33, Y: 1.},
+            green: qcms_CIE_xyY { x: 0.21, y: 0.71, Y: 1.},
+            blue: qcms_CIE_xyY { x: 0.15, y: 0.06, Y: 1.}
+        };
+        let mut mem: *mut libc::c_void = std::ptr::null_mut();
+        let mut size: size_t = 0;
+        unsafe { qcms_data_create_rgb_with_gamma(white_point, primaries, 2.2, &mut mem, &mut size); }
+        assert!(size != 0)
+    }
+    */
+
+    #[test]
+    fn alignment() {
+        assert_eq!(std::mem::align_of::<qcms_transform>(), 16);
+    }
+
+    #[test]
+    fn basic() {
+        let sRGB_profile = crate::c_bindings::qcms_profile_sRGB();
+
+        let Rec709Primaries = qcms_CIE_xyYTRIPLE {
+            red: qcms_CIE_xyY {
+                x: 0.6400f64,
+                y: 0.3300f64,
+                Y: 1.0f64,
+            },
+            green: qcms_CIE_xyY {
+                x: 0.3000f64,
+                y: 0.6000f64,
+                Y: 1.0f64,
+            },
+            blue: qcms_CIE_xyY {
+                x: 0.1500f64,
+                y: 0.0600f64,
+                Y: 1.0f64,
+            },
+        };
+        let D65 = qcms_white_point_sRGB();
+        let other = unsafe { qcms_profile_create_rgb_with_gamma(D65, Rec709Primaries, 2.2) };
+        unsafe { qcms_profile_precache_output_transform(&mut *other) };
+
+        let transform = unsafe {
+            qcms_transform_create(
+                &mut *sRGB_profile,
+                RGB8,
+                &mut *other,
+                RGB8,
+                Perceptual,
+            )
+        };
+        let mut data: [u8; 120] = [0; 120];
+
+        unsafe {
+            qcms_transform_data(
+                &*transform,
+                data.as_ptr() as *const libc::c_void,
+                data.as_mut_ptr() as *mut libc::c_void,
+                data.len() / 3,
+            )
+        };
+
+        unsafe {
+            qcms_transform_release(transform);
+            qcms_profile_release(sRGB_profile);
+            qcms_profile_release(other);
+        }
+    }
+
+    #[test]
+    fn gray_alpha() {
+        let sRGB_profile = qcms_profile_sRGB();
+        let other = unsafe { qcms_profile_create_gray_with_gamma(2.2) };
+        unsafe { qcms_profile_precache_output_transform(&mut *other) };
+
+        let transform = unsafe {
+            qcms_transform_create(
+                &mut *other,
+                GrayA8,
+                &mut *sRGB_profile,
+                RGBA8,
+                Perceptual,
+            )
+        };
+        assert!(!transform.is_null());
+
+        let in_data: [u8; 4] = [0, 255, 255, 0];
+        let mut out_data: [u8; 2 * 4] = [0; 8];
+        unsafe {
+            qcms_transform_data(
+                &*transform,
+                in_data.as_ptr() as *const libc::c_void,
+                out_data.as_mut_ptr() as *mut libc::c_void,
+                in_data.len() / 2,
+            )
+        };
+
+        assert_eq!(out_data, [0, 0, 0, 255, 255, 255, 255, 0]);
+        unsafe {
+            qcms_transform_release(transform);
+            qcms_profile_release(sRGB_profile);
+            qcms_profile_release(other);
+        }
+    }
+    #[test]
+    fn samples() {
+        use libc::c_void;
+        use std::io::Read;
+
+        let mut d = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+        qcms_enable_iccv4();
+        d.push("fuzz");
+        d.push("samples");
+        let samples = [
+            "0220-ca351238d719fd07ef8607d326b398fe.icc",
+            "0372-973178997787ee780b4b58ee47cad683.icc",
+            "0744-0a5faafe175e682b10c590b03d3f093b.icc",
+            "0316-eb3f97ab646cd7b66bee80bdfe6098ac.icc",
+            "0732-80707d91aea0f8e64ef0286cc7720e99.icc",
+            "1809-2bd4b77651214ca6110fdbee2502671e.icc",
+        ];
+        for s in samples.iter() {
+            let mut p = d.clone();
+            p.push(s);
+            let mut file = std::fs::File::open(p.clone()).unwrap();
+            let mut data = Vec::new();
+            file.read_to_end(&mut data).unwrap();
+            let profile =
+                unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) };
+            assert_ne!(profile, std::ptr::null_mut());
+            unsafe { qcms_profile_release(profile) };
+        }
+    }
+
+    #[test]
+    fn v4() {
+        use libc::c_void;
+        use std::io::Read;
+
+        let mut p = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+        qcms_enable_iccv4();
+        p.push("profiles");
+        // this profile was made by taking the lookup table profile from
+        // http://displaycal.net/icc-color-management-test/ and removing
+        // the unneeed tables using lcms
+        p.push("displaycal-lut-stripped.icc");
+
+        let mut file = std::fs::File::open(p).unwrap();
+        let mut data = Vec::new();
+        file.read_to_end(&mut data).unwrap();
+        let profile =
+            unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) };
+        assert_ne!(profile, std::ptr::null_mut());
+
+        let srgb_profile = qcms_profile_sRGB();
+        assert_ne!(srgb_profile, std::ptr::null_mut());
+
+        unsafe { qcms_profile_precache_output_transform(&mut *srgb_profile) };
+
+        let intent = unsafe { qcms_profile_get_rendering_intent(&*profile) };
+        let transform = unsafe {
+            qcms_transform_create(&*profile, RGB8, &*srgb_profile, RGB8, intent)
+        };
+
+        assert_ne!(transform, std::ptr::null_mut());
+
+        const SRC_SIZE: usize = 4;
+        let src: [u8; SRC_SIZE * 3] = [
+            246, 246, 246, // gray
+            255, 0, 0, // red
+            0, 255, 255, // cyan
+            255, 255, 0, // yellow
+        ];
+        let mut dst: [u8; SRC_SIZE * 3] = [0; SRC_SIZE * 3];
+
+        // the reference values here should be adjusted if the accuracy
+        // of the transformation changes
+        let reference = [
+            246, 246, 246, // gray
+            255, 0, 0, // red
+            248, 14, 22, // red
+            0, 0, 255, // blue
+        ];
+
+        unsafe {
+            qcms_transform_data(
+                &*transform,
+                src.as_ptr() as *const libc::c_void,
+                dst.as_mut_ptr() as *mut libc::c_void,
+                SRC_SIZE,
+            );
+        }
+
+        assert_eq!(reference, dst);
+        unsafe { qcms_transform_release(transform) }
+        unsafe { qcms_profile_release(profile) }
+        unsafe { qcms_profile_release(srgb_profile) }
+    }
+
+    fn CmpRgbChannel(reference: &[u8], test: &[u8], index: usize) -> bool {
+        (reference[index] as i32 - test[index] as i32).abs() <= 1
+    }
+
+    fn CmpRgbBufferImpl(
+        refBuffer: &[u8],
+        testBuffer: &[u8],
+        pixels: usize,
+        kSwapRB: bool,
+        hasAlpha: bool,
+    ) -> bool {
+        let pixelSize = if hasAlpha { 4 } else { 3 };
+        if refBuffer[..pixels * pixelSize] == testBuffer[..pixels * pixelSize] {
+            return true;
+        }
+
+        let kRIndex = if kSwapRB { 2 } else { 0 };
+        let kGIndex = 1;
+        let kBIndex = if kSwapRB { 0 } else { 2 };
+        let kAIndex = 3;
+
+        let mut remaining = pixels;
+        let mut reference = &refBuffer[..];
+        let mut test = &testBuffer[..];
+        while remaining > 0 {
+            if !CmpRgbChannel(reference, test, kRIndex)
+                || !CmpRgbChannel(reference, test, kGIndex)
+                || !CmpRgbChannel(reference, test, kBIndex)
+                || (hasAlpha && reference[kAIndex] != test[kAIndex])
+            {
+                assert_eq!(test[kRIndex], reference[kRIndex]);
+                assert_eq!(test[kGIndex], reference[kGIndex]);
+                assert_eq!(test[kBIndex], reference[kBIndex]);
+                if hasAlpha {
+                    assert_eq!(test[kAIndex], reference[kAIndex]);
+                }
+                return false;
+            }
+            remaining -= 1;
+            reference = &reference[pixelSize..];
+            test = &test[pixelSize..];
+        }
+
+        true
+    }
+
+    fn GetRgbInputBufferImpl(kSwapRB: bool, kHasAlpha: bool) -> (usize, Vec<u8>) {
+        let colorSamples = [0, 5, 16, 43, 101, 127, 182, 255];
+        let colorSampleMax = colorSamples.len();
+        let pixelSize = if kHasAlpha { 4 } else { 3 };
+        let pixelCount = colorSampleMax * colorSampleMax * 256 * 3;
+
+        let mut outBuffer = vec![0; pixelCount * pixelSize];
+
+        let kRIndex = if kSwapRB { 2 } else { 0 };
+        let kGIndex = 1;
+        let kBIndex = if kSwapRB { 0 } else { 2 };
+        let kAIndex = 3;
+
+        // Sample every red pixel value with a subset of green and blue.
+        // we use a u16 for r to avoid https://github.com/rust-lang/rust/issues/78283
+        let mut color: &mut [u8] = &mut outBuffer[..];
+        for r in 0..=255u16 {
+            for &g in colorSamples.iter() {
+                for &b in colorSamples.iter() {
+                    color[kRIndex] = r as u8;
+                    color[kGIndex] = g;
+                    color[kBIndex] = b;
+                    if kHasAlpha {
+                        color[kAIndex] = 0x80;
+                    }
+                    color = &mut color[pixelSize..];
+                }
+            }
+        }
+
+        // Sample every green pixel value with a subset of red and blue.
+        let mut color = &mut outBuffer[..];
+        for &r in colorSamples.iter() {
+            for g in 0..=255u16 {
+                for &b in colorSamples.iter() {
+                    color[kRIndex] = r;
+                    color[kGIndex] = g as u8;
+                    color[kBIndex] = b;
+                    if kHasAlpha {
+                        color[kAIndex] = 0x80;
+                    }
+                    color = &mut color[pixelSize..];
+                }
+            }
+        }
+
+        // Sample every blue pixel value with a subset of red and green.
+        let mut color = &mut outBuffer[..];
+        for &r in colorSamples.iter() {
+            for &g in colorSamples.iter() {
+                for b in 0..=255u16 {
+                    color[kRIndex] = r;
+                    color[kGIndex] = g;
+                    color[kBIndex] = b as u8;
+                    if kHasAlpha {
+                        color[kAIndex] = 0x80;
+                    }
+                    color = &mut color[pixelSize..];
+                }
+            }
+        }
+
+        (pixelCount, outBuffer)
+    }
+
+    fn GetRgbInputBuffer() -> (usize, Vec<u8>) {
+        GetRgbInputBufferImpl(false, false)
+    }
+
+    fn GetRgbaInputBuffer() -> (usize, Vec<u8>) {
+        GetRgbInputBufferImpl(false, true)
+    }
+
+    fn GetBgraInputBuffer() -> (usize, Vec<u8>) {
+        GetRgbInputBufferImpl(true, true)
+    }
+
+    fn CmpRgbBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool {
+        CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, false)
+    }
+
+    fn CmpRgbaBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool {
+        CmpRgbBufferImpl(refBuffer, testBuffer, pixels, false, true)
+    }
+
+    fn CmpBgraBuffer(refBuffer: &[u8], testBuffer: &[u8], pixels: usize) -> bool {
+        CmpRgbBufferImpl(refBuffer, testBuffer, pixels, true, true)
+    }
+
+    fn ClearRgbBuffer(buffer: &mut [u8], pixels: usize) {
+        for i in 0..pixels * 3 {
+            buffer[i] = 0;
+        }
+    }
+
+    fn ClearRgbaBuffer(buffer: &mut [u8], pixels: usize) {
+        for i in 0..pixels * 4 {
+            buffer[i] = 0;
+        }
+    }
+
+    fn GetRgbOutputBuffer(pixels: usize) -> Vec<u8> {
+        vec![0; pixels * 3]
+    }
+
+    fn GetRgbaOutputBuffer(pixels: usize) -> Vec<u8> {
+        vec![0; pixels * 4]
+    }
+
+    struct QcmsProfileTest {
+        in_profile: *mut Profile,
+        out_profile: *mut Profile,
+        transform: *mut qcms_transform,
+
+        input: Vec<u8>,
+        output: Vec<u8>,
+        reference: Vec<u8>,
+
+        pixels: usize,
+        storage_type: DataType,
+        precache: bool,
+    }
+
+    impl QcmsProfileTest {
+        fn new() -> QcmsProfileTest {
+            QcmsProfileTest {
+                in_profile: null_mut(),
+                out_profile: null_mut(),
+                transform: null_mut(),
+                input: Vec::new(),
+                output: Vec::new(),
+                reference: Vec::new(),
+
+                pixels: 0,
+                storage_type: RGB8,
+                precache: false,
+            }
+        }
+
+        fn SetUp(&mut self) {
+            qcms_enable_iccv4();
+        }
+
+        unsafe fn TearDown(&mut self) {
+            if self.in_profile != null_mut() {
+                qcms_profile_release(self.in_profile)
+            }
+
+            if self.out_profile != null_mut() {
+                qcms_profile_release(self.out_profile)
+            }
+
+            if self.transform != null_mut() {
+                qcms_transform_release(self.transform)
+            }
+        }
+
+        unsafe fn SetTransform(&mut self, transform: *mut qcms_transform) -> bool {
+            if self.transform != null_mut() {
+                qcms_transform_release(self.transform)
+            }
+            self.transform = transform;
+            self.transform != null_mut()
+        }
+
+        unsafe fn SetTransformForType(&mut self, ty: DataType) -> bool {
+            self.SetTransform(qcms_transform_create(
+                &*self.in_profile,
+                ty,
+                &*self.out_profile,
+                ty,
+                Perceptual,
+            ))
+        }
+
+        unsafe fn SetBuffers(&mut self, ty: DataType) -> bool {
+            match ty {
+                RGB8 => {
+                    let (pixels, input) = GetRgbInputBuffer();
+                    self.input = input;
+                    self.pixels = pixels;
+                    self.reference = GetRgbOutputBuffer(self.pixels);
+                    self.output = GetRgbOutputBuffer(self.pixels)
+                }
+                RGBA8 => {
+                    let (pixels, input) = GetBgraInputBuffer();
+                    self.input = input;
+                    self.pixels = pixels;
+                    self.reference = GetRgbaOutputBuffer(self.pixels);
+                    self.output = GetRgbaOutputBuffer(self.pixels);
+                }
+                BGRA8 => {
+                    let (pixels, input) = GetRgbaInputBuffer();
+                    self.input = input;
+                    self.pixels = pixels;
+                    self.reference = GetRgbaOutputBuffer(self.pixels);
+                    self.output = GetRgbaOutputBuffer(self.pixels);
+                }
+                _ => unreachable!("Unknown type!"),
+            }
+            self.storage_type = ty;
+            self.pixels > 0
+        }
+
+        unsafe fn ClearOutputBuffer(&mut self) {
+            match self.storage_type {
+                RGB8 => ClearRgbBuffer(&mut self.output, self.pixels),
+                RGBA8 | BGRA8 => ClearRgbaBuffer(&mut self.output, self.pixels),
+                _ => unreachable!("Unknown type!"),
+            }
+        }
+
+        unsafe fn ProduceRef(&mut self, trans_fn: transform_fn_t) {
+            trans_fn.unwrap()(
+                &*self.transform,
+                self.input.as_mut_ptr(),
+                self.reference.as_mut_ptr(),
+                self.pixels,
+            )
+        }
+
+        fn CopyInputToRef(&mut self) {
+            let pixelSize = match self.storage_type {
+                RGB8 => 3,
+                RGBA8 | BGRA8 => 4,
+                _ => unreachable!("Unknown type!"),
+            };
+            self.reference
+                .copy_from_slice(&self.input[..self.pixels * pixelSize])
+        }
+
+        unsafe fn ProduceOutput(&mut self, trans_fn: transform_fn_t) {
+            self.ClearOutputBuffer();
+            trans_fn.unwrap()(
+                &*self.transform,
+                self.input.as_mut_ptr(),
+                self.output.as_mut_ptr(),
+                self.pixels,
+            )
+        }
+
+        unsafe fn VerifyOutput(&self, buf: &[u8]) -> bool {
+            match self.storage_type {
+                RGB8 => CmpRgbBuffer(buf, &self.output, self.pixels),
+                RGBA8 => CmpRgbaBuffer(buf, &self.output, self.pixels),
+                BGRA8 => CmpBgraBuffer(buf, &self.output, self.pixels),
+                _ => unreachable!("Unknown type!"),
+            }
+        }
+
+        unsafe fn ProduceVerifyOutput(&mut self, trans_fn: transform_fn_t) -> bool {
+            self.ProduceOutput(trans_fn);
+            self.VerifyOutput(&self.reference)
+        }
+
+        unsafe fn PrecacheOutput(&mut self) {
+            qcms_profile_precache_output_transform(&mut *self.out_profile);
+            self.precache = true;
+        }
+        unsafe fn TransformPrecache(&mut self) {
+            assert_eq!(self.precache, false);
+            assert!(self.SetBuffers(RGB8));
+            assert!(self.SetTransformForType(RGB8));
+            self.ProduceRef(Some(qcms_transform_data_rgb_out_lut));
+
+            self.PrecacheOutput();
+            assert!(self.SetTransformForType(RGB8));
+            assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_precache)))
+        }
+
+        unsafe fn TransformPrecachePlatformExt(&mut self) {
+            self.PrecacheOutput();
+
+            // Verify RGB transforms.
+            assert!(self.SetBuffers(RGB8));
+            assert!(self.SetTransformForType(RGB8));
+            self.ProduceRef(Some(qcms_transform_data_rgb_out_lut_precache));
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            {
+                if is_x86_feature_detected!("sse2") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_sse2)));
+                }
+                if is_x86_feature_detected!("avx") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgb_out_lut_avx)))
+                }
+            }
+
+            #[cfg(target_arch = "arm")]
+            {
+                if is_arm_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(qcms_transform_data_rgb_out_lut_neon))
+                }
+            }
+
+            #[cfg(target_arch = "aarch64")]
+            {
+                if is_aarch64_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(qcms_transform_data_rgb_out_lut_neon))
+                }
+            }
+
+            // Verify RGBA transform.
+            assert!(self.SetBuffers(RGBA8));
+            assert!(self.SetTransformForType(RGBA8));
+            self.ProduceRef(Some(qcms_transform_data_rgba_out_lut_precache));
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            {
+                if is_x86_feature_detected!("sse2") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_sse2)));
+                }
+                if is_x86_feature_detected!("avx") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_avx)))
+                }
+            }
+
+            #[cfg(target_arch = "arm")]
+            {
+                if is_arm_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon)))
+                }
+            }
+
+            #[cfg(target_arch = "aarch64")]
+            {
+                if is_aarch64_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_rgba_out_lut_neon)))
+                }
+            }
+
+            // Verify BGRA transform.
+            assert!(self.SetBuffers(BGRA8));
+            assert!(self.SetTransformForType(BGRA8));
+            self.ProduceRef(Some(qcms_transform_data_bgra_out_lut_precache));
+
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            {
+                if is_x86_feature_detected!("sse2") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_sse2)));
+                }
+                if is_x86_feature_detected!("avx") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_avx)))
+                }
+            }
+
+            #[cfg(target_arch = "arm")]
+            {
+                if is_arm_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon)))
+                }
+            }
+
+            #[cfg(target_arch = "aarch64")]
+            {
+                if is_aarch64_feature_detected!("neon") {
+                    assert!(self.ProduceVerifyOutput(Some(qcms_transform_data_bgra_out_lut_neon)))
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn sRGB_to_sRGB_precache() {
+        unsafe {
+            let mut pt = QcmsProfileTest::new();
+            pt.SetUp();
+            pt.in_profile = qcms_profile_sRGB();
+            pt.out_profile = qcms_profile_sRGB();
+            pt.TransformPrecache();
+            pt.TearDown();
+        }
+    }
+
+    #[test]
+    fn sRGB_to_sRGB_transform_identity() {
+        unsafe {
+            let mut pt = QcmsProfileTest::new();
+            pt.SetUp();
+            pt.in_profile = qcms_profile_sRGB();
+            pt.out_profile = qcms_profile_sRGB();
+            pt.PrecacheOutput();
+            pt.SetBuffers(RGB8);
+            pt.SetTransformForType(RGB8);
+            qcms_transform_data(
+                &*pt.transform,
+                pt.input.as_mut_ptr() as *mut c_void,
+                pt.output.as_mut_ptr() as *mut c_void,
+                pt.pixels,
+            );
+            assert!(pt.VerifyOutput(&pt.input));
+            pt.TearDown();
+        }
+    }
+
+    fn profile_from_path(file: &str) -> *mut Profile {
+        use std::io::Read;
+        let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+        path.push("profiles");
+        path.push(file);
+        let mut file = std::fs::File::open(path).unwrap();
+        let mut data = Vec::new();
+        file.read_to_end(&mut data).unwrap();
+        let profile =
+            unsafe { qcms_profile_from_memory(data.as_ptr() as *const c_void, data.len()) };
+        assert_ne!(profile, std::ptr::null_mut());
+        profile
+    }
+
+    #[test]
+    fn sRGB_to_ThinkpadW540() {
+        unsafe {
+            let mut pt = QcmsProfileTest::new();
+            pt.SetUp();
+            pt.in_profile = qcms_profile_sRGB();
+            pt.out_profile = profile_from_path("lcms_thinkpad_w540.icc");
+            pt.TransformPrecachePlatformExt();
+            pt.TearDown();
+        }
+    }
+
+    #[test]
+    fn sRGB_to_SamsungSyncmaster() {
+        unsafe {
+            let mut pt = QcmsProfileTest::new();
+            pt.SetUp();
+            pt.in_profile = qcms_profile_sRGB();
+            pt.out_profile = profile_from_path("lcms_samsung_syncmaster.icc");
+            pt.TransformPrecachePlatformExt();
+            pt.TearDown();
+        }
+    }
+
+    #[test]
+    fn v4_output() {
+        qcms_enable_iccv4();
+        let input = qcms_profile_sRGB();
+        // B2A0-ident.icc was created from the profile in bug 1679621
+        // manually edited using iccToXML/iccFromXML
+        let output = profile_from_path("B2A0-ident.icc");
+
+        let transform = unsafe {
+            qcms_transform_create(
+                &*input,
+                RGB8,
+                &*output,
+                RGB8,
+                Perceptual,
+            )
+        };
+        let src = [0u8, 60, 195];
+        let mut dst = [0u8, 0, 0];
+        unsafe {
+            qcms_transform_data(
+                &*transform,
+                src.as_ptr() as *const libc::c_void,
+                dst.as_mut_ptr() as *mut libc::c_void,
+                1,
+            );
+        }
+        assert_eq!(dst, [15, 16, 122]);
+        unsafe {
+            qcms_transform_release(transform);
+            qcms_profile_release(input);
+            qcms_profile_release(output);
+        }
+    }
+
+    #[test]
+    fn gray_smoke_test() {
+        let input = crate::Profile::new_gray_with_gamma(2.2);
+        let output = crate::Profile::new_sRGB();
+        let xfm =
+            transform_create(&input, GrayA8, &output, RGBA8, crate::Intent::default()).unwrap();
+        let src = [20u8, 20u8];
+        let mut dst = [0u8, 0, 0, 0];
+        unsafe {
+            qcms_transform_data(
+                &xfm,
+                src.as_ptr() as *const libc::c_void,
+                dst.as_mut_ptr() as *mut libc::c_void,
+                src.len() / GrayA8.bytes_per_pixel(),
+            );
+        }
+
+    }
+}
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn identity() {
+        let p1 = crate::Profile::new_sRGB();
+        let p2 = crate::Profile::new_sRGB();
+        let xfm = crate::Transform::new(
+            &p1,
+            &p2,
+            crate::DataType::RGB8,
+            crate::Intent::default(),
+        )
+        .unwrap();
+        let mut data = [4, 30, 80];
+        xfm.apply(&mut data);
+        assert_eq!(data, [4, 30, 80]);
+    }
+}
diff --git a/gfx/qcms/src/iccread.rs b/gfx/qcms/src/iccread.rs
new file mode 100644
index 0000000000..f1107d50a8
--- /dev/null
+++ b/gfx/qcms/src/iccread.rs
@@ -0,0 +1,1179 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+//  qcms
+//  Copyright (C) 2009 Mozilla Foundation
+//  Copyright (C) 1998-2007 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+use std::{
+    sync::atomic::{AtomicBool, Ordering},
+    sync::Arc,
+};
+
+use crate::transform::{set_rgb_colorants, PrecacheOuput};
+use crate::{matrix::Matrix, s15Fixed16Number, s15Fixed16Number_to_float, Intent, Intent::*};
+
+pub static SUPPORTS_ICCV4: AtomicBool = AtomicBool::new(cfg!(feature = "iccv4-enabled"));
+
+pub const RGB_SIGNATURE: u32 = 0x52474220;
+pub const GRAY_SIGNATURE: u32 = 0x47524159;
+pub const XYZ_SIGNATURE: u32 = 0x58595A20;
+pub const LAB_SIGNATURE: u32 = 0x4C616220;
+
+/// A color profile
+#[repr(C)]
+#[derive(Default)]
+pub struct Profile {
+    pub(crate) class_type: u32,
+    pub(crate) color_space: u32,
+    pub(crate) pcs: u32,
+    pub(crate) rendering_intent: Intent,
+    pub(crate) redColorant: XYZNumber,
+    pub(crate) blueColorant: XYZNumber,
+    pub(crate) greenColorant: XYZNumber,
+    pub(crate) redTRC: Option<Box<curveType>>,
+    pub(crate) blueTRC: Option<Box<curveType>>,
+    pub(crate) greenTRC: Option<Box<curveType>>,
+    pub(crate) grayTRC: Option<Box<curveType>>,
+    pub(crate) A2B0: Option<Box<lutType>>,
+    pub(crate) B2A0: Option<Box<lutType>>,
+    pub(crate) mAB: Option<Box<lutmABType>>,
+    pub(crate) mBA: Option<Box<lutmABType>>,
+    pub(crate) chromaticAdaption: Matrix,
+    pub(crate) output_table_r: Option<Arc<PrecacheOuput>>,
+    pub(crate) output_table_g: Option<Arc<PrecacheOuput>>,
+    pub(crate) output_table_b: Option<Arc<PrecacheOuput>>,
+}
+
+#[derive(Default)]
+pub(crate) struct lutmABType {
+    pub num_in_channels: u8,
+    pub num_out_channels: u8,
+    // 16 is the upperbound, actual is 0..num_in_channels.
+    pub num_grid_points: [u8; 16],
+    pub e00: s15Fixed16Number,
+    pub e01: s15Fixed16Number,
+    pub e02: s15Fixed16Number,
+    pub e03: s15Fixed16Number,
+    pub e10: s15Fixed16Number,
+    pub e11: s15Fixed16Number,
+    pub e12: s15Fixed16Number,
+    pub e13: s15Fixed16Number,
+    pub e20: s15Fixed16Number,
+    pub e21: s15Fixed16Number,
+    pub e22: s15Fixed16Number,
+    pub e23: s15Fixed16Number,
+    // reversed elements (for mBA)
+    pub reversed: bool,
+    pub clut_table: Option<Vec<f32>>,
+    pub a_curves: [Option<Box<curveType>>; MAX_CHANNELS],
+    pub b_curves: [Option<Box<curveType>>; MAX_CHANNELS],
+    pub m_curves: [Option<Box<curveType>>; MAX_CHANNELS],
+}
+
+pub(crate) enum curveType {
+    Curve(Vec<uInt16Number>),
+    Parametric(Vec<f32>),
+}
+type uInt16Number = u16;
+
+/* should lut8Type and lut16Type be different types? */
+pub(crate) struct lutType {
+    // used by lut8Type/lut16Type (mft2) only
+    pub num_input_channels: u8,
+    pub num_output_channels: u8,
+    pub num_clut_grid_points: u8,
+    pub e00: s15Fixed16Number,
+    pub e01: s15Fixed16Number,
+    pub e02: s15Fixed16Number,
+    pub e10: s15Fixed16Number,
+    pub e11: s15Fixed16Number,
+    pub e12: s15Fixed16Number,
+    pub e20: s15Fixed16Number,
+    pub e21: s15Fixed16Number,
+    pub e22: s15Fixed16Number,
+    pub num_input_table_entries: u16,
+    pub num_output_table_entries: u16,
+    pub input_table: Vec<f32>,
+    pub clut_table: Vec<f32>,
+    pub output_table: Vec<f32>,
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Default)]
+pub struct XYZNumber {
+    pub X: s15Fixed16Number,
+    pub Y: s15Fixed16Number,
+    pub Z: s15Fixed16Number,
+}
+
+/// A color in the CIE xyY color space
+/* the names for the following two types are sort of ugly */
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct qcms_CIE_xyY {
+    pub x: f64,
+    pub y: f64,
+    pub Y: f64,
+}
+
+/// a set of CIE_xyY values that can use to describe the primaries of a color space
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct qcms_CIE_xyYTRIPLE {
+    pub red: qcms_CIE_xyY,
+    pub green: qcms_CIE_xyY,
+    pub blue: qcms_CIE_xyY,
+}
+
+struct Tag {
+    signature: u32,
+    offset: u32,
+    size: u32,
+}
+
+/* It might be worth having a unified limit on content controlled
+ * allocation per profile. This would remove the need for many
+ * of the arbitrary limits that we used */
+pub type be32 = u32;
+pub type be16 = u16;
+
+type TagIndex = [Tag];
+
+/* a wrapper around the memory that we are going to parse
+ * into a qcms_profile */
+struct MemSource<'a> {
+    buf: &'a [u8],
+    valid: bool,
+    invalid_reason: Option<&'static str>,
+}
+pub type uInt8Number = u8;
+#[inline]
+fn uInt8Number_to_float(a: uInt8Number) -> f32 {
+    a as f32 / 255.0
+}
+
+#[inline]
+fn uInt16Number_to_float(a: uInt16Number) -> f32 {
+    a as f32 / 65535.0
+}
+
+fn cpu_to_be32(v: u32) -> be32 {
+    v.to_be()
+}
+fn cpu_to_be16(v: u16) -> be16 {
+    v.to_be()
+}
+fn be32_to_cpu(v: be32) -> u32 {
+    u32::from_be(v)
+}
+fn be16_to_cpu(v: be16) -> u16 {
+    u16::from_be(v)
+}
+fn invalid_source(mut mem: &mut MemSource, reason: &'static str) {
+    mem.valid = false;
+    mem.invalid_reason = Some(reason);
+}
+fn read_u32(mem: &mut MemSource, offset: usize) -> u32 {
+    /* Subtract from mem->size instead of the more intuitive adding to offset.
+     * This avoids overflowing offset. The subtraction is safe because
+     * mem->size is guaranteed to be > 4 */
+    if offset > mem.buf.len() - 4 {
+        invalid_source(mem, "Invalid offset");
+        0
+    } else {
+        let k = unsafe { std::ptr::read_unaligned(mem.buf.as_ptr().add(offset) as *const be32) };
+        be32_to_cpu(k)
+    }
+}
+fn read_u16(mem: &mut MemSource, offset: usize) -> u16 {
+    if offset > mem.buf.len() - 2 {
+        invalid_source(mem, "Invalid offset");
+        0u16
+    } else {
+        let k = unsafe { std::ptr::read_unaligned(mem.buf.as_ptr().add(offset) as *const be16) };
+        be16_to_cpu(k)
+    }
+}
+fn read_u8(mem: &mut MemSource, offset: usize) -> u8 {
+    if offset > mem.buf.len() - 1 {
+        invalid_source(mem, "Invalid offset");
+        0u8
+    } else {
+        unsafe { *(mem.buf.as_ptr().add(offset) as *mut u8) }
+    }
+}
+fn read_s15Fixed16Number(mem: &mut MemSource, offset: usize) -> s15Fixed16Number {
+    read_u32(mem, offset) as s15Fixed16Number
+}
+fn read_uInt8Number(mem: &mut MemSource, offset: usize) -> uInt8Number {
+    read_u8(mem, offset)
+}
+fn read_uInt16Number(mem: &mut MemSource, offset: usize) -> uInt16Number {
+    read_u16(mem, offset)
+}
+pub fn write_u32(mem: &mut [u8], offset: usize, value: u32) {
+    if offset > mem.len() - std::mem::size_of_val(&value) {
+        panic!("OOB");
+    }
+    let mem = mem.as_mut_ptr();
+    unsafe {
+        std::ptr::write_unaligned(mem.add(offset) as *mut u32, cpu_to_be32(value));
+    }
+}
+pub fn write_u16(mem: &mut [u8], offset: usize, value: u16) {
+    if offset > mem.len() - std::mem::size_of_val(&value) {
+        panic!("OOB");
+    }
+    let mem = mem.as_mut_ptr();
+    unsafe {
+        std::ptr::write_unaligned(mem.add(offset) as *mut u16, cpu_to_be16(value));
+    }
+}
+
+/* An arbitrary 4MB limit on profile size */
+pub(crate) const MAX_PROFILE_SIZE: usize = 1024 * 1024 * 4;
+const MAX_TAG_COUNT: u32 = 1024;
+
+fn check_CMM_type_signature(_src: &mut MemSource) {
+    //uint32_t CMM_type_signature = read_u32(src, 4);
+    //TODO: do the check?
+}
+fn check_profile_version(src: &mut MemSource) {
+    /*
+    uint8_t major_revision = read_u8(src, 8 + 0);
+    uint8_t minor_revision = read_u8(src, 8 + 1);
+    */
+    let reserved1: u8 = read_u8(src, (8 + 2) as usize);
+    let reserved2: u8 = read_u8(src, (8 + 3) as usize);
+    /* Checking the version doesn't buy us anything
+    if (major_revision != 0x4) {
+        if (major_revision > 0x2)
+            invalid_source(src, "Unsupported major revision");
+        if (minor_revision > 0x40)
+            invalid_source(src, "Unsupported minor revision");
+    }
+    */
+    if reserved1 != 0 || reserved2 != 0 {
+        invalid_source(src, "Invalid reserved bytes");
+    };
+}
+
+const INPUT_DEVICE_PROFILE: u32 = 0x73636e72; // 'scnr'
+pub const DISPLAY_DEVICE_PROFILE: u32 = 0x6d6e7472; // 'mntr'
+const OUTPUT_DEVICE_PROFILE: u32 = 0x70727472; // 'prtr'
+const DEVICE_LINK_PROFILE: u32 = 0x6c696e6b; // 'link'
+const COLOR_SPACE_PROFILE: u32 = 0x73706163; // 'spac'
+const ABSTRACT_PROFILE: u32 = 0x61627374; // 'abst'
+const NAMED_COLOR_PROFILE: u32 = 0x6e6d636c; // 'nmcl'
+
+fn read_class_signature(mut profile: &mut Profile, mem: &mut MemSource) {
+    profile.class_type = read_u32(mem, 12);
+    match profile.class_type {
+        DISPLAY_DEVICE_PROFILE
+        | INPUT_DEVICE_PROFILE
+        | OUTPUT_DEVICE_PROFILE
+        | COLOR_SPACE_PROFILE => {}
+        _ => {
+            invalid_source(mem, "Invalid  Profile/Device Class signature");
+        }
+    };
+}
+fn read_color_space(mut profile: &mut Profile, mem: &mut MemSource) {
+    profile.color_space = read_u32(mem, 16);
+    match profile.color_space {
+        RGB_SIGNATURE | GRAY_SIGNATURE => {}
+        _ => {
+            invalid_source(mem, "Unsupported colorspace");
+        }
+    };
+}
+fn read_pcs(mut profile: &mut Profile, mem: &mut MemSource) {
+    profile.pcs = read_u32(mem, 20);
+    match profile.pcs {
+        XYZ_SIGNATURE | LAB_SIGNATURE => {}
+        _ => {
+            invalid_source(mem, "Unsupported pcs");
+        }
+    };
+}
+fn read_tag_table(_profile: &mut Profile, mem: &mut MemSource) -> Vec<Tag> {
+    let count = read_u32(mem, 128);
+    if count > MAX_TAG_COUNT {
+        invalid_source(mem, "max number of tags exceeded");
+        return Vec::new();
+    }
+    let mut index = Vec::with_capacity(count as usize);
+    for i in 0..count {
+        index.push(Tag {
+            signature: read_u32(mem, (128 + 4 + 4 * i * 3) as usize),
+            offset: read_u32(mem, (128 + 4 + 4 * i * 3 + 4) as usize),
+            size: read_u32(mem, (128 + 4 + 4 * i * 3 + 8) as usize),
+        });
+    }
+
+    index
+}
+
+/// Checks a profile for obvious inconsistencies and returns
+/// true if the profile looks bogus and should probably be
+/// ignored.
+#[no_mangle]
+pub extern "C" fn qcms_profile_is_bogus(profile: &mut Profile) -> bool {
+    let mut sum: [f32; 3] = [0.; 3];
+    let mut target: [f32; 3] = [0.; 3];
+    let mut tolerance: [f32; 3] = [0.; 3];
+    let rX: f32;
+    let rY: f32;
+    let rZ: f32;
+    let gX: f32;
+    let gY: f32;
+    let gZ: f32;
+    let bX: f32;
+    let bY: f32;
+    let bZ: f32;
+    let negative: bool;
+    let mut i: u32;
+    // We currently only check the bogosity of RGB profiles
+    if profile.color_space != RGB_SIGNATURE {
+        return false;
+    }
+    if profile.A2B0.is_some()
+        || profile.B2A0.is_some()
+        || profile.mAB.is_some()
+        || profile.mBA.is_some()
+    {
+        return false;
+    }
+    rX = s15Fixed16Number_to_float(profile.redColorant.X);
+    rY = s15Fixed16Number_to_float(profile.redColorant.Y);
+    rZ = s15Fixed16Number_to_float(profile.redColorant.Z);
+    gX = s15Fixed16Number_to_float(profile.greenColorant.X);
+    gY = s15Fixed16Number_to_float(profile.greenColorant.Y);
+    gZ = s15Fixed16Number_to_float(profile.greenColorant.Z);
+    bX = s15Fixed16Number_to_float(profile.blueColorant.X);
+    bY = s15Fixed16Number_to_float(profile.blueColorant.Y);
+    bZ = s15Fixed16Number_to_float(profile.blueColorant.Z);
+    // Sum the values; they should add up to something close to white
+    sum[0] = rX + gX + bX;
+    sum[1] = rY + gY + bY;
+    sum[2] = rZ + gZ + bZ;
+    // Build our target vector (see mozilla bug 460629)
+    target[0] = 0.96420;
+    target[1] = 1.00000;
+    target[2] = 0.82491;
+    // Our tolerance vector - Recommended by Chris Murphy based on
+    // conversion from the LAB space criterion of no more than 3 in any one
+    // channel. This is similar to, but slightly more tolerant than Adobe's
+    // criterion.
+    tolerance[0] = 0.02;
+    tolerance[1] = 0.02;
+    tolerance[2] = 0.04;
+    // Compare with our tolerance
+    i = 0;
+    while i < 3 {
+        if !(sum[i as usize] - tolerance[i as usize] <= target[i as usize]
+            && sum[i as usize] + tolerance[i as usize] >= target[i as usize])
+        {
+            return true;
+        }
+        i += 1
+    }
+    if !cfg!(target_os = "macos") {
+        negative = (rX < 0.)
+            || (rY < 0.)
+            || (rZ < 0.)
+            || (gX < 0.)
+            || (gY < 0.)
+            || (gZ < 0.)
+            || (bX < 0.)
+            || (bY < 0.)
+            || (bZ < 0.);
+    } else {
+        // Chromatic adaption to D50 can result in negative XYZ, but the white
+        // point D50 tolerance test has passed. Accept negative values herein.
+        // See https://bugzilla.mozilla.org/show_bug.cgi?id=498245#c18 onwards
+        // for discussion about whether profile XYZ can or cannot be negative,
+        // per the spec. Also the https://bugzil.la/450923 user report.
+
+        // FIXME: allow this relaxation on all ports?
+        negative = false; // bogus
+    }
+    if negative {
+        return true;
+    }
+    // All Good
+    false
+}
+
+pub const TAG_bXYZ: u32 = 0x6258595a;
+pub const TAG_gXYZ: u32 = 0x6758595a;
+pub const TAG_rXYZ: u32 = 0x7258595a;
+pub const TAG_rTRC: u32 = 0x72545243;
+pub const TAG_bTRC: u32 = 0x62545243;
+pub const TAG_gTRC: u32 = 0x67545243;
+pub const TAG_kTRC: u32 = 0x6b545243;
+pub const TAG_A2B0: u32 = 0x41324230;
+pub const TAG_B2A0: u32 = 0x42324130;
+pub const TAG_CHAD: u32 = 0x63686164;
+
+fn find_tag(index: &TagIndex, tag_id: u32) -> Option<&Tag> {
+    for t in index {
+        if t.signature == tag_id {
+            return Some(t);
+        }
+    }
+    None
+}
+
+pub const XYZ_TYPE: u32 = 0x58595a20; // 'XYZ '
+pub const CURVE_TYPE: u32 = 0x63757276; // 'curv'
+pub const PARAMETRIC_CURVE_TYPE: u32 = 0x70617261; // 'para'
+pub const LUT16_TYPE: u32 = 0x6d667432; // 'mft2'
+pub const LUT8_TYPE: u32 = 0x6d667431; // 'mft1'
+pub const LUT_MAB_TYPE: u32 = 0x6d414220; // 'mAB '
+pub const LUT_MBA_TYPE: u32 = 0x6d424120; // 'mBA '
+pub const CHROMATIC_TYPE: u32 = 0x73663332; // 'sf32'
+
+fn read_tag_s15Fixed16ArrayType(src: &mut MemSource, index: &TagIndex, tag_id: u32) -> Matrix {
+    let tag = find_tag(index, tag_id);
+    let mut matrix: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    if let Some(tag) = tag {
+        let offset: u32 = tag.offset;
+        let type_0: u32 = read_u32(src, offset as usize);
+        // Check mandatory type signature for s16Fixed16ArrayType
+        if type_0 != CHROMATIC_TYPE {
+            invalid_source(src, "unexpected type, expected \'sf32\'");
+        }
+        for i in 0..=8 {
+            matrix.m[(i / 3) as usize][(i % 3) as usize] = s15Fixed16Number_to_float(
+                read_s15Fixed16Number(src, (offset + 8 + (i * 4) as u32) as usize),
+            );
+        }
+        matrix.invalid = false
+    } else {
+        matrix.invalid = true;
+        invalid_source(src, "missing sf32tag");
+    }
+    matrix
+}
+fn read_tag_XYZType(src: &mut MemSource, index: &TagIndex, tag_id: u32) -> XYZNumber {
+    let mut num: XYZNumber = {
+        let init = XYZNumber { X: 0, Y: 0, Z: 0 };
+        init
+    };
+    let tag = find_tag(&index, tag_id);
+    if let Some(tag) = tag {
+        let offset: u32 = tag.offset;
+        let type_0: u32 = read_u32(src, offset as usize);
+        if type_0 != XYZ_TYPE {
+            invalid_source(src, "unexpected type, expected XYZ");
+        }
+        num.X = read_s15Fixed16Number(src, (offset + 8) as usize);
+        num.Y = read_s15Fixed16Number(src, (offset + 12) as usize);
+        num.Z = read_s15Fixed16Number(src, (offset + 16) as usize)
+    } else {
+        invalid_source(src, "missing xyztag");
+    }
+    num
+}
+// Read the tag at a given offset rather then the tag_index.
+// This method is used when reading mAB tags where nested curveType are
+// present that are not part of the tag_index.
+fn read_curveType(src: &mut MemSource, offset: u32, len: &mut u32) -> Option<Box<curveType>> {
+    const COUNT_TO_LENGTH: [u32; 5] = [1, 3, 4, 5, 7]; //PARAMETRIC_CURVE_TYPE
+    let type_0: u32 = read_u32(src, offset as usize);
+    let count: u32;
+    if type_0 != CURVE_TYPE && type_0 != PARAMETRIC_CURVE_TYPE {
+        invalid_source(src, "unexpected type, expected CURV or PARA");
+        return None;
+    }
+    if type_0 == CURVE_TYPE {
+        count = read_u32(src, (offset + 8) as usize);
+        //arbitrary
+        if count > 40000 {
+            invalid_source(src, "curve size too large");
+            return None;
+        }
+        let mut table = Vec::with_capacity(count as usize);
+        for i in 0..count {
+            table.push(read_u16(src, (offset + 12 + i * 2) as usize));
+        }
+        *len = 12 + count * 2;
+        Some(Box::new(curveType::Curve(table)))
+    } else {
+        count = read_u16(src, (offset + 8) as usize) as u32;
+        if count > 4 {
+            invalid_source(src, "parametric function type not supported.");
+            return None;
+        }
+        let mut params = Vec::with_capacity(count as usize);
+        for i in 0..COUNT_TO_LENGTH[count as usize] {
+            params.push(s15Fixed16Number_to_float(read_s15Fixed16Number(
+                src,
+                (offset + 12 + i * 4) as usize,
+            )));
+        }
+        *len = 12 + COUNT_TO_LENGTH[count as usize] * 4;
+        if count == 1 || count == 2 {
+            /* we have a type 1 or type 2 function that has a division by 'a' */
+            let a: f32 = params[1];
+            if a == 0.0 {
+                invalid_source(src, "parametricCurve definition causes division by zero");
+            }
+        }
+        Some(Box::new(curveType::Parametric(params)))
+    }
+}
+fn read_tag_curveType(
+    src: &mut MemSource,
+    index: &TagIndex,
+    tag_id: u32,
+) -> Option<Box<curveType>> {
+    let tag = find_tag(index, tag_id);
+    if let Some(tag) = tag {
+        let mut len: u32 = 0;
+        return read_curveType(src, tag.offset, &mut len);
+    } else {
+        invalid_source(src, "missing curvetag");
+    }
+    None
+}
+
+const MAX_LUT_SIZE: u32 = 500000; // arbitrary
+const MAX_CHANNELS: usize = 10; // arbitrary
+fn read_nested_curveType(
+    src: &mut MemSource,
+    curveArray: &mut [Option<Box<curveType>>; MAX_CHANNELS],
+    num_channels: u8,
+    curve_offset: u32,
+) {
+    let mut channel_offset: u32 = 0;
+    for i in 0..usize::from(num_channels) {
+        let mut tag_len: u32 = 0;
+        curveArray[i] = read_curveType(src, curve_offset + channel_offset, &mut tag_len);
+        if curveArray[i].is_none() {
+            invalid_source(src, "invalid nested curveType curve");
+            break;
+        } else {
+            channel_offset += tag_len;
+            // 4 byte aligned
+            if tag_len % 4 != 0 {
+                channel_offset += 4 - tag_len % 4
+            }
+        }
+    }
+}
+
+/* See section 10.10 for specs */
+fn read_tag_lutmABType(src: &mut MemSource, tag: &Tag) -> Option<Box<lutmABType>> {
+    let offset: u32 = tag.offset;
+    let mut clut_size: u32 = 1;
+    let type_0: u32 = read_u32(src, offset as usize);
+    if type_0 != LUT_MAB_TYPE && type_0 != LUT_MBA_TYPE {
+        return None;
+    }
+    let num_in_channels = read_u8(src, (offset + 8) as usize);
+    let num_out_channels = read_u8(src, (offset + 9) as usize);
+    if num_in_channels > 10 || num_out_channels > 10 {
+        return None;
+    }
+    // We require 3in/out channels since we only support RGB->XYZ (or RGB->LAB)
+    // XXX: If we remove this restriction make sure that the number of channels
+    //      is less or equal to the maximum number of mAB curves in qcmsint.h
+    //      also check for clut_size overflow. Also make sure it's != 0
+    if num_in_channels != 3 || num_out_channels != 3 {
+        return None;
+    }
+    // some of this data is optional and is denoted by a zero offset
+    // we also use this to track their existance
+    let mut a_curve_offset = read_u32(src, (offset + 28) as usize);
+    let mut clut_offset = read_u32(src, (offset + 24) as usize);
+    let mut m_curve_offset = read_u32(src, (offset + 20) as usize);
+    let mut matrix_offset = read_u32(src, (offset + 16) as usize);
+    let mut b_curve_offset = read_u32(src, (offset + 12) as usize);
+    // Convert offsets relative to the tag to relative to the profile
+    // preserve zero for optional fields
+    if a_curve_offset != 0 {
+        a_curve_offset += offset
+    }
+    if clut_offset != 0 {
+        clut_offset += offset
+    }
+    if m_curve_offset != 0 {
+        m_curve_offset += offset
+    }
+    if matrix_offset != 0 {
+        matrix_offset += offset
+    }
+    if b_curve_offset != 0 {
+        b_curve_offset += offset
+    }
+    if clut_offset != 0 {
+        debug_assert!(num_in_channels == 3);
+        // clut_size can not overflow since lg(256^num_in_channels) = 24 bits.
+        for i in 0..u32::from(num_in_channels) {
+            clut_size *= read_u8(src, (clut_offset + i) as usize) as u32;
+            if clut_size == 0 {
+                invalid_source(src, "bad clut_size");
+            }
+        }
+    } else {
+        clut_size = 0
+    }
+    // 24bits * 3 won't overflow either
+    clut_size *= num_out_channels as u32;
+    if clut_size > MAX_LUT_SIZE {
+        return None;
+    }
+
+    let mut lut = Box::new(lutmABType::default());
+
+    if clut_offset != 0 {
+        for i in 0..usize::from(num_in_channels) {
+            lut.num_grid_points[i] = read_u8(src, clut_offset as usize + i);
+            if lut.num_grid_points[i] == 0 {
+                invalid_source(src, "bad grid_points");
+            }
+        }
+    }
+    // Reverse the processing of transformation elements for mBA type.
+    lut.reversed = type_0 == LUT_MBA_TYPE;
+    lut.num_in_channels = num_in_channels;
+    lut.num_out_channels = num_out_channels;
+    if matrix_offset != 0 {
+        // read the matrix if we have it
+        lut.e00 = read_s15Fixed16Number(src, (matrix_offset + (4 * 0) as u32) as usize); // the caller checks that this doesn't happen
+        lut.e01 = read_s15Fixed16Number(src, (matrix_offset + (4 * 1) as u32) as usize);
+        lut.e02 = read_s15Fixed16Number(src, (matrix_offset + (4 * 2) as u32) as usize);
+        lut.e10 = read_s15Fixed16Number(src, (matrix_offset + (4 * 3) as u32) as usize);
+        lut.e11 = read_s15Fixed16Number(src, (matrix_offset + (4 * 4) as u32) as usize);
+        lut.e12 = read_s15Fixed16Number(src, (matrix_offset + (4 * 5) as u32) as usize);
+        lut.e20 = read_s15Fixed16Number(src, (matrix_offset + (4 * 6) as u32) as usize);
+        lut.e21 = read_s15Fixed16Number(src, (matrix_offset + (4 * 7) as u32) as usize);
+        lut.e22 = read_s15Fixed16Number(src, (matrix_offset + (4 * 8) as u32) as usize);
+        lut.e03 = read_s15Fixed16Number(src, (matrix_offset + (4 * 9) as u32) as usize);
+        lut.e13 = read_s15Fixed16Number(src, (matrix_offset + (4 * 10) as u32) as usize);
+        lut.e23 = read_s15Fixed16Number(src, (matrix_offset + (4 * 11) as u32) as usize)
+    }
+    if a_curve_offset != 0 {
+        read_nested_curveType(src, &mut lut.a_curves, num_in_channels, a_curve_offset);
+    }
+    if m_curve_offset != 0 {
+        read_nested_curveType(src, &mut lut.m_curves, num_out_channels, m_curve_offset);
+    }
+    if b_curve_offset != 0 {
+        read_nested_curveType(src, &mut lut.b_curves, num_out_channels, b_curve_offset);
+    } else {
+        invalid_source(src, "B curves required");
+    }
+    if clut_offset != 0 {
+        let clut_precision = read_u8(src, (clut_offset + 16) as usize);
+        let mut clut_table = Vec::with_capacity(clut_size as usize);
+        if clut_precision == 1 {
+            for i in 0..clut_size {
+                clut_table.push(uInt8Number_to_float(read_uInt8Number(
+                    src,
+                    (clut_offset + 20 + i * 1) as usize,
+                )));
+            }
+            lut.clut_table = Some(clut_table);
+        } else if clut_precision == 2 {
+            for i in 0..clut_size {
+                clut_table.push(uInt16Number_to_float(read_uInt16Number(
+                    src,
+                    (clut_offset + 20 + i * 2) as usize,
+                )));
+            }
+            lut.clut_table = Some(clut_table);
+        } else {
+            invalid_source(src, "Invalid clut precision");
+        }
+    }
+    if !src.valid {
+        return None;
+    }
+    Some(lut)
+}
+fn read_tag_lutType(src: &mut MemSource, tag: &Tag) -> Option<Box<lutType>> {
+    let offset: u32 = tag.offset;
+    let type_0: u32 = read_u32(src, offset as usize);
+    let num_input_table_entries: u16;
+    let num_output_table_entries: u16;
+    let input_offset: u32;
+    let entry_size: usize;
+    if type_0 == LUT8_TYPE {
+        num_input_table_entries = 256u16;
+        num_output_table_entries = 256u16;
+        entry_size = 1;
+        input_offset = 48
+    } else if type_0 == LUT16_TYPE {
+        num_input_table_entries = read_u16(src, (offset + 48) as usize);
+        num_output_table_entries = read_u16(src, (offset + 50) as usize);
+
+        // these limits come from the spec
+        if num_input_table_entries < 2
+            || num_input_table_entries > 4096
+            || num_output_table_entries < 2
+            || num_output_table_entries > 4096
+        {
+            invalid_source(src, "Bad channel count");
+            return None;
+        }
+        entry_size = 2;
+        input_offset = 52
+    } else {
+        debug_assert!(false);
+        invalid_source(src, "Unexpected lut type");
+        return None;
+    }
+    let in_chan = read_u8(src, (offset + 8) as usize);
+    let out_chan = read_u8(src, (offset + 9) as usize);
+    let grid_points = read_u8(src, (offset + 10) as usize);
+    let clut_size = (grid_points as f64).powf(in_chan as f64) as u32;
+    if clut_size > MAX_LUT_SIZE {
+        invalid_source(src, "CLUT too large");
+        return None;
+    }
+    if clut_size <= 0 {
+        invalid_source(src, "CLUT must not be empty.");
+        return None;
+    }
+    if in_chan != 3 || out_chan != 3 {
+        invalid_source(src, "CLUT only supports RGB");
+        return None;
+    }
+
+    let e00 = read_s15Fixed16Number(src, (offset + 12) as usize);
+    let e01 = read_s15Fixed16Number(src, (offset + 16) as usize);
+    let e02 = read_s15Fixed16Number(src, (offset + 20) as usize);
+    let e10 = read_s15Fixed16Number(src, (offset + 24) as usize);
+    let e11 = read_s15Fixed16Number(src, (offset + 28) as usize);
+    let e12 = read_s15Fixed16Number(src, (offset + 32) as usize);
+    let e20 = read_s15Fixed16Number(src, (offset + 36) as usize);
+    let e21 = read_s15Fixed16Number(src, (offset + 40) as usize);
+    let e22 = read_s15Fixed16Number(src, (offset + 44) as usize);
+
+    let mut input_table = Vec::with_capacity((num_input_table_entries * in_chan as u16) as usize);
+    for i in 0..(num_input_table_entries * in_chan as u16) {
+        if type_0 == LUT8_TYPE {
+            input_table.push(uInt8Number_to_float(read_uInt8Number(
+                src,
+                (offset + input_offset) as usize + i as usize * entry_size,
+            )))
+        } else {
+            input_table.push(uInt16Number_to_float(read_uInt16Number(
+                src,
+                (offset + input_offset) as usize + i as usize * entry_size,
+            )))
+        }
+    }
+    let clut_offset = ((offset + input_offset) as usize
+        + (num_input_table_entries as i32 * in_chan as i32) as usize * entry_size)
+        as u32;
+
+    let mut clut_table = Vec::with_capacity((clut_size * out_chan as u32) as usize);
+    for i in (0..clut_size * out_chan as u32).step_by(3) {
+        if type_0 == LUT8_TYPE {
+            clut_table.push(uInt8Number_to_float(read_uInt8Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 0,
+            )));
+            clut_table.push(uInt8Number_to_float(read_uInt8Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 1,
+            )));
+            clut_table.push(uInt8Number_to_float(read_uInt8Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 2,
+            )))
+        } else {
+            clut_table.push(uInt16Number_to_float(read_uInt16Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 0,
+            )));
+            clut_table.push(uInt16Number_to_float(read_uInt16Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 2,
+            )));
+            clut_table.push(uInt16Number_to_float(read_uInt16Number(
+                src,
+                clut_offset as usize + i as usize * entry_size + 4,
+            )))
+        }
+    }
+    let output_offset =
+        (clut_offset as usize + (clut_size * out_chan as u32) as usize * entry_size) as u32;
+
+    let mut output_table =
+        Vec::with_capacity((num_output_table_entries * out_chan as u16) as usize);
+    for i in 0..num_output_table_entries as i32 * out_chan as i32 {
+        if type_0 == LUT8_TYPE {
+            output_table.push(uInt8Number_to_float(read_uInt8Number(
+                src,
+                output_offset as usize + i as usize * entry_size,
+            )))
+        } else {
+            output_table.push(uInt16Number_to_float(read_uInt16Number(
+                src,
+                output_offset as usize + i as usize * entry_size,
+            )))
+        }
+    }
+    Some(Box::new(lutType {
+        num_input_table_entries,
+        num_output_table_entries,
+        num_input_channels: in_chan,
+        num_output_channels: out_chan,
+        num_clut_grid_points: grid_points,
+        e00,
+        e01,
+        e02,
+        e10,
+        e11,
+        e12,
+        e20,
+        e21,
+        e22,
+        input_table,
+        clut_table,
+        output_table,
+    }))
+}
+fn read_rendering_intent(mut profile: &mut Profile, src: &mut MemSource) {
+    let intent = read_u32(src, 64);
+    profile.rendering_intent = match intent {
+        x if x == Perceptual as u32 => Perceptual,
+        x if x == RelativeColorimetric as u32 => RelativeColorimetric,
+        x if x == Saturation as u32 => Saturation,
+        x if x == AbsoluteColorimetric as u32 => AbsoluteColorimetric,
+        _ => {
+            invalid_source(src, "unknown rendering intent");
+            Intent::default()
+        }
+    };
+}
+fn profile_create() -> Box<Profile> {
+    Box::new(Profile::default())
+}
+/* build sRGB gamma table */
+/* based on cmsBuildParametricGamma() */
+fn build_sRGB_gamma_table(num_entries: i32) -> Vec<u16> {
+    /* taken from lcms: Build_sRGBGamma() */
+    let gamma: f64 = 2.4;
+    let a: f64 = 1.0 / 1.055;
+    let b: f64 = 0.055 / 1.055;
+    let c: f64 = 1.0 / 12.92;
+    let d: f64 = 0.04045;
+    let mut table = Vec::with_capacity(num_entries as usize);
+
+    for i in 0..num_entries {
+        let x: f64 = i as f64 / (num_entries - 1) as f64;
+        let y: f64;
+        let mut output: f64;
+        // IEC 61966-2.1 (sRGB)
+        // Y = (aX + b)^Gamma | X >= d
+        // Y = cX             | X < d
+        if x >= d {
+            let e: f64 = a * x + b;
+            if e > 0. {
+                y = e.powf(gamma)
+            } else {
+                y = 0.
+            }
+        } else {
+            y = c * x
+        }
+        // Saturate -- this could likely move to a separate function
+        output = y * 65535.0 + 0.5;
+        if output > 65535.0 {
+            output = 65535.0
+        }
+        if output < 0.0 {
+            output = 0.0
+        }
+        table.push(output.floor() as u16);
+    }
+    table
+}
+fn curve_from_table(table: &[u16]) -> Box<curveType> {
+    Box::new(curveType::Curve(table.to_vec()))
+}
+pub fn float_to_u8Fixed8Number(a: f32) -> u16 {
+    if a > 255.0 + 255.0 / 256f32 {
+        0xffffu16
+    } else if a < 0.0 {
+        0u16
+    } else {
+        (a * 256.0 + 0.5).floor() as u16
+    }
+}
+
+fn curve_from_gamma(gamma: f32) -> Box<curveType> {
+    Box::new(curveType::Curve(vec![float_to_u8Fixed8Number(gamma)]))
+}
+
+/* from lcms: cmsWhitePointFromTemp */
+/* tempK must be >= 4000. and <= 25000.
+ * Invalid values of tempK will return
+ * (x,y,Y) = (-1.0, -1.0, -1.0)
+ * similar to argyll: icx_DTEMP2XYZ() */
+fn white_point_from_temp(temp_K: i32) -> qcms_CIE_xyY {
+    let mut white_point: qcms_CIE_xyY = qcms_CIE_xyY {
+        x: 0.,
+        y: 0.,
+        Y: 0.,
+    };
+    // No optimization provided.
+    let T = temp_K as f64; // Square
+    let T2 = T * T; // Cube
+    let T3 = T2 * T;
+    // For correlated color temperature (T) between 4000K and 7000K:
+    let x = if T >= 4000.0 && T <= 7000.0 {
+        -4.6070 * (1E9 / T3) + 2.9678 * (1E6 / T2) + 0.09911 * (1E3 / T) + 0.244063
+    } else if T > 7000.0 && T <= 25000.0 {
+        -2.0064 * (1E9 / T3) + 1.9018 * (1E6 / T2) + 0.24748 * (1E3 / T) + 0.237040
+    } else {
+        // or for correlated color temperature (T) between 7000K and 25000K:
+        // Invalid tempK
+        white_point.x = -1.0;
+        white_point.y = -1.0;
+        white_point.Y = -1.0;
+        debug_assert!(false, "invalid temp");
+        return white_point;
+    };
+    // Obtain y(x)
+    let y = -3.000 * (x * x) + 2.870 * x - 0.275;
+    // wave factors (not used, but here for futures extensions)
+    // let M1 = (-1.3515 - 1.7703*x + 5.9114 *y)/(0.0241 + 0.2562*x - 0.7341*y);
+    // let M2 = (0.0300 - 31.4424*x + 30.0717*y)/(0.0241 + 0.2562*x - 0.7341*y);
+    // Fill white_point struct
+    white_point.x = x;
+    white_point.y = y;
+    white_point.Y = 1.0;
+    white_point
+}
+#[no_mangle]
+pub extern "C" fn qcms_white_point_sRGB() -> qcms_CIE_xyY {
+    white_point_from_temp(6504)
+}
+
+impl Profile {
+    //XXX: it would be nice if we had a way of ensuring
+    // everything in a profile was initialized regardless of how it was created
+    //XXX: should this also be taking a black_point?
+    /* similar to CGColorSpaceCreateCalibratedRGB */
+    pub fn new_rgb_with_table(
+        white_point: qcms_CIE_xyY,
+        primaries: qcms_CIE_xyYTRIPLE,
+        table: &[u16],
+    ) -> Option<Box<Profile>> {
+        let mut profile = profile_create();
+        //XXX: should store the whitepoint
+        if !set_rgb_colorants(&mut profile, white_point, primaries) {
+            return None;
+        }
+        profile.redTRC = Some(curve_from_table(table));
+        profile.blueTRC = Some(curve_from_table(table));
+        profile.greenTRC = Some(curve_from_table(table));
+        profile.class_type = DISPLAY_DEVICE_PROFILE;
+        profile.rendering_intent = Perceptual;
+        profile.color_space = RGB_SIGNATURE;
+        profile.pcs = XYZ_TYPE;
+        Some(profile)
+    }
+    pub fn new_sRGB() -> Box<Profile> {
+        let Rec709Primaries = qcms_CIE_xyYTRIPLE {
+            red: {
+                qcms_CIE_xyY {
+                    x: 0.6400,
+                    y: 0.3300,
+                    Y: 1.0,
+                }
+            },
+            green: {
+                qcms_CIE_xyY {
+                    x: 0.3000,
+                    y: 0.6000,
+                    Y: 1.0,
+                }
+            },
+            blue: {
+                qcms_CIE_xyY {
+                    x: 0.1500,
+                    y: 0.0600,
+                    Y: 1.0,
+                }
+            },
+        };
+        let D65 = qcms_white_point_sRGB();
+        let table = build_sRGB_gamma_table(1024);
+
+        Profile::new_rgb_with_table(D65, Rec709Primaries, &table).unwrap()
+    }
+
+    pub fn new_gray_with_gamma(gamma: f32) -> Box<Profile> {
+        let mut profile = profile_create();
+
+        profile.grayTRC = Some(curve_from_gamma(gamma));
+        profile.class_type = DISPLAY_DEVICE_PROFILE;
+        profile.rendering_intent = Perceptual;
+        profile.color_space = GRAY_SIGNATURE;
+        profile.pcs = XYZ_TYPE;
+        profile
+    }
+
+    pub fn new_rgb_with_gamma_set(
+        white_point: qcms_CIE_xyY,
+        primaries: qcms_CIE_xyYTRIPLE,
+        redGamma: f32,
+        greenGamma: f32,
+        blueGamma: f32,
+    ) -> Option<Box<Profile>> {
+        let mut profile = profile_create();
+
+        //XXX: should store the whitepoint
+        if !set_rgb_colorants(&mut profile, white_point, primaries) {
+            return None;
+        }
+        profile.redTRC = Some(curve_from_gamma(redGamma));
+        profile.blueTRC = Some(curve_from_gamma(blueGamma));
+        profile.greenTRC = Some(curve_from_gamma(greenGamma));
+        profile.class_type = DISPLAY_DEVICE_PROFILE;
+        profile.rendering_intent = Perceptual;
+        profile.color_space = RGB_SIGNATURE;
+        profile.pcs = XYZ_TYPE;
+        Some(profile)
+    }
+
+    pub fn new_from_slice(mem: &[u8]) -> Option<Box<Profile>> {
+        let length: u32;
+        let mut source: MemSource = MemSource {
+            buf: mem,
+            valid: false,
+            invalid_reason: None,
+        };
+        let index;
+        source.valid = true;
+        let mut src: &mut MemSource = &mut source;
+        if mem.len() < 4 {
+            return None;
+        }
+        length = read_u32(src, 0);
+        if length as usize <= mem.len() {
+            // shrink the area that we can read if appropriate
+            src.buf = &src.buf[0..length as usize];
+        } else {
+            return None;
+        }
+        /* ensure that the profile size is sane so it's easier to reason about */
+        if src.buf.len() <= 64 || src.buf.len() >= MAX_PROFILE_SIZE {
+            return None;
+        }
+        let mut profile = profile_create();
+
+        check_CMM_type_signature(src);
+        check_profile_version(src);
+        read_class_signature(&mut profile, src);
+        read_rendering_intent(&mut profile, src);
+        read_color_space(&mut profile, src);
+        read_pcs(&mut profile, src);
+        //TODO read rest of profile stuff
+        if !src.valid {
+            return None;
+        }
+
+        index = read_tag_table(&mut profile, src);
+        if !src.valid || index.is_empty() {
+            return None;
+        }
+
+        if find_tag(&index, TAG_CHAD).is_some() {
+            profile.chromaticAdaption = read_tag_s15Fixed16ArrayType(src, &index, TAG_CHAD)
+        } else {
+            profile.chromaticAdaption.invalid = true //Signal the data is not present
+        }
+
+        if profile.class_type == DISPLAY_DEVICE_PROFILE
+            || profile.class_type == INPUT_DEVICE_PROFILE
+            || profile.class_type == OUTPUT_DEVICE_PROFILE
+            || profile.class_type == COLOR_SPACE_PROFILE
+        {
+            if profile.color_space == RGB_SIGNATURE {
+                if let Some(A2B0) = find_tag(&index, TAG_A2B0) {
+                    let lut_type = read_u32(src, A2B0.offset as usize);
+                    if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE {
+                        profile.A2B0 = read_tag_lutType(src, A2B0)
+                    } else if lut_type == LUT_MAB_TYPE {
+                        profile.mAB = read_tag_lutmABType(src, A2B0)
+                    }
+                }
+                if let Some(B2A0) = find_tag(&index, TAG_B2A0) {
+                    let lut_type = read_u32(src, B2A0.offset as usize);
+                    if lut_type == LUT8_TYPE || lut_type == LUT16_TYPE {
+                        profile.B2A0 = read_tag_lutType(src, B2A0)
+                    } else if lut_type == LUT_MBA_TYPE {
+                        profile.mBA = read_tag_lutmABType(src, B2A0)
+                    }
+                }
+                if find_tag(&index, TAG_rXYZ).is_some() || !SUPPORTS_ICCV4.load(Ordering::Relaxed) {
+                    profile.redColorant = read_tag_XYZType(src, &index, TAG_rXYZ);
+                    profile.greenColorant = read_tag_XYZType(src, &index, TAG_gXYZ);
+                    profile.blueColorant = read_tag_XYZType(src, &index, TAG_bXYZ)
+                }
+                if !src.valid {
+                    return None;
+                }
+
+                if find_tag(&index, TAG_rTRC).is_some() || !SUPPORTS_ICCV4.load(Ordering::Relaxed) {
+                    profile.redTRC = read_tag_curveType(src, &index, TAG_rTRC);
+                    profile.greenTRC = read_tag_curveType(src, &index, TAG_gTRC);
+                    profile.blueTRC = read_tag_curveType(src, &index, TAG_bTRC);
+                    if profile.redTRC.is_none()
+                        || profile.blueTRC.is_none()
+                        || profile.greenTRC.is_none()
+                    {
+                        return None;
+                    }
+                }
+            } else if profile.color_space == GRAY_SIGNATURE {
+                profile.grayTRC = read_tag_curveType(src, &index, TAG_kTRC);
+                profile.grayTRC.as_ref()?;
+            } else {
+                debug_assert!(false, "read_color_space protects against entering here");
+                return None;
+            }
+        } else {
+            return None;
+        }
+
+        if !src.valid {
+            return None;
+        }
+        Some(profile)
+    }
+    /// Precomputes the information needed for this profile to be
+    /// used as the output profile when constructing a `Transform`.
+    pub fn precache_output_transform(&mut self) {
+        crate::transform::qcms_profile_precache_output_transform(self);
+    }
+}
diff --git a/gfx/qcms/src/lib.rs b/gfx/qcms/src/lib.rs
new file mode 100644
index 0000000000..0b7a5f6989
--- /dev/null
+++ b/gfx/qcms/src/lib.rs
@@ -0,0 +1,73 @@
+/*! A pure Rust color management library.
+*/
+
+#![allow(dead_code)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+#![allow(non_upper_case_globals)]
+#![feature(stdsimd)]
+// These are needed for the neon intrinsics implementation
+// and can be removed once the MSRV is high enough (1.48)
+#![feature(platform_intrinsics)]
+#![feature(simd_ffi)]
+#![feature(link_llvm_intrinsics)]
+#![feature(aarch64_target_feature)]
+#![feature(arm_target_feature)]
+#![feature(raw_ref_op)]
+
+/// These values match the Rendering Intent values from the ICC spec
+#[repr(u32)]
+#[derive(Clone, Copy)]
+pub enum Intent {
+    AbsoluteColorimetric = 3,
+    Saturation = 2,
+    RelativeColorimetric = 1,
+    Perceptual = 0,
+}
+
+use Intent::*;
+
+impl Default for Intent {
+    fn default() -> Self {
+        /* Chris Murphy (CM consultant) suggests this as a default in the event that we
+         * cannot reproduce relative + Black Point Compensation.  BPC brings an
+         * unacceptable performance overhead, so we go with perceptual. */
+        Perceptual
+    }
+}
+
+pub(crate) type s15Fixed16Number = i32;
+
+/* produces the nearest float to 'a' with a maximum error
+ * of 1/1024 which happens for large values like 0x40000040 */
+#[inline]
+fn s15Fixed16Number_to_float(a: s15Fixed16Number) -> f32 {
+    a as f32 / 65536.0
+}
+
+#[inline]
+fn double_to_s15Fixed16Number(v: f64) -> s15Fixed16Number {
+    (v * 65536f64) as i32
+}
+
+#[cfg(feature = "c_bindings")]
+extern crate libc;
+#[cfg(feature = "c_bindings")]
+pub mod c_bindings;
+mod chain;
+mod gtest;
+mod iccread;
+mod matrix;
+mod transform;
+pub use iccread::qcms_CIE_xyY as CIE_xyY;
+pub use iccread::qcms_CIE_xyYTRIPLE as CIE_xyYTRIPLE;
+pub use iccread::Profile;
+pub use transform::DataType;
+pub use transform::Transform;
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod transform_avx;
+#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
+mod transform_neon;
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod transform_sse2;
+mod transform_util;
diff --git a/gfx/qcms/src/matrix.rs b/gfx/qcms/src/matrix.rs
new file mode 100644
index 0000000000..a7d4bc6455
--- /dev/null
+++ b/gfx/qcms/src/matrix.rs
@@ -0,0 +1,147 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+//  qcms
+//  Copyright (C) 2009 Mozilla Foundation
+//  Copyright (C) 1998-2007 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#[derive(Copy, Clone, Default)]
+pub struct Matrix {
+    pub m: [[f32; 3]; 3],
+    pub invalid: bool,
+}
+
+#[derive(Copy, Clone)]
+pub struct Vector {
+    pub v: [f32; 3],
+}
+
+impl Matrix {
+    pub fn eval(&self, v: Vector) -> Vector {
+        let mut result: Vector = Vector { v: [0.; 3] };
+        result.v[0] = self.m[0][0] * v.v[0] + self.m[0][1] * v.v[1] + self.m[0][2] * v.v[2];
+        result.v[1] = self.m[1][0] * v.v[0] + self.m[1][1] * v.v[1] + self.m[1][2] * v.v[2];
+        result.v[2] = self.m[2][0] * v.v[0] + self.m[2][1] * v.v[1] + self.m[2][2] * v.v[2];
+        result
+    }
+
+    //probably reuse this computation in matrix_invert
+    pub fn det(&self) -> f32 {
+        let det: f32 = self.m[0][0] * self.m[1][1] * self.m[2][2]
+            + self.m[0][1] * self.m[1][2] * self.m[2][0]
+            + self.m[0][2] * self.m[1][0] * self.m[2][1]
+            - self.m[0][0] * self.m[1][2] * self.m[2][1]
+            - self.m[0][1] * self.m[1][0] * self.m[2][2]
+            - self.m[0][2] * self.m[1][1] * self.m[2][0];
+        det
+    }
+    /* from pixman and cairo and Mathematics for Game Programmers */
+    /* lcms uses gauss-jordan elimination with partial pivoting which is
+     * less efficient and not as numerically stable. See Mathematics for
+     * Game Programmers. */
+    pub fn invert(&self) -> Matrix {
+        let mut dest_mat: Matrix = Matrix {
+            m: [[0.; 3]; 3],
+            invalid: false,
+        };
+        let mut i: i32;
+
+        const a: [i32; 3] = [2, 2, 1];
+        const b: [i32; 3] = [1, 0, 0];
+        /* inv  (A) = 1/det (A) * adj (A) */
+        let mut det: f32 = self.det();
+        if det == 0. {
+            dest_mat.invalid = true;
+            return dest_mat;
+        }
+        dest_mat.invalid = false;
+        det = 1. / det;
+        let mut j: i32 = 0;
+        while j < 3 {
+            i = 0;
+            while i < 3 {
+                let ai: i32 = a[i as usize];
+                let aj: i32 = a[j as usize];
+                let bi: i32 = b[i as usize];
+                let bj: i32 = b[j as usize];
+                let mut p: f64 = (self.m[ai as usize][aj as usize]
+                    * self.m[bi as usize][bj as usize]
+                    - self.m[ai as usize][bj as usize] * self.m[bi as usize][aj as usize])
+                    as f64;
+                if ((i + j) & 1) != 0 {
+                    p = -p
+                }
+                dest_mat.m[j as usize][i as usize] = (det as f64 * p) as f32;
+                i += 1
+            }
+            j += 1
+        }
+        dest_mat
+    }
+    pub fn identity() -> Matrix {
+        let mut i: Matrix = Matrix {
+            m: [[0.; 3]; 3],
+            invalid: false,
+        };
+        i.m[0][0] = 1.;
+        i.m[0][1] = 0.;
+        i.m[0][2] = 0.;
+        i.m[1][0] = 0.;
+        i.m[1][1] = 1.;
+        i.m[1][2] = 0.;
+        i.m[2][0] = 0.;
+        i.m[2][1] = 0.;
+        i.m[2][2] = 1.;
+        i.invalid = false;
+        i
+    }
+    pub fn invalid() -> Matrix {
+        let mut inv: Matrix = Self::identity();
+        inv.invalid = true;
+        inv
+    }
+    /* from pixman */
+    /* MAT3per... */
+    pub fn multiply(a: Matrix, b: Matrix) -> Matrix {
+        let mut result: Matrix = Matrix {
+            m: [[0.; 3]; 3],
+            invalid: false,
+        };
+        let mut dx: i32;
+
+        let mut o: i32;
+        let mut dy: i32 = 0;
+        while dy < 3 {
+            dx = 0;
+            while dx < 3 {
+                let mut v: f64 = 0f64;
+                o = 0;
+                while o < 3 {
+                    v += (a.m[dy as usize][o as usize] * b.m[o as usize][dx as usize]) as f64;
+                    o += 1
+                }
+                result.m[dy as usize][dx as usize] = v as f32;
+                dx += 1
+            }
+            dy += 1
+        }
+        result.invalid = a.invalid as i32 != 0 || b.invalid as i32 != 0;
+        result
+    }
+}
diff --git a/gfx/qcms/src/transform.rs b/gfx/qcms/src/transform.rs
new file mode 100644
index 0000000000..faece155c6
--- /dev/null
+++ b/gfx/qcms/src/transform.rs
@@ -0,0 +1,1381 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+//  qcms
+//  Copyright (C) 2009 Mozilla Foundation
+//  Copyright (C) 1998-2007 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+use crate::transform_neon::{
+    qcms_transform_data_bgra_out_lut_neon, qcms_transform_data_rgb_out_lut_neon,
+    qcms_transform_data_rgba_out_lut_neon,
+};
+use crate::{
+    chain::chain_transform,
+    double_to_s15Fixed16Number,
+    iccread::SUPPORTS_ICCV4,
+    matrix::*,
+    transform_util::{
+        build_colorant_matrix, build_input_gamma_table, build_output_lut, compute_precache,
+        lut_interp_linear,
+    },
+};
+use crate::{
+    iccread::{qcms_CIE_xyY, qcms_CIE_xyYTRIPLE, Profile, GRAY_SIGNATURE, RGB_SIGNATURE},
+    transform_util::clamp_float,
+    Intent,
+};
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+use crate::{
+    transform_avx::{
+        qcms_transform_data_bgra_out_lut_avx, qcms_transform_data_rgb_out_lut_avx,
+        qcms_transform_data_rgba_out_lut_avx,
+    },
+    transform_sse2::{
+        qcms_transform_data_bgra_out_lut_sse2, qcms_transform_data_rgb_out_lut_sse2,
+        qcms_transform_data_rgba_out_lut_sse2,
+    },
+};
+
+use std::sync::atomic::Ordering;
+use std::sync::Arc;
+
+pub const PRECACHE_OUTPUT_SIZE: usize = 8192;
+pub const PRECACHE_OUTPUT_MAX: usize = PRECACHE_OUTPUT_SIZE - 1;
+pub const FLOATSCALE: f32 = PRECACHE_OUTPUT_SIZE as f32;
+pub const CLAMPMAXVAL: f32 = ((PRECACHE_OUTPUT_SIZE - 1) as f32) / PRECACHE_OUTPUT_SIZE as f32;
+
+#[repr(C)]
+pub struct PrecacheOuput {
+    /* We previously used a count of 65536 here but that seems like more
+     * precision than we actually need.  By reducing the size we can
+     * improve startup performance and reduce memory usage. ColorSync on
+     * 10.5 uses 4097 which is perhaps because they use a fixed point
+     * representation where 1. is represented by 0x1000. */
+    pub data: [u8; PRECACHE_OUTPUT_SIZE],
+}
+
+impl Default for PrecacheOuput {
+    fn default() -> PrecacheOuput {
+        PrecacheOuput {
+            data: [0; PRECACHE_OUTPUT_SIZE],
+        }
+    }
+}
+
+/* used as a lookup table for the output transformation.
+ * we refcount them so we only need to have one around per output
+ * profile, instead of duplicating them per transform */
+
+#[repr(C)]
+#[repr(align(16))]
+#[derive(Clone, Default)]
+pub struct qcms_transform {
+    pub matrix: [[f32; 4]; 3],
+    pub input_gamma_table_r: Option<Vec<f32>>,
+    pub input_gamma_table_g: Option<Vec<f32>>,
+    pub input_gamma_table_b: Option<Vec<f32>>,
+    pub input_clut_table_length: u16,
+    pub clut: Option<Vec<f32>>,
+    pub grid_size: u16,
+    pub output_clut_table_length: u16,
+    pub input_gamma_table_gray: Option<Vec<f32>>,
+    pub out_gamma_r: f32,
+    pub out_gamma_g: f32,
+    pub out_gamma_b: f32,
+    pub out_gamma_gray: f32,
+    pub output_gamma_lut_r: Option<Vec<u16>>,
+    pub output_gamma_lut_g: Option<Vec<u16>>,
+    pub output_gamma_lut_b: Option<Vec<u16>>,
+    pub output_gamma_lut_gray: Option<Vec<u16>>,
+    pub output_gamma_lut_r_length: usize,
+    pub output_gamma_lut_g_length: usize,
+    pub output_gamma_lut_b_length: usize,
+    pub output_gamma_lut_gray_length: usize,
+    pub output_table_r: Option<Arc<PrecacheOuput>>,
+    pub output_table_g: Option<Arc<PrecacheOuput>>,
+    pub output_table_b: Option<Arc<PrecacheOuput>>,
+    pub transform_fn: transform_fn_t,
+}
+
+pub type transform_fn_t =
+    Option<unsafe extern "C" fn(_: &qcms_transform, _: *const u8, _: *mut u8, _: usize) -> ()>;
+/// The format of pixel data
+#[repr(u32)]
+#[derive(PartialEq, Eq, Clone, Copy)]
+pub enum DataType {
+    RGB8 = 0,
+    RGBA8 = 1,
+    BGRA8 = 2,
+    Gray8 = 3,
+    GrayA8 = 4,
+}
+
+impl DataType {
+    pub fn bytes_per_pixel(&self) -> usize {
+        match self {
+            RGB8 => 3,
+            RGBA8 => 4,
+            BGRA8 => 4,
+            Gray8 => 1,
+            GrayA8 => 2,
+        }
+    }
+}
+
+use DataType::*;
+
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CIE_XYZ {
+    pub X: f64,
+    pub Y: f64,
+    pub Z: f64,
+}
+
+pub trait Format {
+    const kRIndex: usize;
+    const kGIndex: usize;
+    const kBIndex: usize;
+    const kAIndex: usize;
+}
+
+pub struct BGRA;
+impl Format for BGRA {
+    const kBIndex: usize = 0;
+    const kGIndex: usize = 1;
+    const kRIndex: usize = 2;
+    const kAIndex: usize = 3;
+}
+
+pub struct RGBA;
+impl Format for RGBA {
+    const kRIndex: usize = 0;
+    const kGIndex: usize = 1;
+    const kBIndex: usize = 2;
+    const kAIndex: usize = 3;
+}
+
+pub struct RGB;
+impl Format for RGB {
+    const kRIndex: usize = 0;
+    const kGIndex: usize = 1;
+    const kBIndex: usize = 2;
+    const kAIndex: usize = 0xFF;
+}
+
+pub trait GrayFormat {
+    const has_alpha: bool;
+}
+
+pub struct Gray;
+impl GrayFormat for Gray {
+    const has_alpha: bool = false;
+}
+
+pub struct GrayAlpha;
+impl GrayFormat for GrayAlpha {
+    const has_alpha: bool = true;
+}
+
+#[inline]
+fn clamp_u8(v: f32) -> u8 {
+    if v > 255. {
+        255
+    } else if v < 0. {
+        0
+    } else {
+        (v + 0.5).floor() as u8
+    }
+}
+
+// Build a White point, primary chromas transfer matrix from RGB to CIE XYZ
+// This is just an approximation, I am not handling all the non-linear
+// aspects of the RGB to XYZ process, and assumming that the gamma correction
+// has transitive property in the tranformation chain.
+//
+// the alghoritm:
+//
+//            - First I build the absolute conversion matrix using
+//              primaries in XYZ. This matrix is next inverted
+//            - Then I eval the source white point across this matrix
+//              obtaining the coeficients of the transformation
+//            - Then, I apply these coeficients to the original matrix
+fn build_RGB_to_XYZ_transfer_matrix(white: qcms_CIE_xyY, primrs: qcms_CIE_xyYTRIPLE) -> Matrix {
+    let mut primaries: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+
+    let mut result: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    let mut white_point: Vector = Vector { v: [0.; 3] };
+
+    let xn: f64 = white.x;
+    let yn: f64 = white.y;
+    if yn == 0.0f64 {
+        return Matrix::invalid();
+    }
+
+    let xr: f64 = primrs.red.x;
+    let yr: f64 = primrs.red.y;
+    let xg: f64 = primrs.green.x;
+    let yg: f64 = primrs.green.y;
+    let xb: f64 = primrs.blue.x;
+    let yb: f64 = primrs.blue.y;
+    primaries.m[0][0] = xr as f32;
+    primaries.m[0][1] = xg as f32;
+    primaries.m[0][2] = xb as f32;
+    primaries.m[1][0] = yr as f32;
+    primaries.m[1][1] = yg as f32;
+    primaries.m[1][2] = yb as f32;
+    primaries.m[2][0] = (1f64 - xr - yr) as f32;
+    primaries.m[2][1] = (1f64 - xg - yg) as f32;
+    primaries.m[2][2] = (1f64 - xb - yb) as f32;
+    primaries.invalid = false;
+    white_point.v[0] = (xn / yn) as f32;
+    white_point.v[1] = 1.;
+    white_point.v[2] = ((1.0f64 - xn - yn) / yn) as f32;
+    let primaries_invert: Matrix = primaries.invert();
+    if primaries_invert.invalid {
+        return Matrix::invalid();
+    }
+    let coefs: Vector = primaries_invert.eval(white_point);
+    result.m[0][0] = (coefs.v[0] as f64 * xr) as f32;
+    result.m[0][1] = (coefs.v[1] as f64 * xg) as f32;
+    result.m[0][2] = (coefs.v[2] as f64 * xb) as f32;
+    result.m[1][0] = (coefs.v[0] as f64 * yr) as f32;
+    result.m[1][1] = (coefs.v[1] as f64 * yg) as f32;
+    result.m[1][2] = (coefs.v[2] as f64 * yb) as f32;
+    result.m[2][0] = (coefs.v[0] as f64 * (1.0f64 - xr - yr)) as f32;
+    result.m[2][1] = (coefs.v[1] as f64 * (1.0f64 - xg - yg)) as f32;
+    result.m[2][2] = (coefs.v[2] as f64 * (1.0f64 - xb - yb)) as f32;
+    result.invalid = primaries_invert.invalid;
+    result
+}
+/* CIE Illuminant D50 */
+const D50_XYZ: CIE_XYZ = CIE_XYZ {
+    X: 0.9642f64,
+    Y: 1.0000f64,
+    Z: 0.8249f64,
+};
+/* from lcms: xyY2XYZ()
+ * corresponds to argyll: icmYxy2XYZ() */
+fn xyY2XYZ(source: qcms_CIE_xyY) -> CIE_XYZ {
+    let mut dest: CIE_XYZ = CIE_XYZ {
+        X: 0.,
+        Y: 0.,
+        Z: 0.,
+    };
+    dest.X = source.x / source.y * source.Y;
+    dest.Y = source.Y;
+    dest.Z = (1f64 - source.x - source.y) / source.y * source.Y;
+    dest
+}
+/* from lcms: ComputeChromaticAdaption */
+// Compute chromatic adaption matrix using chad as cone matrix
+fn compute_chromatic_adaption(
+    source_white_point: CIE_XYZ,
+    dest_white_point: CIE_XYZ,
+    chad: Matrix,
+) -> Matrix {
+    let mut cone_source_XYZ: Vector = Vector { v: [0.; 3] };
+
+    let mut cone_dest_XYZ: Vector = Vector { v: [0.; 3] };
+
+    let mut cone: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+
+    let tmp: Matrix = chad;
+    let chad_inv: Matrix = tmp.invert();
+    if chad_inv.invalid {
+        return Matrix::invalid();
+    }
+    cone_source_XYZ.v[0] = source_white_point.X as f32;
+    cone_source_XYZ.v[1] = source_white_point.Y as f32;
+    cone_source_XYZ.v[2] = source_white_point.Z as f32;
+    cone_dest_XYZ.v[0] = dest_white_point.X as f32;
+    cone_dest_XYZ.v[1] = dest_white_point.Y as f32;
+    cone_dest_XYZ.v[2] = dest_white_point.Z as f32;
+
+    let cone_source_rgb: Vector = chad.eval(cone_source_XYZ);
+    let cone_dest_rgb: Vector = chad.eval(cone_dest_XYZ);
+    cone.m[0][0] = cone_dest_rgb.v[0] / cone_source_rgb.v[0];
+    cone.m[0][1] = 0.;
+    cone.m[0][2] = 0.;
+    cone.m[1][0] = 0.;
+    cone.m[1][1] = cone_dest_rgb.v[1] / cone_source_rgb.v[1];
+    cone.m[1][2] = 0.;
+    cone.m[2][0] = 0.;
+    cone.m[2][1] = 0.;
+    cone.m[2][2] = cone_dest_rgb.v[2] / cone_source_rgb.v[2];
+    cone.invalid = false;
+    // Normalize
+    Matrix::multiply(chad_inv, Matrix::multiply(cone, chad))
+}
+/* from lcms: cmsAdaptionMatrix */
+// Returns the final chrmatic adaptation from illuminant FromIll to Illuminant ToIll
+// Bradford is assumed
+fn adaption_matrix(source_illumination: CIE_XYZ, target_illumination: CIE_XYZ) -> Matrix {
+    let lam_rigg: Matrix = {
+        let init = Matrix {
+            m: [
+                [0.8951, 0.2664, -0.1614],
+                [-0.7502, 1.7135, 0.0367],
+                [0.0389, -0.0685, 1.0296],
+            ],
+            invalid: false,
+        };
+        init
+    };
+    compute_chromatic_adaption(source_illumination, target_illumination, lam_rigg)
+}
+/* from lcms: cmsAdaptMatrixToD50 */
+fn adapt_matrix_to_D50(r: Matrix, source_white_pt: qcms_CIE_xyY) -> Matrix {
+    if source_white_pt.y == 0.0f64 {
+        return Matrix::invalid();
+    }
+
+    let Dn: CIE_XYZ = xyY2XYZ(source_white_pt);
+    let Bradford: Matrix = adaption_matrix(Dn, D50_XYZ);
+    if Bradford.invalid {
+        return Matrix::invalid();
+    }
+    Matrix::multiply(Bradford, r)
+}
+pub(crate) fn set_rgb_colorants(
+    mut profile: &mut Profile,
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+) -> bool {
+    let mut colorants: Matrix = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
+    colorants = adapt_matrix_to_D50(colorants, white_point);
+    if colorants.invalid {
+        return false;
+    }
+    /* note: there's a transpose type of operation going on here */
+    profile.redColorant.X = double_to_s15Fixed16Number(colorants.m[0][0] as f64);
+    profile.redColorant.Y = double_to_s15Fixed16Number(colorants.m[1][0] as f64);
+    profile.redColorant.Z = double_to_s15Fixed16Number(colorants.m[2][0] as f64);
+    profile.greenColorant.X = double_to_s15Fixed16Number(colorants.m[0][1] as f64);
+    profile.greenColorant.Y = double_to_s15Fixed16Number(colorants.m[1][1] as f64);
+    profile.greenColorant.Z = double_to_s15Fixed16Number(colorants.m[2][1] as f64);
+    profile.blueColorant.X = double_to_s15Fixed16Number(colorants.m[0][2] as f64);
+    profile.blueColorant.Y = double_to_s15Fixed16Number(colorants.m[1][2] as f64);
+    profile.blueColorant.Z = double_to_s15Fixed16Number(colorants.m[2][2] as f64);
+    true
+}
+pub(crate) fn get_rgb_colorants(
+    colorants: &mut Matrix,
+    white_point: qcms_CIE_xyY,
+    primaries: qcms_CIE_xyYTRIPLE,
+) -> bool {
+    *colorants = build_RGB_to_XYZ_transfer_matrix(white_point, primaries);
+    *colorants = adapt_matrix_to_D50(*colorants, white_point);
+    colorants.invalid
+}
+/* Alpha is not corrected.
+   A rationale for this is found in Alvy Ray's "Should Alpha Be Nonlinear If
+   RGB Is?" Tech Memo 17 (December 14, 1998).
+    See: ftp://ftp.alvyray.com/Acrobat/17_Nonln.pdf
+*/
+unsafe extern "C" fn qcms_transform_data_gray_template_lut<I: GrayFormat, F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    length: usize,
+) {
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    let input_gamma_table_gray = (*transform)
+        .input_gamma_table_gray
+        .as_ref()
+        .unwrap()
+        .as_ptr();
+
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        let fresh0 = src;
+        src = src.offset(1);
+        let device: u8 = *fresh0;
+        let mut alpha: u8 = 0xffu8;
+        if I::has_alpha {
+            let fresh1 = src;
+            src = src.offset(1);
+            alpha = *fresh1
+        }
+        let linear: f32 = *input_gamma_table_gray.offset(device as isize);
+
+        let out_device_r: f32 = lut_interp_linear(
+            linear as f64,
+            &(*transform).output_gamma_lut_r.as_ref().unwrap(),
+        );
+        let out_device_g: f32 = lut_interp_linear(
+            linear as f64,
+            &(*transform).output_gamma_lut_g.as_ref().unwrap(),
+        );
+        let out_device_b: f32 = lut_interp_linear(
+            linear as f64,
+            &(*transform).output_gamma_lut_b.as_ref().unwrap(),
+        );
+        *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32);
+        *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32);
+        *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32);
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha
+        }
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+}
+unsafe extern "C" fn qcms_transform_data_gray_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_lut::<Gray, RGB>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_gray_rgba_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_lut::<Gray, RGBA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_gray_bgra_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_lut::<Gray, BGRA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_graya_rgba_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_lut::<GrayAlpha, RGBA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_graya_bgra_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_lut::<GrayAlpha, BGRA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_gray_template_precache<I: GrayFormat, F: Format>(
+    transform: *const qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    length: usize,
+) {
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    let output_table_r = ((*transform).output_table_r).as_deref().unwrap();
+    let output_table_g = ((*transform).output_table_g).as_deref().unwrap();
+    let output_table_b = ((*transform).output_table_b).as_deref().unwrap();
+
+    let input_gamma_table_gray = (*transform)
+        .input_gamma_table_gray
+        .as_ref()
+        .unwrap()
+        .as_ptr();
+
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        let fresh2 = src;
+        src = src.offset(1);
+        let device: u8 = *fresh2;
+        let mut alpha: u8 = 0xffu8;
+        if I::has_alpha {
+            let fresh3 = src;
+            src = src.offset(1);
+            alpha = *fresh3
+        }
+
+        let linear: f32 = *input_gamma_table_gray.offset(device as isize);
+        /* we could round here... */
+        let gray: u16 = (linear * PRECACHE_OUTPUT_MAX as f32) as u16;
+        *dest.add(F::kRIndex) = (output_table_r).data[gray as usize];
+        *dest.add(F::kGIndex) = (output_table_g).data[gray as usize];
+        *dest.add(F::kBIndex) = (output_table_b).data[gray as usize];
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha
+        }
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+}
+unsafe extern "C" fn qcms_transform_data_gray_out_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_precache::<Gray, RGB>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_gray_rgba_out_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_precache::<Gray, RGBA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_gray_bgra_out_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_precache::<Gray, BGRA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_graya_rgba_out_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_precache::<GrayAlpha, RGBA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_graya_bgra_out_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_gray_template_precache::<GrayAlpha, BGRA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_template_lut_precache<F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    length: usize,
+) {
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    let output_table_r = ((*transform).output_table_r).as_deref().unwrap();
+    let output_table_g = ((*transform).output_table_g).as_deref().unwrap();
+    let output_table_b = ((*transform).output_table_b).as_deref().unwrap();
+    let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+    let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+    let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+
+    let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        let device_r: u8 = *src.add(F::kRIndex);
+        let device_g: u8 = *src.add(F::kGIndex);
+        let device_b: u8 = *src.add(F::kBIndex);
+        let mut alpha: u8 = 0;
+        if F::kAIndex != 0xff {
+            alpha = *src.add(F::kAIndex)
+        }
+        src = src.offset(components as isize);
+
+        let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize);
+        let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize);
+        let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize);
+        let mut out_linear_r: f32 = (*mat.offset(0isize))[0] * linear_r
+            + (*mat.offset(1isize))[0] * linear_g
+            + (*mat.offset(2isize))[0] * linear_b;
+        let mut out_linear_g: f32 = (*mat.offset(0isize))[1] * linear_r
+            + (*mat.offset(1isize))[1] * linear_g
+            + (*mat.offset(2isize))[1] * linear_b;
+        let mut out_linear_b: f32 = (*mat.offset(0isize))[2] * linear_r
+            + (*mat.offset(1isize))[2] * linear_g
+            + (*mat.offset(2isize))[2] * linear_b;
+        out_linear_r = clamp_float(out_linear_r);
+        out_linear_g = clamp_float(out_linear_g);
+        out_linear_b = clamp_float(out_linear_b);
+        /* we could round here... */
+
+        let r: u16 = (out_linear_r * PRECACHE_OUTPUT_MAX as f32) as u16;
+        let g: u16 = (out_linear_g * PRECACHE_OUTPUT_MAX as f32) as u16;
+        let b: u16 = (out_linear_b * PRECACHE_OUTPUT_MAX as f32) as u16;
+        *dest.add(F::kRIndex) = (output_table_r).data[r as usize];
+        *dest.add(F::kGIndex) = (output_table_g).data[g as usize];
+        *dest.add(F::kBIndex) = (output_table_b).data[b as usize];
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha
+        }
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_precache::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_precache::<RGBA>(transform, src, dest, length);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_precache(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_precache::<BGRA>(transform, src, dest, length);
+}
+// Not used
+/*
+static void qcms_transform_data_clut(const qcms_transform *transform, const unsigned char *src, unsigned char *dest, size_t length) {
+    unsigned int i;
+    int xy_len = 1;
+    int x_len = transform->grid_size;
+    int len = x_len * x_len;
+    const float* r_table = transform->r_clut;
+    const float* g_table = transform->g_clut;
+    const float* b_table = transform->b_clut;
+
+    for (i = 0; i < length; i++) {
+        unsigned char in_r = *src++;
+        unsigned char in_g = *src++;
+        unsigned char in_b = *src++;
+        float linear_r = in_r/255.0f, linear_g=in_g/255.0f, linear_b = in_b/255.0f;
+
+        int x = floorf(linear_r * (transform->grid_size-1));
+        int y = floorf(linear_g * (transform->grid_size-1));
+        int z = floorf(linear_b * (transform->grid_size-1));
+        int x_n = ceilf(linear_r * (transform->grid_size-1));
+        int y_n = ceilf(linear_g * (transform->grid_size-1));
+        int z_n = ceilf(linear_b * (transform->grid_size-1));
+        float x_d = linear_r * (transform->grid_size-1) - x;
+        float y_d = linear_g * (transform->grid_size-1) - y;
+        float z_d = linear_b * (transform->grid_size-1) - z;
+
+        float r_x1 = lerp(CLU(r_table,x,y,z), CLU(r_table,x_n,y,z), x_d);
+        float r_x2 = lerp(CLU(r_table,x,y_n,z), CLU(r_table,x_n,y_n,z), x_d);
+        float r_y1 = lerp(r_x1, r_x2, y_d);
+        float r_x3 = lerp(CLU(r_table,x,y,z_n), CLU(r_table,x_n,y,z_n), x_d);
+        float r_x4 = lerp(CLU(r_table,x,y_n,z_n), CLU(r_table,x_n,y_n,z_n), x_d);
+        float r_y2 = lerp(r_x3, r_x4, y_d);
+        float clut_r = lerp(r_y1, r_y2, z_d);
+
+        float g_x1 = lerp(CLU(g_table,x,y,z), CLU(g_table,x_n,y,z), x_d);
+        float g_x2 = lerp(CLU(g_table,x,y_n,z), CLU(g_table,x_n,y_n,z), x_d);
+        float g_y1 = lerp(g_x1, g_x2, y_d);
+        float g_x3 = lerp(CLU(g_table,x,y,z_n), CLU(g_table,x_n,y,z_n), x_d);
+        float g_x4 = lerp(CLU(g_table,x,y_n,z_n), CLU(g_table,x_n,y_n,z_n), x_d);
+        float g_y2 = lerp(g_x3, g_x4, y_d);
+        float clut_g = lerp(g_y1, g_y2, z_d);
+
+        float b_x1 = lerp(CLU(b_table,x,y,z), CLU(b_table,x_n,y,z), x_d);
+        float b_x2 = lerp(CLU(b_table,x,y_n,z), CLU(b_table,x_n,y_n,z), x_d);
+        float b_y1 = lerp(b_x1, b_x2, y_d);
+        float b_x3 = lerp(CLU(b_table,x,y,z_n), CLU(b_table,x_n,y,z_n), x_d);
+        float b_x4 = lerp(CLU(b_table,x,y_n,z_n), CLU(b_table,x_n,y_n,z_n), x_d);
+        float b_y2 = lerp(b_x3, b_x4, y_d);
+        float clut_b = lerp(b_y1, b_y2, z_d);
+
+        *dest++ = clamp_u8(clut_r*255.0f);
+        *dest++ = clamp_u8(clut_g*255.0f);
+        *dest++ = clamp_u8(clut_b*255.0f);
+    }
+}
+*/
+fn int_div_ceil(value: i32, div: i32) -> i32 {
+    (value + div - 1) / div
+}
+// Using lcms' tetra interpolation algorithm.
+unsafe extern "C" fn qcms_transform_data_tetra_clut_template<F: Format>(
+    transform: *const qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    length: usize,
+) {
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+
+    let xy_len: i32 = 1;
+    let x_len: i32 = (*transform).grid_size as i32;
+    let len: i32 = x_len * x_len;
+    let table = (*transform).clut.as_ref().unwrap().as_ptr();
+    let r_table: *const f32 = table;
+    let g_table: *const f32 = table.offset(1);
+    let b_table: *const f32 = table.offset(2);
+    let mut c0_r: f32;
+    let mut c1_r: f32;
+    let mut c2_r: f32;
+    let mut c3_r: f32;
+    let mut c0_g: f32;
+    let mut c1_g: f32;
+    let mut c2_g: f32;
+    let mut c3_g: f32;
+    let mut c0_b: f32;
+    let mut c1_b: f32;
+    let mut c2_b: f32;
+    let mut c3_b: f32;
+    let mut clut_r: f32;
+    let mut clut_g: f32;
+    let mut clut_b: f32;
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        let in_r: u8 = *src.add(F::kRIndex);
+        let in_g: u8 = *src.add(F::kGIndex);
+        let in_b: u8 = *src.add(F::kBIndex);
+        let mut in_a: u8 = 0;
+        if F::kAIndex != 0xff {
+            in_a = *src.add(F::kAIndex)
+        }
+        src = src.offset(components as isize);
+        let linear_r: f32 = in_r as i32 as f32 / 255.0;
+        let linear_g: f32 = in_g as i32 as f32 / 255.0;
+        let linear_b: f32 = in_b as i32 as f32 / 255.0;
+        let x: i32 = in_r as i32 * ((*transform).grid_size as i32 - 1) / 255;
+        let y: i32 = in_g as i32 * ((*transform).grid_size as i32 - 1) / 255;
+        let z: i32 = in_b as i32 * ((*transform).grid_size as i32 - 1) / 255;
+        let x_n: i32 = int_div_ceil(in_r as i32 * ((*transform).grid_size as i32 - 1), 255);
+        let y_n: i32 = int_div_ceil(in_g as i32 * ((*transform).grid_size as i32 - 1), 255);
+        let z_n: i32 = int_div_ceil(in_b as i32 * ((*transform).grid_size as i32 - 1), 255);
+        let rx: f32 = linear_r * ((*transform).grid_size as i32 - 1) as f32 - x as f32;
+        let ry: f32 = linear_g * ((*transform).grid_size as i32 - 1) as f32 - y as f32;
+        let rz: f32 = linear_b * ((*transform).grid_size as i32 - 1) as f32 - z as f32;
+        c0_r = *r_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize);
+        c0_g = *g_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize);
+        c0_b = *b_table.offset(((x * len + y * x_len + z * xy_len) * 3) as isize);
+        if rx >= ry {
+            if ry >= rz {
+                //rx >= ry && ry >= rz
+                c1_r = *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_r; //rz > rx && rx >= ry
+                c2_r = *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                    - *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize);
+                c3_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize);
+                c1_g = *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_g;
+                c2_g = *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                    - *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize);
+                c3_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize);
+                c1_b = *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_b;
+                c2_b = *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                    - *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize);
+                c3_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+            } else if rx >= rz {
+                //rx >= rz && rz >= ry
+                c1_r = *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_r;
+                c2_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_r = *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *r_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize);
+                c1_g = *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_g;
+                c2_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_g = *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *g_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize);
+                c1_b = *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize) - c0_b;
+                c2_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_b = *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *b_table.offset(((x_n * len + y * x_len + z * xy_len) * 3) as isize)
+            } else {
+                c1_r = *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c2_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *r_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_r = *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_r;
+                c1_g = *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c2_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *g_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_g = *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_g;
+                c1_b = *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize)
+                    - *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c2_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                    - *b_table.offset(((x_n * len + y * x_len + z_n * xy_len) * 3) as isize);
+                c3_b = *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_b
+            }
+        } else if rx >= rz {
+            //ry > rx && rx >= rz
+            c1_r = *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                - *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize); //rz > ry && ry > rx
+            c2_r = *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_r;
+            c3_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *r_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c1_g = *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                - *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c2_g = *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_g;
+            c3_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *g_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c1_b = *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+                - *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c2_b = *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_b;
+            c3_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *b_table.offset(((x_n * len + y_n * x_len + z * xy_len) * 3) as isize)
+        } else if ry >= rz {
+            //ry >= rz && rz > rx
+            c1_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_r = *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_r;
+            c3_r = *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *r_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c1_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_g = *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_g;
+            c3_g = *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *g_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize);
+            c1_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_b = *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize) - c0_b;
+            c3_b = *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *b_table.offset(((x * len + y_n * x_len + z * xy_len) * 3) as isize)
+        } else {
+            c1_r = *r_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_r = *r_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+            c3_r = *r_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_r;
+            c1_g = *g_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_g = *g_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+            c3_g = *g_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_g;
+            c1_b = *b_table.offset(((x_n * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize);
+            c2_b = *b_table.offset(((x * len + y_n * x_len + z_n * xy_len) * 3) as isize)
+                - *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize);
+            c3_b = *b_table.offset(((x * len + y * x_len + z_n * xy_len) * 3) as isize) - c0_b
+        }
+        clut_r = c0_r + c1_r * rx + c2_r * ry + c3_r * rz;
+        clut_g = c0_g + c1_g * rx + c2_g * ry + c3_g * rz;
+        clut_b = c0_b + c1_b * rx + c2_b * ry + c3_b * rz;
+        *dest.add(F::kRIndex) = clamp_u8(clut_r * 255.0);
+        *dest.add(F::kGIndex) = clamp_u8(clut_g * 255.0);
+        *dest.add(F::kBIndex) = clamp_u8(clut_b * 255.0);
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = in_a
+        }
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+}
+unsafe extern "C" fn qcms_transform_data_tetra_clut_rgb(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_tetra_clut_template::<RGB>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_tetra_clut_rgba(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_tetra_clut_template::<RGBA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_tetra_clut_bgra(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_tetra_clut_template::<BGRA>(transform, src, dest, length);
+}
+unsafe extern "C" fn qcms_transform_data_template_lut<F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    length: usize,
+) {
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+
+    let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+    let mut i: u32 = 0;
+    let input_gamma_table_r = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+    let input_gamma_table_g = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+    let input_gamma_table_b = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+    while (i as usize) < length {
+        let device_r: u8 = *src.add(F::kRIndex);
+        let device_g: u8 = *src.add(F::kGIndex);
+        let device_b: u8 = *src.add(F::kBIndex);
+        let mut alpha: u8 = 0;
+        if F::kAIndex != 0xff {
+            alpha = *src.add(F::kAIndex)
+        }
+        src = src.offset(components as isize);
+
+        let linear_r: f32 = *input_gamma_table_r.offset(device_r as isize);
+        let linear_g: f32 = *input_gamma_table_g.offset(device_g as isize);
+        let linear_b: f32 = *input_gamma_table_b.offset(device_b as isize);
+        let mut out_linear_r: f32 = (*mat.offset(0isize))[0] * linear_r
+            + (*mat.offset(1isize))[0] * linear_g
+            + (*mat.offset(2isize))[0] * linear_b;
+        let mut out_linear_g: f32 = (*mat.offset(0isize))[1] * linear_r
+            + (*mat.offset(1isize))[1] * linear_g
+            + (*mat.offset(2isize))[1] * linear_b;
+        let mut out_linear_b: f32 = (*mat.offset(0isize))[2] * linear_r
+            + (*mat.offset(1isize))[2] * linear_g
+            + (*mat.offset(2isize))[2] * linear_b;
+        out_linear_r = clamp_float(out_linear_r);
+        out_linear_g = clamp_float(out_linear_g);
+        out_linear_b = clamp_float(out_linear_b);
+
+        let out_device_r: f32 = lut_interp_linear(
+            out_linear_r as f64,
+            &(*transform).output_gamma_lut_r.as_ref().unwrap(),
+        );
+        let out_device_g: f32 = lut_interp_linear(
+            out_linear_g as f64,
+            (*transform).output_gamma_lut_g.as_ref().unwrap(),
+        );
+        let out_device_b: f32 = lut_interp_linear(
+            out_linear_b as f64,
+            (*transform).output_gamma_lut_b.as_ref().unwrap(),
+        );
+        *dest.add(F::kRIndex) = clamp_u8(out_device_r * 255f32);
+        *dest.add(F::kGIndex) = clamp_u8(out_device_g * 255f32);
+        *dest.add(F::kBIndex) = clamp_u8(out_device_b * 255f32);
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha
+        }
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut::<RGBA>(transform, src, dest, length);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut::<BGRA>(transform, src, dest, length);
+}
+
+fn precache_create() -> Arc<PrecacheOuput> {
+    Arc::new(PrecacheOuput::default())
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_release(t: *mut qcms_transform) {
+    let t = Box::from_raw(t);
+    drop(t)
+}
+
+const bradford_matrix: Matrix = Matrix {
+    m: [
+        [0.8951, 0.2664, -0.1614],
+        [-0.7502, 1.7135, 0.0367],
+        [0.0389, -0.0685, 1.0296],
+    ],
+    invalid: false,
+};
+
+const bradford_matrix_inv: Matrix = Matrix {
+    m: [
+        [0.9869929, -0.1470543, 0.1599627],
+        [0.4323053, 0.5183603, 0.0492912],
+        [-0.0085287, 0.0400428, 0.9684867],
+    ],
+    invalid: false,
+};
+
+// See ICCv4 E.3
+fn compute_whitepoint_adaption(X: f32, Y: f32, Z: f32) -> Matrix {
+    let p: f32 = (0.96422 * bradford_matrix.m[0][0]
+        + 1.000 * bradford_matrix.m[1][0]
+        + 0.82521 * bradford_matrix.m[2][0])
+        / (X * bradford_matrix.m[0][0] + Y * bradford_matrix.m[1][0] + Z * bradford_matrix.m[2][0]);
+    let y: f32 = (0.96422 * bradford_matrix.m[0][1]
+        + 1.000 * bradford_matrix.m[1][1]
+        + 0.82521 * bradford_matrix.m[2][1])
+        / (X * bradford_matrix.m[0][1] + Y * bradford_matrix.m[1][1] + Z * bradford_matrix.m[2][1]);
+    let b: f32 = (0.96422 * bradford_matrix.m[0][2]
+        + 1.000 * bradford_matrix.m[1][2]
+        + 0.82521 * bradford_matrix.m[2][2])
+        / (X * bradford_matrix.m[0][2] + Y * bradford_matrix.m[1][2] + Z * bradford_matrix.m[2][2]);
+    let white_adaption = Matrix {
+        m: [[p, 0., 0.], [0., y, 0.], [0., 0., b]],
+        invalid: false,
+    };
+    Matrix::multiply(
+        bradford_matrix_inv,
+        Matrix::multiply(white_adaption, bradford_matrix),
+    )
+}
+#[no_mangle]
+pub extern "C" fn qcms_profile_precache_output_transform(mut profile: &mut Profile) {
+    /* we only support precaching on rgb profiles */
+    if profile.color_space != RGB_SIGNATURE {
+        return;
+    }
+    if SUPPORTS_ICCV4.load(Ordering::Relaxed) {
+        /* don't precache since we will use the B2A LUT */
+        if profile.B2A0.is_some() {
+            return;
+        }
+        /* don't precache since we will use the mBA LUT */
+        if profile.mBA.is_some() {
+            return;
+        }
+    }
+    /* don't precache if we do not have the TRC curves */
+    if profile.redTRC.is_none() || profile.greenTRC.is_none() || profile.blueTRC.is_none() {
+        return;
+    }
+    if profile.output_table_r.is_none() {
+        let mut output_table_r = precache_create();
+        if compute_precache(
+            profile.redTRC.as_deref().unwrap(),
+            &mut Arc::get_mut(&mut output_table_r).unwrap().data,
+        ) {
+            profile.output_table_r = Some(output_table_r);
+        }
+    }
+    if profile.output_table_g.is_none() {
+        let mut output_table_g = precache_create();
+        if compute_precache(
+            profile.greenTRC.as_deref().unwrap(),
+            &mut Arc::get_mut(&mut output_table_g).unwrap().data,
+        ) {
+            profile.output_table_g = Some(output_table_g);
+        }
+    }
+    if profile.output_table_b.is_none() {
+        let mut output_table_b = precache_create();
+        if compute_precache(
+            profile.blueTRC.as_deref().unwrap(),
+            &mut Arc::get_mut(&mut output_table_b).unwrap().data,
+        ) {
+            profile.output_table_b = Some(output_table_b);
+        }
+    };
+}
+/* Replace the current transformation with a LUT transformation using a given number of sample points */
+fn transform_precacheLUT_float(
+    mut transform: Box<qcms_transform>,
+    input: &Profile,
+    output: &Profile,
+    samples: i32,
+    in_type: DataType,
+) -> Option<Box<qcms_transform>> {
+    /* The range between which 2 consecutive sample points can be used to interpolate */
+    let lutSize: u32 = (3 * samples * samples * samples) as u32;
+
+    let mut src = Vec::with_capacity(lutSize as usize);
+    let dest = vec![0.; lutSize as usize];
+    /* Prepare a list of points we want to sample */
+    for x in 0..samples {
+        for y in 0..samples {
+            for z in 0..samples {
+                src.push(x as f32 / (samples - 1) as f32);
+                src.push(y as f32 / (samples - 1) as f32);
+                src.push(z as f32 / (samples - 1) as f32);
+            }
+        }
+    }
+    let lut = chain_transform(input, output, src, dest, lutSize as usize);
+    if let Some(lut) = lut {
+        (*transform).clut = Some(lut);
+        (*transform).grid_size = samples as u16;
+        if in_type == RGBA8 {
+            (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgba)
+        } else if in_type == BGRA8 {
+            (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_bgra)
+        } else if in_type == RGB8 {
+            (*transform).transform_fn = Some(qcms_transform_data_tetra_clut_rgb)
+        }
+        debug_assert!((*transform).transform_fn.is_some());
+    } else {
+        return None;
+    }
+
+    Some(transform)
+}
+
+pub fn transform_create(
+    input: &Profile,
+    in_type: DataType,
+    output: &Profile,
+    out_type: DataType,
+    _intent: Intent,
+) -> Option<Box<qcms_transform>> {
+    // Ensure the requested input and output types make sense.
+    let matching_format = match (in_type, out_type) {
+        (RGB8, RGB8) => true,
+        (RGBA8, RGBA8) => true,
+        (BGRA8, BGRA8) => true,
+        (Gray8, out_type) => matches!(out_type, RGB8 | RGBA8 | BGRA8),
+        (GrayA8, out_type) => matches!(out_type, RGBA8 | BGRA8),
+        _ => false,
+    };
+    if !matching_format {
+        debug_assert!(false, "input/output type");
+        return None;
+    }
+    let mut transform: Box<qcms_transform> = Box::new(Default::default());
+    let mut precache: bool = false;
+    if output.output_table_r.is_some()
+        && output.output_table_g.is_some()
+        && output.output_table_b.is_some()
+    {
+        precache = true
+    }
+    // This precache assumes RGB_SIGNATURE (fails on GRAY_SIGNATURE, for instance)
+    if SUPPORTS_ICCV4.load(Ordering::Relaxed)
+        && (in_type == RGB8 || in_type == RGBA8 || in_type == BGRA8)
+        && (input.A2B0.is_some()
+            || output.B2A0.is_some()
+            || input.mAB.is_some()
+            || output.mAB.is_some())
+    {
+        // Precache the transformation to a CLUT 33x33x33 in size.
+        // 33 is used by many profiles and works well in pratice.
+        // This evenly divides 256 into blocks of 8x8x8.
+        // TODO For transforming small data sets of about 200x200 or less
+        // precaching should be avoided.
+        let result = transform_precacheLUT_float(transform, input, output, 33, in_type);
+        debug_assert!(result.is_some(), "precacheLUT failed");
+        return result;
+    }
+    if precache {
+        transform.output_table_r = Some(Arc::clone(output.output_table_r.as_ref().unwrap()));
+        transform.output_table_g = Some(Arc::clone(output.output_table_g.as_ref().unwrap()));
+        transform.output_table_b = Some(Arc::clone(output.output_table_b.as_ref().unwrap()));
+    } else {
+        if output.redTRC.is_none() || output.greenTRC.is_none() || output.blueTRC.is_none() {
+            return None;
+        }
+        transform.output_gamma_lut_r = Some(build_output_lut(output.redTRC.as_deref().unwrap()));
+        transform.output_gamma_lut_g = Some(build_output_lut(output.greenTRC.as_deref().unwrap()));
+        transform.output_gamma_lut_b = Some(build_output_lut(output.blueTRC.as_deref().unwrap()));
+
+        if transform.output_gamma_lut_r.is_none()
+            || transform.output_gamma_lut_g.is_none()
+            || transform.output_gamma_lut_b.is_none()
+        {
+            return None;
+        }
+    }
+    if input.color_space == RGB_SIGNATURE {
+        if precache {
+            #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+            if is_x86_feature_detected!("avx") {
+                if in_type == RGB8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_avx)
+                } else if in_type == RGBA8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_avx)
+                } else if in_type == BGRA8 {
+                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_avx)
+                }
+            } else if cfg!(not(miri)) && is_x86_feature_detected!("sse2") {
+                if in_type == RGB8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_sse2)
+                } else if in_type == RGBA8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_sse2)
+                } else if in_type == BGRA8 {
+                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_sse2)
+                }
+            }
+
+            #[cfg(target_arch = "arm")]
+            let neon_supported = is_arm_feature_detected!("neon");
+            #[cfg(target_arch = "aarch64")]
+            let neon_supported = is_aarch64_feature_detected!("neon");
+
+            #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+            if neon_supported {
+                if in_type == RGB8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_neon)
+                } else if in_type == RGBA8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_neon)
+                } else if in_type == BGRA8 {
+                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_neon)
+                }
+            }
+
+            if transform.transform_fn.is_none() {
+                if in_type == RGB8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgb_out_lut_precache)
+                } else if in_type == RGBA8 {
+                    transform.transform_fn = Some(qcms_transform_data_rgba_out_lut_precache)
+                } else if in_type == BGRA8 {
+                    transform.transform_fn = Some(qcms_transform_data_bgra_out_lut_precache)
+                }
+            }
+        } else if in_type == RGB8 {
+            transform.transform_fn = Some(qcms_transform_data_rgb_out_lut)
+        } else if in_type == RGBA8 {
+            transform.transform_fn = Some(qcms_transform_data_rgba_out_lut)
+        } else if in_type == BGRA8 {
+            transform.transform_fn = Some(qcms_transform_data_bgra_out_lut)
+        }
+        //XXX: avoid duplicating tables if we can
+        transform.input_gamma_table_r = build_input_gamma_table(input.redTRC.as_deref());
+        transform.input_gamma_table_g = build_input_gamma_table(input.greenTRC.as_deref());
+        transform.input_gamma_table_b = build_input_gamma_table(input.blueTRC.as_deref());
+        if transform.input_gamma_table_r.is_none()
+            || transform.input_gamma_table_g.is_none()
+            || transform.input_gamma_table_b.is_none()
+        {
+            return None;
+        }
+        /* build combined colorant matrix */
+
+        let in_matrix: Matrix = build_colorant_matrix(input);
+        let mut out_matrix: Matrix = build_colorant_matrix(output);
+        out_matrix = out_matrix.invert();
+        if out_matrix.invalid {
+            return None;
+        }
+        let result_0: Matrix = Matrix::multiply(out_matrix, in_matrix);
+        /* check for NaN values in the matrix and bail if we find any */
+        let mut i: u32 = 0;
+        while i < 3 {
+            let mut j: u32 = 0;
+            while j < 3 {
+                if result_0.m[i as usize][j as usize] != result_0.m[i as usize][j as usize] {
+                    return None;
+                }
+                j += 1
+            }
+            i += 1
+        }
+        /* store the results in column major mode
+         * this makes doing the multiplication with sse easier */
+        transform.matrix[0][0] = result_0.m[0][0];
+        transform.matrix[1][0] = result_0.m[0][1];
+        transform.matrix[2][0] = result_0.m[0][2];
+        transform.matrix[0][1] = result_0.m[1][0];
+        transform.matrix[1][1] = result_0.m[1][1];
+        transform.matrix[2][1] = result_0.m[1][2];
+        transform.matrix[0][2] = result_0.m[2][0];
+        transform.matrix[1][2] = result_0.m[2][1];
+        transform.matrix[2][2] = result_0.m[2][2]
+    } else if input.color_space == GRAY_SIGNATURE {
+        transform.input_gamma_table_gray = build_input_gamma_table(input.grayTRC.as_deref());
+        transform.input_gamma_table_gray.as_ref()?;
+        if precache {
+            if out_type == RGB8 {
+                transform.transform_fn = Some(qcms_transform_data_gray_out_precache)
+            } else if out_type == RGBA8 {
+                if in_type == Gray8 {
+                    transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_precache)
+                } else {
+                    transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_precache)
+                }
+            } else if out_type == BGRA8 {
+                if in_type == Gray8 {
+                    transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_precache)
+                } else {
+                    transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_precache)
+                }
+            }
+        } else if out_type == RGB8 {
+            transform.transform_fn = Some(qcms_transform_data_gray_out_lut)
+        } else if out_type == RGBA8 {
+            if in_type == Gray8 {
+                transform.transform_fn = Some(qcms_transform_data_gray_rgba_out_lut)
+            } else {
+                transform.transform_fn = Some(qcms_transform_data_graya_rgba_out_lut)
+            }
+        } else if out_type == BGRA8 {
+            if in_type == Gray8 {
+                transform.transform_fn = Some(qcms_transform_data_gray_bgra_out_lut)
+            } else {
+                transform.transform_fn = Some(qcms_transform_data_graya_bgra_out_lut)
+            }
+        }
+    } else {
+        debug_assert!(false, "unexpected colorspace");
+        return None;
+    }
+    debug_assert!(transform.transform_fn.is_some());
+    Some(transform)
+}
+ /// A transform from an input profile to an output one.
+pub struct Transform {
+    ty: DataType,
+    xfm: Box<qcms_transform>,
+}
+
+impl Transform {
+    /// Create a new transform from `input` to `output` for pixels of `DataType` `ty` with `intent`
+    pub fn new(
+        input: &Profile,
+        output: &Profile,
+        ty: DataType,
+        intent: Intent,
+    ) -> Option<Self> {
+        transform_create(input, ty, output, ty, intent).map(|xfm| Transform { ty, xfm })
+    }
+
+    /// Apply the color space transform to `data`
+    pub fn apply(&self, data: &mut [u8]) {
+        if data.len() % self.ty.bytes_per_pixel() != 0 {
+            panic!(
+                "incomplete pixels: should be a multiple of {} got {}",
+                self.ty.bytes_per_pixel(),
+                data.len()
+            )
+        }
+        unsafe {
+            self.xfm.transform_fn.expect("non-null function pointer")(
+                &*self.xfm,
+                data.as_ptr(),
+                data.as_mut_ptr(),
+                data.len() / self.ty.bytes_per_pixel(),
+            );
+        }
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn qcms_enable_iccv4() {
+    SUPPORTS_ICCV4.store(true, Ordering::Relaxed);
+}
diff --git a/gfx/qcms/src/transform_avx.rs b/gfx/qcms/src/transform_avx.rs
new file mode 100644
index 0000000000..d104e4426a
--- /dev/null
+++ b/gfx/qcms/src/transform_avx.rs
@@ -0,0 +1,230 @@
+use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA};
+#[cfg(target_arch = "x86")]
+pub use std::arch::x86::{
+    __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256,
+    _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps,
+    _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256,
+    _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps,
+    _mm_mul_ps, _mm_setzero_ps, _mm_store_si128,
+};
+#[cfg(target_arch = "x86_64")]
+pub use std::arch::x86_64::{
+    __m128, __m128i, __m256, __m256i, _mm256_add_ps, _mm256_broadcast_ps, _mm256_castps128_ps256,
+    _mm256_castps256_ps128, _mm256_cvtps_epi32, _mm256_insertf128_ps, _mm256_max_ps, _mm256_min_ps,
+    _mm256_mul_ps, _mm256_set1_ps, _mm256_set_ps, _mm256_setzero_ps, _mm256_store_si256,
+    _mm_add_ps, _mm_broadcast_ss, _mm_cvtps_epi32, _mm_loadu_ps, _mm_max_ps, _mm_min_ps,
+    _mm_mul_ps, _mm_setzero_ps, _mm_store_si128,
+};
+
+#[repr(align(32))]
+struct Output([u32; 8]);
+
+#[target_feature(enable = "avx")]
+unsafe extern "C" fn qcms_transform_data_template_lut_avx<F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    mut length: usize,
+) {
+    let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+    let mut input: Output = std::mem::zeroed();
+    /* share input and output locations to save having to keep the
+     * locations in separate registers */
+    let output: *const u32 = &mut input as *mut Output as *mut u32;
+    /* deref *transform now to avoid it in loop */
+    let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+    let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+    let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+    /* deref *transform now to avoid it in loop */
+    let otdata_r: *const u8 = (*transform)
+        .output_table_r
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_g: *const u8 = (*transform)
+        .output_table_g
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_b: *const u8 = (*transform)
+        .output_table_b
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    /* input matrix values never change */
+    let mat0: __m256 = _mm256_broadcast_ps(&*((*mat.offset(0isize)).as_ptr() as *const __m128));
+    let mat1: __m256 = _mm256_broadcast_ps(&*((*mat.offset(1isize)).as_ptr() as *const __m128));
+    let mat2: __m256 = _mm256_broadcast_ps(&*((*mat.offset(2isize)).as_ptr() as *const __m128));
+    /* these values don't change, either */
+    let max: __m256 = _mm256_set1_ps(CLAMPMAXVAL);
+    let min: __m256 = _mm256_setzero_ps();
+    let scale: __m256 = _mm256_set1_ps(FLOATSCALE);
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    /* working variables */
+    let mut vec_r: __m256 = _mm256_setzero_ps();
+    let mut vec_g: __m256 = _mm256_setzero_ps();
+    let mut vec_b: __m256 = _mm256_setzero_ps();
+    let mut result: __m256;
+    let mut vec_r0: __m128;
+    let mut vec_g0: __m128;
+    let mut vec_b0: __m128;
+    let mut vec_r1: __m128;
+    let mut vec_g1: __m128;
+    let mut vec_b1: __m128;
+    let mut alpha1: u8 = 0;
+    let mut alpha2: u8 = 0;
+    /* CYA */
+    if length == 0 {
+        return;
+    }
+    /* If there are at least 2 pixels, then we can load their components into
+    a single 256-bit register for processing. */
+    if length > 1 {
+        vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize));
+        vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize));
+        vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize));
+        vec_r1 =
+            _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize));
+        vec_g1 =
+            _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize));
+        vec_b1 =
+            _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize));
+        vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1);
+        vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1);
+        vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1);
+        if F::kAIndex != 0xff {
+            alpha1 = *src.add(F::kAIndex);
+            alpha2 = *src.add(F::kAIndex + components as usize)
+        }
+    }
+    /* If there are at least 4 pixels, then we can iterate and preload the
+    next 2 while we store the result of the current 2. */
+    while length > 3 {
+        /* Ensure we are pointing at the next 2 pixels for the next load. */
+        src = src.offset((2 * components) as isize);
+        /* gamma * matrix */
+        vec_r = _mm256_mul_ps(vec_r, mat0);
+        vec_g = _mm256_mul_ps(vec_g, mat1);
+        vec_b = _mm256_mul_ps(vec_b, mat2);
+        /* store alpha for these pixels; load alpha for next two */
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha1;
+            *dest.add(F::kAIndex + components as usize) = alpha2;
+            alpha1 = *src.add(F::kAIndex);
+            alpha2 = *src.add(F::kAIndex + components as usize)
+        }
+        /* crunch, crunch, crunch */
+        vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b));
+        vec_r = _mm256_max_ps(min, vec_r);
+        vec_r = _mm256_min_ps(max, vec_r);
+        result = _mm256_mul_ps(vec_r, scale);
+        /* store calc'd output tables indices */
+        _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result));
+        /* load gamma values for next loop while store completes */
+        vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize));
+        vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize));
+        vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize));
+        vec_r1 =
+            _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex + components as usize) as isize));
+        vec_g1 =
+            _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex + components as usize) as isize));
+        vec_b1 =
+            _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex + components as usize) as isize));
+        vec_r = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_r0), vec_r1, 1);
+        vec_g = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_g0), vec_g1, 1);
+        vec_b = _mm256_insertf128_ps(_mm256_castps128_ps256(vec_b0), vec_b1, 1);
+        /* use calc'd indices to output RGB values */
+        *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize);
+        *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize);
+        *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize);
+        *dest.add(F::kRIndex + components as usize) =
+            *otdata_r.offset(*output.offset(4isize) as isize);
+        *dest.add(F::kGIndex + components as usize) =
+            *otdata_g.offset(*output.offset(5isize) as isize);
+        *dest.add(F::kBIndex + components as usize) =
+            *otdata_b.offset(*output.offset(6isize) as isize);
+        dest = dest.offset((2 * components) as isize);
+        length -= 2
+    }
+    /* There are 0-3 pixels remaining. If there are 2-3 remaining, then we know
+    we have already populated the necessary registers to start the transform. */
+    if length > 1 {
+        vec_r = _mm256_mul_ps(vec_r, mat0);
+        vec_g = _mm256_mul_ps(vec_g, mat1);
+        vec_b = _mm256_mul_ps(vec_b, mat2);
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha1;
+            *dest.add(F::kAIndex + components as usize) = alpha2
+        }
+        vec_r = _mm256_add_ps(vec_r, _mm256_add_ps(vec_g, vec_b));
+        vec_r = _mm256_max_ps(min, vec_r);
+        vec_r = _mm256_min_ps(max, vec_r);
+        result = _mm256_mul_ps(vec_r, scale);
+        _mm256_store_si256(output as *mut __m256i, _mm256_cvtps_epi32(result));
+        *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize);
+        *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize);
+        *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize);
+        *dest.add(F::kRIndex + components as usize) =
+            *otdata_r.offset(*output.offset(4isize) as isize);
+        *dest.add(F::kGIndex + components as usize) =
+            *otdata_g.offset(*output.offset(5isize) as isize);
+        *dest.add(F::kBIndex + components as usize) =
+            *otdata_b.offset(*output.offset(6isize) as isize);
+        src = src.offset((2 * components) as isize);
+        dest = dest.offset((2 * components) as isize);
+        length -= 2
+    }
+    /* There may be 0-1 pixels remaining. */
+    if length == 1 {
+        vec_r0 = _mm_broadcast_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize));
+        vec_g0 = _mm_broadcast_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize));
+        vec_b0 = _mm_broadcast_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize));
+        vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(mat0));
+        vec_g0 = _mm_mul_ps(vec_g0, _mm256_castps256_ps128(mat1));
+        vec_b0 = _mm_mul_ps(vec_b0, _mm256_castps256_ps128(mat2));
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = *src.add(F::kAIndex)
+        }
+        vec_r0 = _mm_add_ps(vec_r0, _mm_add_ps(vec_g0, vec_b0));
+        vec_r0 = _mm_max_ps(_mm256_castps256_ps128(min), vec_r0);
+        vec_r0 = _mm_min_ps(_mm256_castps256_ps128(max), vec_r0);
+        vec_r0 = _mm_mul_ps(vec_r0, _mm256_castps256_ps128(scale));
+        _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(vec_r0));
+        *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize);
+        *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize);
+        *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize)
+    };
+}
+#[no_mangle]
+#[target_feature(enable = "avx")]
+pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_avx(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_avx::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+#[target_feature(enable = "avx")]
+pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_avx(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_avx::<RGBA>(transform, src, dest, length);
+}
+#[no_mangle]
+#[target_feature(enable = "avx")]
+pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_avx(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_avx::<BGRA>(transform, src, dest, length);
+}
diff --git a/gfx/qcms/src/transform_neon.rs b/gfx/qcms/src/transform_neon.rs
new file mode 100644
index 0000000000..64a56f7874
--- /dev/null
+++ b/gfx/qcms/src/transform_neon.rs
@@ -0,0 +1,255 @@
+use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA};
+#[cfg(target_arch = "aarch64")]
+use core::arch::aarch64::{float32x4_t, int32x4_t, vaddq_f32};
+#[cfg(target_arch = "arm")]
+use core::arch::arm::{float32x4_t, int32x4_t, vaddq_f32};
+use std::mem::zeroed;
+
+static mut floatScale: f32 = FLOATSCALE;
+static mut clampMaxValue: f32 = CLAMPMAXVAL;
+
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+unsafe extern "C" fn qcms_transform_data_template_lut_neon<F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    mut length: usize,
+) {
+    let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+    /* deref *transform now to avoid it in loop */
+    let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+    let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+    let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+    /* deref *transform now to avoid it in loop */
+    let otdata_r: *const u8 = (*transform)
+        .output_table_r
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_g: *const u8 = (*transform)
+        .output_table_g
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_b: *const u8 = (*transform)
+        .output_table_b
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    /* input matrix values never change */
+    let mat0: float32x4_t = vld1q_f32((*mat.offset(0isize)).as_ptr());
+    let mat1: float32x4_t = vld1q_f32((*mat.offset(1isize)).as_ptr());
+    let mat2: float32x4_t = vld1q_f32((*mat.offset(2isize)).as_ptr());
+    /* these values don't change, either */
+    let max: float32x4_t = vld1q_dup_f32(&clampMaxValue);
+    let min: float32x4_t = zeroed();
+    let scale: float32x4_t = vld1q_dup_f32(&floatScale);
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    /* working variables */
+    let mut vec_r: float32x4_t;
+    let mut vec_g: float32x4_t;
+    let mut vec_b: float32x4_t;
+    let mut result: int32x4_t;
+    let mut alpha: u8 = 0;
+    /* CYA */
+    if length == 0 {
+        return;
+    }
+    /* one pixel is handled outside of the loop */
+    length = length.wrapping_sub(1);
+    /* setup for transforming 1st pixel */
+    vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize));
+    vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize));
+    vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize));
+    if F::kAIndex != 0xff {
+        alpha = *src.offset(F::kAIndex as isize)
+    }
+    src = src.offset(components as isize);
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        /* gamma * matrix */
+        vec_r = vmulq_f32(vec_r, mat0);
+        vec_g = vmulq_f32(vec_g, mat1);
+        vec_b = vmulq_f32(vec_b, mat2);
+        /* store alpha for this pixel; load alpha for next */
+        if F::kAIndex != 0xff {
+            *dest.offset(F::kAIndex as isize) = alpha;
+            alpha = *src.offset(F::kAIndex as isize)
+        }
+        /* crunch, crunch, crunch */
+        vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
+        vec_r = vmaxq_f32(min, vec_r);
+        vec_r = vminq_f32(max, vec_r);
+        result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
+
+        /* use calc'd indices to output RGB values */
+        *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize);
+        *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize);
+        *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize);
+
+        /* load gamma values for next loop while store completes */
+        vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize));
+        vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize));
+        vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize));
+
+        dest = dest.offset(components as isize);
+        src = src.offset(components as isize);
+        i = i.wrapping_add(1)
+    }
+    /* handle final (maybe only) pixel */
+    vec_r = vmulq_f32(vec_r, mat0);
+    vec_g = vmulq_f32(vec_g, mat1);
+    vec_b = vmulq_f32(vec_b, mat2);
+    if F::kAIndex != 0xff {
+        *dest.offset(F::kAIndex as isize) = alpha
+    }
+    vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
+    vec_r = vmaxq_f32(min, vec_r);
+    vec_r = vminq_f32(max, vec_r);
+    result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
+
+    *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize);
+    *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize);
+    *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize);
+}
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_neon(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_neon::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_neon(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_neon::<RGBA>(transform, src, dest, length);
+}
+
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_neon(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_neon::<BGRA>(transform, src, dest, length);
+}
+
+use std::mem::transmute;
+
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "aarch64")]
+pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t {
+    transmute([*addr, *addr.offset(1), *addr.offset(2), *addr.offset(3)])
+}
+
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon")]
+#[target_feature(enable = "v7")]
+pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t {
+    vld1q_v4f32(addr as *const u8, 4)
+}
+
+#[cfg(target_arch = "arm")]
+#[allow(improper_ctypes)]
+extern "C" {
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32.p0i8")]
+    fn vld1q_v4f32(addr: *const u8, align: u32) -> float32x4_t;
+}
+
+#[cfg(target_arch = "aarch64")]
+#[allow(improper_ctypes)]
+extern "C" {
+    #[link_name = "llvm.aarch64.neon.fcvtzs.v4.v4f32"]
+    fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
+    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")]
+    fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+    #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")]
+    fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+}
+
+/// Move vector element to general-purpose register
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn vgetq_lane_s32(v: int32x4_t, imm5: i32) -> i32 {
+    assert!(imm5 >= 0 && imm5 <= 3);
+    simd_extract(v, imm5 as u32)
+}
+
+/// Multiply
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    simd_mul(a, b)
+}
+
+/// Floating-point minimum (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    vminq_f32_(a, b)
+}
+
+/// Floating-point maxmimum (vector).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    vmaxq_f32_(a, b)
+}
+
+#[inline]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "neon")]
+pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
+    vcvtq_s32_f32_(a)
+}
+/// Floating-point Convert to Signed fixed-point, rounding toward Zero (vector)
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon")]
+#[target_feature(enable = "v7")]
+pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
+    simd_cast::<_, int32x4_t>(a)
+}
+
+/// Load one single-element structure and Replicate to all lanes (of one register).
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn vld1q_dup_f32(addr: *const f32) -> float32x4_t {
+    let v = *addr;
+    transmute([v, v, v, v])
+}
+
+extern "platform-intrinsic" {
+    pub fn simd_mul<T>(x: T, y: T) -> T;
+    pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
+    pub fn simd_cast<T, U>(x: T) -> U;
+}
diff --git a/gfx/qcms/src/transform_sse2.rs b/gfx/qcms/src/transform_sse2.rs
new file mode 100644
index 0000000000..1ab975bc57
--- /dev/null
+++ b/gfx/qcms/src/transform_sse2.rs
@@ -0,0 +1,159 @@
+use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA};
+#[cfg(target_arch = "x86")]
+pub use std::arch::x86::{
+    __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps,
+    _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128,
+};
+#[cfg(target_arch = "x86_64")]
+pub use std::arch::x86_64::{
+    __m128, __m128i, _mm_add_ps, _mm_cvtps_epi32, _mm_load_ps, _mm_load_ss, _mm_max_ps, _mm_min_ps,
+    _mm_mul_ps, _mm_set1_ps, _mm_setzero_ps, _mm_shuffle_ps, _mm_store_si128,
+};
+
+#[repr(align(16))]
+struct Output([u32; 4]);
+
+unsafe extern "C" fn qcms_transform_data_template_lut_sse2<F: Format>(
+    transform: &qcms_transform,
+    mut src: *const u8,
+    mut dest: *mut u8,
+    mut length: usize,
+) {
+    let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+    let mut input: Output = std::mem::zeroed();
+    /* share input and output locations to save having to keep the
+     * locations in separate registers */
+    let output: *const u32 = &mut input as *mut Output as *mut u32;
+    /* deref *transform now to avoid it in loop */
+    let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+    let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+    let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+    /* deref *transform now to avoid it in loop */
+    let otdata_r: *const u8 = (*transform)
+        .output_table_r
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_g: *const u8 = (*transform)
+        .output_table_g
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    let otdata_b: *const u8 = (*transform)
+        .output_table_b
+        .as_deref()
+        .unwrap()
+        .data
+        .as_ptr();
+    /* input matrix values never change */
+    let mat0: __m128 = _mm_load_ps((*mat.offset(0isize)).as_ptr());
+    let mat1: __m128 = _mm_load_ps((*mat.offset(1isize)).as_ptr());
+    let mat2: __m128 = _mm_load_ps((*mat.offset(2isize)).as_ptr());
+    /* these values don't change, either */
+    let max: __m128 = _mm_set1_ps(CLAMPMAXVAL);
+    let min: __m128 = _mm_setzero_ps();
+    let scale: __m128 = _mm_set1_ps(FLOATSCALE);
+    let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+    /* working variables */
+    let mut vec_r: __m128;
+    let mut vec_g: __m128;
+    let mut vec_b: __m128;
+    let mut result: __m128;
+    let mut alpha: u8 = 0;
+    /* CYA */
+    if length == 0 {
+        return;
+    }
+    /* one pixel is handled outside of the loop */
+    length -= 1;
+    /* setup for transforming 1st pixel */
+    vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize));
+    vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize));
+    vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize));
+    if F::kAIndex != 0xff {
+        alpha = *src.add(F::kAIndex)
+    }
+    src = src.offset(components as isize);
+    let mut i: u32 = 0;
+    while (i as usize) < length {
+        /* position values from gamma tables */
+        vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
+        vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
+        vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
+        /* gamma * matrix */
+        vec_r = _mm_mul_ps(vec_r, mat0);
+        vec_g = _mm_mul_ps(vec_g, mat1);
+        vec_b = _mm_mul_ps(vec_b, mat2);
+        /* store alpha for this pixel; load alpha for next */
+        if F::kAIndex != 0xff {
+            *dest.add(F::kAIndex) = alpha;
+            alpha = *src.add(F::kAIndex)
+        }
+        /* crunch, crunch, crunch */
+        vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b));
+        vec_r = _mm_max_ps(min, vec_r);
+        vec_r = _mm_min_ps(max, vec_r);
+        result = _mm_mul_ps(vec_r, scale);
+        /* store calc'd output tables indices */
+        _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result));
+        /* load gamma values for next loop while store completes */
+        vec_r = _mm_load_ss(&*igtbl_r.offset(*src.add(F::kRIndex) as isize));
+        vec_g = _mm_load_ss(&*igtbl_g.offset(*src.add(F::kGIndex) as isize));
+        vec_b = _mm_load_ss(&*igtbl_b.offset(*src.add(F::kBIndex) as isize));
+        src = src.offset(components as isize);
+        /* use calc'd indices to output RGB values */
+        *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize);
+        *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize);
+        *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize);
+        dest = dest.offset(components as isize);
+        i += 1
+    }
+    /* handle final (maybe only) pixel */
+    vec_r = _mm_shuffle_ps(vec_r, vec_r, 0);
+    vec_g = _mm_shuffle_ps(vec_g, vec_g, 0);
+    vec_b = _mm_shuffle_ps(vec_b, vec_b, 0);
+    vec_r = _mm_mul_ps(vec_r, mat0);
+    vec_g = _mm_mul_ps(vec_g, mat1);
+    vec_b = _mm_mul_ps(vec_b, mat2);
+    if F::kAIndex != 0xff {
+        *dest.add(F::kAIndex) = alpha
+    }
+    vec_r = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b));
+    vec_r = _mm_max_ps(min, vec_r);
+    vec_r = _mm_min_ps(max, vec_r);
+    result = _mm_mul_ps(vec_r, scale);
+    _mm_store_si128(output as *mut __m128i, _mm_cvtps_epi32(result));
+    *dest.add(F::kRIndex) = *otdata_r.offset(*output.offset(0isize) as isize);
+    *dest.add(F::kGIndex) = *otdata_g.offset(*output.offset(1isize) as isize);
+    *dest.add(F::kBIndex) = *otdata_b.offset(*output.offset(2isize) as isize);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgb_out_lut_sse2(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_sse2::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_rgba_out_lut_sse2(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_sse2::<RGBA>(transform, src, dest, length);
+}
+
+#[no_mangle]
+pub unsafe extern "C" fn qcms_transform_data_bgra_out_lut_sse2(
+    transform: &qcms_transform,
+    src: *const u8,
+    dest: *mut u8,
+    length: usize,
+) {
+    qcms_transform_data_template_lut_sse2::<BGRA>(transform, src, dest, length);
+}
diff --git a/gfx/qcms/src/transform_util.rs b/gfx/qcms/src/transform_util.rs
new file mode 100644
index 0000000000..5cda0385e5
--- /dev/null
+++ b/gfx/qcms/src/transform_util.rs
@@ -0,0 +1,476 @@
+/* vim: set ts=8 sw=8 noexpandtab: */
+//  qcms
+//  Copyright (C) 2009 Mozilla Foundation
+//  Copyright (C) 1998-2007 Marti Maria
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files (the "Software"),
+// to deal in the Software without restriction, including without limitation
+// the rights to use, copy, modify, merge, publish, distribute, sublicense,
+// and/or sell copies of the Software, and to permit persons to whom the Software
+// is furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+use crate::{
+    iccread::{curveType, Profile},
+    s15Fixed16Number_to_float,
+};
+use crate::{matrix::Matrix, transform::PRECACHE_OUTPUT_MAX, transform::PRECACHE_OUTPUT_SIZE};
+
+//XXX: could use a bettername
+pub type uint16_fract_t = u16;
+
+#[inline]
+fn u8Fixed8Number_to_float(x: u16) -> f32 {
+    // 0x0000 = 0.
+    // 0x0100 = 1.
+    // 0xffff = 255  + 255/256
+    (x as i32 as f64 / 256.0f64) as f32
+}
+#[inline]
+pub fn clamp_float(a: f32) -> f32 {
+    /* One would naturally write this function as the following:
+    if (a > 1.)
+      return 1.;
+    else if (a < 0)
+      return 0;
+    else
+      return a;
+
+    However, that version will let NaNs pass through which is undesirable
+    for most consumers.
+    */
+    if a > 1. {
+        1.
+    } else if a >= 0. {
+        a
+    } else {
+        // a < 0 or a is NaN
+        0.
+    }
+}
+/* value must be a value between 0 and 1 */
+//XXX: is the above a good restriction to have?
+// the output range of this functions is 0..1
+pub fn lut_interp_linear(mut input_value: f64, table: &[u16]) -> f32 {
+    input_value *= (table.len() - 1) as f64;
+
+    let upper: i32 = input_value.ceil() as i32;
+    let lower: i32 = input_value.floor() as i32;
+    let value: f32 = ((table[upper as usize] as f64) * (1. - (upper as f64 - input_value))
+        + (table[lower as usize] as f64 * (upper as f64 - input_value)))
+        as f32;
+    /* scale the value */
+    value * (1.0 / 65535.0)
+}
+/* same as above but takes and returns a uint16_t value representing a range from 0..1 */
+#[no_mangle]
+pub fn lut_interp_linear16(input_value: u16, table: &[u16]) -> u16 {
+    /* Start scaling input_value to the length of the array: 65535*(length-1).
+     * We'll divide out the 65535 next */
+    let mut value: u32 = (input_value as i32 * (table.len() as i32 - 1)) as u32; /* equivalent to ceil(value/65535) */
+    let upper: u32 = (value + 65534) / 65535; /* equivalent to floor(value/65535) */
+    let lower: u32 = value / 65535;
+    /* interp is the distance from upper to value scaled to 0..65535 */
+    let interp: u32 = value % 65535; // 0..65535*65535
+    value = (table[upper as usize] as u32 * interp
+        + table[lower as usize] as u32 * (65535 - interp))
+        / 65535;
+    value as u16
+}
+/* same as above but takes an input_value from 0..PRECACHE_OUTPUT_MAX
+ * and returns a uint8_t value representing a range from 0..1 */
+fn lut_interp_linear_precache_output(input_value: u32, table: &[u16]) -> u8 {
+    /* Start scaling input_value to the length of the array: PRECACHE_OUTPUT_MAX*(length-1).
+     * We'll divide out the PRECACHE_OUTPUT_MAX next */
+    let mut value: u32 = input_value * (table.len() - 1) as u32;
+    /* equivalent to ceil(value/PRECACHE_OUTPUT_MAX) */
+    let upper: u32 = (value + PRECACHE_OUTPUT_MAX as u32 - 1) / PRECACHE_OUTPUT_MAX as u32;
+    /* equivalent to floor(value/PRECACHE_OUTPUT_MAX) */
+    let lower: u32 = value / PRECACHE_OUTPUT_MAX as u32;
+    /* interp is the distance from upper to value scaled to 0..PRECACHE_OUTPUT_MAX */
+    let interp: u32 = value % PRECACHE_OUTPUT_MAX as u32;
+    /* the table values range from 0..65535 */
+    value = table[upper as usize] as u32 * interp
+        + table[lower as usize] as u32 * (PRECACHE_OUTPUT_MAX as u32 - interp); // 0..(65535*PRECACHE_OUTPUT_MAX)
+                                                                                /* round and scale */
+    value += (PRECACHE_OUTPUT_MAX * 65535 / 255 / 2) as u32; // scale to 0..255
+    value /= (PRECACHE_OUTPUT_MAX * 65535 / 255) as u32;
+    value as u8
+}
+/* value must be a value between 0 and 1 */
+//XXX: is the above a good restriction to have?
+pub fn lut_interp_linear_float(mut value: f32, table: &[f32]) -> f32 {
+    value *= (table.len() - 1) as f32;
+
+    let upper: i32 = value.ceil() as i32;
+    let lower: i32 = value.floor() as i32;
+    //XXX: can we be more performant here?
+    value = (table[upper as usize] as f64 * (1.0f64 - (upper as f32 - value) as f64)
+        + (table[lower as usize] * (upper as f32 - value)) as f64) as f32;
+    /* scale the value */
+    value
+}
+fn compute_curve_gamma_table_type1(gamma_table: &mut Vec<f32>, gamma: u16) {
+    let gamma_float: f32 = u8Fixed8Number_to_float(gamma);
+    for i in 0..256 {
+        // 0..1^(0..255 + 255/256) will always be between 0 and 1
+        gamma_table.push((i as f64 / 255.0f64).powf(gamma_float as f64) as f32);
+    }
+}
+fn compute_curve_gamma_table_type2(gamma_table: &mut Vec<f32>, table: &[u16]) {
+    for i in 0..256 {
+        gamma_table.push(lut_interp_linear(i as f64 / 255.0f64, table));
+    }
+}
+fn compute_curve_gamma_table_type_parametric(gamma_table: &mut Vec<f32>, params: &[f32]) {
+    let interval: f32;
+    let a: f32;
+    let b: f32;
+    let c: f32;
+    let e: f32;
+    let f: f32;
+    let y: f32 = params[0];
+    // XXX: this could probably be cleaner with slice patterns
+    if params.len() == 1 {
+        a = 1.;
+        b = 0.;
+        c = 0.;
+        e = 0.;
+        f = 0.;
+        interval = -1.
+    } else if params.len() == 3 {
+        a = params[1];
+        b = params[2];
+        c = 0.;
+        e = 0.;
+        f = 0.;
+        interval = -1. * params[2] / params[1]
+    } else if params.len() == 4 {
+        a = params[1];
+        b = params[2];
+        c = 0.;
+        e = params[3];
+        f = params[3];
+        interval = -1. * params[2] / params[1]
+    } else if params.len() == 5 {
+        a = params[1];
+        b = params[2];
+        c = params[3];
+        e = -c;
+        f = 0.;
+        interval = params[4]
+    } else if params.len() == 7 {
+        a = params[1];
+        b = params[2];
+        c = params[3];
+        e = params[5] - c;
+        f = params[6];
+        interval = params[4]
+    } else {
+        debug_assert!(false, "invalid parametric function type.");
+        a = 1.;
+        b = 0.;
+        c = 0.;
+        e = 0.;
+        f = 0.;
+        interval = -1.
+    }
+    for X in 0..256 {
+        if X as f32 >= interval {
+            // XXX The equations are not exactly as defined in the spec but are
+            //     algebraically equivalent.
+            // TODO Should division by 255 be for the whole expression.
+            gamma_table.push(clamp_float(
+                (((a * X as f32) as f64 / 255.0f64 + b as f64).powf(y as f64) + c as f64 + e as f64)
+                    as f32,
+            ));
+        } else {
+            gamma_table.push(clamp_float(
+                ((c * X as f32) as f64 / 255.0f64 + f as f64) as f32,
+            ));
+        }
+    }
+}
+
+fn compute_curve_gamma_table_type0(gamma_table: &mut Vec<f32>) {
+    for i in 0..256 {
+        gamma_table.push((i as f64 / 255.0f64) as f32);
+    }
+}
+pub(crate) fn build_input_gamma_table(TRC: Option<&curveType>) -> Option<Vec<f32>> {
+    let TRC = match TRC {
+        Some(TRC) => TRC,
+        None => return None,
+    };
+    let mut gamma_table = Vec::with_capacity(256);
+    match TRC {
+        curveType::Parametric(params) => {
+            compute_curve_gamma_table_type_parametric(&mut gamma_table, params)
+        }
+        curveType::Curve(data) => match data.len() {
+            0 => compute_curve_gamma_table_type0(&mut gamma_table),
+            1 => compute_curve_gamma_table_type1(&mut gamma_table, data[0]),
+            _ => compute_curve_gamma_table_type2(&mut gamma_table, data),
+        },
+    }
+
+    Some(gamma_table)
+}
+pub fn build_colorant_matrix(p: &Profile) -> Matrix {
+    let mut result: Matrix = Matrix {
+        m: [[0.; 3]; 3],
+        invalid: false,
+    };
+    result.m[0][0] = s15Fixed16Number_to_float(p.redColorant.X);
+    result.m[0][1] = s15Fixed16Number_to_float(p.greenColorant.X);
+    result.m[0][2] = s15Fixed16Number_to_float(p.blueColorant.X);
+    result.m[1][0] = s15Fixed16Number_to_float(p.redColorant.Y);
+    result.m[1][1] = s15Fixed16Number_to_float(p.greenColorant.Y);
+    result.m[1][2] = s15Fixed16Number_to_float(p.blueColorant.Y);
+    result.m[2][0] = s15Fixed16Number_to_float(p.redColorant.Z);
+    result.m[2][1] = s15Fixed16Number_to_float(p.greenColorant.Z);
+    result.m[2][2] = s15Fixed16Number_to_float(p.blueColorant.Z);
+    result.invalid = false;
+    result
+}
+/* The following code is copied nearly directly from lcms.
+ * I think it could be much better. For example, Argyll seems to have better code in
+ * icmTable_lookup_bwd and icmTable_setup_bwd. However, for now this is a quick way
+ * to a working solution and allows for easy comparing with lcms. */
+#[no_mangle]
+pub fn lut_inverse_interp16(Value: u16, LutTable: &[u16]) -> uint16_fract_t {
+    let mut l: i32 = 1; // 'int' Give spacing for negative values
+    let mut r: i32 = 0x10000;
+    let mut x: i32 = 0;
+    let mut res: i32;
+    let length = LutTable.len() as i32;
+
+    let mut NumZeroes: i32 = 0;
+    while LutTable[NumZeroes as usize] as i32 == 0 && NumZeroes < length - 1 {
+        NumZeroes += 1
+    }
+    // There are no zeros at the beginning and we are trying to find a zero, so
+    // return anything. It seems zero would be the less destructive choice
+    /* I'm not sure that this makes sense, but oh well... */
+    if NumZeroes == 0 && Value as i32 == 0 {
+        return 0u16;
+    }
+    let mut NumPoles: i32 = 0;
+    while LutTable[(length - 1 - NumPoles) as usize] as i32 == 0xffff && NumPoles < length - 1 {
+        NumPoles += 1
+    }
+    // Does the curve belong to this case?
+    if NumZeroes > 1 || NumPoles > 1 {
+        let a_0: i32;
+        let b_0: i32;
+        // Identify if value fall downto 0 or FFFF zone
+        if Value as i32 == 0 {
+            return 0u16;
+        }
+        // if (Value == 0xFFFF) return 0xFFFF;
+        // else restrict to valid zone
+        if NumZeroes > 1 {
+            a_0 = (NumZeroes - 1) * 0xffff / (length - 1);
+            l = a_0 - 1
+        }
+        if NumPoles > 1 {
+            b_0 = (length - 1 - NumPoles) * 0xffff / (length - 1);
+            r = b_0 + 1
+        }
+    }
+    if r <= l {
+        // If this happens LutTable is not invertible
+        return 0u16;
+    }
+    // Seems not a degenerated case... apply binary search
+    while r > l {
+        x = (l + r) / 2;
+        res = lut_interp_linear16((x - 1) as uint16_fract_t, LutTable) as i32;
+        if res == Value as i32 {
+            // Found exact match.
+            return (x - 1) as uint16_fract_t;
+        }
+        if res > Value as i32 {
+            r = x - 1
+        } else {
+            l = x + 1
+        }
+    }
+
+    // Not found, should we interpolate?
+
+    // Get surrounding nodes
+    debug_assert!(x >= 1);
+
+    let val2: f64 = (length - 1) as f64 * ((x - 1) as f64 / 65535.0f64);
+    let cell0: i32 = val2.floor() as i32;
+    let cell1: i32 = val2.ceil() as i32;
+    if cell0 == cell1 {
+        return x as uint16_fract_t;
+    }
+
+    let y0: f64 = LutTable[cell0 as usize] as f64;
+    let x0: f64 = 65535.0f64 * cell0 as f64 / (length - 1) as f64;
+    let y1: f64 = LutTable[cell1 as usize] as f64;
+    let x1: f64 = 65535.0f64 * cell1 as f64 / (length - 1) as f64;
+    let a: f64 = (y1 - y0) / (x1 - x0);
+    let b: f64 = y0 - a * x0;
+    if a.abs() < 0.01f64 {
+        return x as uint16_fract_t;
+    }
+    let f: f64 = (Value as i32 as f64 - b) / a;
+    if f < 0.0f64 {
+        return 0u16;
+    }
+    if f >= 65535.0f64 {
+        return 0xffffu16;
+    }
+    (f + 0.5f64).floor() as uint16_fract_t
+}
+/*
+The number of entries needed to invert a lookup table should not
+necessarily be the same as the original number of entries.  This is
+especially true of lookup tables that have a small number of entries.
+
+For example:
+Using a table like:
+   {0, 3104, 14263, 34802, 65535}
+invert_lut will produce an inverse of:
+   {3, 34459, 47529, 56801, 65535}
+which has an maximum error of about 9855 (pixel difference of ~38.346)
+
+For now, we punt the decision of output size to the caller. */
+fn invert_lut(table: &[u16], out_length: i32) -> Vec<u16> {
+    /* for now we invert the lut by creating a lut of size out_length
+     * and attempting to lookup a value for each entry using lut_inverse_interp16 */
+    let mut output = Vec::with_capacity(out_length as usize);
+    for i in 0..out_length {
+        let x: f64 = i as f64 * 65535.0f64 / (out_length - 1) as f64;
+        let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t;
+        output.push(lut_inverse_interp16(input, table));
+    }
+    output
+}
+fn compute_precache_pow(output: &mut [u8; PRECACHE_OUTPUT_SIZE], gamma: f32) {
+    for v in 0..PRECACHE_OUTPUT_SIZE {
+        //XXX: don't do integer/float conversion... and round?
+        output[v] = (255. * (v as f32 / PRECACHE_OUTPUT_MAX as f32).powf(gamma)) as u8;
+    }
+}
+pub fn compute_precache_lut(output: &mut [u8; PRECACHE_OUTPUT_SIZE], table: &[u16]) {
+    for v in 0..PRECACHE_OUTPUT_SIZE {
+        output[v] = lut_interp_linear_precache_output(v as u32, table);
+    }
+}
+pub fn compute_precache_linear(output: &mut [u8; PRECACHE_OUTPUT_SIZE]) {
+    for v in 0..PRECACHE_OUTPUT_SIZE {
+        //XXX: round?
+        output[v] = (v / (PRECACHE_OUTPUT_SIZE / 256)) as u8;
+    }
+}
+pub(crate) fn compute_precache(trc: &curveType, output: &mut [u8; PRECACHE_OUTPUT_SIZE]) -> bool {
+    match trc {
+        curveType::Parametric(params) => {
+            let mut gamma_table = Vec::with_capacity(256);
+            let mut gamma_table_uint: [u16; 256] = [0; 256];
+
+            let mut inverted_size: i32 = 256;
+            compute_curve_gamma_table_type_parametric(&mut gamma_table, params);
+            let mut i: u16 = 0u16;
+            while (i as i32) < 256 {
+                gamma_table_uint[i as usize] = (gamma_table[i as usize] * 65535f32) as u16;
+                i += 1
+            }
+            //XXX: the choice of a minimum of 256 here is not backed by any theory,
+            //     measurement or data, howeve r it is what lcms uses.
+            //     the maximum number we would need is 65535 because that's the
+            //     accuracy used for computing the pre cache table
+            if inverted_size < 256 {
+                inverted_size = 256
+            }
+            let inverted = invert_lut(&gamma_table_uint, inverted_size);
+            compute_precache_lut(output, &inverted);
+        }
+        curveType::Curve(data) => {
+            match data.len() {
+                0 => compute_precache_linear(output),
+                1 => compute_precache_pow(output, 1. / u8Fixed8Number_to_float(data[0])),
+                _ => {
+                    let mut inverted_size = data.len() as i32;
+                    //XXX: the choice of a minimum of 256 here is not backed by any theory,
+                    //     measurement or data, howeve r it is what lcms uses.
+                    //     the maximum number we would need is 65535 because that's the
+                    //     accuracy used for computing the pre cache table
+                    if inverted_size < 256 {
+                        inverted_size = 256
+                    } //XXX turn this conversion into a function
+                    let inverted = invert_lut(data, inverted_size);
+                    compute_precache_lut(output, &inverted);
+                }
+            }
+        }
+    }
+    true
+}
+fn build_linear_table(length: i32) -> Vec<u16> {
+    let mut output = Vec::with_capacity(length as usize);
+    for i in 0..length {
+        let x: f64 = i as f64 * 65535.0f64 / (length - 1) as f64;
+        let input: uint16_fract_t = (x + 0.5f64).floor() as uint16_fract_t;
+        output.push(input);
+    }
+    output
+}
+fn build_pow_table(gamma: f32, length: i32) -> Vec<u16> {
+    let mut output = Vec::with_capacity(length as usize);
+    for i in 0..length {
+        let mut x: f64 = i as f64 / (length - 1) as f64;
+        x = x.powf(gamma as f64);
+        let result: uint16_fract_t = (x * 65535.0f64 + 0.5f64).floor() as uint16_fract_t;
+        output.push(result);
+    }
+    output
+}
+
+pub(crate) fn build_output_lut(trc: &curveType) -> Vec<u16> {
+    match trc {
+        curveType::Parametric(params) => {
+            let mut gamma_table = Vec::with_capacity(256);
+            let mut output = Vec::with_capacity(256);
+            compute_curve_gamma_table_type_parametric(&mut gamma_table, params);
+            for i in 0..256 {
+                output.push((gamma_table[i as usize] * 65535f32) as u16);
+            }
+            output
+        }
+        curveType::Curve(data) => {
+            match data.len() {
+                0 => build_linear_table(4096),
+                1 => {
+                    let gamma = 1. / u8Fixed8Number_to_float(data[0]);
+                    build_pow_table(gamma, 4096)
+                }
+                _ => {
+                    //XXX: the choice of a minimum of 256 here is not backed by any theory,
+                    //     measurement or data, however it is what lcms uses.
+                    let mut output_gamma_lut_length = data.len();
+                    if output_gamma_lut_length < 256 {
+                        output_gamma_lut_length = 256
+                    }
+                    invert_lut(data, output_gamma_lut_length as i32)
+                }
+            }
+        }
+    }
+}