summaryrefslogtreecommitdiffstats
path: root/gfx/qcms/src/transform_neon.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:44:51 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:44:51 +0000
commit9e3c08db40b8916968b9f30096c7be3f00ce9647 (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /gfx/qcms/src/transform_neon.rs
parentInitial commit. (diff)
downloadthunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.tar.xz
thunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'gfx/qcms/src/transform_neon.rs')
-rw-r--r--gfx/qcms/src/transform_neon.rs158
1 files changed, 158 insertions, 0 deletions
diff --git a/gfx/qcms/src/transform_neon.rs b/gfx/qcms/src/transform_neon.rs
new file mode 100644
index 0000000000..d3983ba18c
--- /dev/null
+++ b/gfx/qcms/src/transform_neon.rs
@@ -0,0 +1,158 @@
+use crate::transform::{qcms_transform, Format, BGRA, CLAMPMAXVAL, FLOATSCALE, RGB, RGBA};
+#[cfg(target_arch = "aarch64")]
+use core::arch::aarch64::{
+ float32x4_t, int32x4_t, vaddq_f32, vcvtq_s32_f32, vgetq_lane_s32, vld1q_dup_f32, vld1q_f32,
+ vmaxq_f32, vminq_f32, vmulq_f32,
+};
+#[cfg(target_arch = "arm")]
+use core::arch::arm::{
+ float32x4_t, int32x4_t, vaddq_f32, vcvtq_s32_f32, vgetq_lane_s32, vld1q_dup_f32, vld1q_f32,
+ vmaxq_f32, vminq_f32, vmulq_f32,
+};
+use std::mem::zeroed;
+
+static mut floatScale: f32 = FLOATSCALE;
+static mut clampMaxValue: f32 = CLAMPMAXVAL;
+
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+unsafe fn qcms_transform_data_template_lut_neon<F: Format>(
+ transform: &qcms_transform,
+ mut src: *const u8,
+ mut dest: *mut u8,
+ mut length: usize,
+) {
+ let mat: *const [f32; 4] = (*transform).matrix.as_ptr();
+ /* deref *transform now to avoid it in loop */
+ let igtbl_r: *const f32 = (*transform).input_gamma_table_r.as_ref().unwrap().as_ptr();
+ let igtbl_g: *const f32 = (*transform).input_gamma_table_g.as_ref().unwrap().as_ptr();
+ let igtbl_b: *const f32 = (*transform).input_gamma_table_b.as_ref().unwrap().as_ptr();
+ /* deref *transform now to avoid it in loop */
+ let otdata_r: *const u8 = (*transform)
+ .output_table_r
+ .as_deref()
+ .unwrap()
+ .data
+ .as_ptr();
+ let otdata_g: *const u8 = (*transform)
+ .output_table_g
+ .as_deref()
+ .unwrap()
+ .data
+ .as_ptr();
+ let otdata_b: *const u8 = (*transform)
+ .output_table_b
+ .as_deref()
+ .unwrap()
+ .data
+ .as_ptr();
+ /* input matrix values never change */
+ let mat0: float32x4_t = vld1q_f32((*mat.offset(0isize)).as_ptr());
+ let mat1: float32x4_t = vld1q_f32((*mat.offset(1isize)).as_ptr());
+ let mat2: float32x4_t = vld1q_f32((*mat.offset(2isize)).as_ptr());
+ /* these values don't change, either */
+ let max: float32x4_t = vld1q_dup_f32(&clampMaxValue);
+ let min: float32x4_t = zeroed();
+ let scale: float32x4_t = vld1q_dup_f32(&floatScale);
+ let components: u32 = if F::kAIndex == 0xff { 3 } else { 4 } as u32;
+ /* working variables */
+ let mut vec_r: float32x4_t;
+ let mut vec_g: float32x4_t;
+ let mut vec_b: float32x4_t;
+ let mut result: int32x4_t;
+ let mut alpha: u8 = 0;
+ /* CYA */
+ if length == 0 {
+ return;
+ }
+ /* one pixel is handled outside of the loop */
+ length = length.wrapping_sub(1);
+ /* setup for transforming 1st pixel */
+ vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize));
+ vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize));
+ vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize));
+ if F::kAIndex != 0xff {
+ alpha = *src.offset(F::kAIndex as isize)
+ }
+ src = src.offset(components as isize);
+ let mut i: u32 = 0;
+ while (i as usize) < length {
+ /* gamma * matrix */
+ vec_r = vmulq_f32(vec_r, mat0);
+ vec_g = vmulq_f32(vec_g, mat1);
+ vec_b = vmulq_f32(vec_b, mat2);
+ /* store alpha for this pixel; load alpha for next */
+ if F::kAIndex != 0xff {
+ *dest.offset(F::kAIndex as isize) = alpha;
+ alpha = *src.offset(F::kAIndex as isize)
+ }
+ /* crunch, crunch, crunch */
+ vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
+ vec_r = vmaxq_f32(min, vec_r);
+ vec_r = vminq_f32(max, vec_r);
+ result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
+
+ /* use calc'd indices to output RGB values */
+ *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize);
+ *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize);
+ *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize);
+
+ /* load gamma values for next loop while store completes */
+ vec_r = vld1q_dup_f32(&*igtbl_r.offset(*src.offset(F::kRIndex as isize) as isize));
+ vec_g = vld1q_dup_f32(&*igtbl_g.offset(*src.offset(F::kGIndex as isize) as isize));
+ vec_b = vld1q_dup_f32(&*igtbl_b.offset(*src.offset(F::kBIndex as isize) as isize));
+
+ dest = dest.offset(components as isize);
+ src = src.offset(components as isize);
+ i = i.wrapping_add(1)
+ }
+ /* handle final (maybe only) pixel */
+ vec_r = vmulq_f32(vec_r, mat0);
+ vec_g = vmulq_f32(vec_g, mat1);
+ vec_b = vmulq_f32(vec_b, mat2);
+ if F::kAIndex != 0xff {
+ *dest.offset(F::kAIndex as isize) = alpha
+ }
+ vec_r = vaddq_f32(vec_r, vaddq_f32(vec_g, vec_b));
+ vec_r = vmaxq_f32(min, vec_r);
+ vec_r = vminq_f32(max, vec_r);
+ result = vcvtq_s32_f32(vmulq_f32(vec_r, scale));
+
+ *dest.offset(F::kRIndex as isize) = *otdata_r.offset(vgetq_lane_s32(result, 0) as isize);
+ *dest.offset(F::kGIndex as isize) = *otdata_g.offset(vgetq_lane_s32(result, 1) as isize);
+ *dest.offset(F::kBIndex as isize) = *otdata_b.offset(vgetq_lane_s32(result, 2) as isize);
+}
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn qcms_transform_data_rgb_out_lut_neon(
+ transform: &qcms_transform,
+ src: *const u8,
+ dest: *mut u8,
+ length: usize,
+) {
+ qcms_transform_data_template_lut_neon::<RGB>(transform, src, dest, length);
+}
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn qcms_transform_data_rgba_out_lut_neon(
+ transform: &qcms_transform,
+ src: *const u8,
+ dest: *mut u8,
+ length: usize,
+) {
+ qcms_transform_data_template_lut_neon::<RGBA>(transform, src, dest, length);
+}
+
+#[no_mangle]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+pub unsafe fn qcms_transform_data_bgra_out_lut_neon(
+ transform: &qcms_transform,
+ src: *const u8,
+ dest: *mut u8,
+ length: usize,
+) {
+ qcms_transform_data_template_lut_neon::<BGRA>(transform, src, dest, length);
+}