diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /library/stdarch/crates/core_arch/src/x86/f16c.rs | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | library/stdarch/crates/core_arch/src/x86/f16c.rs | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/f16c.rs b/library/stdarch/crates/core_arch/src/x86/f16c.rs new file mode 100644 index 000000000..8b25fd65e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86/f16c.rs @@ -0,0 +1,112 @@ +//! [F16C intrinsics]. +//! +//! [F16C intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=fp16&expand=1769 + +use crate::{ + core_arch::{simd::*, x86::*}, + // hint::unreachable_unchecked, + mem::transmute, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "unadjusted" { + #[link_name = "llvm.x86.vcvtph2ps.128"] + fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4; + #[link_name = "llvm.x86.vcvtph2ps.256"] + fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8; + #[link_name = "llvm.x86.vcvtps2ph.128"] + fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8; + #[link_name = "llvm.x86.vcvtps2ph.256"] + fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8; +} + +/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of +/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide +/// vector. +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtph2ps"))] +pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 { + transmute(llvm_vcvtph2ps_128(transmute(a))) +} + +/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector +/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector. +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtph2ps"))] +pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 { + transmute(llvm_vcvtph2ps_256(transmute(a))) +} + +/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x +/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit +/// vector. +/// +/// Rounding is done according to the `imm_rounding` parameter, which can be one of: +/// +/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions, +/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions, +/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions, +/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, +/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i { + static_assert_imm3!(IMM_ROUNDING); + let a = a.as_f32x4(); + let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING); + transmute(r) +} + +/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x +/// 16-bit half-precision float values stored in a 128-bit wide vector. +/// +/// Rounding is done according to the `imm_rounding` parameter, which can be one of: +/// +/// * `_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC`: round to nearest and suppress exceptions, +/// * `_MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC`: round down and suppress exceptions, +/// * `_MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC`: round up and suppress exceptions, +/// * `_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC`: truncate and suppress exceptions, +/// * `_MM_FROUND_CUR_DIRECTION`: use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]. +#[inline] +#[target_feature(enable = "f16c")] +#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i { + static_assert_imm3!(IMM_ROUNDING); + let a = a.as_f32x8(); + let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING); + transmute(r) +} + +#[cfg(test)] +mod tests { + use crate::{core_arch::x86::*, mem::transmute}; + use stdarch_test::simd_test; + + #[simd_test(enable = "f16c")] + unsafe fn test_mm_cvtph_ps() { + let array = [1_f32, 2_f32, 3_f32, 4_f32]; + let float_vec: __m128 = transmute(array); + let halfs: __m128i = _mm_cvtps_ph::<0>(float_vec); + let floats: __m128 = _mm_cvtph_ps(halfs); + let result: [f32; 4] = transmute(floats); + assert_eq!(result, array); + } + + #[simd_test(enable = "f16c")] + unsafe fn test_mm256_cvtph_ps() { + let array = [1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32]; + let float_vec: __m256 = transmute(array); + let halfs: __m128i = _mm256_cvtps_ph::<0>(float_vec); + let floats: __m256 = _mm256_cvtph_ps(halfs); + let result: [f32; 8] = transmute(floats); + assert_eq!(result, array); + } +} |