diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:11:38 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:13:23 +0000 |
commit | 20431706a863f92cb37dc512fef6e48d192aaf2c (patch) | |
tree | 2867f13f5fd5437ba628c67d7f87309ccadcd286 /library/stdarch/crates/core_arch/src/riscv_shared | |
parent | Releasing progress-linux version 1.65.0+dfsg1-2~progress7.99u1. (diff) | |
download | rustc-20431706a863f92cb37dc512fef6e48d192aaf2c.tar.xz rustc-20431706a863f92cb37dc512fef6e48d192aaf2c.zip |
Merging upstream version 1.66.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | library/stdarch/crates/core_arch/src/riscv_shared/mod.rs | 83 | ||||
-rw-r--r-- | library/stdarch/crates/core_arch/src/riscv_shared/p.rs | 1061 |
2 files changed, 1097 insertions, 47 deletions
diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs index 347735df1..0e35fe1f1 100644 --- a/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs +++ b/library/stdarch/crates/core_arch/src/riscv_shared/mod.rs @@ -1,4 +1,7 @@ //! Shared RISC-V intrinsics +mod p; + +pub use p::*; use crate::arch::asm; @@ -469,6 +472,17 @@ pub unsafe fn hinval_gvma_vmid(vmid: usize) { asm!(".insn r 0x73, 0, 0x33, x0, x0, {}", in(reg) vmid, options(nostack)) } +/// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses +/// +/// This instruction invalidates any address-translation cache entries that an +/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. +/// +/// This fence specifies all guest physical addresses and all virtual machines. +#[inline] +pub unsafe fn hinval_gvma_all() { + asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack)) +} + /// Reads the floating-point control and status register `fcsr` /// /// Register `fcsr` is a 32-bit read/write register that selects the dynamic rounding mode @@ -574,17 +588,6 @@ pub fn fsflags(value: u32) -> u32 { original } -/// Invalidate hypervisor translation cache for all virtual machines and guest physical addresses -/// -/// This instruction invalidates any address-translation cache entries that an -/// `HFENCE.GVMA` instruction with the same values of `gaddr` and `vmid` would invalidate. -/// -/// This fence specifies all guest physical addresses and all virtual machines. -#[inline] -pub unsafe fn hinval_gvma_all() { - asm!(".insn r 0x73, 0, 0x33, x0, x0, x0", options(nostack)) -} - /// `P0` transformation function as is used in the SM3 hash algorithm /// /// This function is included in `Zksh` extension. It's defined as: @@ -602,12 +605,10 @@ pub unsafe fn hinval_gvma_all() { /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. #[inline] +#[target_feature(enable = "zksh")] pub fn sm3p0(x: u32) -> u32 { let ans: u32; - unsafe { - // asm!("sm3p0 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack)) - asm!(".insn i 0x13, 0x1, {}, {}, 0x108", out(reg) ans, in(reg) x, options(nomem, nostack)) - }; + unsafe { asm!("sm3p0 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; ans } @@ -634,12 +635,10 @@ pub fn sm3p0(x: u32) -> u32 { /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. #[inline] +#[target_feature(enable = "zksh")] pub fn sm3p1(x: u32) -> u32 { let ans: u32; - unsafe { - // asm!("sm3p1 {}, {}", out(reg) ans, in(reg) x, options(nomem, nostack)) - asm!(".insn i 0x13, 0x1, {}, {}, 0x109", out(reg) ans, in(reg) x, options(nomem, nostack)) - }; + unsafe { asm!("sm3p1 {}, {}", lateout(reg) ans, in(reg) x, options(pure, nomem, nostack)) }; ans } @@ -674,33 +673,28 @@ pub fn sm3p1(x: u32) -> u32 { /// It can be implemented by `sm4ed` instruction like: /// /// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn round_function(x0: u32, x1: u32, x2: u32, x3: u32, rk: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ed; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ed; /// let a = x1 ^ x2 ^ x3 ^ rk; /// let c0 = sm4ed::<0>(x0, a); /// let c1 = sm4ed::<1>(c0, a); // c1 represents c[0..=1], etc. /// let c2 = sm4ed::<2>(c1, a); /// let c3 = sm4ed::<3>(c2, a); /// return c3; // c3 represents c[0..=3] +/// # } /// ``` /// /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. +#[inline] +#[target_feature(enable = "zksed")] pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); let ans: u32; - match BS { - 0 => unsafe { - asm!(".insn r 0x33, 0, 0x18, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 1 => unsafe { - asm!(".insn r 0x33, 0, 0x38, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 2 => unsafe { - asm!(".insn r 0x33, 0, 0x58, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - 3 => unsafe { - asm!(".insn r 0x33, 0, 0x78, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) a, options(nomem, nostack)) - }, - _ => unreachable!(), + unsafe { + asm!("sm4ed {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) a, const BS, options(pure, nomem, nostack)) }; ans } @@ -739,33 +733,28 @@ pub fn sm4ed<const BS: u8>(x: u32, a: u32) -> u32 { /// Hence, the key schedule operation can be implemented by `sm4ks` instruction like: /// /// ```no_run +/// # #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] +/// # fn key_schedule(k0: u32, k1: u32, k2: u32, k3: u32, ck_i: u32) -> u32 { +/// # #[cfg(target_arch = "riscv32")] use core::arch::riscv32::sm4ks; +/// # #[cfg(target_arch = "riscv64")] use core::arch::riscv64::sm4ks; /// let k = k1 ^ k2 ^ k3 ^ ck_i; /// let c0 = sm4ks::<0>(k0, k); /// let c1 = sm4ks::<1>(c0, k); // c1 represents c[0..=1], etc. /// let c2 = sm4ks::<2>(c1, k); /// let c3 = sm4ks::<3>(c2, k); /// return c3; // c3 represents c[0..=3] +/// # } /// ``` /// /// According to RISC-V Cryptography Extensions, Volume I, the execution latency of /// this instruction must always be independent from the data it operates on. +#[inline] +#[target_feature(enable = "zksed")] pub fn sm4ks<const BS: u8>(x: u32, k: u32) -> u32 { static_assert!(BS: u8 where BS <= 3); let ans: u32; - match BS { - 0 => unsafe { - asm!(".insn r 0x33, 0, 0x1A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 1 => unsafe { - asm!(".insn r 0x33, 0, 0x3A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 2 => unsafe { - asm!(".insn r 0x33, 0, 0x5A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - 3 => unsafe { - asm!(".insn r 0x33, 0, 0x7A, {}, {}, {}", out(reg) ans, in(reg) x, in(reg) k, options(nomem, nostack)) - }, - _ => unreachable!(), + unsafe { + asm!("sm4ks {}, {}, {}, {}", lateout(reg) ans, in(reg) x, in(reg) k, const BS, options(pure, nomem, nostack)) }; ans } diff --git a/library/stdarch/crates/core_arch/src/riscv_shared/p.rs b/library/stdarch/crates/core_arch/src/riscv_shared/p.rs new file mode 100644 index 000000000..a26044aee --- /dev/null +++ b/library/stdarch/crates/core_arch/src/riscv_shared/p.rs @@ -0,0 +1,1061 @@ +//! RISC-V Packed SIMD intrinsics; shared part. +//! +//! RV64 only part is placed in riscv64 folder. +use crate::arch::asm; + +/// Adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn add16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x20, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn radd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x00, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn uradd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x10, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kadd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x08, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukadd16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x18, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn sub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x21, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn rsub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x01, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn ursub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x11, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn ksub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x09, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn uksub16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x19, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn cras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x22, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of adds and subtracts packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn rcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x02, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of adds and subtracts packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn urcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x12, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross adds and subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukcras16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn crsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x23, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of subtracts and adds packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn rcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x03, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross halves of subtracts and adds packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn urcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x13, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Cross subtracts and adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukcrsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn stas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of adds and subtracts packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn rstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x5A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of adds and subtracts packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn urstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x6A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x62, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight adds and subtracts packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukstas16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x72, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit signed numbers, discarding overflow bits +#[inline] +pub fn stsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of subtracts and adds packed 16-bit signed numbers, dropping least bits +#[inline] +pub fn rstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x5B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight halves of subtracts and adds packed 16-bit unsigned numbers, dropping least bits +#[inline] +pub fn urstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x6B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x63, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Straight subtracts and adds packed 16-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukstsa16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x73, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit signed numbers, discarding overflow bits +#[inline] +pub fn add8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x24, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 8-bit signed numbers, dropping least bits +#[inline] +pub fn radd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x04, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the sum of packed 8-bit unsigned numbers, dropping least bits +#[inline] +pub fn uradd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x14, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn kadd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds packed 8-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn ukadd8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit signed numbers, discarding overflow bits +#[inline] +pub fn sub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x25, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 8-bit signed numbers, dropping least bits +#[inline] +pub fn rsub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x05, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Halves the subtraction result of packed 8-bit unsigned numbers, dropping least bits +#[inline] +pub fn ursub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x15, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit signed numbers, saturating at the numeric bounds +#[inline] +pub fn ksub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts packed 8-bit unsigned numbers, saturating at the numeric bounds +#[inline] +pub fn uksub8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 16-bit elements without rounding up +#[inline] +pub fn sra16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x28, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 16-bit elements with rounding up +#[inline] +pub fn sra16u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x30, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 16-bit elements without rounding up +#[inline] +pub fn srl16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x29, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 16-bit elements with rounding up +#[inline] +pub fn srl16u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x31, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 16-bit elements, discarding overflow bits +#[inline] +pub fn sll16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 16-bit elements, saturating at the numeric bounds +#[inline] +pub fn ksll16(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x32, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 16-bit elements +#[inline] +pub fn kslra16(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 16-bit elements +#[inline] +pub fn kslra16u(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x33, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 8-bit elements without rounding up +#[inline] +pub fn sra8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Arithmetic right shift packed 8-bit elements with rounding up +#[inline] +pub fn sra8u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x34, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 8-bit elements without rounding up +#[inline] +pub fn srl8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical right shift packed 8-bit elements with rounding up +#[inline] +pub fn srl8u(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x35, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 8-bit elements, discarding overflow bits +#[inline] +pub fn sll8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical left shift packed 8-bit elements, saturating at the numeric bounds +#[inline] +pub fn ksll8(a: usize, b: u32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x36, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 8-bit elements +#[inline] +pub fn kslra8(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x2F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Logical saturating left then arithmetic right shift packed 8-bit elements +#[inline] +pub fn kslra8u(a: usize, b: i32) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x37, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare equality for packed 16-bit elements +#[inline] +pub fn cmpeq16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x26, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed signed integers are less than the others +#[inline] +pub fn scmplt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x06, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed signed integers are less than or equal to the others +#[inline] +pub fn scmple16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed unsigned integers are less than the others +#[inline] +pub fn ucmplt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x16, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 16-bit packed unsigned integers are less than or equal to the others +#[inline] +pub fn ucmple16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare equality for packed 8-bit elements +#[inline] +pub fn cmpeq8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x27, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed signed integers are less than the others +#[inline] +pub fn scmplt8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x07, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed signed integers are less than or equal to the others +#[inline] +pub fn scmple8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed unsigned integers are less than the others +#[inline] +pub fn ucmplt8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x17, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Compare whether 8-bit packed unsigned integers are less than or equal to the others +#[inline] +pub fn ucmple8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x1F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 16-bit packed signed integers +#[inline] +pub fn smin16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x40, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 16-bit packed unsigned integers +#[inline] +pub fn umin16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x48, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 16-bit packed signed integers +#[inline] +pub fn smax16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x41, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 16-bit packed unsigned integers +#[inline] +pub fn umax16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x49, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/* todo: sclip16, uclip16 */ + +/// Compute the absolute value of packed 16-bit signed integers +#[inline] +pub fn kabs16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAD1", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 16-bit elements +#[inline] +pub fn clrs16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAE8", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 16-bit elements +#[inline] +pub fn clz16(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAE9", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Swap the 16-bit halfwords within each 32-bit word of a register +#[inline] +pub fn swap16(a: usize) -> usize { + let value: usize; + // this instruction is an alias for `pkbt rd, rs1, rs1`. + unsafe { + asm!(".insn r 0x77, 0x0, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 8-bit packed signed integers +#[inline] +pub fn smin8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x44, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get minimum values from 8-bit packed unsigned integers +#[inline] +pub fn umin8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x4C, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 8-bit packed signed integers +#[inline] +pub fn smax8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x45, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Get maximum values from 8-bit packed unsigned integers +#[inline] +pub fn umax8(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x4D, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/* todo: sclip8, uclip8 */ + +/// Compute the absolute value of packed 8-bit signed integers +#[inline] +pub fn kabs8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAD0", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 8-bit elements +#[inline] +pub fn clrs8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAE0", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 8-bit elements +#[inline] +pub fn clz8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAE1", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Swap the 8-bit bytes within each 16-bit halfword of a register. +#[inline] +pub fn swap8(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAD8", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack first and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +pub fn sunpkd810(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAC8", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack second and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +pub fn sunpkd820(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAC9", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and zeroth into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +pub fn sunpkd830(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACA", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and first into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +pub fn sunpkd831(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACB", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and second into two 16-bit signed halfwords in each 32-bit chunk +#[inline] +pub fn sunpkd832(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAD3", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack first and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +pub fn zunpkd810(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACC", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack second and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +pub fn zunpkd820(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACD", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and zeroth into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +pub fn zunpkd830(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACE", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and first into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +pub fn zunpkd831(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xACF", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Unpack third and second into two 16-bit unsigned halfwords in each 32-bit chunk +#[inline] +pub fn zunpkd832(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAD7", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +// todo: pkbb16, pktt16 + +/// Pack two 16-bit data from bottom and top half from 32-bit chunks +#[inline] +pub fn pkbt16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Pack two 16-bit data from top and bottom half from 32-bit chunks +#[inline] +pub fn pktb16(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x1F, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of redundant sign bits of the packed 32-bit elements +#[inline] +pub fn clrs32(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAF8", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Count the number of leading zero bits of the packed 32-bit elements +#[inline] +pub fn clz32(a: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn i 0x77, 0x0, {}, {}, 0xAF9", lateout(reg) value, in(reg) a, options(pure, nomem, nostack)) + } + value +} + +/// Calculate the sum of absolute difference of unsigned 8-bit data elements +#[inline] +pub fn pbsad(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7E, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Calculate and accumulate the sum of absolute difference of unsigned 8-bit data elements +#[inline] +pub fn pbsada(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x7F, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply signed 8-bit elements and add 16-bit elements on results for packed 32-bit chunks +#[inline] +pub fn smaqa(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x64, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply unsigned 8-bit elements and add 16-bit elements on results for packed 32-bit chunks +#[inline] +pub fn umaqa(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x66, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Multiply signed to unsigned 8-bit and add 16-bit elements on results for packed 32-bit chunks +#[inline] +pub fn smaqasu(t: usize, a: usize, b: usize) -> usize { + let mut value: usize; + unsafe { + asm!(".insn r 0x77, 0x0, 0x65, {}, {}, {}", inlateout(reg) t => value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds signed lower 16-bit content of two registers with Q15 saturation +#[inline] +pub fn kaddh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x02, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts signed lower 16-bit content of two registers with Q15 saturation +#[inline] +pub fn ksubh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x03, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Adds signed lower 16-bit content of two registers with U16 saturation +#[inline] +pub fn ukaddh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0A, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} + +/// Subtracts signed lower 16-bit content of two registers with U16 saturation +#[inline] +pub fn uksubh(a: usize, b: usize) -> usize { + let value: usize; + unsafe { + asm!(".insn r 0x77, 0x1, 0x0B, {}, {}, {}", lateout(reg) value, in(reg) a, in(reg) b, options(pure, nomem, nostack)) + } + value +} |