summaryrefslogtreecommitdiffstats
path: root/library/portable-simd
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:56 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 09:25:56 +0000
commit018c4950b9406055dec02ef0fb52f132e2bb1e2c (patch)
treea835ebdf2088ef88fa681f8fad45f09922c1ae9a /library/portable-simd
parentAdding debian version 1.75.0+dfsg1-5. (diff)
downloadrustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.tar.xz
rustc-018c4950b9406055dec02ef0fb52f132e2bb1e2c.zip
Merging upstream version 1.76.0+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/portable-simd')
-rw-r--r--library/portable-simd/.github/workflows/ci.yml71
-rw-r--r--library/portable-simd/Cargo.lock304
-rw-r--r--library/portable-simd/crates/core_simd/Cargo.toml1
-rw-r--r--library/portable-simd/crates/core_simd/examples/dot_product.rs2
-rw-r--r--library/portable-simd/crates/core_simd/examples/matrix_inversion.rs49
-rw-r--r--library/portable-simd/crates/core_simd/examples/nbody.rs3
-rw-r--r--library/portable-simd/crates/core_simd/examples/spectral_norm.rs2
-rw-r--r--library/portable-simd/crates/core_simd/src/core_simd_docs.md35
-rw-r--r--library/portable-simd/crates/core_simd/src/fmt.rs4
-rw-r--r--library/portable-simd/crates/core_simd/src/intrinsics.rs6
-rw-r--r--library/portable-simd/crates/core_simd/src/iter.rs16
-rw-r--r--library/portable-simd/crates/core_simd/src/lane_count.rs8
-rw-r--r--library/portable-simd/crates/core_simd/src/lib.rs4
-rw-r--r--library/portable-simd/crates/core_simd/src/masks.rs379
-rw-r--r--library/portable-simd/crates/core_simd/src/masks/bitmask.rs118
-rw-r--r--library/portable-simd/crates/core_simd/src/masks/full_masks.rs185
-rw-r--r--library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs97
-rw-r--r--library/portable-simd/crates/core_simd/src/mod.rs20
-rw-r--r--library/portable-simd/crates/core_simd/src/ops.rs11
-rw-r--r--library/portable-simd/crates/core_simd/src/ops/assign.rs26
-rw-r--r--library/portable-simd/crates/core_simd/src/ops/deref.rs46
-rw-r--r--library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs62
-rw-r--r--library/portable-simd/crates/core_simd/src/ops/unary.rs46
-rw-r--r--library/portable-simd/crates/core_simd/src/select.rs22
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/cmp.rs7
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs (renamed from library/portable-simd/crates/core_simd/src/eq.rs)30
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/cmp/ord.rs (renamed from library/portable-simd/crates/core_simd/src/ord.rs)67
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num.rs (renamed from library/portable-simd/crates/core_simd/src/elements.rs)6
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/float.rs (renamed from library/portable-simd/crates/core_simd/src/elements/float.rs)97
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/int.rs (renamed from library/portable-simd/crates/core_simd/src/elements/int.rs)114
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/num/uint.rs (renamed from library/portable-simd/crates/core_simd/src/elements/uint.rs)97
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/prelude.rs6
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/ptr.rs11
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs (renamed from library/portable-simd/crates/core_simd/src/elements/const_ptr.rs)47
-rw-r--r--library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs (renamed from library/portable-simd/crates/core_simd/src/elements/mut_ptr.rs)45
-rw-r--r--library/portable-simd/crates/core_simd/src/swizzle.rs408
-rw-r--r--library/portable-simd/crates/core_simd/src/swizzle_dyn.rs6
-rw-r--r--library/portable-simd/crates/core_simd/src/to_bytes.rs132
-rw-r--r--library/portable-simd/crates/core_simd/src/vector.rs43
-rw-r--r--library/portable-simd/crates/core_simd/src/vendor.rs2
-rw-r--r--library/portable-simd/crates/core_simd/src/vendor/x86.rs2
-rw-r--r--library/portable-simd/crates/core_simd/tests/cast.rs2
-rw-r--r--library/portable-simd/crates/core_simd/tests/masks.rs14
-rw-r--r--library/portable-simd/crates/core_simd/tests/ops_macros.rs135
-rw-r--r--library/portable-simd/crates/core_simd/tests/pointers.rs7
-rw-r--r--library/portable-simd/crates/core_simd/tests/round.rs4
-rw-r--r--library/portable-simd/crates/core_simd/tests/swizzle.rs28
-rw-r--r--library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs1
-rw-r--r--library/portable-simd/crates/core_simd/tests/to_bytes.rs22
-rw-r--r--library/portable-simd/crates/std_float/src/lib.rs9
-rw-r--r--library/portable-simd/crates/test_helpers/Cargo.toml6
-rw-r--r--library/portable-simd/crates/test_helpers/src/biteq.rs32
-rw-r--r--library/portable-simd/crates/test_helpers/src/lib.rs128
-rw-r--r--library/portable-simd/crates/test_helpers/src/subnormals.rs91
54 files changed, 2109 insertions, 1007 deletions
diff --git a/library/portable-simd/.github/workflows/ci.yml b/library/portable-simd/.github/workflows/ci.yml
index 1ff377fce..90543044e 100644
--- a/library/portable-simd/.github/workflows/ci.yml
+++ b/library/portable-simd/.github/workflows/ci.yml
@@ -167,40 +167,33 @@ jobs:
RUSTFLAGS: ${{ matrix.rustflags }}
cross-tests:
- name: "${{ matrix.target }} (via cross)"
+ name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
runs-on: ubuntu-latest
strategy:
fail-fast: false
- # TODO: Sadly, we cant configure target-feature in a meaningful way
- # because `cross` doesn't tell qemu to enable any non-default cpu
- # features, nor does it give us a way to do so.
- #
- # Ultimately, we'd like to do something like [rust-lang/stdarch][stdarch].
- # This is a lot more complex... but in practice it's likely that we can just
- # snarf the docker config from around [here][1000-dockerfiles].
- #
- # [stdarch]: https://github.com/rust-lang/stdarch/blob/a5db4eaf/.github/workflows/main.yml#L67
- # [1000-dockerfiles]: https://github.com/rust-lang/stdarch/tree/a5db4eaf/ci/docker
matrix:
target:
- - i586-unknown-linux-gnu
- # 32-bit arm has a few idiosyncracies like having subnormal flushing
- # to zero on by default. Ideally we'd set
- armv7-unknown-linux-gnueabihf
- - aarch64-unknown-linux-gnu
- # Note: The issue above means neither of these mips targets will use
- # MSA (mips simd) but MIPS uses a nonstandard binary representation
- # for NaNs which makes it worth testing on despite that.
+ - thumbv7neon-unknown-linux-gnueabihf # includes neon by default
+ - aarch64-unknown-linux-gnu # includes neon by default
+ - powerpc-unknown-linux-gnu
+ - powerpc64le-unknown-linux-gnu # includes altivec by default
+ - riscv64gc-unknown-linux-gnu
+ # MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
+ # non-nightly since https://github.com/rust-lang/rust/pull/113274
# - mips-unknown-linux-gnu
# - mips64-unknown-linux-gnuabi64
- - riscv64gc-unknown-linux-gnu
- # TODO this test works, but it appears to time out
- # - powerpc-unknown-linux-gnu
- # TODO this test is broken, but it appears to be a problem with QEMU, not us.
- # - powerpc64le-unknown-linux-gnu
- # TODO enable this once a new version of cross is released
+ # Lots of errors in QEMU and no real hardware to test on. Not clear if it's QEMU or bad codegen.
# - powerpc64-unknown-linux-gnu
+ target_feature: [default]
+ include:
+ - { target: powerpc64le-unknown-linux-gnu, target_feature: "+vsx" }
+ # Fails due to QEMU floating point errors, probably handling subnormals incorrectly.
+ # This target is somewhat redundant, since ppc64le has altivec as well.
+ # - { target: powerpc-unknown-linux-gnu, target_feature: "+altivec" }
+ # We should test this, but cross currently can't run it
+ # - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
steps:
- uses: actions/checkout@v2
@@ -217,11 +210,27 @@ jobs:
# being part of the tarball means we can't just use the download/latest
# URL :(
run: |
- CROSS_URL=https://github.com/rust-embedded/cross/releases/download/v0.2.1/cross-v0.2.1-x86_64-unknown-linux-gnu.tar.gz
+ CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
mkdir -p "$HOME/.bin"
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
echo "$HOME/.bin" >> $GITHUB_PATH
+ - name: Configure Emulated CPUs
+ run: |
+ echo "CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER=qemu-ppc -cpu e600" >> $GITHUB_ENV
+ # echo "CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_RUNNER=qemu-riscv64 -cpu rv64,zba=true,zbb=true,v=true,vlen=256,vext_spec=v1.0" >> $GITHUB_ENV
+
+ - name: Configure RUSTFLAGS
+ shell: bash
+ run: |
+ case "${{ matrix.target_feature }}" in
+ default)
+ echo "RUSTFLAGS=" >> $GITHUB_ENV;;
+ *)
+ echo "RUSTFLAGS=-Ctarget-feature=${{ matrix.target_feature }}" >> $GITHUB_ENV
+ ;;
+ esac
+
- name: Test (debug)
run: cross test --verbose --target=${{ matrix.target }}
@@ -229,7 +238,7 @@ jobs:
run: cross test --verbose --target=${{ matrix.target }} --release
features:
- name: "Check cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
+ name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
@@ -240,12 +249,8 @@ jobs:
features:
- ""
- "--features std"
- - "--features generic_const_exprs"
- - "--features std --features generic_const_exprs"
- "--features all_lane_counts"
- - "--features all_lane_counts --features std"
- - "--features all_lane_counts --features generic_const_exprs"
- - "--features all_lane_counts --features std --features generic_const_exprs"
+ - "--all-features"
steps:
- uses: actions/checkout@v2
@@ -257,9 +262,9 @@ jobs:
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
- name: Check build
if: ${{ matrix.simd == '' }}
- run: RUSTFLAGS="-Dwarnings" cargo check --all-targets --no-default-features ${{ matrix.features }}
+ run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
- name: Check AVX
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
run: |
echo "Found AVX features: $CPU_FEATURE"
- RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo check --all-targets --no-default-features ${{ matrix.features }}
+ RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}
diff --git a/library/portable-simd/Cargo.lock b/library/portable-simd/Cargo.lock
new file mode 100644
index 000000000..46312c096
--- /dev/null
+++ b/library/portable-simd/Cargo.lock
@@ -0,0 +1,304 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bumpalo"
+version = "3.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
+
+[[package]]
+name = "byteorder"
+version = "1.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "console_error_panic_hook"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "core_simd"
+version = "0.1.0"
+dependencies = [
+ "proptest",
+ "std_float",
+ "test_helpers",
+ "wasm-bindgen",
+ "wasm-bindgen-test",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
+dependencies = [
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+
+[[package]]
+name = "num-traits"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.66"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "proptest"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "12e6c80c1139113c28ee4670dc50cc42915228b51f56a9e407f0ec60f966646f"
+dependencies = [
+ "bitflags",
+ "byteorder",
+ "num-traits",
+ "rand",
+ "rand_chacha",
+ "rand_xorshift",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rand"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+ "rand_hc",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
+
+[[package]]
+name = "rand_hc"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "rand_xorshift"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77d416b86801d23dde1aa643023b775c3a462efc0ed96443add11546cdf1dca8"
+dependencies = [
+ "rand_core",
+]
+
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
+[[package]]
+name = "std_float"
+version = "0.1.0"
+dependencies = [
+ "core_simd",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "test_helpers"
+version = "0.1.0"
+dependencies = [
+ "proptest",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
+dependencies = [
+ "cfg-if",
+ "wasm-bindgen-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
+dependencies = [
+ "bumpalo",
+ "log",
+ "once_cell",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
+
+[[package]]
+name = "wasm-bindgen-test"
+version = "0.3.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671"
+dependencies = [
+ "console_error_panic_hook",
+ "js-sys",
+ "scoped-tls",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "wasm-bindgen-test-macro",
+]
+
+[[package]]
+name = "wasm-bindgen-test-macro"
+version = "0.3.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
diff --git a/library/portable-simd/crates/core_simd/Cargo.toml b/library/portable-simd/crates/core_simd/Cargo.toml
index d1a3a515a..b4a8fd70f 100644
--- a/library/portable-simd/crates/core_simd/Cargo.toml
+++ b/library/portable-simd/crates/core_simd/Cargo.toml
@@ -12,7 +12,6 @@ license = "MIT OR Apache-2.0"
default = ["as_crate"]
as_crate = []
std = []
-generic_const_exprs = []
all_lane_counts = []
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
diff --git a/library/portable-simd/crates/core_simd/examples/dot_product.rs b/library/portable-simd/crates/core_simd/examples/dot_product.rs
index a7973ec74..f047010a6 100644
--- a/library/portable-simd/crates/core_simd/examples/dot_product.rs
+++ b/library/portable-simd/crates/core_simd/examples/dot_product.rs
@@ -6,7 +6,7 @@
#![feature(slice_as_chunks)]
// Add these imports to use the stdsimd library
#![feature(portable_simd)]
-use core_simd::simd::*;
+use core_simd::simd::prelude::*;
// This is your barebones dot product implementation:
// Take 2 vectors, multiply them element wise and *then*
diff --git a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
index 39f530f68..bad864144 100644
--- a/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
+++ b/library/portable-simd/crates/core_simd/examples/matrix_inversion.rs
@@ -2,8 +2,7 @@
// Code ported from the `packed_simd` crate
// Run this code with `cargo test --example matrix_inversion`
#![feature(array_chunks, portable_simd)]
-use core_simd::simd::*;
-use Which::*;
+use core_simd::simd::prelude::*;
// Gotta define our own 4x4 matrix since Rust doesn't ship multidim arrays yet :^)
#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
@@ -164,10 +163,10 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
let m_2 = f32x4::from_array(m[2]);
let m_3 = f32x4::from_array(m[3]);
- const SHUFFLE01: [Which; 4] = [First(0), First(1), Second(0), Second(1)];
- const SHUFFLE02: [Which; 4] = [First(0), First(2), Second(0), Second(2)];
- const SHUFFLE13: [Which; 4] = [First(1), First(3), Second(1), Second(3)];
- const SHUFFLE23: [Which; 4] = [First(2), First(3), Second(2), Second(3)];
+ const SHUFFLE01: [usize; 4] = [0, 1, 4, 5];
+ const SHUFFLE02: [usize; 4] = [0, 2, 4, 6];
+ const SHUFFLE13: [usize; 4] = [1, 3, 5, 7];
+ const SHUFFLE23: [usize; 4] = [2, 3, 6, 7];
let tmp = simd_swizzle!(m_0, m_1, SHUFFLE01);
let row1 = simd_swizzle!(m_2, m_3, SHUFFLE01);
@@ -180,58 +179,58 @@ pub fn simd_inv4x4(m: Matrix4x4) -> Option<Matrix4x4> {
let row2 = simd_swizzle!(tmp, row3, SHUFFLE02);
let row3 = simd_swizzle!(row3, tmp, SHUFFLE13);
- let tmp = (row2 * row3).reverse().rotate_lanes_right::<2>();
+ let tmp = (row2 * row3).reverse().rotate_elements_right::<2>();
let minor0 = row1 * tmp;
let minor1 = row0 * tmp;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor0 = (row1 * tmp) - minor0;
let minor1 = (row0 * tmp) - minor1;
- let minor1 = minor1.rotate_lanes_right::<2>();
+ let minor1 = minor1.rotate_elements_right::<2>();
- let tmp = (row1 * row2).reverse().rotate_lanes_right::<2>();
+ let tmp = (row1 * row2).reverse().rotate_elements_right::<2>();
let minor0 = (row3 * tmp) + minor0;
let minor3 = row0 * tmp;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor0 = minor0 - row3 * tmp;
let minor3 = row0 * tmp - minor3;
- let minor3 = minor3.rotate_lanes_right::<2>();
+ let minor3 = minor3.rotate_elements_right::<2>();
- let tmp = (row3 * row1.rotate_lanes_right::<2>())
+ let tmp = (row3 * row1.rotate_elements_right::<2>())
.reverse()
- .rotate_lanes_right::<2>();
- let row2 = row2.rotate_lanes_right::<2>();
+ .rotate_elements_right::<2>();
+ let row2 = row2.rotate_elements_right::<2>();
let minor0 = row2 * tmp + minor0;
let minor2 = row0 * tmp;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor0 = minor0 - row2 * tmp;
let minor2 = row0 * tmp - minor2;
- let minor2 = minor2.rotate_lanes_right::<2>();
+ let minor2 = minor2.rotate_elements_right::<2>();
- let tmp = (row0 * row1).reverse().rotate_lanes_right::<2>();
+ let tmp = (row0 * row1).reverse().rotate_elements_right::<2>();
let minor2 = minor2 + row3 * tmp;
let minor3 = row2 * tmp - minor3;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor2 = row3 * tmp - minor2;
let minor3 = minor3 - row2 * tmp;
- let tmp = (row0 * row3).reverse().rotate_lanes_right::<2>();
+ let tmp = (row0 * row3).reverse().rotate_elements_right::<2>();
let minor1 = minor1 - row2 * tmp;
let minor2 = row1 * tmp + minor2;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor1 = row2 * tmp + minor1;
let minor2 = minor2 - row1 * tmp;
- let tmp = (row0 * row2).reverse().rotate_lanes_right::<2>();
+ let tmp = (row0 * row2).reverse().rotate_elements_right::<2>();
let minor1 = row3 * tmp + minor1;
let minor3 = minor3 - row1 * tmp;
- let tmp = tmp.rotate_lanes_right::<2>();
+ let tmp = tmp.rotate_elements_right::<2>();
let minor1 = minor1 - row3 * tmp;
let minor3 = row1 * tmp + minor3;
let det = row0 * minor0;
- let det = det.rotate_lanes_right::<2>() + det;
- let det = det.reverse().rotate_lanes_right::<2>() + det;
+ let det = det.rotate_elements_right::<2>() + det;
+ let det = det.reverse().rotate_elements_right::<2>() + det;
if det.reduce_sum() == 0. {
return None;
diff --git a/library/portable-simd/crates/core_simd/examples/nbody.rs b/library/portable-simd/crates/core_simd/examples/nbody.rs
index df38a0096..65820d134 100644
--- a/library/portable-simd/crates/core_simd/examples/nbody.rs
+++ b/library/portable-simd/crates/core_simd/examples/nbody.rs
@@ -1,11 +1,12 @@
#![feature(portable_simd)]
+#![allow(clippy::excessive_precision)]
extern crate std_float;
/// Benchmarks game nbody code
/// Taken from the `packed_simd` crate
/// Run this benchmark with `cargo test --example nbody`
mod nbody {
- use core_simd::simd::*;
+ use core_simd::simd::prelude::*;
#[allow(unused)] // False positive?
use std_float::StdFloat;
diff --git a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
index d576bd0cc..bc7934c25 100644
--- a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@@ -1,6 +1,6 @@
#![feature(portable_simd)]
-use core_simd::simd::*;
+use core_simd::simd::prelude::*;
fn a(i: usize, j: usize) -> f64 {
((i + j) * (i + j + 1) / 2 + i + 1) as f64
diff --git a/library/portable-simd/crates/core_simd/src/core_simd_docs.md b/library/portable-simd/crates/core_simd/src/core_simd_docs.md
index 15e8ed025..bf412e035 100644
--- a/library/portable-simd/crates/core_simd/src/core_simd_docs.md
+++ b/library/portable-simd/crates/core_simd/src/core_simd_docs.md
@@ -2,3 +2,38 @@ Portable SIMD module.
This module offers a portable abstraction for SIMD operations
that is not bound to any particular hardware architecture.
+
+# What is "portable"?
+
+This module provides a SIMD implementation that is fast and predictable on any target.
+
+### Portable SIMD works on every target
+
+Unlike target-specific SIMD in `std::arch`, portable SIMD compiles for every target.
+In this regard, it is just like "regular" Rust.
+
+### Portable SIMD is consistent between targets
+
+A program using portable SIMD can expect identical behavior on any target.
+In most regards, [`Simd<T, N>`] can be thought of as a parallelized `[T; N]` and operates like a sequence of `T`.
+
+This has one notable exception: a handful of older architectures (e.g. `armv7` and `powerpc`) flush [subnormal](`f32::is_subnormal`) `f32` values to zero.
+On these architectures, subnormal `f32` input values are replaced with zeros, and any operation producing subnormal `f32` values produces zeros instead.
+This doesn't affect most architectures or programs.
+
+### Operations use the best instructions available
+
+Operations provided by this module compile to the best available SIMD instructions.
+
+Portable SIMD is not a low-level vendor library, and operations in portable SIMD _do not_ necessarily map to a single instruction.
+Instead, they map to a reasonable implementation of the operation for the target.
+
+Consistency between targets is not compromised to use faster or fewer instructions.
+In some cases, `std::arch` will provide a faster function that has slightly different behavior than the `std::simd` equivalent.
+For example, `_mm_min_ps`[^1] can be slightly faster than [`SimdFloat::simd_min`](`num::SimdFloat::simd_min`), but does not conform to the IEEE standard also used by [`f32::min`].
+When necessary, [`Simd<T, N>`] can be converted to the types provided by `std::arch` to make use of target-specific functions.
+
+Many targets simply don't have SIMD, or don't support SIMD for a particular element type.
+In those cases, regular scalar operations are generated instead.
+
+[^1]: `_mm_min_ps(x, y)` is equivalent to `x.simd_lt(y).select(x, y)`
diff --git a/library/portable-simd/crates/core_simd/src/fmt.rs b/library/portable-simd/crates/core_simd/src/fmt.rs
index b7317969c..3a540f5a0 100644
--- a/library/portable-simd/crates/core_simd/src/fmt.rs
+++ b/library/portable-simd/crates/core_simd/src/fmt.rs
@@ -1,9 +1,9 @@
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::fmt;
-impl<T, const LANES: usize> fmt::Debug for Simd<T, LANES>
+impl<T, const N: usize> fmt::Debug for Simd<T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
T: SimdElement + fmt::Debug,
{
/// A `Simd<T, N>` has a debug format like the one for `[T]`:
diff --git a/library/portable-simd/crates/core_simd/src/intrinsics.rs b/library/portable-simd/crates/core_simd/src/intrinsics.rs
index dd6698e2b..b27893bc7 100644
--- a/library/portable-simd/crates/core_simd/src/intrinsics.rs
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@@ -160,4 +160,10 @@ extern "platform-intrinsic" {
/// convert an exposed address back to a pointer
pub(crate) fn simd_from_exposed_addr<T, U>(addr: T) -> U;
+
+ // Integer operations
+ pub(crate) fn simd_bswap<T>(x: T) -> T;
+ pub(crate) fn simd_bitreverse<T>(x: T) -> T;
+ pub(crate) fn simd_ctlz<T>(x: T) -> T;
+ pub(crate) fn simd_cttz<T>(x: T) -> T;
}
diff --git a/library/portable-simd/crates/core_simd/src/iter.rs b/library/portable-simd/crates/core_simd/src/iter.rs
index 328c995b8..b3732fd74 100644
--- a/library/portable-simd/crates/core_simd/src/iter.rs
+++ b/library/portable-simd/crates/core_simd/src/iter.rs
@@ -6,9 +6,9 @@ use core::{
macro_rules! impl_traits {
{ $type:ty } => {
- impl<const LANES: usize> Sum<Self> for Simd<$type, LANES>
+ impl<const N: usize> Sum<Self> for Simd<$type, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
@@ -16,9 +16,9 @@ macro_rules! impl_traits {
}
}
- impl<const LANES: usize> Product<Self> for Simd<$type, LANES>
+ impl<const N: usize> Product<Self> for Simd<$type, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
@@ -26,9 +26,9 @@ macro_rules! impl_traits {
}
}
- impl<'a, const LANES: usize> Sum<&'a Self> for Simd<$type, LANES>
+ impl<'a, const N: usize> Sum<&'a Self> for Simd<$type, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
@@ -36,9 +36,9 @@ macro_rules! impl_traits {
}
}
- impl<'a, const LANES: usize> Product<&'a Self> for Simd<$type, LANES>
+ impl<'a, const N: usize> Product<&'a Self> for Simd<$type, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
diff --git a/library/portable-simd/crates/core_simd/src/lane_count.rs b/library/portable-simd/crates/core_simd/src/lane_count.rs
index 2b91eb9e8..4cd7265ed 100644
--- a/library/portable-simd/crates/core_simd/src/lane_count.rs
+++ b/library/portable-simd/crates/core_simd/src/lane_count.rs
@@ -4,11 +4,11 @@ mod sealed {
use sealed::Sealed;
/// Specifies the number of lanes in a SIMD vector as a type.
-pub struct LaneCount<const LANES: usize>;
+pub struct LaneCount<const N: usize>;
-impl<const LANES: usize> LaneCount<LANES> {
+impl<const N: usize> LaneCount<N> {
/// The number of bytes in a bitmask with this many lanes.
- pub const BITMASK_LEN: usize = (LANES + 7) / 8;
+ pub const BITMASK_LEN: usize = (N + 7) / 8;
}
/// Statically guarantees that a lane count is marked as supported.
@@ -21,7 +21,7 @@ pub trait SupportedLaneCount: Sealed {
type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
}
-impl<const LANES: usize> Sealed for LaneCount<LANES> {}
+impl<const N: usize> Sealed for LaneCount<N> {}
macro_rules! supported_lane_count {
($($lanes:literal),+) => {
diff --git a/library/portable-simd/crates/core_simd/src/lib.rs b/library/portable-simd/crates/core_simd/src/lib.rs
index fde406bda..64ba9705e 100644
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@@ -5,6 +5,7 @@
const_mut_refs,
convert_float_to_int,
decl_macro,
+ inline_const,
intra_doc_pointers,
platform_intrinsics,
repr_simd,
@@ -14,10 +15,9 @@
strict_provenance,
ptr_metadata
)]
-#![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
-#![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
#![warn(missing_docs, clippy::missing_inline_in_public_items)] // basically all items, really
#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
+#![allow(internal_features)]
#![unstable(feature = "portable_simd", issue = "86656")]
//! Portable SIMD module.
diff --git a/library/portable-simd/crates/core_simd/src/masks.rs b/library/portable-simd/crates/core_simd/src/masks.rs
index fea687bdc..0623d2bf3 100644
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@@ -1,4 +1,4 @@
-//! Types and traits associated with masking lanes of vectors.
+//! Types and traits associated with masking elements of vectors.
//! Types representing
#![allow(non_camel_case_types)]
@@ -12,13 +12,9 @@
)]
mod mask_impl;
-mod to_bitmask;
-pub use to_bitmask::ToBitMask;
-
-#[cfg(feature = "generic_const_exprs")]
-pub use to_bitmask::{bitmask_len, ToBitMaskArray};
-
-use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{
+ cmp::SimdPartialEq, intrinsics, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount,
+};
use core::cmp::Ordering;
use core::{fmt, mem};
@@ -32,13 +28,17 @@ mod sealed {
/// prevent us from ever removing that bound, or from implementing `MaskElement` on
/// non-`PartialEq` types in the future.
pub trait Sealed {
- fn valid<const LANES: usize>(values: Simd<Self, LANES>) -> bool
+ fn valid<const N: usize>(values: Simd<Self, N>) -> bool
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
Self: SimdElement;
fn eq(self, other: Self) -> bool;
+ fn as_usize(self) -> usize;
+
+ type Unsigned: SimdElement;
+
const TRUE: Self;
const FALSE: Self;
@@ -50,15 +50,15 @@ use sealed::Sealed;
///
/// # Safety
/// Type must be a signed integer.
-pub unsafe trait MaskElement: SimdElement + Sealed {}
+pub unsafe trait MaskElement: SimdElement<Mask = Self> + SimdCast + Sealed {}
macro_rules! impl_element {
- { $ty:ty } => {
+ { $ty:ty, $unsigned:ty } => {
impl Sealed for $ty {
#[inline]
- fn valid<const LANES: usize>(value: Simd<Self, LANES>) -> bool
+ fn valid<const N: usize>(value: Simd<Self, N>) -> bool
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
(value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
}
@@ -66,6 +66,13 @@ macro_rules! impl_element {
#[inline]
fn eq(self, other: Self) -> bool { self == other }
+ #[inline]
+ fn as_usize(self) -> usize {
+ self as usize
+ }
+
+ type Unsigned = $unsigned;
+
const TRUE: Self = -1;
const FALSE: Self = 0;
}
@@ -75,36 +82,36 @@ macro_rules! impl_element {
}
}
-impl_element! { i8 }
-impl_element! { i16 }
-impl_element! { i32 }
-impl_element! { i64 }
-impl_element! { isize }
+impl_element! { i8, u8 }
+impl_element! { i16, u16 }
+impl_element! { i32, u32 }
+impl_element! { i64, u64 }
+impl_element! { isize, usize }
-/// A SIMD vector mask for `LANES` elements of width specified by `Element`.
+/// A SIMD vector mask for `N` elements of width specified by `Element`.
///
-/// Masks represent boolean inclusion/exclusion on a per-lane basis.
+/// Masks represent boolean inclusion/exclusion on a per-element basis.
///
/// The layout of this type is unspecified, and may change between platforms
/// and/or Rust versions, and code should not assume that it is equivalent to
-/// `[T; LANES]`.
-#[cfg_attr(not(doc), repr(transparent))] // work around https://github.com/rust-lang/rust/issues/90435
-pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)
+/// `[T; N]`.
+#[repr(transparent)]
+pub struct Mask<T, const N: usize>(mask_impl::Mask<T, N>)
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount;
+ LaneCount<N>: SupportedLaneCount;
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn clone(&self) -> Self {
@@ -112,12 +119,12 @@ where
}
}
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- /// Construct a mask by setting all lanes to the given value.
+ /// Construct a mask by setting all elements to the given value.
#[inline]
pub fn splat(value: bool) -> Self {
Self(mask_impl::Mask::splat(value))
@@ -125,7 +132,7 @@ where
/// Converts an array of bools to a SIMD mask.
#[inline]
- pub fn from_array(array: [bool; LANES]) -> Self {
+ pub fn from_array(array: [bool; N]) -> Self {
// SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
// true: 0b_0000_0001
// false: 0b_0000_0000
@@ -133,16 +140,15 @@ where
// This would be hypothetically valid as an "in-place" transmute,
// but these are "dependently-sized" types, so copy elision it is!
unsafe {
- let bytes: [u8; LANES] = mem::transmute_copy(&array);
- let bools: Simd<i8, LANES> =
- intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
+ let bytes: [u8; N] = mem::transmute_copy(&array);
+ let bools: Simd<i8, N> = intrinsics::simd_ne(Simd::from_array(bytes), Simd::splat(0u8));
Mask::from_int_unchecked(intrinsics::simd_cast(bools))
}
}
/// Converts a SIMD mask to an array of bools.
#[inline]
- pub fn to_array(self) -> [bool; LANES] {
+ pub fn to_array(self) -> [bool; N] {
// This follows mostly the same logic as from_array.
// SAFETY: Rust's bool has a layout of 1 byte (u8) with a value of
// true: 0b_0000_0001
@@ -154,7 +160,7 @@ where
// This would be hypothetically valid as an "in-place" transmute,
// but these are "dependently-sized" types, so copy elision it is!
unsafe {
- let mut bytes: Simd<i8, LANES> = intrinsics::simd_cast(self.to_int());
+ let mut bytes: Simd<i8, N> = intrinsics::simd_cast(self.to_int());
bytes &= Simd::splat(1i8);
mem::transmute_copy(&bytes)
}
@@ -164,10 +170,10 @@ where
/// represents `true`.
///
/// # Safety
- /// All lanes must be either 0 or -1.
+ /// All elements must be either 0 or -1.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
// Safety: the caller must confirm this invariant
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
}
@@ -176,11 +182,11 @@ where
/// represents `true`.
///
/// # Panics
- /// Panics if any lane is not 0 or -1.
+ /// Panics if any element is not 0 or -1.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
#[track_caller]
- pub fn from_int(value: Simd<T, LANES>) -> Self {
+ pub fn from_int(value: Simd<T, N>) -> Self {
assert!(T::valid(value), "all values must be either 0 or -1",);
// Safety: the validity has been checked
unsafe { Self::from_int_unchecked(value) }
@@ -190,121 +196,244 @@ where
/// represents `true`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
- pub fn to_int(self) -> Simd<T, LANES> {
+ pub fn to_int(self) -> Simd<T, N> {
self.0.to_int()
}
- /// Converts the mask to a mask of any other lane size.
+ /// Converts the mask to a mask of any other element size.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> {
+ pub fn cast<U: MaskElement>(self) -> Mask<U, N> {
Mask(self.0.convert())
}
- /// Tests the value of the specified lane.
+ /// Tests the value of the specified element.
///
/// # Safety
- /// `lane` must be less than `LANES`.
+ /// `index` must be less than `self.len()`.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
- pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+ pub unsafe fn test_unchecked(&self, index: usize) -> bool {
// Safety: the caller must confirm this invariant
- unsafe { self.0.test_unchecked(lane) }
+ unsafe { self.0.test_unchecked(index) }
}
- /// Tests the value of the specified lane.
+ /// Tests the value of the specified element.
///
/// # Panics
- /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ /// Panics if `index` is greater than or equal to the number of elements in the vector.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
#[track_caller]
- pub fn test(&self, lane: usize) -> bool {
- assert!(lane < LANES, "lane index out of range");
- // Safety: the lane index has been checked
- unsafe { self.test_unchecked(lane) }
+ pub fn test(&self, index: usize) -> bool {
+ assert!(index < N, "element index out of range");
+ // Safety: the element index has been checked
+ unsafe { self.test_unchecked(index) }
}
- /// Sets the value of the specified lane.
+ /// Sets the value of the specified element.
///
/// # Safety
- /// `lane` must be less than `LANES`.
+ /// `index` must be less than `self.len()`.
#[inline]
- pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+ pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
// Safety: the caller must confirm this invariant
unsafe {
- self.0.set_unchecked(lane, value);
+ self.0.set_unchecked(index, value);
}
}
- /// Sets the value of the specified lane.
+ /// Sets the value of the specified element.
///
/// # Panics
- /// Panics if `lane` is greater than or equal to the number of lanes in the vector.
+ /// Panics if `index` is greater than or equal to the number of elements in the vector.
#[inline]
#[track_caller]
- pub fn set(&mut self, lane: usize, value: bool) {
- assert!(lane < LANES, "lane index out of range");
- // Safety: the lane index has been checked
+ pub fn set(&mut self, index: usize, value: bool) {
+ assert!(index < N, "element index out of range");
+ // Safety: the element index has been checked
unsafe {
- self.set_unchecked(lane, value);
+ self.set_unchecked(index, value);
}
}
- /// Returns true if any lane is set, or false otherwise.
+ /// Returns true if any element is set, or false otherwise.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn any(self) -> bool {
self.0.any()
}
- /// Returns true if all lanes are set, or false otherwise.
+ /// Returns true if all elements are set, or false otherwise.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn all(self) -> bool {
self.0.all()
}
+
+ /// Create a bitmask from a mask.
+ ///
+ /// Each bit is set if the corresponding element in the mask is `true`.
+ /// If the mask contains more than 64 elements, the bitmask is truncated to the first 64.
+ #[inline]
+ #[must_use = "method returns a new integer and does not mutate the original value"]
+ pub fn to_bitmask(self) -> u64 {
+ self.0.to_bitmask_integer()
+ }
+
+ /// Create a mask from a bitmask.
+ ///
+ /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
+ /// If the mask contains more than 64 elements, the remainder are set to `false`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn from_bitmask(bitmask: u64) -> Self {
+ Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+ }
+
+ /// Create a bitmask vector from a mask.
+ ///
+ /// Each bit is set if the corresponding element in the mask is `true`.
+ /// The remaining bits are unset.
+ ///
+ /// The bits are packed into the first N bits of the vector:
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::mask32x8;
+ /// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]);
+ /// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new integer and does not mutate the original value"]
+ pub fn to_bitmask_vector(self) -> Simd<u8, N> {
+ self.0.to_bitmask_vector()
+ }
+
+ /// Create a mask from a bitmask vector.
+ ///
+ /// For each bit, if it is set, the corresponding element in the mask is set to `true`.
+ ///
+ /// The bits are packed into the first N bits of the vector:
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::{mask32x8, u8x8};
+ /// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]);
+ /// assert_eq!(
+ /// mask32x8::from_bitmask_vector(bitmask),
+ /// mask32x8::from_array([true, false, true, false, false, false, true, false]),
+ /// );
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original value"]
+ pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
+ Self(mask_impl::Mask::from_bitmask_vector(bitmask))
+ }
+
+ /// Find the index of the first set element.
+ ///
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::mask32x8;
+ /// assert_eq!(mask32x8::splat(false).first_set(), None);
+ /// assert_eq!(mask32x8::splat(true).first_set(), Some(0));
+ ///
+ /// let mask = mask32x8::from_array([false, true, false, false, true, false, false, true]);
+ /// assert_eq!(mask.first_set(), Some(1));
+ /// ```
+ #[inline]
+ #[must_use = "method returns the index and does not mutate the original value"]
+ pub fn first_set(self) -> Option<usize> {
+ // If bitmasks are efficient, using them is better
+ if cfg!(target_feature = "sse") && N <= 64 {
+ let tz = self.to_bitmask().trailing_zeros();
+ return if tz == 64 { None } else { Some(tz as usize) };
+ }
+
+ // To find the first set index:
+ // * create a vector 0..N
+ // * replace unset mask elements in that vector with -1
+ // * perform _unsigned_ reduce-min
+ // * check if the result is -1 or an index
+
+ let index = Simd::from_array(
+ const {
+ let mut index = [0; N];
+ let mut i = 0;
+ while i < N {
+ index[i] = i;
+ i += 1;
+ }
+ index
+ },
+ );
+
+ // Safety: the input and output are integer vectors
+ let index: Simd<T, N> = unsafe { intrinsics::simd_cast(index) };
+
+ let masked_index = self.select(index, Self::splat(true).to_int());
+
+ // Safety: the input and output are integer vectors
+ let masked_index: Simd<T::Unsigned, N> = unsafe { intrinsics::simd_cast(masked_index) };
+
+ // Safety: the input is an integer vector
+ let min_index: T::Unsigned = unsafe { intrinsics::simd_reduce_min(masked_index) };
+
+ // Safety: the return value is the unsigned version of T
+ let min_index: T = unsafe { core::mem::transmute_copy(&min_index) };
+
+ if min_index.eq(T::TRUE) {
+ None
+ } else {
+ Some(min_index.as_usize())
+ }
+ }
}
// vector/array conversion
-impl<T, const LANES: usize> From<[bool; LANES]> for Mask<T, LANES>
+impl<T, const N: usize> From<[bool; N]> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
- fn from(array: [bool; LANES]) -> Self {
+ fn from(array: [bool; N]) -> Self {
Self::from_array(array)
}
}
-impl<T, const LANES: usize> From<Mask<T, LANES>> for [bool; LANES]
+impl<T, const N: usize> From<Mask<T, N>> for [bool; N]
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
- fn from(vector: Mask<T, LANES>) -> Self {
+ fn from(vector: Mask<T, N>) -> Self {
vector.to_array()
}
}
-impl<T, const LANES: usize> Default for Mask<T, LANES>
+impl<T, const N: usize> Default for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
- #[must_use = "method returns a defaulted mask with all lanes set to false (0)"]
+ #[must_use = "method returns a defaulted mask with all elements set to false (0)"]
fn default() -> Self {
Self::splat(false)
}
}
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement + PartialEq,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
@@ -313,10 +442,10 @@ where
}
}
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement + PartialOrd,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new Ordering and does not mutate the original value"]
@@ -325,23 +454,23 @@ where
}
}
-impl<T, const LANES: usize> fmt::Debug for Mask<T, LANES>
+impl<T, const N: usize> fmt::Debug for Mask<T, N>
where
T: MaskElement + fmt::Debug,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_list()
- .entries((0..LANES).map(|lane| self.test(lane)))
+ .entries((0..N).map(|i| self.test(i)))
.finish()
}
}
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -351,10 +480,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitAnd<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -364,23 +493,23 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitAnd<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitAnd<Mask<T, N>> for bool
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Mask<T, LANES>;
+ type Output = Mask<T, N>;
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- fn bitand(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ fn bitand(self, rhs: Mask<T, N>) -> Mask<T, N> {
Mask::splat(self) & rhs
}
}
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -390,10 +519,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOr<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -403,23 +532,23 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOr<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitOr<Mask<T, N>> for bool
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Mask<T, LANES>;
+ type Output = Mask<T, N>;
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- fn bitor(self, rhs: Mask<T, LANES>) -> Mask<T, LANES> {
+ fn bitor(self, rhs: Mask<T, N>) -> Mask<T, N> {
Mask::splat(self) | rhs
}
}
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -429,10 +558,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXor<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -442,25 +571,25 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXor<Mask<T, LANES>> for bool
+impl<T, const N: usize> core::ops::BitXor<Mask<T, N>> for bool
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Mask<T, LANES>;
+ type Output = Mask<T, N>;
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- fn bitxor(self, rhs: Mask<T, LANES>) -> Self::Output {
+ fn bitxor(self, rhs: Mask<T, N>) -> Self::Output {
Mask::splat(self) ^ rhs
}
}
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Mask<T, LANES>;
+ type Output = Mask<T, N>;
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
fn not(self) -> Self::Output {
@@ -468,10 +597,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitAndAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAndAssign for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitand_assign(&mut self, rhs: Self) {
@@ -479,10 +608,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitAndAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAndAssign<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitand_assign(&mut self, rhs: bool) {
@@ -490,10 +619,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOrAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOrAssign for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitor_assign(&mut self, rhs: Self) {
@@ -501,10 +630,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOrAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOrAssign<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitor_assign(&mut self, rhs: bool) {
@@ -512,10 +641,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXorAssign for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXorAssign for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitxor_assign(&mut self, rhs: Self) {
@@ -523,10 +652,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXorAssign<bool> for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXorAssign<bool> for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn bitxor_assign(&mut self, rhs: bool) {
@@ -537,12 +666,12 @@ where
macro_rules! impl_from {
{ $from:ty => $($to:ty),* } => {
$(
- impl<const LANES: usize> From<Mask<$from, LANES>> for Mask<$to, LANES>
+ impl<const N: usize> From<Mask<$from, N>> for Mask<$to, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
- fn from(value: Mask<$from, LANES>) -> Self {
+ fn from(value: Mask<$from, N>) -> Self {
value.cast()
}
}
diff --git a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
index 20465ba9b..6ddff07fe 100644
--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@@ -1,30 +1,30 @@
#![allow(unused_imports)]
use super::MaskElement;
use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use core::marker::PhantomData;
/// A mask where each lane is represented by a single bit.
#[repr(transparent)]
-pub struct Mask<T, const LANES: usize>(
- <LaneCount<LANES> as SupportedLaneCount>::BitMask,
+pub struct Mask<T, const N: usize>(
+ <LaneCount<N> as SupportedLaneCount>::BitMask,
PhantomData<T>,
)
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount;
+ LaneCount<N>: SupportedLaneCount;
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn clone(&self) -> Self {
@@ -32,10 +32,10 @@ where
}
}
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
@@ -43,10 +43,10 @@ where
}
}
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -54,17 +54,17 @@ where
}
}
-impl<T, const LANES: usize> Eq for Mask<T, LANES>
+impl<T, const N: usize> Eq for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
-impl<T, const LANES: usize> Ord for Mask<T, LANES>
+impl<T, const N: usize> Ord for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
@@ -72,22 +72,22 @@ where
}
}
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn splat(value: bool) -> Self {
- let mut mask = <LaneCount<LANES> as SupportedLaneCount>::BitMask::default();
+ let mut mask = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
if value {
mask.as_mut().fill(u8::MAX)
} else {
mask.as_mut().fill(u8::MIN)
}
- if LANES % 8 > 0 {
- *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ if N % 8 > 0 {
+ *mask.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
}
Self(mask, PhantomData)
}
@@ -107,7 +107,7 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
- pub fn to_int(self) -> Simd<T, LANES> {
+ pub fn to_int(self) -> Simd<T, N> {
unsafe {
intrinsics::simd_select_bitmask(self.0, Simd::splat(T::TRUE), Simd::splat(T::FALSE))
}
@@ -115,51 +115,47 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
}
- #[cfg(feature = "generic_const_exprs")]
#[inline]
- #[must_use = "method returns a new array and does not mutate the original value"]
- pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
- assert!(core::mem::size_of::<Self>() == N);
-
- // Safety: converting an integer to an array of bytes of the same size is safe
- unsafe { core::mem::transmute_copy(&self.0) }
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn to_bitmask_vector(self) -> Simd<u8, N> {
+ let mut bitmask = Simd::splat(0);
+ bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
+ bitmask
}
- #[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
- assert!(core::mem::size_of::<Self>() == N);
-
- // Safety: converting an array of bytes to an integer of the same size is safe
- Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+ pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
+ let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
+ let len = bytes.as_ref().len();
+ bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
+ Self(bytes, PhantomData)
}
#[inline]
- pub fn to_bitmask_integer<U>(self) -> U
- where
- super::Mask<T, LANES>: ToBitMask<BitMask = U>,
- {
- // Safety: these are the same types
- unsafe { core::mem::transmute_copy(&self.0) }
+ pub fn to_bitmask_integer(self) -> u64 {
+ let mut bitmask = [0u8; 8];
+ bitmask[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
+ u64::from_ne_bytes(bitmask)
}
#[inline]
- pub fn from_bitmask_integer<U>(bitmask: U) -> Self
- where
- super::Mask<T, LANES>: ToBitMask<BitMask = U>,
- {
- // Safety: these are the same types
- unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
+ pub fn from_bitmask_integer(bitmask: u64) -> Self {
+ let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
+ let len = bytes.as_mut().len();
+ bytes
+ .as_mut()
+ .copy_from_slice(&bitmask.to_ne_bytes()[..len]);
+ Self(bytes, PhantomData)
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn convert<U>(self) -> Mask<U, LANES>
+ pub fn convert<U>(self) -> Mask<U, N>
where
U: MaskElement,
{
@@ -180,11 +176,11 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
- <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+ LaneCount<N>: SupportedLaneCount,
+ <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
{
type Output = Self;
#[inline]
@@ -197,11 +193,11 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
- <LaneCount<LANES> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
+ LaneCount<N>: SupportedLaneCount,
+ <LaneCount<N> as SupportedLaneCount>::BitMask: AsRef<[u8]> + AsMut<[u8]>,
{
type Output = Self;
#[inline]
@@ -214,10 +210,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -230,10 +226,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -242,8 +238,8 @@ where
for x in self.0.as_mut() {
*x = !*x;
}
- if LANES % 8 > 0 {
- *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - LANES % 8);
+ if N % 8 > 0 {
+ *self.0.as_mut().last_mut().unwrap() &= u8::MAX >> (8 - N % 8);
}
self
}
diff --git a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
index 1d13c45b8..63964f455 100644
--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@@ -1,29 +1,25 @@
//! Masks that take up full SIMD vector registers.
-use super::MaskElement;
use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
-
-#[cfg(feature = "generic_const_exprs")]
-use crate::simd::ToBitMaskArray;
+use crate::simd::{LaneCount, MaskElement, Simd, SupportedLaneCount};
#[repr(transparent)]
-pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
+pub struct Mask<T, const N: usize>(Simd<T, N>)
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount;
+ LaneCount<N>: SupportedLaneCount;
-impl<T, const LANES: usize> Copy for Mask<T, LANES>
+impl<T, const N: usize> Copy for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
-impl<T, const LANES: usize> Clone for Mask<T, LANES>
+impl<T, const N: usize> Clone for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
@@ -32,10 +28,10 @@ where
}
}
-impl<T, const LANES: usize> PartialEq for Mask<T, LANES>
+impl<T, const N: usize> PartialEq for Mask<T, N>
where
T: MaskElement + PartialEq,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
@@ -43,10 +39,10 @@ where
}
}
-impl<T, const LANES: usize> PartialOrd for Mask<T, LANES>
+impl<T, const N: usize> PartialOrd for Mask<T, N>
where
T: MaskElement + PartialOrd,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
@@ -54,17 +50,17 @@ where
}
}
-impl<T, const LANES: usize> Eq for Mask<T, LANES>
+impl<T, const N: usize> Eq for Mask<T, N>
where
T: MaskElement + Eq,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
-impl<T, const LANES: usize> Ord for Mask<T, LANES>
+impl<T, const N: usize> Ord for Mask<T, N>
where
T: MaskElement + Ord,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
@@ -101,10 +97,10 @@ macro_rules! impl_reverse_bits {
impl_reverse_bits! { u8, u16, u32, u64 }
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
@@ -125,19 +121,19 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
- pub fn to_int(self) -> Simd<T, LANES> {
+ pub fn to_int(self) -> Simd<T, N> {
self.0
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+ pub unsafe fn from_int_unchecked(value: Simd<T, N>) -> Self {
Self(value)
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn convert<U>(self) -> Mask<U, LANES>
+ pub fn convert<U>(self) -> Mask<U, N>
where
U: MaskElement,
{
@@ -145,62 +141,50 @@ where
unsafe { Mask(intrinsics::simd_cast(self.0)) }
}
- #[cfg(feature = "generic_const_exprs")]
#[inline]
- #[must_use = "method returns a new array and does not mutate the original value"]
- pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
- where
- super::Mask<T, LANES>: ToBitMaskArray,
- [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
- {
- assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+ #[must_use = "method returns a new vector and does not mutate the original value"]
+ pub fn to_bitmask_vector(self) -> Simd<u8, N> {
+ let mut bitmask = Simd::splat(0);
- // Safety: N is the correct bitmask size
+ // Safety: Bytes is the right size array
unsafe {
// Compute the bitmask
- let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+ let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
intrinsics::simd_bitmask(self.0);
- // Transmute to the return type, previously asserted to be the same size
- let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask);
-
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
- for x in bitmask.as_mut() {
- *x = x.reverse_bits();
+ for x in bytes.as_mut() {
+ *x = x.reverse_bits()
}
- };
+ }
- bitmask
+ bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
}
+
+ bitmask
}
- #[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
- pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
- where
- super::Mask<T, LANES>: ToBitMaskArray,
- [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
- {
- assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+ pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
+ let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
- // Safety: N is the correct bitmask size
+ // Safety: Bytes is the right size array
unsafe {
+ let len = bytes.as_ref().len();
+ bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
+
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
- for x in bitmask.as_mut() {
+ for x in bytes.as_mut() {
*x = x.reverse_bits();
}
}
- // Transmute to the bitmask type, previously asserted to be the same size
- let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
- core::mem::transmute_copy(&bitmask);
-
// Compute the regular mask
Self::from_int_unchecked(intrinsics::simd_select_bitmask(
- bitmask,
+ bytes,
Self::splat(true).to_int(),
Self::splat(false).to_int(),
))
@@ -208,40 +192,81 @@ where
}
#[inline]
- pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
+ unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
where
- super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+ LaneCount<M>: SupportedLaneCount,
{
- // Safety: U is required to be the appropriate bitmask type
- let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
+ let resized = self.to_int().resize::<M>(T::FALSE);
+
+ // Safety: `resized` is an integer vector with length M, which must match T
+ let bitmask: U = unsafe { intrinsics::simd_bitmask(resized) };
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
- bitmask.reverse_bits(LANES)
+ bitmask.reverse_bits(M)
} else {
bitmask
}
}
#[inline]
- pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
+ unsafe fn from_bitmask_impl<U: ReverseBits, const M: usize>(bitmask: U) -> Self
where
- super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+ LaneCount<M>: SupportedLaneCount,
{
// LLVM assumes bit order should match endianness
let bitmask = if cfg!(target_endian = "big") {
- bitmask.reverse_bits(LANES)
+ bitmask.reverse_bits(M)
} else {
bitmask
};
- // Safety: U is required to be the appropriate bitmask type
- unsafe {
- Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+ // SAFETY: `mask` is the correct bitmask type for a u64 bitmask
+ let mask: Simd<T, M> = unsafe {
+ intrinsics::simd_select_bitmask(
bitmask,
- Self::splat(true).to_int(),
- Self::splat(false).to_int(),
- ))
+ Simd::<T, M>::splat(T::TRUE),
+ Simd::<T, M>::splat(T::FALSE),
+ )
+ };
+
+ // SAFETY: `mask` only contains `T::TRUE` or `T::FALSE`
+ unsafe { Self::from_int_unchecked(mask.resize::<N>(T::FALSE)) }
+ }
+
+ #[inline]
+ pub(crate) fn to_bitmask_integer(self) -> u64 {
+ // TODO modify simd_bitmask to zero-extend output, making this unnecessary
+ if N <= 8 {
+ // Safety: bitmask matches length
+ unsafe { self.to_bitmask_impl::<u8, 8>() as u64 }
+ } else if N <= 16 {
+ // Safety: bitmask matches length
+ unsafe { self.to_bitmask_impl::<u16, 16>() as u64 }
+ } else if N <= 32 {
+ // Safety: bitmask matches length
+ unsafe { self.to_bitmask_impl::<u32, 32>() as u64 }
+ } else {
+ // Safety: bitmask matches length
+ unsafe { self.to_bitmask_impl::<u64, 64>() }
+ }
+ }
+
+ #[inline]
+ pub(crate) fn from_bitmask_integer(bitmask: u64) -> Self {
+ // TODO modify simd_bitmask_select to truncate input, making this unnecessary
+ if N <= 8 {
+ // Safety: bitmask matches length
+ unsafe { Self::from_bitmask_impl::<u8, 8>(bitmask as u8) }
+ } else if N <= 16 {
+ // Safety: bitmask matches length
+ unsafe { Self::from_bitmask_impl::<u16, 16>(bitmask as u16) }
+ } else if N <= 32 {
+ // Safety: bitmask matches length
+ unsafe { Self::from_bitmask_impl::<u32, 32>(bitmask as u32) }
+ } else {
+ // Safety: bitmask matches length
+ unsafe { Self::from_bitmask_impl::<u64, 64>(bitmask) }
}
}
@@ -260,21 +285,21 @@ where
}
}
-impl<T, const LANES: usize> From<Mask<T, LANES>> for Simd<T, LANES>
+impl<T, const N: usize> From<Mask<T, N>> for Simd<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
- fn from(value: Mask<T, LANES>) -> Self {
+ fn from(value: Mask<T, N>) -> Self {
value.0
}
}
-impl<T, const LANES: usize> core::ops::BitAnd for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitAnd for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -285,10 +310,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitOr for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitOr for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -299,10 +324,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::BitXor for Mask<T, LANES>
+impl<T, const N: usize> core::ops::BitXor for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
@@ -313,10 +338,10 @@ where
}
}
-impl<T, const LANES: usize> core::ops::Not for Mask<T, LANES>
+impl<T, const N: usize> core::ops::Not for Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
#[inline]
diff --git a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
deleted file mode 100644
index fc7d6b781..000000000
--- a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
+++ /dev/null
@@ -1,97 +0,0 @@
-use super::{mask_impl, Mask, MaskElement};
-use crate::simd::{LaneCount, SupportedLaneCount};
-
-mod sealed {
- pub trait Sealed {}
-}
-pub use sealed::Sealed;
-
-impl<T, const LANES: usize> Sealed for Mask<T, LANES>
-where
- T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
-{
-}
-
-/// Converts masks to and from integer bitmasks.
-///
-/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
-pub trait ToBitMask: Sealed {
- /// The integer bitmask type.
- type BitMask;
-
- /// Converts a mask to a bitmask.
- fn to_bitmask(self) -> Self::BitMask;
-
- /// Converts a bitmask to a mask.
- fn from_bitmask(bitmask: Self::BitMask) -> Self;
-}
-
-/// Converts masks to and from byte array bitmasks.
-///
-/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
-#[cfg(feature = "generic_const_exprs")]
-pub trait ToBitMaskArray: Sealed {
- /// The length of the bitmask array.
- const BYTES: usize;
-
- /// Converts a mask to a bitmask.
- fn to_bitmask_array(self) -> [u8; Self::BYTES];
-
- /// Converts a bitmask to a mask.
- fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self;
-}
-
-macro_rules! impl_integer_intrinsic {
- { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
- $(
- impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
- type BitMask = $int;
-
- #[inline]
- fn to_bitmask(self) -> $int {
- self.0.to_bitmask_integer()
- }
-
- #[inline]
- fn from_bitmask(bitmask: $int) -> Self {
- Self(mask_impl::Mask::from_bitmask_integer(bitmask))
- }
- }
- )*
- }
-}
-
-impl_integer_intrinsic! {
- impl ToBitMask<BitMask=u8> for Mask<_, 1>
- impl ToBitMask<BitMask=u8> for Mask<_, 2>
- impl ToBitMask<BitMask=u8> for Mask<_, 4>
- impl ToBitMask<BitMask=u8> for Mask<_, 8>
- impl ToBitMask<BitMask=u16> for Mask<_, 16>
- impl ToBitMask<BitMask=u32> for Mask<_, 32>
- impl ToBitMask<BitMask=u64> for Mask<_, 64>
-}
-
-/// Returns the minimum number of bytes in a bitmask with `lanes` lanes.
-#[cfg(feature = "generic_const_exprs")]
-pub const fn bitmask_len(lanes: usize) -> usize {
- (lanes + 7) / 8
-}
-
-#[cfg(feature = "generic_const_exprs")]
-impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
-where
- LaneCount<LANES>: SupportedLaneCount,
-{
- const BYTES: usize = bitmask_len(LANES);
-
- #[inline]
- fn to_bitmask_array(self) -> [u8; Self::BYTES] {
- self.0.to_bitmask_array()
- }
-
- #[inline]
- fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self {
- Mask(mask_impl::Mask::from_bitmask_array(bitmask))
- }
-}
diff --git a/library/portable-simd/crates/core_simd/src/mod.rs b/library/portable-simd/crates/core_simd/src/mod.rs
index 194267698..fd016f1c6 100644
--- a/library/portable-simd/crates/core_simd/src/mod.rs
+++ b/library/portable-simd/crates/core_simd/src/mod.rs
@@ -3,37 +3,37 @@ mod swizzle;
pub(crate) mod intrinsics;
-#[cfg(feature = "generic_const_exprs")]
-mod to_bytes;
-
mod alias;
mod cast;
-mod elements;
-mod eq;
mod fmt;
mod iter;
mod lane_count;
mod masks;
mod ops;
-mod ord;
mod select;
mod swizzle_dyn;
+mod to_bytes;
mod vector;
mod vendor;
-#[doc = include_str!("core_simd_docs.md")]
pub mod simd {
+ #![doc = include_str!("core_simd_docs.md")]
+
pub mod prelude;
+ pub mod num;
+
+ pub mod ptr;
+
+ pub mod cmp;
+
pub(crate) use crate::core_simd::intrinsics;
pub use crate::core_simd::alias::*;
pub use crate::core_simd::cast::*;
- pub use crate::core_simd::elements::*;
- pub use crate::core_simd::eq::*;
pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
pub use crate::core_simd::masks::*;
- pub use crate::core_simd::ord::*;
pub use crate::core_simd::swizzle::*;
+ pub use crate::core_simd::to_bytes::ToBytes;
pub use crate::core_simd::vector::*;
}
diff --git a/library/portable-simd/crates/core_simd/src/ops.rs b/library/portable-simd/crates/core_simd/src/ops.rs
index b007456cf..8a1b083f0 100644
--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
+use crate::simd::{cmp::SimdPartialEq, LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::ops::{Add, Mul};
use core::ops::{BitAnd, BitOr, BitXor};
use core::ops::{Div, Rem, Sub};
@@ -6,12 +6,13 @@ use core::ops::{Shl, Shr};
mod assign;
mod deref;
+mod shift_scalar;
mod unary;
-impl<I, T, const LANES: usize> core::ops::Index<I> for Simd<T, LANES>
+impl<I, T, const N: usize> core::ops::Index<I> for Simd<T, N>
where
T: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
type Output = I::Output;
@@ -21,10 +22,10 @@ where
}
}
-impl<I, T, const LANES: usize> core::ops::IndexMut<I> for Simd<T, LANES>
+impl<I, T, const N: usize> core::ops::IndexMut<I> for Simd<T, N>
where
T: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
I: core::slice::SliceIndex<[T]>,
{
#[inline]
diff --git a/library/portable-simd/crates/core_simd/src/ops/assign.rs b/library/portable-simd/crates/core_simd/src/ops/assign.rs
index d2b48614f..0e8778502 100644
--- a/library/portable-simd/crates/core_simd/src/ops/assign.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/assign.rs
@@ -8,7 +8,7 @@ use core::ops::{ShlAssign, ShrAssign}; // non-commutative bit binary op-assignme
// Arithmetic
macro_rules! assign_ops {
- ($(impl<T, U, const LANES: usize> $assignTrait:ident<U> for Simd<T, LANES>
+ ($(impl<T, U, const N: usize> $assignTrait:ident<U> for Simd<T, N>
where
Self: $trait:ident,
{
@@ -16,11 +16,11 @@ macro_rules! assign_ops {
$call:ident
}
})*) => {
- $(impl<T, U, const LANES: usize> $assignTrait<U> for Simd<T, LANES>
+ $(impl<T, U, const N: usize> $assignTrait<U> for Simd<T, N>
where
Self: $trait<U, Output = Self>,
T: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn $assign_call(&mut self, rhs: U) {
@@ -32,7 +32,7 @@ macro_rules! assign_ops {
assign_ops! {
// Arithmetic
- impl<T, U, const LANES: usize> AddAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> AddAssign<U> for Simd<T, N>
where
Self: Add,
{
@@ -41,7 +41,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> MulAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> MulAssign<U> for Simd<T, N>
where
Self: Mul,
{
@@ -50,7 +50,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> SubAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> SubAssign<U> for Simd<T, N>
where
Self: Sub,
{
@@ -59,7 +59,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> DivAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> DivAssign<U> for Simd<T, N>
where
Self: Div,
{
@@ -67,7 +67,7 @@ assign_ops! {
div
}
}
- impl<T, U, const LANES: usize> RemAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> RemAssign<U> for Simd<T, N>
where
Self: Rem,
{
@@ -77,7 +77,7 @@ assign_ops! {
}
// Bitops
- impl<T, U, const LANES: usize> BitAndAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> BitAndAssign<U> for Simd<T, N>
where
Self: BitAnd,
{
@@ -86,7 +86,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> BitOrAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> BitOrAssign<U> for Simd<T, N>
where
Self: BitOr,
{
@@ -95,7 +95,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> BitXorAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> BitXorAssign<U> for Simd<T, N>
where
Self: BitXor,
{
@@ -104,7 +104,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> ShlAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> ShlAssign<U> for Simd<T, N>
where
Self: Shl,
{
@@ -113,7 +113,7 @@ assign_ops! {
}
}
- impl<T, U, const LANES: usize> ShrAssign<U> for Simd<T, LANES>
+ impl<T, U, const N: usize> ShrAssign<U> for Simd<T, N>
where
Self: Shr,
{
diff --git a/library/portable-simd/crates/core_simd/src/ops/deref.rs b/library/portable-simd/crates/core_simd/src/ops/deref.rs
index 302bf148b..89a60ba11 100644
--- a/library/portable-simd/crates/core_simd/src/ops/deref.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/deref.rs
@@ -5,16 +5,16 @@
use super::*;
macro_rules! deref_lhs {
- (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+ (impl<T, const N: usize> $trait:ident for $simd:ty {
fn $call:ident
}) => {
- impl<T, const LANES: usize> $trait<$simd> for &$simd
+ impl<T, const N: usize> $trait<$simd> for &$simd
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Simd<T, LANES>;
+ type Output = Simd<T, N>;
#[inline]
#[must_use = "operator returns a new vector without mutating the inputs"]
@@ -26,16 +26,16 @@ macro_rules! deref_lhs {
}
macro_rules! deref_rhs {
- (impl<T, const LANES: usize> $trait:ident for $simd:ty {
+ (impl<T, const N: usize> $trait:ident for $simd:ty {
fn $call:ident
}) => {
- impl<T, const LANES: usize> $trait<&$simd> for $simd
+ impl<T, const N: usize> $trait<&$simd> for $simd
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Output = Simd<T, LANES>;
+ type Output = Simd<T, N>;
#[inline]
#[must_use = "operator returns a new vector without mutating the inputs"]
@@ -47,25 +47,25 @@ macro_rules! deref_rhs {
}
macro_rules! deref_ops {
- ($(impl<T, const LANES: usize> $trait:ident for $simd:ty {
+ ($(impl<T, const N: usize> $trait:ident for $simd:ty {
fn $call:ident
})*) => {
$(
deref_rhs! {
- impl<T, const LANES: usize> $trait for $simd {
+ impl<T, const N: usize> $trait for $simd {
fn $call
}
}
deref_lhs! {
- impl<T, const LANES: usize> $trait for $simd {
+ impl<T, const N: usize> $trait for $simd {
fn $call
}
}
- impl<'lhs, 'rhs, T, const LANES: usize> $trait<&'rhs $simd> for &'lhs $simd
+ impl<'lhs, 'rhs, T, const N: usize> $trait<&'rhs $simd> for &'lhs $simd
where
T: SimdElement,
$simd: $trait<$simd, Output = $simd>,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = $simd;
@@ -81,44 +81,44 @@ macro_rules! deref_ops {
deref_ops! {
// Arithmetic
- impl<T, const LANES: usize> Add for Simd<T, LANES> {
+ impl<T, const N: usize> Add for Simd<T, N> {
fn add
}
- impl<T, const LANES: usize> Mul for Simd<T, LANES> {
+ impl<T, const N: usize> Mul for Simd<T, N> {
fn mul
}
- impl<T, const LANES: usize> Sub for Simd<T, LANES> {
+ impl<T, const N: usize> Sub for Simd<T, N> {
fn sub
}
- impl<T, const LANES: usize> Div for Simd<T, LANES> {
+ impl<T, const N: usize> Div for Simd<T, N> {
fn div
}
- impl<T, const LANES: usize> Rem for Simd<T, LANES> {
+ impl<T, const N: usize> Rem for Simd<T, N> {
fn rem
}
// Bitops
- impl<T, const LANES: usize> BitAnd for Simd<T, LANES> {
+ impl<T, const N: usize> BitAnd for Simd<T, N> {
fn bitand
}
- impl<T, const LANES: usize> BitOr for Simd<T, LANES> {
+ impl<T, const N: usize> BitOr for Simd<T, N> {
fn bitor
}
- impl<T, const LANES: usize> BitXor for Simd<T, LANES> {
+ impl<T, const N: usize> BitXor for Simd<T, N> {
fn bitxor
}
- impl<T, const LANES: usize> Shl for Simd<T, LANES> {
+ impl<T, const N: usize> Shl for Simd<T, N> {
fn shl
}
- impl<T, const LANES: usize> Shr for Simd<T, LANES> {
+ impl<T, const N: usize> Shr for Simd<T, N> {
fn shr
}
}
diff --git a/library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs b/library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs
new file mode 100644
index 000000000..f5115a5a5
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/ops/shift_scalar.rs
@@ -0,0 +1,62 @@
+// Shift operations uniquely typically only have a scalar on the right-hand side.
+// Here, we implement shifts for scalar RHS arguments.
+
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+
+macro_rules! impl_splatted_shifts {
+ { impl $trait:ident :: $trait_fn:ident for $ty:ty } => {
+ impl<const N: usize> core::ops::$trait<$ty> for Simd<$ty, N>
+ where
+ LaneCount<N>: SupportedLaneCount,
+ {
+ type Output = Self;
+ #[inline]
+ fn $trait_fn(self, rhs: $ty) -> Self::Output {
+ self.$trait_fn(Simd::splat(rhs))
+ }
+ }
+
+ impl<const N: usize> core::ops::$trait<&$ty> for Simd<$ty, N>
+ where
+ LaneCount<N>: SupportedLaneCount,
+ {
+ type Output = Self;
+ #[inline]
+ fn $trait_fn(self, rhs: &$ty) -> Self::Output {
+ self.$trait_fn(Simd::splat(*rhs))
+ }
+ }
+
+ impl<'lhs, const N: usize> core::ops::$trait<$ty> for &'lhs Simd<$ty, N>
+ where
+ LaneCount<N>: SupportedLaneCount,
+ {
+ type Output = Simd<$ty, N>;
+ #[inline]
+ fn $trait_fn(self, rhs: $ty) -> Self::Output {
+ self.$trait_fn(Simd::splat(rhs))
+ }
+ }
+
+ impl<'lhs, const N: usize> core::ops::$trait<&$ty> for &'lhs Simd<$ty, N>
+ where
+ LaneCount<N>: SupportedLaneCount,
+ {
+ type Output = Simd<$ty, N>;
+ #[inline]
+ fn $trait_fn(self, rhs: &$ty) -> Self::Output {
+ self.$trait_fn(Simd::splat(*rhs))
+ }
+ }
+ };
+ { $($ty:ty),* } => {
+ $(
+ impl_splatted_shifts! { impl Shl::shl for $ty }
+ impl_splatted_shifts! { impl Shr::shr for $ty }
+ )*
+ }
+}
+
+// In the past there were inference issues when generically splatting arguments.
+// Enumerate them instead.
+impl_splatted_shifts! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
diff --git a/library/portable-simd/crates/core_simd/src/ops/unary.rs b/library/portable-simd/crates/core_simd/src/ops/unary.rs
index 4ad022150..a651aa73e 100644
--- a/library/portable-simd/crates/core_simd/src/ops/unary.rs
+++ b/library/portable-simd/crates/core_simd/src/ops/unary.rs
@@ -3,11 +3,11 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::ops::{Neg, Not}; // unary ops
macro_rules! neg {
- ($(impl<const LANES: usize> Neg for Simd<$scalar:ty, LANES>)*) => {
- $(impl<const LANES: usize> Neg for Simd<$scalar, LANES>
+ ($(impl<const N: usize> Neg for Simd<$scalar:ty, N>)*) => {
+ $(impl<const N: usize> Neg for Simd<$scalar, N>
where
$scalar: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
@@ -22,27 +22,27 @@ macro_rules! neg {
}
neg! {
- impl<const LANES: usize> Neg for Simd<f32, LANES>
+ impl<const N: usize> Neg for Simd<f32, N>
- impl<const LANES: usize> Neg for Simd<f64, LANES>
+ impl<const N: usize> Neg for Simd<f64, N>
- impl<const LANES: usize> Neg for Simd<i8, LANES>
+ impl<const N: usize> Neg for Simd<i8, N>
- impl<const LANES: usize> Neg for Simd<i16, LANES>
+ impl<const N: usize> Neg for Simd<i16, N>
- impl<const LANES: usize> Neg for Simd<i32, LANES>
+ impl<const N: usize> Neg for Simd<i32, N>
- impl<const LANES: usize> Neg for Simd<i64, LANES>
+ impl<const N: usize> Neg for Simd<i64, N>
- impl<const LANES: usize> Neg for Simd<isize, LANES>
+ impl<const N: usize> Neg for Simd<isize, N>
}
macro_rules! not {
- ($(impl<const LANES: usize> Not for Simd<$scalar:ty, LANES>)*) => {
- $(impl<const LANES: usize> Not for Simd<$scalar, LANES>
+ ($(impl<const N: usize> Not for Simd<$scalar:ty, N>)*) => {
+ $(impl<const N: usize> Not for Simd<$scalar, N>
where
$scalar: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Output = Self;
@@ -56,23 +56,23 @@ macro_rules! not {
}
not! {
- impl<const LANES: usize> Not for Simd<i8, LANES>
+ impl<const N: usize> Not for Simd<i8, N>
- impl<const LANES: usize> Not for Simd<i16, LANES>
+ impl<const N: usize> Not for Simd<i16, N>
- impl<const LANES: usize> Not for Simd<i32, LANES>
+ impl<const N: usize> Not for Simd<i32, N>
- impl<const LANES: usize> Not for Simd<i64, LANES>
+ impl<const N: usize> Not for Simd<i64, N>
- impl<const LANES: usize> Not for Simd<isize, LANES>
+ impl<const N: usize> Not for Simd<isize, N>
- impl<const LANES: usize> Not for Simd<u8, LANES>
+ impl<const N: usize> Not for Simd<u8, N>
- impl<const LANES: usize> Not for Simd<u16, LANES>
+ impl<const N: usize> Not for Simd<u16, N>
- impl<const LANES: usize> Not for Simd<u32, LANES>
+ impl<const N: usize> Not for Simd<u32, N>
- impl<const LANES: usize> Not for Simd<u64, LANES>
+ impl<const N: usize> Not for Simd<u64, N>
- impl<const LANES: usize> Not for Simd<usize, LANES>
+ impl<const N: usize> Not for Simd<usize, N>
}
diff --git a/library/portable-simd/crates/core_simd/src/select.rs b/library/portable-simd/crates/core_simd/src/select.rs
index 065c5987d..cdcf8eeec 100644
--- a/library/portable-simd/crates/core_simd/src/select.rs
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@@ -1,15 +1,15 @@
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
-impl<T, const LANES: usize> Mask<T, LANES>
+impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- /// Choose lanes from two vectors.
+ /// Choose elements from two vectors.
///
- /// For each lane in the mask, choose the corresponding lane from `true_values` if
- /// that lane mask is true, and `false_values` if that lane mask is false.
+ /// For each element in the mask, choose the corresponding element from `true_values` if
+ /// that element mask is true, and `false_values` if that element mask is false.
///
/// # Examples
/// ```
@@ -23,11 +23,7 @@ where
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
- pub fn select<U>(
- self,
- true_values: Simd<U, LANES>,
- false_values: Simd<U, LANES>,
- ) -> Simd<U, LANES>
+ pub fn select<U>(self, true_values: Simd<U, N>, false_values: Simd<U, N>) -> Simd<U, N>
where
U: SimdElement<Mask = T>,
{
@@ -36,10 +32,10 @@ where
unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
}
- /// Choose lanes from two masks.
+ /// Choose elements from two masks.
///
- /// For each lane in the mask, choose the corresponding lane from `true_values` if
- /// that lane mask is true, and `false_values` if that lane mask is false.
+ /// For each element in the mask, choose the corresponding element from `true_values` if
+ /// that element mask is true, and `false_values` if that element mask is false.
///
/// # Examples
/// ```
diff --git a/library/portable-simd/crates/core_simd/src/simd/cmp.rs b/library/portable-simd/crates/core_simd/src/simd/cmp.rs
new file mode 100644
index 000000000..a8d81dbf2
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp.rs
@@ -0,0 +1,7 @@
+//! Traits for comparing and ordering vectors.
+
+mod eq;
+mod ord;
+
+pub use eq::*;
+pub use ord::*;
diff --git a/library/portable-simd/crates/core_simd/src/eq.rs b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
index 80763c072..f132fa2cc 100644
--- a/library/portable-simd/crates/core_simd/src/eq.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp/eq.rs
@@ -1,5 +1,7 @@
use crate::simd::{
- intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdElement, SimdMutPtr, SupportedLaneCount,
+ intrinsics,
+ ptr::{SimdConstPtr, SimdMutPtr},
+ LaneCount, Mask, Simd, SimdElement, SupportedLaneCount,
};
/// Parallel `PartialEq`.
@@ -7,11 +9,11 @@ pub trait SimdPartialEq {
/// The mask type returned by each comparison.
type Mask;
- /// Test if each lane is equal to the corresponding lane in `other`.
+ /// Test if each element is equal to the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_eq(self, other: Self) -> Self::Mask;
- /// Test if each lane is equal to the corresponding lane in `other`.
+ /// Test if each element is equal to the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_ne(self, other: Self) -> Self::Mask;
}
@@ -19,11 +21,11 @@ pub trait SimdPartialEq {
macro_rules! impl_number {
{ $($number:ty),* } => {
$(
- impl<const LANES: usize> SimdPartialEq for Simd<$number, LANES>
+ impl<const N: usize> SimdPartialEq for Simd<$number, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Mask = Mask<<$number as SimdElement>::Mask, LANES>;
+ type Mask = Mask<<$number as SimdElement>::Mask, N>;
#[inline]
fn simd_eq(self, other: Self) -> Self::Mask {
@@ -48,9 +50,9 @@ impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
macro_rules! impl_mask {
{ $($integer:ty),* } => {
$(
- impl<const LANES: usize> SimdPartialEq for Mask<$integer, LANES>
+ impl<const N: usize> SimdPartialEq for Mask<$integer, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Mask = Self;
@@ -74,11 +76,11 @@ macro_rules! impl_mask {
impl_mask! { i8, i16, i32, i64, isize }
-impl<T, const LANES: usize> SimdPartialEq for Simd<*const T, LANES>
+impl<T, const N: usize> SimdPartialEq for Simd<*const T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Mask = Mask<isize, LANES>;
+ type Mask = Mask<isize, N>;
#[inline]
fn simd_eq(self, other: Self) -> Self::Mask {
@@ -91,11 +93,11 @@ where
}
}
-impl<T, const LANES: usize> SimdPartialEq for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdPartialEq for Simd<*mut T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Mask = Mask<isize, LANES>;
+ type Mask = Mask<isize, N>;
#[inline]
fn simd_eq(self, other: Self) -> Self::Mask {
diff --git a/library/portable-simd/crates/core_simd/src/ord.rs b/library/portable-simd/crates/core_simd/src/simd/cmp/ord.rs
index b2455190e..4e9d49ea2 100644
--- a/library/portable-simd/crates/core_simd/src/ord.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/cmp/ord.rs
@@ -1,44 +1,47 @@
use crate::simd::{
- intrinsics, LaneCount, Mask, Simd, SimdConstPtr, SimdMutPtr, SimdPartialEq, SupportedLaneCount,
+ cmp::SimdPartialEq,
+ intrinsics,
+ ptr::{SimdConstPtr, SimdMutPtr},
+ LaneCount, Mask, Simd, SupportedLaneCount,
};
/// Parallel `PartialOrd`.
pub trait SimdPartialOrd: SimdPartialEq {
- /// Test if each lane is less than the corresponding lane in `other`.
+ /// Test if each element is less than the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_lt(self, other: Self) -> Self::Mask;
- /// Test if each lane is less than or equal to the corresponding lane in `other`.
+ /// Test if each element is less than or equal to the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_le(self, other: Self) -> Self::Mask;
- /// Test if each lane is greater than the corresponding lane in `other`.
+ /// Test if each element is greater than the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_gt(self, other: Self) -> Self::Mask;
- /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+ /// Test if each element is greater than or equal to the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_ge(self, other: Self) -> Self::Mask;
}
/// Parallel `Ord`.
pub trait SimdOrd: SimdPartialOrd {
- /// Returns the lane-wise maximum with `other`.
+ /// Returns the element-wise maximum with `other`.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_max(self, other: Self) -> Self;
- /// Returns the lane-wise minimum with `other`.
+ /// Returns the element-wise minimum with `other`.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_min(self, other: Self) -> Self;
- /// Restrict each lane to a certain interval.
+ /// Restrict each element to a certain interval.
///
- /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
+ /// For each element, returns `max` if `self` is greater than `max`, and `min` if `self` is
/// less than `min`. Otherwise returns `self`.
///
/// # Panics
///
- /// Panics if `min > max` on any lane.
+ /// Panics if `min > max` on any element.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_clamp(self, min: Self, max: Self) -> Self;
}
@@ -46,9 +49,9 @@ pub trait SimdOrd: SimdPartialOrd {
macro_rules! impl_integer {
{ $($integer:ty),* } => {
$(
- impl<const LANES: usize> SimdPartialOrd for Simd<$integer, LANES>
+ impl<const N: usize> SimdPartialOrd for Simd<$integer, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
@@ -79,9 +82,9 @@ macro_rules! impl_integer {
}
}
- impl<const LANES: usize> SimdOrd for Simd<$integer, LANES>
+ impl<const N: usize> SimdOrd for Simd<$integer, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
@@ -98,7 +101,7 @@ macro_rules! impl_integer {
fn simd_clamp(self, min: Self, max: Self) -> Self {
assert!(
min.simd_le(max).all(),
- "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+ "each element in `min` must be less than or equal to the corresponding element in `max`",
);
self.simd_max(min).simd_min(max)
}
@@ -112,9 +115,9 @@ impl_integer! { u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
macro_rules! impl_float {
{ $($float:ty),* } => {
$(
- impl<const LANES: usize> SimdPartialOrd for Simd<$float, LANES>
+ impl<const N: usize> SimdPartialOrd for Simd<$float, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
@@ -153,9 +156,9 @@ impl_float! { f32, f64 }
macro_rules! impl_mask {
{ $($integer:ty),* } => {
$(
- impl<const LANES: usize> SimdPartialOrd for Mask<$integer, LANES>
+ impl<const N: usize> SimdPartialOrd for Mask<$integer, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
@@ -186,9 +189,9 @@ macro_rules! impl_mask {
}
}
- impl<const LANES: usize> SimdOrd for Mask<$integer, LANES>
+ impl<const N: usize> SimdOrd for Mask<$integer, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
@@ -205,7 +208,7 @@ macro_rules! impl_mask {
fn simd_clamp(self, min: Self, max: Self) -> Self {
assert!(
min.simd_le(max).all(),
- "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+ "each element in `min` must be less than or equal to the corresponding element in `max`",
);
self.simd_max(min).simd_min(max)
}
@@ -216,9 +219,9 @@ macro_rules! impl_mask {
impl_mask! { i8, i16, i32, i64, isize }
-impl<T, const LANES: usize> SimdPartialOrd for Simd<*const T, LANES>
+impl<T, const N: usize> SimdPartialOrd for Simd<*const T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
@@ -241,9 +244,9 @@ where
}
}
-impl<T, const LANES: usize> SimdOrd for Simd<*const T, LANES>
+impl<T, const N: usize> SimdOrd for Simd<*const T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
@@ -260,15 +263,15 @@ where
fn simd_clamp(self, min: Self, max: Self) -> Self {
assert!(
min.simd_le(max).all(),
- "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+ "each element in `min` must be less than or equal to the corresponding element in `max`",
);
self.simd_max(min).simd_min(max)
}
}
-impl<T, const LANES: usize> SimdPartialOrd for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdPartialOrd for Simd<*mut T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_lt(self, other: Self) -> Self::Mask {
@@ -291,9 +294,9 @@ where
}
}
-impl<T, const LANES: usize> SimdOrd for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdOrd for Simd<*mut T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
#[inline]
fn simd_max(self, other: Self) -> Self {
@@ -310,7 +313,7 @@ where
fn simd_clamp(self, min: Self, max: Self) -> Self {
assert!(
min.simd_le(max).all(),
- "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+ "each element in `min` must be less than or equal to the corresponding element in `max`",
);
self.simd_max(min).simd_min(max)
}
diff --git a/library/portable-simd/crates/core_simd/src/elements.rs b/library/portable-simd/crates/core_simd/src/simd/num.rs
index dc7f52a4d..22a4802ec 100644
--- a/library/portable-simd/crates/core_simd/src/elements.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num.rs
@@ -1,15 +1,13 @@
-mod const_ptr;
+//! Traits for vectors with numeric elements.
+
mod float;
mod int;
-mod mut_ptr;
mod uint;
mod sealed {
pub trait Sealed {}
}
-pub use const_ptr::*;
pub use float::*;
pub use int::*;
-pub use mut_ptr::*;
pub use uint::*;
diff --git a/library/portable-simd/crates/core_simd/src/elements/float.rs b/library/portable-simd/crates/core_simd/src/simd/num/float.rs
index 501c1c5dd..fc0b99e87 100644
--- a/library/portable-simd/crates/core_simd/src/elements/float.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/float.rs
@@ -1,7 +1,7 @@
use super::sealed::Sealed;
use crate::simd::{
- intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialEq, SimdPartialOrd,
- SupportedLaneCount,
+ cmp::{SimdPartialEq, SimdPartialOrd},
+ intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SupportedLaneCount,
};
/// Operations on SIMD vectors of floats.
@@ -28,7 +28,7 @@ pub trait SimdFloat: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{SimdFloat, SimdInt, Simd};
+ /// # use simd::prelude::*;
/// let floats: Simd<f32, 4> = Simd::from_array([1.9, -4.5, f32::INFINITY, f32::NAN]);
/// let ints = floats.cast::<i32>();
/// assert_eq!(ints, Simd::from_array([1, -4, i32::MAX, 0]));
@@ -63,64 +63,64 @@ pub trait SimdFloat: Copy + Sealed {
Self::Scalar: core::convert::FloatToInt<I>;
/// Raw transmutation to an unsigned integer vector type with the
- /// same size and number of lanes.
+ /// same size and number of elements.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn to_bits(self) -> Self::Bits;
/// Raw transmutation from an unsigned integer vector type with the
- /// same size and number of lanes.
+ /// same size and number of elements.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn from_bits(bits: Self::Bits) -> Self;
- /// Produces a vector where every lane has the absolute value of the
- /// equivalently-indexed lane in `self`.
+ /// Produces a vector where every element has the absolute value of the
+ /// equivalently-indexed element in `self`.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn abs(self) -> Self;
- /// Takes the reciprocal (inverse) of each lane, `1/x`.
+ /// Takes the reciprocal (inverse) of each element, `1/x`.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn recip(self) -> Self;
- /// Converts each lane from radians to degrees.
+ /// Converts each element from radians to degrees.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn to_degrees(self) -> Self;
- /// Converts each lane from degrees to radians.
+ /// Converts each element from degrees to radians.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn to_radians(self) -> Self;
- /// Returns true for each lane if it has a positive sign, including
+ /// Returns true for each element if it has a positive sign, including
/// `+0.0`, `NaN`s with positive sign bit and positive infinity.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_sign_positive(self) -> Self::Mask;
- /// Returns true for each lane if it has a negative sign, including
+ /// Returns true for each element if it has a negative sign, including
/// `-0.0`, `NaN`s with negative sign bit and negative infinity.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_sign_negative(self) -> Self::Mask;
- /// Returns true for each lane if its value is `NaN`.
+ /// Returns true for each element if its value is `NaN`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_nan(self) -> Self::Mask;
- /// Returns true for each lane if its value is positive infinity or negative infinity.
+ /// Returns true for each element if its value is positive infinity or negative infinity.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_infinite(self) -> Self::Mask;
- /// Returns true for each lane if its value is neither infinite nor `NaN`.
+ /// Returns true for each element if its value is neither infinite nor `NaN`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_finite(self) -> Self::Mask;
- /// Returns true for each lane if its value is subnormal.
+ /// Returns true for each element if its value is subnormal.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_subnormal(self) -> Self::Mask;
- /// Returns true for each lane if its value is neither zero, infinite,
+ /// Returns true for each element if its value is neither zero, infinite,
/// subnormal, nor `NaN`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn is_normal(self) -> Self::Mask;
- /// Replaces each lane with a number that represents its sign.
+ /// Replaces each element with a number that represents its sign.
///
/// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
/// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
@@ -128,33 +128,33 @@ pub trait SimdFloat: Copy + Sealed {
#[must_use = "method returns a new vector and does not mutate the original value"]
fn signum(self) -> Self;
- /// Returns each lane with the magnitude of `self` and the sign of `sign`.
+ /// Returns each element with the magnitude of `self` and the sign of `sign`.
///
- /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned.
+ /// For any element containing a `NAN`, a `NAN` with the sign of `sign` is returned.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn copysign(self, sign: Self) -> Self;
- /// Returns the minimum of each lane.
+ /// Returns the minimum of each element.
///
/// If one of the values is `NAN`, then the other value is returned.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_min(self, other: Self) -> Self;
- /// Returns the maximum of each lane.
+ /// Returns the maximum of each element.
///
/// If one of the values is `NAN`, then the other value is returned.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_max(self, other: Self) -> Self;
- /// Restrict each lane to a certain interval unless it is NaN.
+ /// Restrict each element to a certain interval unless it is NaN.
///
- /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
- /// greater than `max`, and the corresponding lane in `min` if the lane is less
- /// than `min`. Otherwise returns the lane in `self`.
+ /// For each element in `self`, returns the corresponding element in `max` if the element is
+ /// greater than `max`, and the corresponding element in `min` if the element is less
+ /// than `min`. Otherwise returns the element in `self`.
#[must_use = "method returns a new vector and does not mutate the original value"]
fn simd_clamp(self, min: Self, max: Self) -> Self;
- /// Returns the sum of the lanes of the vector.
+ /// Returns the sum of the elements of the vector.
///
/// # Examples
///
@@ -162,13 +162,13 @@ pub trait SimdFloat: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{f32x2, SimdFloat};
+ /// # use simd::prelude::*;
/// let v = f32x2::from_array([1., 2.]);
/// assert_eq!(v.reduce_sum(), 3.);
/// ```
fn reduce_sum(self) -> Self::Scalar;
- /// Reducing multiply. Returns the product of the lanes of the vector.
+ /// Reducing multiply. Returns the product of the elements of the vector.
///
/// # Examples
///
@@ -176,18 +176,18 @@ pub trait SimdFloat: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{f32x2, SimdFloat};
+ /// # use simd::prelude::*;
/// let v = f32x2::from_array([3., 4.]);
/// assert_eq!(v.reduce_product(), 12.);
/// ```
fn reduce_product(self) -> Self::Scalar;
- /// Returns the maximum lane in the vector.
+ /// Returns the maximum element in the vector.
///
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
/// return either.
///
- /// This function will not return `NaN` unless all lanes are `NaN`.
+ /// This function will not return `NaN` unless all elements are `NaN`.
///
/// # Examples
///
@@ -195,7 +195,7 @@ pub trait SimdFloat: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{f32x2, SimdFloat};
+ /// # use simd::prelude::*;
/// let v = f32x2::from_array([1., 2.]);
/// assert_eq!(v.reduce_max(), 2.);
///
@@ -209,12 +209,12 @@ pub trait SimdFloat: Copy + Sealed {
/// ```
fn reduce_max(self) -> Self::Scalar;
- /// Returns the minimum lane in the vector.
+ /// Returns the minimum element in the vector.
///
/// Returns values based on equality, so a vector containing both `0.` and `-0.` may
/// return either.
///
- /// This function will not return `NaN` unless all lanes are `NaN`.
+ /// This function will not return `NaN` unless all elements are `NaN`.
///
/// # Examples
///
@@ -222,7 +222,7 @@ pub trait SimdFloat: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{f32x2, SimdFloat};
+ /// # use simd::prelude::*;
/// let v = f32x2::from_array([3., 7.]);
/// assert_eq!(v.reduce_min(), 3.);
///
@@ -240,20 +240,20 @@ pub trait SimdFloat: Copy + Sealed {
macro_rules! impl_trait {
{ $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
$(
- impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+ impl<const N: usize> Sealed for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
- impl<const LANES: usize> SimdFloat for Simd<$ty, LANES>
+ impl<const N: usize> SimdFloat for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>;
+ type Mask = Mask<<$mask_ty as SimdElement>::Mask, N>;
type Scalar = $ty;
- type Bits = Simd<$bits_ty, LANES>;
- type Cast<T: SimdElement> = Simd<T, LANES>;
+ type Bits = Simd<$bits_ty, N>;
+ type Cast<T: SimdElement> = Simd<T, N>;
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
@@ -273,14 +273,14 @@ macro_rules! impl_trait {
}
#[inline]
- fn to_bits(self) -> Simd<$bits_ty, LANES> {
+ fn to_bits(self) -> Simd<$bits_ty, N> {
assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
// Safety: transmuting between vector types is safe
unsafe { core::mem::transmute_copy(&self) }
}
#[inline]
- fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
+ fn from_bits(bits: Simd<$bits_ty, N>) -> Self {
assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
// Safety: transmuting between vector types is safe
unsafe { core::mem::transmute_copy(&bits) }
@@ -336,7 +336,10 @@ macro_rules! impl_trait {
#[inline]
fn is_subnormal(self) -> Self::Mask {
- self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+ // On some architectures (e.g. armv7 and some ppc) subnormals are flushed to zero,
+ // so this comparison must be done with integers.
+ let not_zero = self.abs().to_bits().simd_ne(Self::splat(0.0).to_bits());
+ not_zero & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
}
#[inline]
@@ -373,7 +376,7 @@ macro_rules! impl_trait {
fn simd_clamp(self, min: Self, max: Self) -> Self {
assert!(
min.simd_le(max).all(),
- "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+ "each element in `min` must be less than or equal to the corresponding element in `max`",
);
let mut x = self;
x = x.simd_lt(min).select(min, x);
diff --git a/library/portable-simd/crates/core_simd/src/elements/int.rs b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
index 6db89ff9a..1f1aa2727 100644
--- a/library/portable-simd/crates/core_simd/src/elements/int.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/int.rs
@@ -1,6 +1,7 @@
use super::sealed::Sealed;
use crate::simd::{
- intrinsics, LaneCount, Mask, Simd, SimdCast, SimdElement, SimdPartialOrd, SupportedLaneCount,
+ cmp::SimdPartialOrd, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
+ SupportedLaneCount,
};
/// Operations on SIMD vectors of signed integers.
@@ -11,6 +12,9 @@ pub trait SimdInt: Copy + Sealed {
/// Scalar type contained by this SIMD vector type.
type Scalar;
+ /// A SIMD vector of unsigned integers with the same element size.
+ type Unsigned;
+
/// A SIMD vector with a different element type.
type Cast<T: SimdElement>;
@@ -28,7 +32,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdInt};
+ /// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let x = Simd::from_array([MIN, 0, 1, MAX]);
/// let max = Simd::splat(MAX);
@@ -46,7 +50,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdInt};
+ /// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let x = Simd::from_array([MIN, -2, -1, MAX]);
/// let max = Simd::splat(MAX);
@@ -57,14 +61,14 @@ pub trait SimdInt: Copy + Sealed {
fn saturating_sub(self, second: Self) -> Self;
/// Lanewise absolute value, implemented in Rust.
- /// Every lane becomes its absolute value.
+ /// Every element becomes its absolute value.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdInt};
+ /// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
/// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
@@ -79,7 +83,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdInt};
+ /// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let xs = Simd::from_array([MIN, -2, 0, 3]);
/// let unsat = xs.abs();
@@ -97,7 +101,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdInt};
+ /// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let x = Simd::from_array([MIN, -2, 3, MAX]);
/// let unsat = -x;
@@ -107,19 +111,19 @@ pub trait SimdInt: Copy + Sealed {
/// ```
fn saturating_neg(self) -> Self;
- /// Returns true for each positive lane and false if it is zero or negative.
+ /// Returns true for each positive element and false if it is zero or negative.
fn is_positive(self) -> Self::Mask;
- /// Returns true for each negative lane and false if it is zero or positive.
+ /// Returns true for each negative element and false if it is zero or positive.
fn is_negative(self) -> Self::Mask;
- /// Returns numbers representing the sign of each lane.
+ /// Returns numbers representing the sign of each element.
/// * `0` if the number is zero
/// * `1` if the number is positive
/// * `-1` if the number is negative
fn signum(self) -> Self;
- /// Returns the sum of the lanes of the vector, with wrapping addition.
+ /// Returns the sum of the elements of the vector, with wrapping addition.
///
/// # Examples
///
@@ -127,7 +131,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{i32x4, SimdInt};
+ /// # use simd::prelude::*;
/// let v = i32x4::from_array([1, 2, 3, 4]);
/// assert_eq!(v.reduce_sum(), 10);
///
@@ -137,7 +141,7 @@ pub trait SimdInt: Copy + Sealed {
/// ```
fn reduce_sum(self) -> Self::Scalar;
- /// Returns the product of the lanes of the vector, with wrapping multiplication.
+ /// Returns the product of the elements of the vector, with wrapping multiplication.
///
/// # Examples
///
@@ -145,7 +149,7 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{i32x4, SimdInt};
+ /// # use simd::prelude::*;
/// let v = i32x4::from_array([1, 2, 3, 4]);
/// assert_eq!(v.reduce_product(), 24);
///
@@ -155,7 +159,7 @@ pub trait SimdInt: Copy + Sealed {
/// ```
fn reduce_product(self) -> Self::Scalar;
- /// Returns the maximum lane in the vector.
+ /// Returns the maximum element in the vector.
///
/// # Examples
///
@@ -163,13 +167,13 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{i32x4, SimdInt};
+ /// # use simd::prelude::*;
/// let v = i32x4::from_array([1, 2, 3, 4]);
/// assert_eq!(v.reduce_max(), 4);
/// ```
fn reduce_max(self) -> Self::Scalar;
- /// Returns the minimum lane in the vector.
+ /// Returns the minimum element in the vector.
///
/// # Examples
///
@@ -177,38 +181,58 @@ pub trait SimdInt: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{i32x4, SimdInt};
+ /// # use simd::prelude::*;
/// let v = i32x4::from_array([1, 2, 3, 4]);
/// assert_eq!(v.reduce_min(), 1);
/// ```
fn reduce_min(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "and" across the lanes of the vector.
+ /// Returns the cumulative bitwise "and" across the elements of the vector.
fn reduce_and(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "or" across the lanes of the vector.
+ /// Returns the cumulative bitwise "or" across the elements of the vector.
fn reduce_or(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+ /// Returns the cumulative bitwise "xor" across the elements of the vector.
fn reduce_xor(self) -> Self::Scalar;
+
+ /// Reverses the byte order of each element.
+ fn swap_bytes(self) -> Self;
+
+ /// Reverses the order of bits in each elemnent.
+ /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
+ fn reverse_bits(self) -> Self;
+
+ /// Returns the number of leading zeros in the binary representation of each element.
+ fn leading_zeros(self) -> Self::Unsigned;
+
+ /// Returns the number of trailing zeros in the binary representation of each element.
+ fn trailing_zeros(self) -> Self::Unsigned;
+
+ /// Returns the number of leading ones in the binary representation of each element.
+ fn leading_ones(self) -> Self::Unsigned;
+
+ /// Returns the number of trailing ones in the binary representation of each element.
+ fn trailing_ones(self) -> Self::Unsigned;
}
macro_rules! impl_trait {
- { $($ty:ty),* } => {
+ { $($ty:ident ($unsigned:ident)),* } => {
$(
- impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+ impl<const N: usize> Sealed for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
- impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
+ impl<const N: usize> SimdInt for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
+ type Mask = Mask<<$ty as SimdElement>::Mask, N>;
type Scalar = $ty;
- type Cast<T: SimdElement> = Simd<T, LANES>;
+ type Unsigned = Simd<$unsigned, N>;
+ type Cast<T: SimdElement> = Simd<T, N>;
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T> {
@@ -307,9 +331,41 @@ macro_rules! impl_trait {
// Safety: `self` is an integer vector
unsafe { intrinsics::simd_reduce_xor(self) }
}
+
+ #[inline]
+ fn swap_bytes(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_bswap(self) }
+ }
+
+ #[inline]
+ fn reverse_bits(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_bitreverse(self) }
+ }
+
+ #[inline]
+ fn leading_zeros(self) -> Self::Unsigned {
+ self.cast::<$unsigned>().leading_zeros()
+ }
+
+ #[inline]
+ fn trailing_zeros(self) -> Self::Unsigned {
+ self.cast::<$unsigned>().trailing_zeros()
+ }
+
+ #[inline]
+ fn leading_ones(self) -> Self::Unsigned {
+ self.cast::<$unsigned>().leading_ones()
+ }
+
+ #[inline]
+ fn trailing_ones(self) -> Self::Unsigned {
+ self.cast::<$unsigned>().trailing_ones()
+ }
}
)*
}
}
-impl_trait! { i8, i16, i32, i64, isize }
+impl_trait! { i8 (u8), i16 (u16), i32 (u32), i64 (u64), isize (usize) }
diff --git a/library/portable-simd/crates/core_simd/src/elements/uint.rs b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
index 3926c395e..c955ee8fe 100644
--- a/library/portable-simd/crates/core_simd/src/elements/uint.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/num/uint.rs
@@ -16,6 +16,12 @@ pub trait SimdUint: Copy + Sealed {
#[must_use]
fn cast<T: SimdCast>(self) -> Self::Cast<T>;
+ /// Wrapping negation.
+ ///
+ /// Like [`u32::wrapping_neg`], all applications of this function will wrap, with the exception
+ /// of `-0`.
+ fn wrapping_neg(self) -> Self;
+
/// Lanewise saturating add.
///
/// # Examples
@@ -23,7 +29,7 @@ pub trait SimdUint: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdUint};
+ /// # use simd::prelude::*;
/// use core::u32::MAX;
/// let x = Simd::from_array([2, 1, 0, MAX]);
/// let max = Simd::splat(MAX);
@@ -41,7 +47,7 @@ pub trait SimdUint: Copy + Sealed {
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdUint};
+ /// # use simd::prelude::*;
/// use core::u32::MAX;
/// let x = Simd::from_array([2, 1, 0, MAX]);
/// let max = Simd::splat(MAX);
@@ -51,43 +57,62 @@ pub trait SimdUint: Copy + Sealed {
/// assert_eq!(sat, Simd::splat(0));
fn saturating_sub(self, second: Self) -> Self;
- /// Returns the sum of the lanes of the vector, with wrapping addition.
+ /// Returns the sum of the elements of the vector, with wrapping addition.
fn reduce_sum(self) -> Self::Scalar;
- /// Returns the product of the lanes of the vector, with wrapping multiplication.
+ /// Returns the product of the elements of the vector, with wrapping multiplication.
fn reduce_product(self) -> Self::Scalar;
- /// Returns the maximum lane in the vector.
+ /// Returns the maximum element in the vector.
fn reduce_max(self) -> Self::Scalar;
- /// Returns the minimum lane in the vector.
+ /// Returns the minimum element in the vector.
fn reduce_min(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "and" across the lanes of the vector.
+ /// Returns the cumulative bitwise "and" across the elements of the vector.
fn reduce_and(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "or" across the lanes of the vector.
+ /// Returns the cumulative bitwise "or" across the elements of the vector.
fn reduce_or(self) -> Self::Scalar;
- /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+ /// Returns the cumulative bitwise "xor" across the elements of the vector.
fn reduce_xor(self) -> Self::Scalar;
+
+ /// Reverses the byte order of each element.
+ fn swap_bytes(self) -> Self;
+
+ /// Reverses the order of bits in each elemnent.
+ /// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
+ fn reverse_bits(self) -> Self;
+
+ /// Returns the number of leading zeros in the binary representation of each element.
+ fn leading_zeros(self) -> Self;
+
+ /// Returns the number of trailing zeros in the binary representation of each element.
+ fn trailing_zeros(self) -> Self;
+
+ /// Returns the number of leading ones in the binary representation of each element.
+ fn leading_ones(self) -> Self;
+
+ /// Returns the number of trailing ones in the binary representation of each element.
+ fn trailing_ones(self) -> Self;
}
macro_rules! impl_trait {
- { $($ty:ty),* } => {
+ { $($ty:ident ($signed:ident)),* } => {
$(
- impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+ impl<const N: usize> Sealed for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
}
- impl<const LANES: usize> SimdUint for Simd<$ty, LANES>
+ impl<const N: usize> SimdUint for Simd<$ty, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
type Scalar = $ty;
- type Cast<T: SimdElement> = Simd<T, LANES>;
+ type Cast<T: SimdElement> = Simd<T, N>;
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T> {
@@ -96,6 +121,12 @@ macro_rules! impl_trait {
}
#[inline]
+ fn wrapping_neg(self) -> Self {
+ use crate::simd::num::SimdInt;
+ (-self.cast::<$signed>()).cast()
+ }
+
+ #[inline]
fn saturating_add(self, second: Self) -> Self {
// Safety: `self` is a vector
unsafe { intrinsics::simd_saturating_add(self, second) }
@@ -148,9 +179,43 @@ macro_rules! impl_trait {
// Safety: `self` is an integer vector
unsafe { intrinsics::simd_reduce_xor(self) }
}
+
+ #[inline]
+ fn swap_bytes(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_bswap(self) }
+ }
+
+ #[inline]
+ fn reverse_bits(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_bitreverse(self) }
+ }
+
+ #[inline]
+ fn leading_zeros(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_ctlz(self) }
+ }
+
+ #[inline]
+ fn trailing_zeros(self) -> Self {
+ // Safety: `self` is an integer vector
+ unsafe { intrinsics::simd_cttz(self) }
+ }
+
+ #[inline]
+ fn leading_ones(self) -> Self {
+ (!self).leading_zeros()
+ }
+
+ #[inline]
+ fn trailing_ones(self) -> Self {
+ (!self).trailing_zeros()
+ }
}
)*
}
}
-impl_trait! { u8, u16, u32, u64, usize }
+impl_trait! { u8 (i8), u16 (i16), u32 (i32), u64 (i64), usize (isize) }
diff --git a/library/portable-simd/crates/core_simd/src/simd/prelude.rs b/library/portable-simd/crates/core_simd/src/simd/prelude.rs
index e8fdc932d..4b7c744c0 100644
--- a/library/portable-simd/crates/core_simd/src/simd/prelude.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/prelude.rs
@@ -7,8 +7,10 @@
#[doc(no_inline)]
pub use super::{
- simd_swizzle, Mask, Simd, SimdConstPtr, SimdFloat, SimdInt, SimdMutPtr, SimdOrd, SimdPartialEq,
- SimdPartialOrd, SimdUint,
+ cmp::{SimdOrd, SimdPartialEq, SimdPartialOrd},
+ num::{SimdFloat, SimdInt, SimdUint},
+ ptr::{SimdConstPtr, SimdMutPtr},
+ simd_swizzle, Mask, Simd,
};
#[rustfmt::skip]
diff --git a/library/portable-simd/crates/core_simd/src/simd/ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr.rs
new file mode 100644
index 000000000..3f8e66691
--- /dev/null
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr.rs
@@ -0,0 +1,11 @@
+//! Traits for vectors of pointers.
+
+mod const_ptr;
+mod mut_ptr;
+
+mod sealed {
+ pub trait Sealed {}
+}
+
+pub use const_ptr::*;
+pub use mut_ptr::*;
diff --git a/library/portable-simd/crates/core_simd/src/elements/const_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
index f215f9a61..97fe3fb60 100644
--- a/library/portable-simd/crates/core_simd/src/elements/const_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/const_ptr.rs
@@ -1,15 +1,17 @@
use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};
+use crate::simd::{
+ cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
+};
/// Operations on SIMD vectors of constant pointers.
pub trait SimdConstPtr: Copy + Sealed {
- /// Vector of `usize` with the same number of lanes.
+ /// Vector of `usize` with the same number of elements.
type Usize;
- /// Vector of `isize` with the same number of lanes.
+ /// Vector of `isize` with the same number of elements.
type Isize;
- /// Vector of const pointers with the same number of lanes.
+ /// Vector of const pointers with the same number of elements.
type CastPtr<T>;
/// Vector of mutable pointers to the same type.
@@ -18,17 +20,17 @@ pub trait SimdConstPtr: Copy + Sealed {
/// Mask type used for manipulating this SIMD vector type.
type Mask;
- /// Returns `true` for each lane that is null.
+ /// Returns `true` for each element that is null.
fn is_null(self) -> Self::Mask;
/// Casts to a pointer of another type.
///
- /// Equivalent to calling [`pointer::cast`] on each lane.
+ /// Equivalent to calling [`pointer::cast`] on each element.
fn cast<T>(self) -> Self::CastPtr<T>;
/// Changes constness without changing the type.
///
- /// Equivalent to calling [`pointer::cast_mut`] on each lane.
+ /// Equivalent to calling [`pointer::cast_mut`] on each element.
fn cast_mut(self) -> Self::MutPtr;
/// Gets the "address" portion of the pointer.
@@ -39,7 +41,7 @@ pub trait SimdConstPtr: Copy + Sealed {
/// This method semantically discards *provenance* and
/// *address-space* information. To properly restore that information, use [`Self::with_addr`].
///
- /// Equivalent to calling [`pointer::addr`] on each lane.
+ /// Equivalent to calling [`pointer::addr`] on each element.
fn addr(self) -> Self::Usize;
/// Creates a new pointer with the given address.
@@ -47,7 +49,7 @@ pub trait SimdConstPtr: Copy + Sealed {
/// This performs the same operation as a cast, but copies the *address-space* and
/// *provenance* of `self` to the new pointer.
///
- /// Equivalent to calling [`pointer::with_addr`] on each lane.
+ /// Equivalent to calling [`pointer::with_addr`] on each element.
fn with_addr(self, addr: Self::Usize) -> Self;
/// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use
@@ -56,39 +58,36 @@ pub trait SimdConstPtr: Copy + Sealed {
/// Convert an address back to a pointer, picking up a previously "exposed" provenance.
///
- /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each lane.
+ /// Equivalent to calling [`core::ptr::from_exposed_addr`] on each element.
fn from_exposed_addr(addr: Self::Usize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_offset`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
fn wrapping_offset(self, offset: Self::Isize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_add`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_add`] on each element.
fn wrapping_add(self, count: Self::Usize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_sub`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
fn wrapping_sub(self, count: Self::Usize) -> Self;
}
-impl<T, const LANES: usize> Sealed for Simd<*const T, LANES> where
- LaneCount<LANES>: SupportedLaneCount
-{
-}
+impl<T, const N: usize> Sealed for Simd<*const T, N> where LaneCount<N>: SupportedLaneCount {}
-impl<T, const LANES: usize> SimdConstPtr for Simd<*const T, LANES>
+impl<T, const N: usize> SimdConstPtr for Simd<*const T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Usize = Simd<usize, LANES>;
- type Isize = Simd<isize, LANES>;
- type CastPtr<U> = Simd<*const U, LANES>;
- type MutPtr = Simd<*mut T, LANES>;
- type Mask = Mask<isize, LANES>;
+ type Usize = Simd<usize, N>;
+ type Isize = Simd<isize, N>;
+ type CastPtr<U> = Simd<*const U, N>;
+ type MutPtr = Simd<*mut T, N>;
+ type Mask = Mask<isize, N>;
#[inline]
fn is_null(self) -> Self::Mask {
diff --git a/library/portable-simd/crates/core_simd/src/elements/mut_ptr.rs b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
index 4bdc6a14c..e35633d04 100644
--- a/library/portable-simd/crates/core_simd/src/elements/mut_ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/simd/ptr/mut_ptr.rs
@@ -1,15 +1,17 @@
use super::sealed::Sealed;
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SimdUint, SupportedLaneCount};
+use crate::simd::{
+ cmp::SimdPartialEq, intrinsics, num::SimdUint, LaneCount, Mask, Simd, SupportedLaneCount,
+};
/// Operations on SIMD vectors of mutable pointers.
pub trait SimdMutPtr: Copy + Sealed {
- /// Vector of `usize` with the same number of lanes.
+ /// Vector of `usize` with the same number of elements.
type Usize;
- /// Vector of `isize` with the same number of lanes.
+ /// Vector of `isize` with the same number of elements.
type Isize;
- /// Vector of const pointers with the same number of lanes.
+ /// Vector of const pointers with the same number of elements.
type CastPtr<T>;
/// Vector of constant pointers to the same type.
@@ -18,17 +20,17 @@ pub trait SimdMutPtr: Copy + Sealed {
/// Mask type used for manipulating this SIMD vector type.
type Mask;
- /// Returns `true` for each lane that is null.
+ /// Returns `true` for each element that is null.
fn is_null(self) -> Self::Mask;
/// Casts to a pointer of another type.
///
- /// Equivalent to calling [`pointer::cast`] on each lane.
+ /// Equivalent to calling [`pointer::cast`] on each element.
fn cast<T>(self) -> Self::CastPtr<T>;
/// Changes constness without changing the type.
///
- /// Equivalent to calling [`pointer::cast_const`] on each lane.
+ /// Equivalent to calling [`pointer::cast_const`] on each element.
fn cast_const(self) -> Self::ConstPtr;
/// Gets the "address" portion of the pointer.
@@ -36,7 +38,7 @@ pub trait SimdMutPtr: Copy + Sealed {
/// This method discards pointer semantic metadata, so the result cannot be
/// directly cast into a valid pointer.
///
- /// Equivalent to calling [`pointer::addr`] on each lane.
+ /// Equivalent to calling [`pointer::addr`] on each element.
fn addr(self) -> Self::Usize;
/// Creates a new pointer with the given address.
@@ -44,7 +46,7 @@ pub trait SimdMutPtr: Copy + Sealed {
/// This performs the same operation as a cast, but copies the *address-space* and
/// *provenance* of `self` to the new pointer.
///
- /// Equivalent to calling [`pointer::with_addr`] on each lane.
+ /// Equivalent to calling [`pointer::with_addr`] on each element.
fn with_addr(self, addr: Self::Usize) -> Self;
/// Gets the "address" portion of the pointer, and "exposes" the provenance part for future use
@@ -53,37 +55,36 @@ pub trait SimdMutPtr: Copy + Sealed {
/// Convert an address back to a pointer, picking up a previously "exposed" provenance.
///
- /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each lane.
+ /// Equivalent to calling [`core::ptr::from_exposed_addr_mut`] on each element.
fn from_exposed_addr(addr: Self::Usize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_offset`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_offset`] on each element.
fn wrapping_offset(self, offset: Self::Isize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_add`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_add`] on each element.
fn wrapping_add(self, count: Self::Usize) -> Self;
/// Calculates the offset from a pointer using wrapping arithmetic.
///
- /// Equivalent to calling [`pointer::wrapping_sub`] on each lane.
+ /// Equivalent to calling [`pointer::wrapping_sub`] on each element.
fn wrapping_sub(self, count: Self::Usize) -> Self;
}
-impl<T, const LANES: usize> Sealed for Simd<*mut T, LANES> where LaneCount<LANES>: SupportedLaneCount
-{}
+impl<T, const N: usize> Sealed for Simd<*mut T, N> where LaneCount<N>: SupportedLaneCount {}
-impl<T, const LANES: usize> SimdMutPtr for Simd<*mut T, LANES>
+impl<T, const N: usize> SimdMutPtr for Simd<*mut T, N>
where
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- type Usize = Simd<usize, LANES>;
- type Isize = Simd<isize, LANES>;
- type CastPtr<U> = Simd<*mut U, LANES>;
- type ConstPtr = Simd<*const T, LANES>;
- type Mask = Mask<isize, LANES>;
+ type Usize = Simd<usize, N>;
+ type Isize = Simd<isize, N>;
+ type CastPtr<U> = Simd<*mut U, N>;
+ type ConstPtr = Simd<*const T, N>;
+ type Mask = Mask<isize, N>;
#[inline]
fn is_null(self) -> Self::Mask {
diff --git a/library/portable-simd/crates/core_simd/src/swizzle.rs b/library/portable-simd/crates/core_simd/src/swizzle.rs
index 68f20516c..ec8548d55 100644
--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@@ -1,17 +1,15 @@
use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Mask, MaskElement, Simd, SimdElement, SupportedLaneCount};
-/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors.
+/// Constructs a new SIMD vector by copying elements from selected elements in other vectors.
///
-/// When swizzling one vector, lanes are selected by a `const` array of `usize`,
-/// like [`Swizzle`].
+/// When swizzling one vector, elements are selected like [`Swizzle::swizzle`].
///
-/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`],
-/// like [`Swizzle2`].
+/// When swizzling two vectors, elements are selected like [`Swizzle::concat_swizzle`].
///
/// # Examples
///
-/// With a single SIMD vector, the const array specifies lane indices in that vector:
+/// With a single SIMD vector, the const array specifies element indices in that vector:
/// ```
/// # #![feature(portable_simd)]
/// # use core::simd::{u32x2, u32x4, simd_swizzle};
@@ -21,25 +19,27 @@ use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
/// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]);
/// assert_eq!(r.to_array(), [13, 10, 11, 12]);
///
-/// // Changing the number of lanes
+/// // Changing the number of elements
/// let r: u32x2 = simd_swizzle!(v, [3, 1]);
/// assert_eq!(r.to_array(), [13, 11]);
/// ```
///
-/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index:
+/// With two input SIMD vectors, the const array specifies element indices in the concatenation of
+/// those vectors:
/// ```
/// # #![feature(portable_simd)]
-/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which};
-/// use Which::{First, Second};
+/// # #[cfg(feature = "as_crate")] use core_simd::simd;
+/// # #[cfg(not(feature = "as_crate"))] use core::simd;
+/// # use simd::{u32x2, u32x4, simd_swizzle};
/// let a = u32x4::from_array([0, 1, 2, 3]);
/// let b = u32x4::from_array([4, 5, 6, 7]);
///
/// // Keeping the same size
-/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+/// let r: u32x4 = simd_swizzle!(a, b, [0, 1, 6, 7]);
/// assert_eq!(r.to_array(), [0, 1, 6, 7]);
///
-/// // Changing the number of lanes
-/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]);
+/// // Changing the number of elements
+/// let r: u32x2 = simd_swizzle!(a, b, [0, 4]);
/// assert_eq!(r.to_array(), [0, 4]);
/// ```
#[allow(unused_macros)]
@@ -50,7 +50,7 @@ pub macro simd_swizzle {
{
use $crate::simd::Swizzle;
struct Impl;
- impl<const LANES: usize> Swizzle<LANES, {$index.len()}> for Impl {
+ impl Swizzle<{$index.len()}> for Impl {
const INDEX: [usize; {$index.len()}] = $index;
}
Impl::swizzle($vector)
@@ -60,204 +60,194 @@ pub macro simd_swizzle {
$first:expr, $second:expr, $index:expr $(,)?
) => {
{
- use $crate::simd::{Which, Swizzle2};
+ use $crate::simd::Swizzle;
struct Impl;
- impl<const LANES: usize> Swizzle2<LANES, {$index.len()}> for Impl {
- const INDEX: [Which; {$index.len()}] = $index;
+ impl Swizzle<{$index.len()}> for Impl {
+ const INDEX: [usize; {$index.len()}] = $index;
}
- Impl::swizzle2($first, $second)
+ Impl::concat_swizzle($first, $second)
}
}
}
-/// Specifies a lane index into one of two SIMD vectors.
-///
-/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle].
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub enum Which {
- /// Index of a lane in the first input SIMD vector.
- First(usize),
- /// Index of a lane in the second input SIMD vector.
- Second(usize),
-}
-
/// Create a vector from the elements of another vector.
-pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
- /// Map from the lanes of the input vector to the output vector.
- const INDEX: [usize; OUTPUT_LANES];
+pub trait Swizzle<const N: usize> {
+ /// Map from the elements of the input vector to the output vector.
+ const INDEX: [usize; N];
- /// Create a new vector from the lanes of `vector`.
+ /// Create a new vector from the elements of `vector`.
///
/// Lane `i` of the output is `vector[Self::INDEX[i]]`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
- fn swizzle<T>(vector: Simd<T, INPUT_LANES>) -> Simd<T, OUTPUT_LANES>
+ fn swizzle<T, const M: usize>(vector: Simd<T, M>) -> Simd<T, N>
where
T: SimdElement,
- LaneCount<INPUT_LANES>: SupportedLaneCount,
- LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
+ LaneCount<M>: SupportedLaneCount,
{
- // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
- unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
+ // Safety: `vector` is a vector, and the index is a const array of u32.
+ unsafe {
+ intrinsics::simd_shuffle(
+ vector,
+ vector,
+ const {
+ let mut output = [0; N];
+ let mut i = 0;
+ while i < N {
+ let index = Self::INDEX[i];
+ assert!(index as u32 as usize == index);
+ assert!(
+ index < M,
+ "source element index exceeds input vector length"
+ );
+ output[i] = index as u32;
+ i += 1;
+ }
+ output
+ },
+ )
+ }
}
-}
-
-/// Create a vector from the elements of two other vectors.
-pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
- /// Map from the lanes of the input vectors to the output vector
- const INDEX: [Which; OUTPUT_LANES];
- /// Create a new vector from the lanes of `first` and `second`.
+ /// Create a new vector from the elements of `first` and `second`.
///
- /// Lane `i` is `first[j]` when `Self::INDEX[i]` is `First(j)`, or `second[j]` when it is
- /// `Second(j)`.
+ /// Lane `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+ /// `first` and `second`.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
- fn swizzle2<T>(
- first: Simd<T, INPUT_LANES>,
- second: Simd<T, INPUT_LANES>,
- ) -> Simd<T, OUTPUT_LANES>
+ fn concat_swizzle<T, const M: usize>(first: Simd<T, M>, second: Simd<T, M>) -> Simd<T, N>
where
T: SimdElement,
- LaneCount<INPUT_LANES>: SupportedLaneCount,
- LaneCount<OUTPUT_LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
+ LaneCount<M>: SupportedLaneCount,
{
- // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
- unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
- }
-}
-
-/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
-/// This trait hides `INDEX_IMPL` from the public API.
-trait SwizzleImpl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
- const INDEX_IMPL: [u32; OUTPUT_LANES];
-}
-
-impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> SwizzleImpl<INPUT_LANES, OUTPUT_LANES>
- for T
-where
- T: Swizzle<INPUT_LANES, OUTPUT_LANES> + ?Sized,
-{
- const INDEX_IMPL: [u32; OUTPUT_LANES] = {
- let mut output = [0; OUTPUT_LANES];
- let mut i = 0;
- while i < OUTPUT_LANES {
- let index = Self::INDEX[i];
- assert!(index as u32 as usize == index);
- assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
- output[i] = index as u32;
- i += 1;
+ // Safety: `first` and `second` are vectors, and the index is a const array of u32.
+ unsafe {
+ intrinsics::simd_shuffle(
+ first,
+ second,
+ const {
+ let mut output = [0; N];
+ let mut i = 0;
+ while i < N {
+ let index = Self::INDEX[i];
+ assert!(index as u32 as usize == index);
+ assert!(
+ index < 2 * M,
+ "source element index exceeds input vector length"
+ );
+ output[i] = index as u32;
+ i += 1;
+ }
+ output
+ },
+ )
}
- output
- };
-}
-
-/// The `simd_shuffle` intrinsic expects `u32`, so do error checking and conversion here.
-/// This trait hides `INDEX_IMPL` from the public API.
-trait Swizzle2Impl<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
- const INDEX_IMPL: [u32; OUTPUT_LANES];
-}
+ }
-impl<T, const INPUT_LANES: usize, const OUTPUT_LANES: usize> Swizzle2Impl<INPUT_LANES, OUTPUT_LANES>
- for T
-where
- T: Swizzle2<INPUT_LANES, OUTPUT_LANES> + ?Sized,
-{
- const INDEX_IMPL: [u32; OUTPUT_LANES] = {
- let mut output = [0; OUTPUT_LANES];
- let mut i = 0;
- while i < OUTPUT_LANES {
- let (offset, index) = match Self::INDEX[i] {
- Which::First(index) => (false, index),
- Which::Second(index) => (true, index),
- };
- assert!(index < INPUT_LANES, "source lane exceeds input lane count",);
+ /// Create a new mask from the elements of `mask`.
+ ///
+ /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+ /// `first` and `second`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original inputs"]
+ fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
+ where
+ T: MaskElement,
+ LaneCount<N>: SupportedLaneCount,
+ LaneCount<M>: SupportedLaneCount,
+ {
+ // SAFETY: all elements of this mask come from another mask
+ unsafe { Mask::from_int_unchecked(Self::swizzle(mask.to_int())) }
+ }
- // lanes are indexed by the first vector, then second vector
- let index = if offset { index + INPUT_LANES } else { index };
- assert!(index as u32 as usize == index);
- output[i] = index as u32;
- i += 1;
- }
- output
- };
+ /// Create a new mask from the elements of `first` and `second`.
+ ///
+ /// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
+ /// `first` and `second`.
+ #[inline]
+ #[must_use = "method returns a new mask and does not mutate the original inputs"]
+ fn concat_swizzle_mask<T, const M: usize>(first: Mask<T, M>, second: Mask<T, M>) -> Mask<T, N>
+ where
+ T: MaskElement,
+ LaneCount<N>: SupportedLaneCount,
+ LaneCount<M>: SupportedLaneCount,
+ {
+ // SAFETY: all elements of this mask come from another mask
+ unsafe { Mask::from_int_unchecked(Self::concat_swizzle(first.to_int(), second.to_int())) }
+ }
}
-impl<T, const LANES: usize> Simd<T, LANES>
+impl<T, const N: usize> Simd<T, N>
where
T: SimdElement,
- LaneCount<LANES>: SupportedLaneCount,
+ LaneCount<N>: SupportedLaneCount,
{
- /// Reverse the order of the lanes in the vector.
+ /// Reverse the order of the elements in the vector.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn reverse(self) -> Self {
- const fn reverse_index<const LANES: usize>() -> [usize; LANES] {
- let mut index = [0; LANES];
- let mut i = 0;
- while i < LANES {
- index[i] = LANES - i - 1;
- i += 1;
- }
- index
- }
-
struct Reverse;
- impl<const LANES: usize> Swizzle<LANES, LANES> for Reverse {
- const INDEX: [usize; LANES] = reverse_index::<LANES>();
+ impl<const N: usize> Swizzle<N> for Reverse {
+ const INDEX: [usize; N] = const {
+ let mut index = [0; N];
+ let mut i = 0;
+ while i < N {
+ index[i] = N - i - 1;
+ i += 1;
+ }
+ index
+ };
}
Reverse::swizzle(self)
}
/// Rotates the vector such that the first `OFFSET` elements of the slice move to the end
- /// while the last `LANES - OFFSET` elements move to the front. After calling `rotate_lanes_left`,
- /// the element previously in lane `OFFSET` will become the first element in the slice.
+ /// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`,
+ /// the element previously at index `OFFSET` will become the first element in the slice.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
- pub fn rotate_lanes_left<const OFFSET: usize>(self) -> Self {
- const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
- let offset = OFFSET % LANES;
- let mut index = [0; LANES];
- let mut i = 0;
- while i < LANES {
- index[i] = (i + offset) % LANES;
- i += 1;
- }
- index
- }
-
+ pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
struct Rotate<const OFFSET: usize>;
- impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
- const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
+ const INDEX: [usize; N] = const {
+ let offset = OFFSET % N;
+ let mut index = [0; N];
+ let mut i = 0;
+ while i < N {
+ index[i] = (i + offset) % N;
+ i += 1;
+ }
+ index
+ };
}
Rotate::<OFFSET>::swizzle(self)
}
- /// Rotates the vector such that the first `LANES - OFFSET` elements of the vector move to
- /// the end while the last `OFFSET` elements move to the front. After calling `rotate_lanes_right`,
- /// the element previously at index `LANES - OFFSET` will become the first element in the slice.
+ /// Rotates the vector such that the first `self.len() - OFFSET` elements of the vector move to
+ /// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`,
+ /// the element previously at index `self.len() - OFFSET` will become the first element in the slice.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
- pub fn rotate_lanes_right<const OFFSET: usize>(self) -> Self {
- const fn rotate_index<const OFFSET: usize, const LANES: usize>() -> [usize; LANES] {
- let offset = LANES - OFFSET % LANES;
- let mut index = [0; LANES];
- let mut i = 0;
- while i < LANES {
- index[i] = (i + offset) % LANES;
- i += 1;
- }
- index
- }
-
+ pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
struct Rotate<const OFFSET: usize>;
- impl<const OFFSET: usize, const LANES: usize> Swizzle<LANES, LANES> for Rotate<OFFSET> {
- const INDEX: [usize; LANES] = rotate_index::<OFFSET, LANES>();
+ impl<const OFFSET: usize, const N: usize> Swizzle<N> for Rotate<OFFSET> {
+ const INDEX: [usize; N] = const {
+ let offset = N - OFFSET % N;
+ let mut index = [0; N];
+ let mut i = 0;
+ while i < N {
+ index[i] = (i + offset) % N;
+ i += 1;
+ }
+ index
+ };
}
Rotate::<OFFSET>::swizzle(self)
@@ -265,7 +255,7 @@ where
/// Interleave two vectors.
///
- /// The resulting vectors contain lanes taken alternatively from `self` and `other`, first
+ /// The resulting vectors contain elements taken alternatively from `self` and `other`, first
/// filling the first result, and then the second.
///
/// The reverse of this operation is [`Simd::deinterleave`].
@@ -282,18 +272,13 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn interleave(self, other: Self) -> (Self, Self) {
- const fn interleave<const LANES: usize>(high: bool) -> [Which; LANES] {
- let mut idx = [Which::First(0); LANES];
+ const fn interleave<const N: usize>(high: bool) -> [usize; N] {
+ let mut idx = [0; N];
let mut i = 0;
- while i < LANES {
- // Treat the source as a concatenated vector
- let dst_index = if high { i + LANES } else { i };
- let src_index = dst_index / 2 + (dst_index % 2) * LANES;
- idx[i] = if src_index < LANES {
- Which::First(src_index)
- } else {
- Which::Second(src_index % LANES)
- };
+ while i < N {
+ let dst_index = if high { i + N } else { i };
+ let src_index = dst_index / 2 + (dst_index % 2) * N;
+ idx[i] = src_index;
i += 1;
}
idx
@@ -302,24 +287,27 @@ where
struct Lo;
struct Hi;
- impl<const LANES: usize> Swizzle2<LANES, LANES> for Lo {
- const INDEX: [Which; LANES] = interleave::<LANES>(false);
+ impl<const N: usize> Swizzle<N> for Lo {
+ const INDEX: [usize; N] = interleave::<N>(false);
}
- impl<const LANES: usize> Swizzle2<LANES, LANES> for Hi {
- const INDEX: [Which; LANES] = interleave::<LANES>(true);
+ impl<const N: usize> Swizzle<N> for Hi {
+ const INDEX: [usize; N] = interleave::<N>(true);
}
- (Lo::swizzle2(self, other), Hi::swizzle2(self, other))
+ (
+ Lo::concat_swizzle(self, other),
+ Hi::concat_swizzle(self, other),
+ )
}
/// Deinterleave two vectors.
///
- /// The first result takes every other lane of `self` and then `other`, starting with
- /// the first lane.
+ /// The first result takes every other element of `self` and then `other`, starting with
+ /// the first element.
///
- /// The second result takes every other lane of `self` and then `other`, starting with
- /// the second lane.
+ /// The second result takes every other element of `self` and then `other`, starting with
+ /// the second element.
///
/// The reverse of this operation is [`Simd::interleave`].
///
@@ -335,17 +323,11 @@ where
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn deinterleave(self, other: Self) -> (Self, Self) {
- const fn deinterleave<const LANES: usize>(second: bool) -> [Which; LANES] {
- let mut idx = [Which::First(0); LANES];
+ const fn deinterleave<const N: usize>(second: bool) -> [usize; N] {
+ let mut idx = [0; N];
let mut i = 0;
- while i < LANES {
- // Treat the source as a concatenated vector
- let src_index = i * 2 + second as usize;
- idx[i] = if src_index < LANES {
- Which::First(src_index)
- } else {
- Which::Second(src_index % LANES)
- };
+ while i < N {
+ idx[i] = i * 2 + second as usize;
i += 1;
}
idx
@@ -354,14 +336,52 @@ where
struct Even;
struct Odd;
- impl<const LANES: usize> Swizzle2<LANES, LANES> for Even {
- const INDEX: [Which; LANES] = deinterleave::<LANES>(false);
+ impl<const N: usize> Swizzle<N> for Even {
+ const INDEX: [usize; N] = deinterleave::<N>(false);
}
- impl<const LANES: usize> Swizzle2<LANES, LANES> for Odd {
- const INDEX: [Which; LANES] = deinterleave::<LANES>(true);
+ impl<const N: usize> Swizzle<N> for Odd {
+ const INDEX: [usize; N] = deinterleave::<N>(true);
}
- (Even::swizzle2(self, other), Odd::swizzle2(self, other))
+ (
+ Even::concat_swizzle(self, other),
+ Odd::concat_swizzle(self, other),
+ )
+ }
+
+ /// Resize a vector.
+ ///
+ /// If `M` > `N`, extends the length of a vector, setting the new elements to `value`.
+ /// If `M` < `N`, truncates the vector to the first `M` elements.
+ ///
+ /// ```
+ /// # #![feature(portable_simd)]
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::u32x4;
+ /// let x = u32x4::from_array([0, 1, 2, 3]);
+ /// assert_eq!(x.resize::<8>(9).to_array(), [0, 1, 2, 3, 9, 9, 9, 9]);
+ /// assert_eq!(x.resize::<2>(9).to_array(), [0, 1]);
+ /// ```
+ #[inline]
+ #[must_use = "method returns a new vector and does not mutate the original inputs"]
+ pub fn resize<const M: usize>(self, value: T) -> Simd<T, M>
+ where
+ LaneCount<M>: SupportedLaneCount,
+ {
+ struct Resize<const N: usize>;
+ impl<const N: usize, const M: usize> Swizzle<M> for Resize<N> {
+ const INDEX: [usize; M] = const {
+ let mut index = [0; M];
+ let mut i = 0;
+ while i < M {
+ index[i] = if i < N { i } else { N };
+ i += 1;
+ }
+ index
+ };
+ }
+ Resize::<N>::concat_swizzle(self, Simd::splat(value))
}
}
diff --git a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
index ce6217925..dac013cc9 100644
--- a/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs
@@ -55,7 +55,7 @@ where
16 => transize(vqtbl1q_u8, self, idxs),
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
32 => transize_raw(avx2_pshufb, self, idxs),
- #[cfg(target_feature = "avx512vl,avx512vbmi")]
+ #[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
32 => transize(x86::_mm256_permutexvar_epi8, self, idxs),
// Notable absence: avx512bw shuffle
// If avx512bw is available, odds of avx512vbmi are good
@@ -86,7 +86,7 @@ where
#[inline]
#[allow(clippy::let_and_return)]
unsafe fn avx2_pshufb(bytes: Simd<u8, 32>, idxs: Simd<u8, 32>) -> Simd<u8, 32> {
- use crate::simd::SimdPartialOrd;
+ use crate::simd::cmp::SimdPartialOrd;
#[cfg(target_arch = "x86")]
use core::arch::x86;
#[cfg(target_arch = "x86_64")]
@@ -149,7 +149,7 @@ where
// On x86, make sure the top bit is set.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
let idxs = {
- use crate::simd::SimdPartialOrd;
+ use crate::simd::cmp::SimdPartialOrd;
idxs.simd_lt(Simd::splat(N as u8))
.select(idxs, Simd::splat(u8::MAX))
};
diff --git a/library/portable-simd/crates/core_simd/src/to_bytes.rs b/library/portable-simd/crates/core_simd/src/to_bytes.rs
index b36b1a347..222526c4a 100644
--- a/library/portable-simd/crates/core_simd/src/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@@ -1,24 +1,125 @@
+use crate::simd::{
+ num::{SimdFloat, SimdInt, SimdUint},
+ LaneCount, Simd, SimdElement, SupportedLaneCount,
+};
+
+mod sealed {
+ use super::*;
+ pub trait Sealed {}
+ impl<T: SimdElement, const N: usize> Sealed for Simd<T, N> where LaneCount<N>: SupportedLaneCount {}
+}
+use sealed::Sealed;
+
+/// Convert SIMD vectors to vectors of bytes
+pub trait ToBytes: Sealed {
+ /// This type, reinterpreted as bytes.
+ type Bytes: Copy
+ + Unpin
+ + Send
+ + Sync
+ + AsRef<[u8]>
+ + AsMut<[u8]>
+ + SimdUint<Scalar = u8>
+ + 'static;
+
+ /// Return the memory representation of this integer as a byte array in native byte
+ /// order.
+ fn to_ne_bytes(self) -> Self::Bytes;
+
+ /// Return the memory representation of this integer as a byte array in big-endian
+ /// (network) byte order.
+ fn to_be_bytes(self) -> Self::Bytes;
+
+ /// Return the memory representation of this integer as a byte array in little-endian
+ /// byte order.
+ fn to_le_bytes(self) -> Self::Bytes;
+
+ /// Create a native endian integer value from its memory representation as a byte array
+ /// in native endianness.
+ fn from_ne_bytes(bytes: Self::Bytes) -> Self;
+
+ /// Create an integer value from its representation as a byte array in big endian.
+ fn from_be_bytes(bytes: Self::Bytes) -> Self;
+
+ /// Create an integer value from its representation as a byte array in little endian.
+ fn from_le_bytes(bytes: Self::Bytes) -> Self;
+}
+
+macro_rules! swap_bytes {
+ { f32, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
+ { f64, $x:expr } => { Simd::from_bits($x.to_bits().swap_bytes()) };
+ { $ty:ty, $x:expr } => { $x.swap_bytes() }
+}
+
macro_rules! impl_to_bytes {
- { $ty:ty, $size:literal } => {
- impl<const LANES: usize> crate::simd::Simd<$ty, LANES>
- where
- crate::simd::LaneCount<LANES>: crate::simd::SupportedLaneCount,
- crate::simd::LaneCount<{{ $size * LANES }}>: crate::simd::SupportedLaneCount,
- {
- /// Return the memory representation of this integer as a byte array in native byte
- /// order.
- pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+ { $ty:tt, 1 } => { impl_to_bytes! { $ty, 1 * [1, 2, 4, 8, 16, 32, 64] } };
+ { $ty:tt, 2 } => { impl_to_bytes! { $ty, 2 * [1, 2, 4, 8, 16, 32] } };
+ { $ty:tt, 4 } => { impl_to_bytes! { $ty, 4 * [1, 2, 4, 8, 16] } };
+ { $ty:tt, 8 } => { impl_to_bytes! { $ty, 8 * [1, 2, 4, 8] } };
+ { $ty:tt, 16 } => { impl_to_bytes! { $ty, 16 * [1, 2, 4] } };
+ { $ty:tt, 32 } => { impl_to_bytes! { $ty, 32 * [1, 2] } };
+ { $ty:tt, 64 } => { impl_to_bytes! { $ty, 64 * [1] } };
+
+ { $ty:tt, $size:literal * [$($elems:literal),*] } => {
+ $(
+ impl ToBytes for Simd<$ty, $elems> {
+ type Bytes = Simd<u8, { $size * $elems }>;
+
+ #[inline]
+ fn to_ne_bytes(self) -> Self::Bytes {
// Safety: transmuting between vectors is safe
- unsafe { core::mem::transmute_copy(&self) }
+ unsafe {
+ #![allow(clippy::useless_transmute)]
+ core::mem::transmute(self)
+ }
+ }
+
+ #[inline]
+ fn to_be_bytes(mut self) -> Self::Bytes {
+ if !cfg!(target_endian = "big") {
+ self = swap_bytes!($ty, self);
+ }
+ self.to_ne_bytes()
+ }
+
+ #[inline]
+ fn to_le_bytes(mut self) -> Self::Bytes {
+ if !cfg!(target_endian = "little") {
+ self = swap_bytes!($ty, self);
+ }
+ self.to_ne_bytes()
}
- /// Create a native endian integer value from its memory representation as a byte array
- /// in native endianness.
- pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+ #[inline]
+ fn from_ne_bytes(bytes: Self::Bytes) -> Self {
// Safety: transmuting between vectors is safe
- unsafe { core::mem::transmute_copy(&bytes) }
+ unsafe {
+ #![allow(clippy::useless_transmute)]
+ core::mem::transmute(bytes)
+ }
+ }
+
+ #[inline]
+ fn from_be_bytes(bytes: Self::Bytes) -> Self {
+ let ret = Self::from_ne_bytes(bytes);
+ if cfg!(target_endian = "big") {
+ ret
+ } else {
+ swap_bytes!($ty, ret)
+ }
+ }
+
+ #[inline]
+ fn from_le_bytes(bytes: Self::Bytes) -> Self {
+ let ret = Self::from_ne_bytes(bytes);
+ if cfg!(target_endian = "little") {
+ ret
+ } else {
+ swap_bytes!($ty, ret)
+ }
}
}
+ )*
}
}
@@ -39,3 +140,6 @@ impl_to_bytes! { i64, 8 }
impl_to_bytes! { isize, 4 }
#[cfg(target_pointer_width = "64")]
impl_to_bytes! { isize, 8 }
+
+impl_to_bytes! { f32, 4 }
+impl_to_bytes! { f64, 8 }
diff --git a/library/portable-simd/crates/core_simd/src/vector.rs b/library/portable-simd/crates/core_simd/src/vector.rs
index 9aa7bacfc..105c06741 100644
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@@ -1,6 +1,8 @@
use crate::simd::{
- intrinsics, LaneCount, Mask, MaskElement, SimdConstPtr, SimdMutPtr, SimdPartialOrd,
- SupportedLaneCount, Swizzle,
+ cmp::SimdPartialOrd,
+ intrinsics,
+ ptr::{SimdConstPtr, SimdMutPtr},
+ LaneCount, Mask, MaskElement, SupportedLaneCount, Swizzle,
};
use core::convert::{TryFrom, TryInto};
@@ -110,7 +112,7 @@ where
T: SimdElement,
{
/// Number of elements in this vector.
- pub const LANES: usize = N;
+ pub const LEN: usize = N;
/// Returns the number of elements in this SIMD vector.
///
@@ -118,13 +120,16 @@ where
///
/// ```
/// # #![feature(portable_simd)]
- /// # use core::simd::u32x4;
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::u32x4;
/// let v = u32x4::splat(0);
- /// assert_eq!(v.lanes(), 4);
+ /// assert_eq!(v.len(), 4);
/// ```
#[inline]
- pub const fn lanes(&self) -> usize {
- Self::LANES
+ #[allow(clippy::len_without_is_empty)]
+ pub const fn len(&self) -> usize {
+ Self::LEN
}
/// Constructs a new SIMD vector with all elements set to the given value.
@@ -133,7 +138,9 @@ where
///
/// ```
/// # #![feature(portable_simd)]
- /// # use core::simd::u32x4;
+ /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+ /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+ /// # use simd::u32x4;
/// let v = u32x4::splat(8);
/// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
/// ```
@@ -142,10 +149,10 @@ where
// This is preferred over `[value; N]`, since it's explicitly a splat:
// https://github.com/rust-lang/rust/issues/97804
struct Splat;
- impl<const N: usize> Swizzle<1, N> for Splat {
+ impl<const N: usize> Swizzle<N> for Splat {
const INDEX: [usize; N] = [0; N];
}
- Splat::swizzle(Simd::<T, 1>::from([value]))
+ Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
}
/// Returns an array reference containing the entire SIMD vector.
@@ -271,7 +278,7 @@ where
#[track_caller]
pub const fn from_slice(slice: &[T]) -> Self {
assert!(
- slice.len() >= Self::LANES,
+ slice.len() >= Self::LEN,
"slice length must be at least the number of elements"
);
// SAFETY: We just checked that the slice contains
@@ -301,7 +308,7 @@ where
#[track_caller]
pub fn copy_to_slice(self, slice: &mut [T]) {
assert!(
- slice.len() >= Self::LANES,
+ slice.len() >= Self::LEN,
"slice length must be at least the number of elements"
);
// SAFETY: We just checked that the slice contains
@@ -394,7 +401,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdPartialOrd, Mask};
+ /// # use simd::{Simd, cmp::SimdPartialOrd, Mask};
/// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
/// let idxs = Simd::from_array([9, 3, 0, 5]); // Includes an out-of-bounds index
/// let alt = Simd::from_array([-5, -4, -3, -2]);
@@ -434,7 +441,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdConstPtr};
+ /// # use simd::prelude::*;
/// let values = [6, 2, 4, 9];
/// let offsets = Simd::from_array([1, 0, 0, 3]);
/// let source = Simd::splat(values.as_ptr()).wrapping_add(offsets);
@@ -467,7 +474,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Mask, Simd, SimdConstPtr};
+ /// # use simd::prelude::*;
/// let values = [6, 2, 4, 9];
/// let enable = Mask::from_array([true, true, false, true]);
/// let offsets = Simd::from_array([1, 0, 0, 3]);
@@ -550,7 +557,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdPartialOrd, Mask};
+ /// # use simd::{Simd, cmp::SimdPartialOrd, Mask};
/// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
/// let idxs = Simd::from_array([9, 3, 0, 0]);
/// let vals = Simd::from_array([-27, 82, -41, 124]);
@@ -604,7 +611,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Simd, SimdMutPtr};
+ /// # use simd::{Simd, ptr::SimdMutPtr};
/// let mut values = [0; 4];
/// let offset = Simd::from_array([3, 2, 1, 0]);
/// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset);
@@ -631,7 +638,7 @@ where
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
- /// # use simd::{Mask, Simd, SimdMutPtr};
+ /// # use simd::{Mask, Simd, ptr::SimdMutPtr};
/// let mut values = [0; 4];
/// let offset = Simd::from_array([3, 2, 1, 0]);
/// let ptrs = Simd::splat(values.as_mut_ptr()).wrapping_add(offset);
diff --git a/library/portable-simd/crates/core_simd/src/vendor.rs b/library/portable-simd/crates/core_simd/src/vendor.rs
index 9fb70218c..6223bedb4 100644
--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@@ -21,7 +21,7 @@ macro_rules! from_transmute {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
-#[cfg(any(target_arch = "wasm32"))]
+#[cfg(target_arch = "wasm32")]
mod wasm32;
#[cfg(any(target_arch = "aarch64", target_arch = "arm",))]
diff --git a/library/portable-simd/crates/core_simd/src/vendor/x86.rs b/library/portable-simd/crates/core_simd/src/vendor/x86.rs
index 0dd47015e..66aaf90ee 100644
--- a/library/portable-simd/crates/core_simd/src/vendor/x86.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor/x86.rs
@@ -1,6 +1,6 @@
use crate::simd::*;
-#[cfg(any(target_arch = "x86"))]
+#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
diff --git a/library/portable-simd/crates/core_simd/tests/cast.rs b/library/portable-simd/crates/core_simd/tests/cast.rs
index 00545936e..185e1945f 100644
--- a/library/portable-simd/crates/core_simd/tests/cast.rs
+++ b/library/portable-simd/crates/core_simd/tests/cast.rs
@@ -3,7 +3,7 @@ macro_rules! cast_types {
($start:ident, $($target:ident),*) => {
mod $start {
#[allow(unused)]
- use core_simd::simd::{Simd, SimdInt, SimdUint, SimdFloat};
+ use core_simd::simd::prelude::*;
type Vector<const N: usize> = Simd<$start, N>;
$(
mod $target {
diff --git a/library/portable-simd/crates/core_simd/tests/masks.rs b/library/portable-simd/crates/core_simd/tests/masks.rs
index 9f8bad1c3..00fc2a24e 100644
--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@@ -72,7 +72,6 @@ macro_rules! test_mask_api {
#[test]
fn roundtrip_bitmask_conversion() {
- use core_simd::simd::ToBitMask;
let values = [
true, false, false, true, false, false, true, false,
true, true, false, false, false, false, false, true,
@@ -85,8 +84,6 @@ macro_rules! test_mask_api {
#[test]
fn roundtrip_bitmask_conversion_short() {
- use core_simd::simd::ToBitMask;
-
let values = [
false, false, false, true,
];
@@ -125,18 +122,17 @@ macro_rules! test_mask_api {
cast_impl::<isize>();
}
- #[cfg(feature = "generic_const_exprs")]
#[test]
- fn roundtrip_bitmask_array_conversion() {
- use core_simd::simd::ToBitMaskArray;
+ fn roundtrip_bitmask_vector_conversion() {
+ use core_simd::simd::ToBytes;
let values = [
true, false, false, true, false, false, true, false,
true, true, false, false, false, false, false, true,
];
let mask = Mask::<$type, 16>::from_array(values);
- let bitmask = mask.to_bitmask_array();
- assert_eq!(bitmask, [0b01001001, 0b10000011]);
- assert_eq!(Mask::<$type, 16>::from_bitmask_array(bitmask), mask);
+ let bitmask = mask.to_bitmask_vector();
+ assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
+ assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
}
}
}
diff --git a/library/portable-simd/crates/core_simd/tests/ops_macros.rs b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
index 3a02f3f01..aa565a137 100644
--- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@@ -6,7 +6,7 @@ macro_rules! impl_unary_op_test {
{ $scalar:ty, $trait:ident :: $fn:ident, $scalar_fn:expr } => {
test_helpers::test_lanes! {
fn $fn<const LANES: usize>() {
- test_helpers::test_unary_elementwise(
+ test_helpers::test_unary_elementwise_flush_subnormals(
&<core_simd::simd::Simd<$scalar, LANES> as core::ops::$trait>::$fn,
&$scalar_fn,
&|_| true,
@@ -31,7 +31,7 @@ macro_rules! impl_binary_op_test {
test_helpers::test_lanes! {
fn normal<const LANES: usize>() {
- test_helpers::test_binary_elementwise(
+ test_helpers::test_binary_elementwise_flush_subnormals(
&<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
&$scalar_fn,
&|_, _| true,
@@ -39,7 +39,7 @@ macro_rules! impl_binary_op_test {
}
fn assign<const LANES: usize>() {
- test_helpers::test_binary_elementwise(
+ test_helpers::test_binary_elementwise_flush_subnormals(
&|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
&$scalar_fn,
&|_, _| true,
@@ -68,6 +68,7 @@ macro_rules! impl_binary_checked_op_test {
test_helpers::test_lanes! {
fn normal<const LANES: usize>() {
+ #![allow(clippy::redundant_closure_call)]
test_helpers::test_binary_elementwise(
&<Simd<$scalar, LANES> as core::ops::$trait>::$fn,
&$scalar_fn,
@@ -76,6 +77,7 @@ macro_rules! impl_binary_checked_op_test {
}
fn assign<const LANES: usize>() {
+ #![allow(clippy::redundant_closure_call)]
test_helpers::test_binary_elementwise(
&|mut a, b| { <Simd<$scalar, LANES> as core::ops::$trait_assign>::$fn_assign(&mut a, b); a },
&$scalar_fn,
@@ -94,11 +96,43 @@ macro_rules! impl_binary_checked_op_test {
macro_rules! impl_common_integer_tests {
{ $vector:ident, $scalar:ident } => {
test_helpers::test_lanes! {
+ fn shr<const LANES: usize>() {
+ use core::ops::Shr;
+ let shr = |x: $scalar, y: $scalar| x.wrapping_shr(y as _);
+ test_helpers::test_binary_elementwise(
+ &<$vector::<LANES> as Shr<$vector::<LANES>>>::shr,
+ &shr,
+ &|_, _| true,
+ );
+ test_helpers::test_binary_scalar_rhs_elementwise(
+ &<$vector::<LANES> as Shr<$scalar>>::shr,
+ &shr,
+ &|_, _| true,
+ );
+ }
+
+ fn shl<const LANES: usize>() {
+ use core::ops::Shl;
+ let shl = |x: $scalar, y: $scalar| x.wrapping_shl(y as _);
+ test_helpers::test_binary_elementwise(
+ &<$vector::<LANES> as Shl<$vector::<LANES>>>::shl,
+ &shl,
+ &|_, _| true,
+ );
+ test_helpers::test_binary_scalar_rhs_elementwise(
+ &<$vector::<LANES> as Shl<$scalar>>::shl,
+ &shl,
+ &|_, _| true,
+ );
+ }
+
fn reduce_sum<const LANES: usize>() {
test_helpers::test_1(&|x| {
+ use test_helpers::subnormals::{flush, flush_in};
test_helpers::prop_assert_biteq! (
$vector::<LANES>::from_array(x).reduce_sum(),
x.iter().copied().fold(0 as $scalar, $scalar::wrapping_add),
+ flush(x.iter().copied().map(flush_in).fold(0 as $scalar, $scalar::wrapping_add)),
);
Ok(())
});
@@ -106,9 +140,11 @@ macro_rules! impl_common_integer_tests {
fn reduce_product<const LANES: usize>() {
test_helpers::test_1(&|x| {
+ use test_helpers::subnormals::{flush, flush_in};
test_helpers::prop_assert_biteq! (
$vector::<LANES>::from_array(x).reduce_product(),
x.iter().copied().fold(1 as $scalar, $scalar::wrapping_mul),
+ flush(x.iter().copied().map(flush_in).fold(1 as $scalar, $scalar::wrapping_mul)),
);
Ok(())
});
@@ -163,6 +199,54 @@ macro_rules! impl_common_integer_tests {
Ok(())
});
}
+
+ fn swap_bytes<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::swap_bytes,
+ &$scalar::swap_bytes,
+ &|_| true,
+ )
+ }
+
+ fn reverse_bits<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::reverse_bits,
+ &$scalar::reverse_bits,
+ &|_| true,
+ )
+ }
+
+ fn leading_zeros<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::leading_zeros,
+ &|x| x.leading_zeros() as _,
+ &|_| true,
+ )
+ }
+
+ fn trailing_zeros<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::trailing_zeros,
+ &|x| x.trailing_zeros() as _,
+ &|_| true,
+ )
+ }
+
+ fn leading_ones<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::leading_ones,
+ &|x| x.leading_ones() as _,
+ &|_| true,
+ )
+ }
+
+ fn trailing_ones<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &$vector::<LANES>::trailing_ones,
+ &|x| x.trailing_ones() as _,
+ &|_| true,
+ )
+ }
}
}
}
@@ -172,7 +256,7 @@ macro_rules! impl_common_integer_tests {
macro_rules! impl_signed_tests {
{ $scalar:tt } => {
mod $scalar {
- use core_simd::simd::SimdInt;
+ use core_simd::simd::num::SimdInt;
type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
type Scalar = $scalar;
@@ -224,7 +308,7 @@ macro_rules! impl_signed_tests {
}
fn simd_min<const LANES: usize>() {
- use core_simd::simd::SimdOrd;
+ use core_simd::simd::cmp::SimdOrd;
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(0);
assert_eq!(a.simd_min(b), a);
@@ -234,7 +318,7 @@ macro_rules! impl_signed_tests {
}
fn simd_max<const LANES: usize>() {
- use core_simd::simd::SimdOrd;
+ use core_simd::simd::cmp::SimdOrd;
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(0);
assert_eq!(a.simd_max(b), b);
@@ -244,7 +328,7 @@ macro_rules! impl_signed_tests {
}
fn simd_clamp<const LANES: usize>() {
- use core_simd::simd::SimdOrd;
+ use core_simd::simd::cmp::SimdOrd;
let min = Vector::<LANES>::splat(Scalar::MIN);
let max = Vector::<LANES>::splat(Scalar::MAX);
let zero = Vector::<LANES>::splat(0);
@@ -313,7 +397,7 @@ macro_rules! impl_signed_tests {
macro_rules! impl_unsigned_tests {
{ $scalar:tt } => {
mod $scalar {
- use core_simd::simd::SimdUint;
+ use core_simd::simd::num::SimdUint;
type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
type Scalar = $scalar;
@@ -327,6 +411,16 @@ macro_rules! impl_unsigned_tests {
}
}
+ test_helpers::test_lanes! {
+ fn wrapping_neg<const LANES: usize>() {
+ test_helpers::test_unary_elementwise(
+ &Vector::<LANES>::wrapping_neg,
+ &Scalar::wrapping_neg,
+ &|_| true,
+ );
+ }
+ }
+
impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
impl_binary_op_test!(Scalar, Sub::sub, SubAssign::sub_assign, Scalar::wrapping_sub);
impl_binary_op_test!(Scalar, Mul::mul, MulAssign::mul_assign, Scalar::wrapping_mul);
@@ -348,7 +442,7 @@ macro_rules! impl_unsigned_tests {
macro_rules! impl_float_tests {
{ $scalar:tt, $int_scalar:tt } => {
mod $scalar {
- use core_simd::simd::SimdFloat;
+ use core_simd::simd::num::SimdFloat;
type Vector<const LANES: usize> = core_simd::simd::Simd<Scalar, LANES>;
type Scalar = $scalar;
@@ -433,7 +527,7 @@ macro_rules! impl_float_tests {
}
fn to_degrees<const LANES: usize>() {
- test_helpers::test_unary_elementwise(
+ test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::to_degrees,
&Scalar::to_degrees,
&|_| true,
@@ -441,7 +535,7 @@ macro_rules! impl_float_tests {
}
fn to_radians<const LANES: usize>() {
- test_helpers::test_unary_elementwise(
+ test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::to_radians,
&Scalar::to_radians,
&|_| true,
@@ -511,7 +605,12 @@ macro_rules! impl_float_tests {
}
fn simd_clamp<const LANES: usize>() {
+ if cfg!(all(target_arch = "powerpc64", target_feature = "vsx")) {
+ // https://gitlab.com/qemu-project/qemu/-/issues/1780
+ return;
+ }
test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
+ use test_helpers::subnormals::flush_in;
for (min, max) in min.iter_mut().zip(max.iter_mut()) {
if max < min {
core::mem::swap(min, max);
@@ -528,8 +627,20 @@ macro_rules! impl_float_tests {
for i in 0..LANES {
result_scalar[i] = value[i].clamp(min[i], max[i]);
}
+ let mut result_scalar_flush = [Scalar::default(); LANES];
+ for i in 0..LANES {
+ // Comparisons flush-to-zero, but return value selection is _not_ flushed.
+ let mut value = value[i];
+ if flush_in(value) < flush_in(min[i]) {
+ value = min[i];
+ }
+ if flush_in(value) > flush_in(max[i]) {
+ value = max[i];
+ }
+ result_scalar_flush[i] = value
+ }
let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
- test_helpers::prop_assert_biteq!(result_scalar, result_vector);
+ test_helpers::prop_assert_biteq!(result_vector, result_scalar, result_scalar_flush);
Ok(())
})
}
diff --git a/library/portable-simd/crates/core_simd/tests/pointers.rs b/library/portable-simd/crates/core_simd/tests/pointers.rs
index 0ae8f83b8..b9f32d16e 100644
--- a/library/portable-simd/crates/core_simd/tests/pointers.rs
+++ b/library/portable-simd/crates/core_simd/tests/pointers.rs
@@ -1,6 +1,9 @@
-#![feature(portable_simd, strict_provenance)]
+#![feature(portable_simd, strict_provenance, exposed_provenance)]
-use core_simd::simd::{Simd, SimdConstPtr, SimdMutPtr};
+use core_simd::simd::{
+ ptr::{SimdConstPtr, SimdMutPtr},
+ Simd,
+};
macro_rules! common_tests {
{ $constness:ident } => {
diff --git a/library/portable-simd/crates/core_simd/tests/round.rs b/library/portable-simd/crates/core_simd/tests/round.rs
index aacf7bd3b..847766ec4 100644
--- a/library/portable-simd/crates/core_simd/tests/round.rs
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@@ -43,7 +43,7 @@ macro_rules! float_rounding_test {
}
fn fract<const LANES: usize>() {
- test_helpers::test_unary_elementwise(
+ test_helpers::test_unary_elementwise_flush_subnormals(
&Vector::<LANES>::fract,
&Scalar::fract,
&|_| true,
@@ -53,7 +53,7 @@ macro_rules! float_rounding_test {
test_helpers::test_lanes! {
fn to_int_unchecked<const LANES: usize>() {
- use core_simd::simd::SimdFloat;
+ use core_simd::simd::num::SimdFloat;
// The maximum integer that can be represented by the equivalently sized float has
// all of the mantissa digits set to 1, pushed up to the MSB.
const ALL_MANTISSA_BITS: IntScalar = ((1 << <Scalar>::MANTISSA_DIGITS) - 1);
diff --git a/library/portable-simd/crates/core_simd/tests/swizzle.rs b/library/portable-simd/crates/core_simd/tests/swizzle.rs
index 8cd7c33e8..522d71439 100644
--- a/library/portable-simd/crates/core_simd/tests/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/tests/swizzle.rs
@@ -11,10 +11,10 @@ wasm_bindgen_test_configure!(run_in_browser);
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn swizzle() {
struct Index;
- impl Swizzle<4, 4> for Index {
+ impl Swizzle<4> for Index {
const INDEX: [usize; 4] = [2, 1, 3, 0];
}
- impl Swizzle<4, 2> for Index {
+ impl Swizzle<2> for Index {
const INDEX: [usize; 2] = [1, 1];
}
@@ -34,18 +34,18 @@ fn reverse() {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn rotate() {
let a = Simd::from_array([1, 2, 3, 4]);
- assert_eq!(a.rotate_lanes_left::<0>().to_array(), [1, 2, 3, 4]);
- assert_eq!(a.rotate_lanes_left::<1>().to_array(), [2, 3, 4, 1]);
- assert_eq!(a.rotate_lanes_left::<2>().to_array(), [3, 4, 1, 2]);
- assert_eq!(a.rotate_lanes_left::<3>().to_array(), [4, 1, 2, 3]);
- assert_eq!(a.rotate_lanes_left::<4>().to_array(), [1, 2, 3, 4]);
- assert_eq!(a.rotate_lanes_left::<5>().to_array(), [2, 3, 4, 1]);
- assert_eq!(a.rotate_lanes_right::<0>().to_array(), [1, 2, 3, 4]);
- assert_eq!(a.rotate_lanes_right::<1>().to_array(), [4, 1, 2, 3]);
- assert_eq!(a.rotate_lanes_right::<2>().to_array(), [3, 4, 1, 2]);
- assert_eq!(a.rotate_lanes_right::<3>().to_array(), [2, 3, 4, 1]);
- assert_eq!(a.rotate_lanes_right::<4>().to_array(), [1, 2, 3, 4]);
- assert_eq!(a.rotate_lanes_right::<5>().to_array(), [4, 1, 2, 3]);
+ assert_eq!(a.rotate_elements_left::<0>().to_array(), [1, 2, 3, 4]);
+ assert_eq!(a.rotate_elements_left::<1>().to_array(), [2, 3, 4, 1]);
+ assert_eq!(a.rotate_elements_left::<2>().to_array(), [3, 4, 1, 2]);
+ assert_eq!(a.rotate_elements_left::<3>().to_array(), [4, 1, 2, 3]);
+ assert_eq!(a.rotate_elements_left::<4>().to_array(), [1, 2, 3, 4]);
+ assert_eq!(a.rotate_elements_left::<5>().to_array(), [2, 3, 4, 1]);
+ assert_eq!(a.rotate_elements_right::<0>().to_array(), [1, 2, 3, 4]);
+ assert_eq!(a.rotate_elements_right::<1>().to_array(), [4, 1, 2, 3]);
+ assert_eq!(a.rotate_elements_right::<2>().to_array(), [3, 4, 1, 2]);
+ assert_eq!(a.rotate_elements_right::<3>().to_array(), [2, 3, 4, 1]);
+ assert_eq!(a.rotate_elements_right::<4>().to_array(), [1, 2, 3, 4]);
+ assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]);
}
#[test]
diff --git a/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs b/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs
index 646cd5f33..f21a937f0 100644
--- a/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs
+++ b/library/portable-simd/crates/core_simd/tests/swizzle_dyn.rs
@@ -1,6 +1,5 @@
#![feature(portable_simd)]
use core::{fmt, ops::RangeInclusive};
-use proptest;
use test_helpers::{self, biteq, make_runner, prop_assert_biteq};
fn swizzle_dyn_scalar_ver<const N: usize>(values: [u8; N], idxs: [u8; N]) -> [u8; N] {
diff --git a/library/portable-simd/crates/core_simd/tests/to_bytes.rs b/library/portable-simd/crates/core_simd/tests/to_bytes.rs
index be0ee4349..66a7981cd 100644
--- a/library/portable-simd/crates/core_simd/tests/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/tests/to_bytes.rs
@@ -1,14 +1,20 @@
-#![feature(portable_simd, generic_const_exprs, adt_const_params)]
-#![allow(incomplete_features)]
-#![cfg(feature = "generic_const_exprs")]
+#![feature(portable_simd)]
-use core_simd::simd::Simd;
+use core_simd::simd::{Simd, ToBytes};
#[test]
fn byte_convert() {
let int = Simd::<u32, 2>::from_array([0xdeadbeef, 0x8badf00d]);
- let bytes = int.to_ne_bytes();
- assert_eq!(int[0].to_ne_bytes(), bytes[..4]);
- assert_eq!(int[1].to_ne_bytes(), bytes[4..]);
- assert_eq!(Simd::<u32, 2>::from_ne_bytes(bytes), int);
+ let ne_bytes = int.to_ne_bytes();
+ let be_bytes = int.to_be_bytes();
+ let le_bytes = int.to_le_bytes();
+ assert_eq!(int[0].to_ne_bytes(), ne_bytes[..4]);
+ assert_eq!(int[1].to_ne_bytes(), ne_bytes[4..]);
+ assert_eq!(int[0].to_be_bytes(), be_bytes[..4]);
+ assert_eq!(int[1].to_be_bytes(), be_bytes[4..]);
+ assert_eq!(int[0].to_le_bytes(), le_bytes[..4]);
+ assert_eq!(int[1].to_le_bytes(), le_bytes[4..]);
+ assert_eq!(Simd::<u32, 2>::from_ne_bytes(ne_bytes), int);
+ assert_eq!(Simd::<u32, 2>::from_be_bytes(be_bytes), int);
+ assert_eq!(Simd::<u32, 2>::from_le_bytes(le_bytes), int);
}
diff --git a/library/portable-simd/crates/std_float/src/lib.rs b/library/portable-simd/crates/std_float/src/lib.rs
index 4ac60b10c..1fef17242 100644
--- a/library/portable-simd/crates/std_float/src/lib.rs
+++ b/library/portable-simd/crates/std_float/src/lib.rs
@@ -1,5 +1,10 @@
#![cfg_attr(feature = "as_crate", no_std)] // We are std!
-#![cfg_attr(feature = "as_crate", feature(platform_intrinsics), feature(portable_simd))]
+#![cfg_attr(
+ feature = "as_crate",
+ feature(platform_intrinsics),
+ feature(portable_simd),
+ allow(internal_features)
+)]
#[cfg(not(feature = "as_crate"))]
use core::simd;
#[cfg(feature = "as_crate")]
@@ -144,7 +149,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
- use simd::*;
+ use simd::prelude::*;
#[test]
fn everything_works() {
diff --git a/library/portable-simd/crates/test_helpers/Cargo.toml b/library/portable-simd/crates/test_helpers/Cargo.toml
index 1d2bc8b51..23dae7c93 100644
--- a/library/portable-simd/crates/test_helpers/Cargo.toml
+++ b/library/portable-simd/crates/test_helpers/Cargo.toml
@@ -4,10 +4,8 @@ version = "0.1.0"
edition = "2021"
publish = false
-[dependencies.proptest]
-version = "0.10"
-default-features = false
-features = ["alloc"]
+[dependencies]
+proptest = { version = "0.10", default-features = false, features = ["alloc"] }
[features]
all_lane_counts = []
diff --git a/library/portable-simd/crates/test_helpers/src/biteq.rs b/library/portable-simd/crates/test_helpers/src/biteq.rs
index 7d91260d8..cbc20cda0 100644
--- a/library/portable-simd/crates/test_helpers/src/biteq.rs
+++ b/library/portable-simd/crates/test_helpers/src/biteq.rs
@@ -113,6 +113,27 @@ impl<T: BitEq> core::fmt::Debug for BitEqWrapper<'_, T> {
}
}
+#[doc(hidden)]
+pub struct BitEqEitherWrapper<'a, T>(pub &'a T, pub &'a T);
+
+impl<T: BitEq> PartialEq<BitEqEitherWrapper<'_, T>> for BitEqWrapper<'_, T> {
+ fn eq(&self, other: &BitEqEitherWrapper<'_, T>) -> bool {
+ self.0.biteq(other.0) || self.0.biteq(other.1)
+ }
+}
+
+impl<T: BitEq> core::fmt::Debug for BitEqEitherWrapper<'_, T> {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ if self.0.biteq(self.1) {
+ self.0.fmt(f)
+ } else {
+ self.0.fmt(f)?;
+ write!(f, " or ")?;
+ self.1.fmt(f)
+ }
+ }
+}
+
#[macro_export]
macro_rules! prop_assert_biteq {
{ $a:expr, $b:expr $(,)? } => {
@@ -122,5 +143,14 @@ macro_rules! prop_assert_biteq {
let b = $b;
proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqWrapper(&b));
}
- }
+ };
+ { $a:expr, $b:expr, $c:expr $(,)? } => {
+ {
+ use $crate::biteq::{BitEqWrapper, BitEqEitherWrapper};
+ let a = $a;
+ let b = $b;
+ let c = $c;
+ proptest::prop_assert_eq!(BitEqWrapper(&a), BitEqEitherWrapper(&b, &c));
+ }
+ };
}
diff --git a/library/portable-simd/crates/test_helpers/src/lib.rs b/library/portable-simd/crates/test_helpers/src/lib.rs
index b26cdc311..b80c745aa 100644
--- a/library/portable-simd/crates/test_helpers/src/lib.rs
+++ b/library/portable-simd/crates/test_helpers/src/lib.rs
@@ -1,3 +1,5 @@
+#![feature(stdsimd, powerpc_target_feature)]
+
pub mod array;
#[cfg(target_arch = "wasm32")]
@@ -6,6 +8,9 @@ pub mod wasm;
#[macro_use]
pub mod biteq;
+pub mod subnormals;
+use subnormals::FlushSubnormals;
+
/// Specifies the default strategy for testing a type.
///
/// This strategy should be what "makes sense" to test.
@@ -151,7 +156,6 @@ pub fn test_3<
}
/// Test a unary vector function against a unary scalar function, applied elementwise.
-#[inline(never)]
pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const LANES: usize>(
fv: &dyn Fn(Vector) -> VectorResult,
fs: &dyn Fn(Scalar) -> ScalarResult,
@@ -178,6 +182,48 @@ pub fn test_unary_elementwise<Scalar, ScalarResult, Vector, VectorResult, const
}
/// Test a unary vector function against a unary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_unary_elementwise_flush_subnormals<
+ Scalar,
+ ScalarResult,
+ Vector,
+ VectorResult,
+ const LANES: usize,
+>(
+ fv: &dyn Fn(Vector) -> VectorResult,
+ fs: &dyn Fn(Scalar) -> ScalarResult,
+ check: &dyn Fn([Scalar; LANES]) -> bool,
+) where
+ Scalar: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+ ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+ Vector: Into<[Scalar; LANES]> + From<[Scalar; LANES]> + Copy,
+ VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+ let flush = |x: Scalar| subnormals::flush(fs(subnormals::flush_in(x)));
+ test_1(&|x: [Scalar; LANES]| {
+ proptest::prop_assume!(check(x));
+ let result_v: [ScalarResult; LANES] = fv(x.into()).into();
+ let result_s: [ScalarResult; LANES] = x
+ .iter()
+ .copied()
+ .map(fs)
+ .collect::<Vec<_>>()
+ .try_into()
+ .unwrap();
+ let result_sf: [ScalarResult; LANES] = x
+ .iter()
+ .copied()
+ .map(flush)
+ .collect::<Vec<_>>()
+ .try_into()
+ .unwrap();
+ crate::prop_assert_biteq!(result_v, result_s, result_sf);
+ Ok(())
+ });
+}
+
+/// Test a unary vector function against a unary scalar function, applied elementwise.
#[inline(never)]
pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
fv: &dyn Fn(Vector) -> Mask,
@@ -204,7 +250,6 @@ pub fn test_unary_mask_elementwise<Scalar, Vector, Mask, const LANES: usize>(
}
/// Test a binary vector function against a binary scalar function, applied elementwise.
-#[inline(never)]
pub fn test_binary_elementwise<
Scalar1,
Scalar2,
@@ -241,6 +286,85 @@ pub fn test_binary_elementwise<
});
}
+/// Test a binary vector function against a binary scalar function, applied elementwise.
+///
+/// Where subnormals are flushed, use approximate equality.
+pub fn test_binary_elementwise_flush_subnormals<
+ Scalar1,
+ Scalar2,
+ ScalarResult,
+ Vector1,
+ Vector2,
+ VectorResult,
+ const LANES: usize,
+>(
+ fv: &dyn Fn(Vector1, Vector2) -> VectorResult,
+ fs: &dyn Fn(Scalar1, Scalar2) -> ScalarResult,
+ check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+ Scalar1: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+ Scalar2: Copy + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+ ScalarResult: Copy + biteq::BitEq + core::fmt::Debug + DefaultStrategy + FlushSubnormals,
+ Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+ Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+ VectorResult: Into<[ScalarResult; LANES]> + From<[ScalarResult; LANES]> + Copy,
+{
+ let flush = |x: Scalar1, y: Scalar2| {
+ subnormals::flush(fs(subnormals::flush_in(x), subnormals::flush_in(y)))
+ };
+ test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+ proptest::prop_assume!(check(x, y));
+ let result_v: [ScalarResult; LANES] = fv(x.into(), y.into()).into();
+ let result_s: [ScalarResult; LANES] = x
+ .iter()
+ .copied()
+ .zip(y.iter().copied())
+ .map(|(x, y)| fs(x, y))
+ .collect::<Vec<_>>()
+ .try_into()
+ .unwrap();
+ let result_sf: [ScalarResult; LANES] = x
+ .iter()
+ .copied()
+ .zip(y.iter().copied())
+ .map(|(x, y)| flush(x, y))
+ .collect::<Vec<_>>()
+ .try_into()
+ .unwrap();
+ crate::prop_assert_biteq!(result_v, result_s, result_sf);
+ Ok(())
+ });
+}
+
+/// Test a unary vector function against a unary scalar function, applied elementwise.
+#[inline(never)]
+pub fn test_binary_mask_elementwise<Scalar1, Scalar2, Vector1, Vector2, Mask, const LANES: usize>(
+ fv: &dyn Fn(Vector1, Vector2) -> Mask,
+ fs: &dyn Fn(Scalar1, Scalar2) -> bool,
+ check: &dyn Fn([Scalar1; LANES], [Scalar2; LANES]) -> bool,
+) where
+ Scalar1: Copy + core::fmt::Debug + DefaultStrategy,
+ Scalar2: Copy + core::fmt::Debug + DefaultStrategy,
+ Vector1: Into<[Scalar1; LANES]> + From<[Scalar1; LANES]> + Copy,
+ Vector2: Into<[Scalar2; LANES]> + From<[Scalar2; LANES]> + Copy,
+ Mask: Into<[bool; LANES]> + From<[bool; LANES]> + Copy,
+{
+ test_2(&|x: [Scalar1; LANES], y: [Scalar2; LANES]| {
+ proptest::prop_assume!(check(x, y));
+ let result_v: [bool; LANES] = fv(x.into(), y.into()).into();
+ let result_s: [bool; LANES] = x
+ .iter()
+ .copied()
+ .zip(y.iter().copied())
+ .map(|(x, y)| fs(x, y))
+ .collect::<Vec<_>>()
+ .try_into()
+ .unwrap();
+ crate::prop_assert_biteq!(result_v, result_s);
+ Ok(())
+ });
+}
+
/// Test a binary vector-scalar function against a binary scalar function, applied elementwise.
#[inline(never)]
pub fn test_binary_scalar_rhs_elementwise<
diff --git a/library/portable-simd/crates/test_helpers/src/subnormals.rs b/library/portable-simd/crates/test_helpers/src/subnormals.rs
new file mode 100644
index 000000000..ec0f1fb24
--- /dev/null
+++ b/library/portable-simd/crates/test_helpers/src/subnormals.rs
@@ -0,0 +1,91 @@
+pub trait FlushSubnormals: Sized {
+ fn flush(self) -> Self {
+ self
+ }
+}
+
+impl<T> FlushSubnormals for *const T {}
+impl<T> FlushSubnormals for *mut T {}
+
+macro_rules! impl_float {
+ { $($ty:ty),* } => {
+ $(
+ impl FlushSubnormals for $ty {
+ fn flush(self) -> Self {
+ let is_f32 = core::mem::size_of::<Self>() == 4;
+ let ppc_flush = is_f32 && cfg!(all(
+ any(target_arch = "powerpc", all(target_arch = "powerpc64", target_endian = "big")),
+ target_feature = "altivec",
+ not(target_feature = "vsx"),
+ ));
+ let arm_flush = is_f32 && cfg!(all(target_arch = "arm", target_feature = "neon"));
+ let flush = ppc_flush || arm_flush;
+ if flush && self.is_subnormal() {
+ <$ty>::copysign(0., self)
+ } else {
+ self
+ }
+ }
+ }
+ )*
+ }
+}
+
+macro_rules! impl_else {
+ { $($ty:ty),* } => {
+ $(
+ impl FlushSubnormals for $ty {}
+ )*
+ }
+}
+
+impl_float! { f32, f64 }
+impl_else! { i8, i16, i32, i64, isize, u8, u16, u32, u64, usize }
+
+/// AltiVec should flush subnormal inputs to zero, but QEMU seems to only flush outputs.
+/// https://gitlab.com/qemu-project/qemu/-/issues/1779
+#[cfg(all(
+ any(target_arch = "powerpc", target_arch = "powerpc64"),
+ target_feature = "altivec"
+))]
+fn in_buggy_qemu() -> bool {
+ use std::sync::OnceLock;
+ static BUGGY: OnceLock<bool> = OnceLock::new();
+
+ fn add(x: f32, y: f32) -> f32 {
+ #[cfg(target_arch = "powerpc")]
+ use core::arch::powerpc::*;
+ #[cfg(target_arch = "powerpc64")]
+ use core::arch::powerpc64::*;
+
+ let array: [f32; 4] =
+ unsafe { core::mem::transmute(vec_add(vec_splats(x), vec_splats(y))) };
+ array[0]
+ }
+
+ *BUGGY.get_or_init(|| add(-1.0857398e-38, 0.).is_sign_negative())
+}
+
+#[cfg(all(
+ any(target_arch = "powerpc", target_arch = "powerpc64"),
+ target_feature = "altivec"
+))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+ if in_buggy_qemu() {
+ x
+ } else {
+ x.flush()
+ }
+}
+
+#[cfg(not(all(
+ any(target_arch = "powerpc", target_arch = "powerpc64"),
+ target_feature = "altivec"
+)))]
+pub fn flush_in<T: FlushSubnormals>(x: T) -> T {
+ x.flush()
+}
+
+pub fn flush<T: FlushSubnormals>(x: T) -> T {
+ x.flush()
+}