summaryrefslogtreecommitdiffstats
path: root/third_party/rust/packed_simd
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/packed_simd')
-rw-r--r--third_party/rust/packed_simd/.appveyor.yml59
-rw-r--r--third_party/rust/packed_simd/.cargo-checksum.json1
-rw-r--r--third_party/rust/packed_simd/.github/workflows/benchmarks.yml31
-rw-r--r--third_party/rust/packed_simd/.github/workflows/ci.yml218
-rw-r--r--third_party/rust/packed_simd/.github/workflows/docs.yml13
-rw-r--r--third_party/rust/packed_simd/.github/workflows/run-ci-script.yml86
-rw-r--r--third_party/rust/packed_simd/.travis.yml212
-rw-r--r--third_party/rust/packed_simd/Cargo.toml80
-rw-r--r--third_party/rust/packed_simd/LICENSE-APACHE201
-rw-r--r--third_party/rust/packed_simd/LICENSE-MIT25
-rw-r--r--third_party/rust/packed_simd/README.md144
-rw-r--r--third_party/rust/packed_simd/bors.toml3
-rw-r--r--third_party/rust/packed_simd/build.rs6
-rwxr-xr-xthird_party/rust/packed_simd/ci/all.sh71
-rw-r--r--third_party/rust/packed_simd/ci/android-install-ndk.sh21
-rw-r--r--third_party/rust/packed_simd/ci/android-install-sdk.sh60
-rw-r--r--third_party/rust/packed_simd/ci/android-sysimage.sh56
-rwxr-xr-xthird_party/rust/packed_simd/ci/benchmark.sh32
-rw-r--r--third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs176
-rw-r--r--third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile47
-rw-r--r--third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile14
-rw-r--r--third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile15
-rw-r--r--third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile13
-rw-r--r--third_party/rust/packed_simd/ci/docker/armv7-linux-androideabi/Dockerfile47
-rw-r--r--third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile13
-rw-r--r--third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile7
-rw-r--r--third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile7
-rw-r--r--third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile13
-rw-r--r--third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile10
-rw-r--r--third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile10
-rw-r--r--third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile25
-rw-r--r--third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile13
-rw-r--r--third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile17
-rw-r--r--third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile11
-rw-r--r--third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile20
-rw-r--r--third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile18
-rw-r--r--third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile47
-rw-r--r--third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile13
-rw-r--r--third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile39
-rw-r--r--third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile31
-rw-r--r--third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile16
-rw-r--r--third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile10
-rwxr-xr-xthird_party/rust/packed_simd/ci/dox.sh27
-rw-r--r--third_party/rust/packed_simd/ci/linux-s390x.sh18
-rw-r--r--third_party/rust/packed_simd/ci/linux-sparc64.sh17
-rw-r--r--third_party/rust/packed_simd/ci/lld-shim.rs11
-rwxr-xr-xthird_party/rust/packed_simd/ci/max_line_width.sh17
-rwxr-xr-xthird_party/rust/packed_simd/ci/run-docker.sh38
-rwxr-xr-xthird_party/rust/packed_simd/ci/run.sh99
-rw-r--r--third_party/rust/packed_simd/ci/run_examples.sh51
-rw-r--r--third_party/rust/packed_simd/ci/runtest-android.rs45
-rwxr-xr-xthird_party/rust/packed_simd/ci/setup_benchmarks.sh7
-rwxr-xr-xthird_party/rust/packed_simd/ci/test-runner-linux24
-rw-r--r--third_party/rust/packed_simd/contributing.md67
-rw-r--r--third_party/rust/packed_simd/perf-guide/.gitignore1
-rw-r--r--third_party/rust/packed_simd/perf-guide/book.toml12
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/SUMMARY.md21
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/ascii.css4
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/bound_checks.md22
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/float-math/approx.md8
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/float-math/fma.md6
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/float-math/fp.md3
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/float-math/svml.md7
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/introduction.md26
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/prof/linux.md107
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/prof/mca.md100
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/prof/profiling.md14
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md5
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/features.md13
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md5
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md31
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md5
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md77
-rw-r--r--third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md76
-rw-r--r--third_party/rust/packed_simd/rustfmt.toml5
-rw-r--r--third_party/rust/packed_simd/src/api.rs308
-rw-r--r--third_party/rust/packed_simd/src/api/bit_manip.rs129
-rw-r--r--third_party/rust/packed_simd/src/api/bitmask.rs79
-rw-r--r--third_party/rust/packed_simd/src/api/cast.rs108
-rw-r--r--third_party/rust/packed_simd/src/api/cast/macros.rs82
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v128.rs302
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v16.rs68
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v256.rs298
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v32.rs132
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v512.rs209
-rw-r--r--third_party/rust/packed_simd/src/api/cast/v64.rs208
-rw-r--r--third_party/rust/packed_simd/src/api/cmp.rs16
-rw-r--r--third_party/rust/packed_simd/src/api/cmp/eq.rs27
-rw-r--r--third_party/rust/packed_simd/src/api/cmp/ord.rs43
-rw-r--r--third_party/rust/packed_simd/src/api/cmp/partial_eq.rs65
-rw-r--r--third_party/rust/packed_simd/src/api/cmp/partial_ord.rs230
-rw-r--r--third_party/rust/packed_simd/src/api/cmp/vertical.rs114
-rw-r--r--third_party/rust/packed_simd/src/api/default.rs30
-rw-r--r--third_party/rust/packed_simd/src/api/fmt.rs12
-rw-r--r--third_party/rust/packed_simd/src/api/fmt/binary.rs54
-rw-r--r--third_party/rust/packed_simd/src/api/fmt/debug.rs60
-rw-r--r--third_party/rust/packed_simd/src/api/fmt/lower_hex.rs54
-rw-r--r--third_party/rust/packed_simd/src/api/fmt/octal.rs54
-rw-r--r--third_party/rust/packed_simd/src/api/fmt/upper_hex.rs54
-rw-r--r--third_party/rust/packed_simd/src/api/from.rs7
-rw-r--r--third_party/rust/packed_simd/src/api/from/from_array.rs124
-rw-r--r--third_party/rust/packed_simd/src/api/from/from_vector.rs67
-rw-r--r--third_party/rust/packed_simd/src/api/hash.rs49
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits.rs59
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs345
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/macros.rs74
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v128.rs232
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v16.rs9
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v256.rs232
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v32.rs13
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v512.rs232
-rw-r--r--third_party/rust/packed_simd/src/api/into_bits/v64.rs18
-rw-r--r--third_party/rust/packed_simd/src/api/math.rs4
-rw-r--r--third_party/rust/packed_simd/src/api/math/float.rs61
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/abs.rs31
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/consts.rs74
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/cos.rs44
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/exp.rs33
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/ln.rs33
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/mul_add.rs44
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/mul_adde.rs48
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/powf.rs36
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/recpre.rs36
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/rsqrte.rs40
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/sin.rs50
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/sqrt.rs35
-rw-r--r--third_party/rust/packed_simd/src/api/math/float/sqrte.rs44
-rw-r--r--third_party/rust/packed_simd/src/api/minimal.rs6
-rw-r--r--third_party/rust/packed_simd/src/api/minimal/iuf.rs169
-rw-r--r--third_party/rust/packed_simd/src/api/minimal/mask.rs176
-rw-r--r--third_party/rust/packed_simd/src/api/minimal/ptr.rs1373
-rw-r--r--third_party/rust/packed_simd/src/api/ops.rs32
-rw-r--r--third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs203
-rw-r--r--third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs162
-rw-r--r--third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs140
-rw-r--r--third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs106
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs148
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs129
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs74
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs57
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs116
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_neg.rs43
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_rotates.rs92
-rw-r--r--third_party/rust/packed_simd/src/api/ops/vector_shifts.rs106
-rw-r--r--third_party/rust/packed_simd/src/api/ptr.rs4
-rw-r--r--third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs216
-rw-r--r--third_party/rust/packed_simd/src/api/reductions.rs12
-rw-r--r--third_party/rust/packed_simd/src/api/reductions/bitwise.rs151
-rw-r--r--third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs313
-rw-r--r--third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs193
-rw-r--r--third_party/rust/packed_simd/src/api/reductions/mask.rs89
-rw-r--r--third_party/rust/packed_simd/src/api/reductions/min_max.rs360
-rw-r--r--third_party/rust/packed_simd/src/api/select.rs73
-rw-r--r--third_party/rust/packed_simd/src/api/shuffle.rs184
-rw-r--r--third_party/rust/packed_simd/src/api/shuffle1_dyn.rs159
-rw-r--r--third_party/rust/packed_simd/src/api/slice.rs7
-rw-r--r--third_party/rust/packed_simd/src/api/slice/from_slice.rs202
-rw-r--r--third_party/rust/packed_simd/src/api/slice/write_to_slice.rs196
-rw-r--r--third_party/rust/packed_simd/src/api/swap_bytes.rs192
-rw-r--r--third_party/rust/packed_simd/src/codegen.rs62
-rw-r--r--third_party/rust/packed_simd/src/codegen/bit_manip.rs347
-rw-r--r--third_party/rust/packed_simd/src/codegen/llvm.rs122
-rw-r--r--third_party/rust/packed_simd/src/codegen/math.rs3
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float.rs18
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/abs.rs103
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/cos.rs103
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs87
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/exp.rs112
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/ln.rs112
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/macros.rs470
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs109
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs60
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/powf.rs112
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/sin.rs103
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs188
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs87
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs103
-rw-r--r--third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs67
-rw-r--r--third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs28
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions.rs1
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask.rs69
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs81
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs56
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs8
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs237
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs216
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs95
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs35
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs35
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs68
-rw-r--r--third_party/rust/packed_simd/src/codegen/shuffle.rs150
-rw-r--r--third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs408
-rw-r--r--third_party/rust/packed_simd/src/codegen/swap_bytes.rs149
-rw-r--r--third_party/rust/packed_simd/src/codegen/v128.rs46
-rw-r--r--third_party/rust/packed_simd/src/codegen/v16.rs7
-rw-r--r--third_party/rust/packed_simd/src/codegen/v256.rs78
-rw-r--r--third_party/rust/packed_simd/src/codegen/v32.rs11
-rw-r--r--third_party/rust/packed_simd/src/codegen/v512.rs145
-rw-r--r--third_party/rust/packed_simd/src/codegen/v64.rs21
-rw-r--r--third_party/rust/packed_simd/src/codegen/vPtr.rs35
-rw-r--r--third_party/rust/packed_simd/src/codegen/vSize.rs16
-rw-r--r--third_party/rust/packed_simd/src/lib.rs348
-rw-r--r--third_party/rust/packed_simd/src/masks.rs126
-rw-r--r--third_party/rust/packed_simd/src/sealed.rs42
-rw-r--r--third_party/rust/packed_simd/src/testing.rs8
-rw-r--r--third_party/rust/packed_simd/src/testing/macros.rs44
-rw-r--r--third_party/rust/packed_simd/src/testing/utils.rs130
-rw-r--r--third_party/rust/packed_simd/src/v128.rs80
-rw-r--r--third_party/rust/packed_simd/src/v16.rs16
-rw-r--r--third_party/rust/packed_simd/src/v256.rs86
-rw-r--r--third_party/rust/packed_simd/src/v32.rs29
-rw-r--r--third_party/rust/packed_simd/src/v512.rs99
-rw-r--r--third_party/rust/packed_simd/src/v64.rs66
-rw-r--r--third_party/rust/packed_simd/src/vPtr.rs34
-rw-r--r--third_party/rust/packed_simd/src/vSize.rs53
-rw-r--r--third_party/rust/packed_simd/tests/endianness.rs268
216 files changed, 19005 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/.appveyor.yml b/third_party/rust/packed_simd/.appveyor.yml
new file mode 100644
index 0000000000..0388cee0a0
--- /dev/null
+++ b/third_party/rust/packed_simd/.appveyor.yml
@@ -0,0 +1,59 @@
+matrix:
+ allow_failures:
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72
+ - TARGET: i686-pc-windows-msvc
+ - TARGET: i686-pc-windows-gnu
+ - TARGET: x86_64-pc-windows-gnu
+ fast_finish: true
+
+environment:
+ matrix:
+ - TARGET: x86_64-pc-windows-msvc
+ MSYSTEM: MINGW64
+ NOVERIFY: "1"
+ - TARGET: x86_64-pc-windows-msvc
+ MSYSTEM: MINGW64
+ RUSTFLAGS: "-C target-feature=+sse4.2"
+ NOVERIFY: "1"
+ - TARGET: x86_64-pc-windows-msvc
+ MSYSTEM: MINGW64
+ RUSTFLAGS: "-C target-feature=+avx"
+ NOVERIFY: "1"
+ - TARGET: x86_64-pc-windows-msvc
+ MSYSTEM: MINGW64
+ RUSTFLAGS: "-C target-feature=+avx2"
+ NOVERIFY: "1"
+
+ - TARGET: i686-pc-windows-msvc
+ MSYSTEM: MINGW32
+ NOVERIFY: "1"
+ - TARGET: i686-pc-windows-msvc
+ MSYSTEM: MINGW32
+ RUSTFLAGS: "-C target-feature=+sse4.2"
+ NOVERIFY: "1"
+ - TARGET: i686-pc-windows-msvc
+ MSYSTEM: MINGW32
+ RUSTFLAGS: "-C target-feature=+avx"
+ NOVERIFY: "1"
+ - TARGET: i686-pc-windows-msvc
+ MSYSTEM: MINGW32
+ RUSTFLAGS: "-C target-feature=+avx2"
+ NOVERIFY: "1"
+
+ - TARGET: x86_64-pc-windows-gnu
+ MSYSTEM: MINGW64
+
+ - TARGET: i686-pc-windows-gnu
+ MSYSTEM: MINGW32
+ - TARGET: x86_64-pc-windows-gnu
+ MSYSTEM: MINGW64
+install:
+ - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" }
+ - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe"
+ - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null
+ - ps: $env:PATH="$env:PATH;C:\rust\bin"
+ - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+ - rustc -vV
+ - cargo -vV
+build: false
+test_script: bash -c "ci/run.sh"
diff --git a/third_party/rust/packed_simd/.cargo-checksum.json b/third_party/rust/packed_simd/.cargo-checksum.json
new file mode 100644
index 0000000000..92964c642b
--- /dev/null
+++ b/third_party/rust/packed_simd/.cargo-checksum.json
@@ -0,0 +1 @@
+{"files":{".appveyor.yml":"f1ed01850e0d725f9498f52a1a63ddf40702ad6e0bf5b2d7c4c04d76e96794a3",".github/workflows/benchmarks.yml":"d049f016dc53830a3fcc735833eca168df2e491f33b6b44ce7e4d5d1dd453854",".github/workflows/ci.yml":"170a2074add57ddc8f88b780149e4930162c6ee4718add4369dca8e3fd6c022a",".github/workflows/docs.yml":"86f7eb652c900624e4deb76320cd92175db1fa7295a76f28f30336ff0fad1604",".github/workflows/run-ci-script.yml":"d847be293f9ec17038e450652b1eb7bdc20b805c18c5dbab34d1d9ff7a9c8940",".travis.yml":"30a61a5ec53355fc1f3585e1690280308c2b7961701abc11e8389b235b647178","Cargo.toml":"02151c415f267a6aeb8651158472744d71e1d9da8883dd20be25731a42e520f4","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","README.md":"c4ac7027a9ab7d7858aa8957d7454dbfcdbb81e605b6a171f05310cc3cad3762","bors.toml":"dee881dc69b9b7834e4eba5d95c3ed5a416d4628815a167d6a22d4cb4fb064b8","build.rs":"019ed29c43989782d8eec3a961654cfc172d7a7898da4eca8f654700af7e1988","ci/all.sh":"2ae6b2445b4db83833e40b37efd0016c6b9879ee988b9b3ef94db5439a3e1606","ci/android-install-ndk.sh":"bdcf93ba9043ac1184e2c504a3d40c47c6c1601d882e0f0a27a8eb56fbabcb5f","ci/android-install-sdk.sh":"3490432022c5c8f5a115c084f7a9aca1626f96c0c87ffb62019228c4346b47e4","ci/android-sysimage.sh":"ebf4e5daa1f0fe1b2092b79f0f3f161c4c4275cb744e52352c4d81ab451e4c5a","ci/benchmark.sh":"b61d19ef6b90deba8fb79dee74c8b062d94844676293da346da87bb78a9a49a4","ci/deploy_and_run_on_ios_simulator.rs":"ec8ecf82d92072676aa47f0d1a3d021b60a7ae3531153ef12d2ff4541fc294dc","ci/docker/aarch64-linux-android/Dockerfile":"ace2e7d33c87bc0f6d3962a4a3408c04557646f7f51ab99cfbf574906796b016","ci/docker/aarch64-unknown-linux-gnu/Dockerfile":"da88c0d50f16dc08448c7fdf1fa5ed2cbe576acf9e7dd85b5b818621b2a8c702","ci/docker/arm-unknown-linux-gnueabi/Dockerfile":"bb5f8ae890707c128652290ffc544447643bf12037ddd73c6ad6989f848cb380","ci/docker/arm-unknown-linux-gnueabihf/Dockerfile":"1afaefcbc05b740859acd4e067bc92439be6bcbe8f2e9678474fb434bcd398d9","ci/docker/armv7-linux-androideabi/Dockerfile":"370e55d3330a413a3ccf677b3afb3e0ef9018a5fab263faa97ae8ac017fc2286","ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile":"8282ea707a94109beed47a57574755e2d58401735904a03f85fb64c578c53b4f","ci/docker/i586-unknown-linux-gnu/Dockerfile":"49792922269f371bd29da4727e9085101b27be67a6b97755d0196c63317f7abb","ci/docker/i686-unknown-linux-gnu/Dockerfile":"49792922269f371bd29da4727e9085101b27be67a6b97755d0196c63317f7abb","ci/docker/mips-unknown-linux-gnu/Dockerfile":"b2ebc25797612c4f8395fe9d407725156044955bfbcf442036b7f55b43a5f9da","ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile":"b0c1692ac65bc56dd30494b1993d8e929c48cc9c4b92029b7c7592af6d4f9220","ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile":"4e9249c179300138141d0b2b7401b11897f64aed69f541f078c1db4594df2827","ci/docker/mipsel-unknown-linux-musl/Dockerfile":"3164c52b0dcbb01afa78292b15b5c43503ccf0491cf6eb801ec2bf22ae274e52","ci/docker/powerpc-unknown-linux-gnu/Dockerfile":"ae8274309928620a5dd232a46264e05399bb746288ebee3843a71c4162208cc3","ci/docker/powerpc64-unknown-linux-gnu/Dockerfile":"ba5fbc4bf3bb91cd50b407248da31225681efc8f2be7618f4a0ab1219b389508","ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile":"53f97f8b9b5aca7534b9bf9ea48f35175052cd2a560a107e01ad270731c032fc","ci/docker/s390x-unknown-linux-gnu/Dockerfile":"89f5421cf06d817ae94092987e914472ef384ad2d1fff2735be3d8786ba11214","ci/docker/sparc64-unknown-linux-gnu/Dockerfile":"83eba19576486f9d10d7c037d669d72b31a65565a479f30b22aab36aaa2ff8dc","ci/docker/thumbv7neon-linux-androideabi/Dockerfile":"c2decd5591bd7a09378901bef629cd944acf052eb55e4f35b79eb9cb4d62246a","ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile":"51955a8bf3c4d440f47382af6f5426ebff94ab01a04da36175babda9a057740f","ci/docker/wasm32-unknown-unknown/Dockerfile":"b982b421c70db476900df5b60e19ef8815e6c7dae22687225002780cab7b0a76","ci/docker/x86_64-linux-android/Dockerfile":"a17ebdb186ce2dd6b62100b5a439e05a1ab9adab113e2508843e121aaea52992","ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile":"44b6203d9290bfdc53d81219f0937e1110847a23dd982ec8c4de388354f01536","ci/docker/x86_64-unknown-linux-gnu/Dockerfile":"7f4e3ca5fa288ea70edb4d1f75309708cd30b192e2e4444e61c4d5b3b58f89cf","ci/dox.sh":"434e9611c52e389312d2b03564adf09429f10cc76fe66a8644adb104903b87b7","ci/linux-s390x.sh":"d6b732d7795b4ba131326aff893bca6228a7d2eb0e9402f135705413dbbe0dce","ci/linux-sparc64.sh":"c92966838b1ab7ad3b7a344833ee726aba6b647cf5952e56f0ad1ba420b13325","ci/lld-shim.rs":"3d7f71ec23a49e2b67f694a0168786f9a954dda15f5a138815d966643fd3fcc3","ci/max_line_width.sh":"0a1518bba4c9ecaa55694cb2e9930d0e19c265baabf73143f17f9cf285aaa5bb","ci/run-docker.sh":"92e036390ad9b0d16f109579df1b5ced2e72e9afea40c7d011400ebd3a2a90de","ci/run.sh":"9afabc961e0ee83b87201f3fd554c19e5b0c36f3a95d013595e276c9882dd0a4","ci/run_examples.sh":"d1a23c6c35374a0678ba5114b9b8fefd8be0a79e774872a8bf0898d1baca18d0","ci/runtest-android.rs":"145a8e9799a5223975061fe7e586ade5669ee4877a7d7a4cf6b4ab48e8e36c7c","ci/setup_benchmarks.sh":"fae3960023f6f3d1388cd2ad22fdbab4b075f1f29dd4292d7994a20783beb6cf","ci/test-runner-linux":"c8aa6025cff5306f4f31d0c61dc5f9d4dd5a1d189ab613ef8d4c367c694d9ccd","contributing.md":"2d2629310ad4d464c482bdbb5819f0d6ce223c576aeef2cdce6a1f6857085ea5","perf-guide/.gitignore":"fe82c7da551079d832cf74200b0b359b4df9828cb4a0416fa7384f07a2ae6a13","perf-guide/book.toml":"115a98284126c6b180178b44713314cc494f08a71662ee2ce15cf67f17a51064","perf-guide/src/SUMMARY.md":"3e03bffc991fdc2050f3d51842d72d9d21ea6abab56a3baf3b2d5973a78b89e1","perf-guide/src/ascii.css":"29afb08833b2fe2250f0412e1fa1161a2432a0820a14953c87124407417c741a","perf-guide/src/bound_checks.md":"5e4991ff58a183ef0cd9fdc1feb4cd12d083b44bdf87393bbb0927808ef3ce7d","perf-guide/src/float-math/approx.md":"8c09032fa2d795a0c5db1775826c850d28eb2627846d0965c60ee72de63735ad","perf-guide/src/float-math/fma.md":"311076ba4b741d604a82e74b83a8d7e8c318fcbd7f64c4392d1cf5af95c60243","perf-guide/src/float-math/fp.md":"04153e775ab6e4f0d7837bcc515230d327b04edfa34c84ce9c9e10ebaeef2be8","perf-guide/src/float-math/svml.md":"0798873b8eedaeda5fed62dc91645b57c20775a02d3cd74d8bd06958f1516506","perf-guide/src/introduction.md":"9f5a19e9e6751f25d2daad39891a0cc600974527ec4c8305843f9618910671bd","perf-guide/src/prof/linux.md":"447731eb5de7d69166728fdbc5ecb0c0c9db678ea493b45a592d67dd002184c0","perf-guide/src/prof/mca.md":"f56d54f3d20e7aa4d32052186e8237b03d65971eb5d112802b442570ff11d344","perf-guide/src/prof/profiling.md":"8a650c0fd6ede0964789bb6577557eeef1d8226a896788602ce61528e260e43c","perf-guide/src/target-feature/attribute.md":"615f88dca0a707b6c416fa605435dd6e1fb5361cc639429cbf68cd87624bd78b","perf-guide/src/target-feature/features.md":"17077760ff24c006b606dd21889c53d87228f4311f3ba3a574f9afdeacd86165","perf-guide/src/target-feature/inlining.md":"7ed1d7068d8173a00d84c16cfe5871cd68b9f04f8d0cca2d01ebc84957ebf2f6","perf-guide/src/target-feature/practice.md":"c4b371842e0086df178488fec97f20def8f0c62ee588bcd25fd948b9b1fa227e","perf-guide/src/target-feature/runtime.md":"835425f5ee597fb3e51d36e725a81ebee29f4561231d19563cd4da81dbb1cfcb","perf-guide/src/target-feature/rustflags.md":"01197acf6f0adec8db32b8591811f69cecb6555a2b05dc5d5ec27d0e3f7b065e","perf-guide/src/vert-hor-ops.md":"c6211c0ee91e60552ec592d89d9d957eedc21dee3cbd89e1ad6765ea06a27471","rustfmt.toml":"d99a43f3f8ef9e425cf01c333fba9f0051f888f5d87ab4e8f63c2f7d0fe6620f","src/api.rs":"bb1795e9657a8298d37d2349b45443f08e9e455399ad4b727018600728478c10","src/api/bit_manip.rs":"27f3097fc0a11e3c4107049d9779e680dcd67407a066704008a6b9c4fd529e05","src/api/bitmask.rs":"058ebc38a2e0363f07a441d3e9a4775aaec57ccb170a0e5d5efa5dc4743ab07b","src/api/cast.rs":"03b94a3d316ac7b7be7068810044911e965e889a0ace7bae762749ca74a92747","src/api/cast/macros.rs":"b0a14d0c83ad2ebb7a275180f6d9e3f2bc312ba57a7d3d6c39fad4e0f20f9408","src/api/cast/v128.rs":"edd0994efac4379dff26e178423a52dbb3ffeb38b1fc97cae975d744c00b4fb6","src/api/cast/v16.rs":"96bd98c2d21b0663abe6c0ab33005b1fa693f3db7ee6795351391343863484da","src/api/cast/v256.rs":"8c31fe91f5e78ef737dfba6979cc1240210cb094a89d284fe459bf8a991ca24b","src/api/cast/v32.rs":"a99a79dd84d2a5e6adf9db98705675915bd03fd1287d489c7fe38e84d7e4a086","src/api/cast/v512.rs":"c0dd526f41ed7b8a71c3743d91267554ec0a0c75834ccc2e3ecb0ef3004af642","src/api/cast/v64.rs":"6572fdba2a1241a6cd666d3f0cce3306cd2cb7e5e236172e59d5d4351c8a88af","src/api/cmp.rs":"357c3a2a09c6d4611c32dd7fa95be2fae933d513e229026ec9b44451a77b884e","src/api/cmp/eq.rs":"60f70f355bae4cb5b17db53204cacc3890f70670611c17df638d4c04f7cc8075","src/api/cmp/ord.rs":"589f7234761c294fa5df8f525bc4acd5a47cdb602207d524a0d4e19804cd9695","src/api/cmp/partial_eq.rs":"902ccb8aa01fd5738b30ba0b712669c21d4801958907e03bad23432c7dba0198","src/api/cmp/partial_ord.rs":"9db0c37d7434cdfc62d8d66912e972fa3d8c115ab2af051a6f45e414bd3e4f1c","src/api/cmp/vertical.rs":"de3d62f38eba817299aa16f1e1939954c9a447e316509397465c2830852ba053","src/api/default.rs":"67bf21c134127d12a7028c8b88a57f0ceee8ccbd74976da8ca74eb9f16a174d5","src/api/fmt.rs":"67fb804bb86b6cd77cf8cd492b5733ce437071b66fe3297278b8a6552c325dda","src/api/fmt/binary.rs":"02b2b287f7404f8a983813cf70c87108c8da3835578b63ab303116885f609413","src/api/fmt/debug.rs":"56e1c3bdc092747344fffaafff9da7163ee7827857f6fb7cb1c9923eca4f6fa0","src/api/fmt/lower_hex.rs":"558fd592f7f485712fb051509cecc7174a21e6bf62e5ce64766e75afc97bb8e1","src/api/fmt/octal.rs":"3b2e70877a4f368c7704f8e254236c014c365c74d93371c1feb5f030e6c66422","src/api/fmt/upper_hex.rs":"2a442f666bc80e22d41f903f881238fe114dd49344c3ed69849250e853cafc5d","src/api/from.rs":"2e599d8329cb05eaf06224cc441355c4b7b51254fc19256619333be8c149d444","src/api/from/from_array.rs":"5d2cc700568376bf6ee1fe5e406da3bc2d488ff155644bf73d06a1349b73fc53","src/api/from/from_vector.rs":"9764371aa9e6005aace74dea14f59e5611a095b7cf42707940924749282c52f0","src/api/hash.rs":"5076ece87969592c876486f5b1ea8affbeaec379d1a14a30859e0aa5592019de","src/api/into_bits.rs":"8f8011627250e23e66b5c0ca641afb079d8232674bb1354140b536bdbea63e55","src/api/into_bits/arch_specific.rs":"e7445021f3908326bfee758835e5fc5ad56aa1baa77fc1c58abe4350c66c670a","src/api/into_bits/macros.rs":"bb4fe99be2af6a21d805efab44c8e4e61a7b2adb42a65504a0cf26d13efdadcd","src/api/into_bits/v128.rs":"145a44922b09a5ca5b62d88a461d327d399a997a15db4b11d7b17e554a9fa4c0","src/api/into_bits/v16.rs":"f4f4f61ba88aa51b158ec56ca3dce234349aea0daf2b3029a14ab5125d1e41e5","src/api/into_bits/v256.rs":"8cea9c5d9809f11323cb7cdc53b83df593fd17caf926251e412ae9777bed547f","src/api/into_bits/v32.rs":"905ba683d342fa32f4202b80bb46530807bd0a5b588f6c2e8c9f475223c47775","src/api/into_bits/v512.rs":"e25afa1fbf088a5d58e7d75d197b6cd4c56637ea28542ba18e46a451f29d04e7","src/api/into_bits/v64.rs":"d6238022ccff7b92e55b3f6017fc269acb6f36330a6d7e8fb389853a0f1b6478","src/api/math.rs":"8b2a2fc651917a850539f993aa0b9e5bf4da67b11685285b8de8cdca311719ec","src/api/math/float.rs":"61d2794d68262a1090ae473bd30793b5f65cf732f32a6694a3af2ce5d9225616","src/api/math/float/abs.rs":"5b6b2701e2e11135b7ce58a05052ea8120e10e4702c95d046b9d21b827b26bf8","src/api/math/float/consts.rs":"6302c9261da4291d144d5bb53493cdd073498feb40955fb6860ea3c4d06c978a","src/api/math/float/cos.rs":"4c2dd7173728ef189314f1576c9486e03be21b7da98843b2f9011282a7979e31","src/api/math/float/exp.rs":"7c6d5f1e304f498a01cfa23b92380c815d7da0ad94eae3483783bc377d287eef","src/api/math/float/ln.rs":"54c7583f3df793b39ff57534fade27b41bb992439e5dc178252f5ca3190a3e54","src/api/math/float/mul_add.rs":"62cac77660d20159276d4c9ef066eb90c81cbddb808e8e157182c607625ad2eb","src/api/math/float/mul_adde.rs":"bae056ee9f3a70df39ec3c3b2f6437c65303888a7b843ef1a5bcf1f5aca0e602","src/api/math/float/powf.rs":"9ddb938984b36d39d82a82f862f80df8f7fb013f1d222d45698d41d88472f568","src/api/math/float/recpre.rs":"589225794ff1dbf31158dff660e6d4509ecc8befbb57c633900dea5ac0b840d6","src/api/math/float/rsqrte.rs":"a32abdcc318d7ccc8448231f54d75b884b7cbeb03a7d595713ab6243036f4dbf","src/api/math/float/sin.rs":"cbd3622b7df74f19691743001c8cf747a201f8977ad90542fee915f37dcd1e49","src/api/math/float/sqrt.rs":"0c66d5d63fb08e4d99c6b82a8828e41173aff1ac9fa1a2764a11fac217ccf2ac","src/api/math/float/sqrte.rs":"731e1c9f321b662accdd27dacb3aac2e8043b7aecb2f2161dde733bd9f025362","src/api/minimal.rs":"1f22bcc528555444e76de569ec0ae2029b9ae9d04805efeafa93369c8098036b","src/api/minimal/iuf.rs":"819cff26d3e196f807645bcc1d79eb27d9f175edb89910f2274d52a1e913cd11","src/api/minimal/mask.rs":"0cae10ae1fc65f5070e686c0c79bfba27b86b33d6c399367bd4848fb367dcec4","src/api/minimal/ptr.rs":"f74d7a4925d7209faebc26ea8315259cb2c08ec65789a70869e595649a9bc39a","src/api/ops.rs":"3e273b277a0f3019d42c3c59ca94a5afd4885d5ae6d2182e5089bbeec9de42ee","src/api/ops/scalar_arithmetic.rs":"d2d5ad897a59dd0787544f927e0e7ca4072c3e58b0f4a2324083312b0d5a21d7","src/api/ops/scalar_bitwise.rs":"482204e459ca6be79568e1c9f70adbe2d2151412ddf122fb2161be8ebb51c40c","src/api/ops/scalar_mask_bitwise.rs":"c250f52042e37b22d57256c80d4604104cfd2fbe2a2e127c676267270ca5d350","src/api/ops/scalar_shifts.rs":"c4773d435c3f9da4454327e6fbb2b5b41a1c0ebb1cca7372e69dc7a344a1b6e4","src/api/ops/vector_arithmetic.rs":"ddca15d09ddeef502c2ed66117a62300ca65d87e959e8b622d767bdf1c307910","src/api/ops/vector_bitwise.rs":"b3968f7005b649edcc22a54e2379b14d5ee19045f2e784029805781ae043b5ee","src/api/ops/vector_float_min_max.rs":"76bf8cb607e2c442923c1da1061a6b80d742d607408033c2a3761161114cf2a0","src/api/ops/vector_int_min_max.rs":"a378789c6ff9b32a51fbd0a97ffd36ed102cd1fe6a067d2b02017c1df342def6","src/api/ops/vector_mask_bitwise.rs":"5052d18517d765415d40327e6e8e55a312daaca0a5e2aec959bfa54b1675f9c8","src/api/ops/vector_neg.rs":"5c62f6b0221983cdbd23cd0a3af3672e6ba1255f0dfe8b19aae6fbd6503e231b","src/api/ops/vector_rotates.rs":"6c3f761d9d551f6365a8a95539ceace4b1a02e0b12d144f34ed68db94e88cff4","src/api/ops/vector_shifts.rs":"e510be14127c0ffd58a2573a39701da3557d66bedec09837ac8bbd44d579da00","src/api/ptr.rs":"8a793251bed6130dcfb2f1519ceaa18b751bbb15875928d0fb6deb5a5e07523a","src/api/ptr/gather_scatter.rs":"3d614f9d5b4ca201a9f7e46af4405e1d2c28ecee1620297c23b52e37b92cc0ea","src/api/reductions.rs":"ae5baca81352ecd44526d6c30c0a1feeda475ec73ddd3c3ec6b14e944e5448ee","src/api/reductions/bitwise.rs":"8bf910ae226188bd15fc7e125f058cd2566b6186fcd0cd8fd020f352c39ce139","src/api/reductions/float_arithmetic.rs":"47a5679896db2cbb56c31372fe42143da015b6beae7db5d2f3a0309ddf427ae1","src/api/reductions/integer_arithmetic.rs":"c2df3cf7493cca4174f2c65aea422a3d20d8a23af03f8d57cef72c19fee8f20d","src/api/reductions/mask.rs":"db83327a950e33a317f37fd33ca4e20c347fb415975ec024f3e23da8509425af","src/api/reductions/min_max.rs":"6af8c9aa45c69961b1b6fc205395f4767d4421869fb105fb3d563c5605fc13cd","src/api/select.rs":"6b07e7e8026df561f7307221a896f0fbb272536f41b9109040ac094c24c69331","src/api/shuffle.rs":"be7faff9b59654926df12897b2f98a4baa7d6acf2af1aaf93d388ba6e96f83ec","src/api/shuffle1_dyn.rs":"bfea5a91905b31444e9ef7ca6eddb7a9606b7e22d3f71bb842eb2795a0346620","src/api/slice.rs":"ee87484e8af329547b9a5d4f2a69e8bed6ea10bbd96270d706083843d4eea2ac","src/api/slice/from_slice.rs":"3735363000737104a8fc5f394ad8c31ec14e885952bd57647dd2a84001aee0a6","src/api/slice/write_to_slice.rs":"79d09c64d00724783c77c42e4583eeec97b18db94cf2ae146b564c3f85cfefd6","src/api/swap_bytes.rs":"05b4262eaade2f63e6cd3b780c19a03aecd2459d4cc4360051fc088887179a6e","src/codegen.rs":"db4f232fb9f5728db310b87dc8c4733be48afacab1053798c06106bef9a42b05","src/codegen/bit_manip.rs":"525ea6ff7ad1e043b6f6136992166f1803ed5563b7f6fc292c1c40257d20e264","src/codegen/llvm.rs":"12e748b4928c3be6cc12b4165c3041a3d0efccf6195338ecd3d88b8fdb0bbcc7","src/codegen/math.rs":"dfcf02ad34e2fdfe22c3f1cc2822001cc895e65031b4d06e585e5047839febb7","src/codegen/math/float.rs":"2c1cbce155bc527ce34d472c0fef6bc3dadb79cd7a357dd7aa5b1ebeb1d77a13","src/codegen/math/float/abs.rs":"d5aaadcf540bdb9b4264dca6471a255fd7bf509e763bef0239c0144a68466fea","src/codegen/math/float/cos.rs":"17f28d2900c852dca221fa9c92a9cd5fe7fd2df8d427bbc60216c749b2be013d","src/codegen/math/float/cos_pi.rs":"dbaf9f443f9846a491d4ec52210a7b5835dd593b03366e3135b05c37d70f9d6c","src/codegen/math/float/exp.rs":"d300058a4bcc7ae7976f216f81902cd73a9e603ad63880dff3bbc866c27a9f37","src/codegen/math/float/ln.rs":"c851e211e43f8256093ba75b03ae0c307c9962ee66d94f09b4dd80068190cbdf","src/codegen/math/float/macros.rs":"fc9924869ed85e4795983af228cacf23158f4f35919adce16c920ad4a3f0a009","src/codegen/math/float/mul_add.rs":"041a5b69d5991d93ef795351b17560c10faf80b78fd26ad7df42a239b32cf9de","src/codegen/math/float/mul_adde.rs":"d71d5f0f3333b62a7439b823cb7adf5340ea1555ce820fb4a3f4cb922f73f5f5","src/codegen/math/float/powf.rs":"9742c3877f1a5509ca5c9492a40884b6579ba6dd11c26b7112e63f70666b395d","src/codegen/math/float/sin.rs":"0e9868d35531566509f3a01d85d5253045eb4afa8525d8407dcc1f5f33c56036","src/codegen/math/float/sin_cos_pi.rs":"8e6b6142d7dd240cdb36669722e82ab9810a2261e86e659f7d97a942ad8b1258","src/codegen/math/float/sin_pi.rs":"bb6d39db8f921e03a301fc5206ac1a61a97def8a2cb83b87ccf189f3fc48d548","src/codegen/math/float/sqrt.rs":"e6ebb0c5f428efad1f672b9a8fe4e58534dbf1ea5a8fe092ce5ce76b52fe89cb","src/codegen/math/float/sqrte.rs":"23acfaea38d0e081a6d9021c1094e813d0cfd12c58c1eca9662aade5e625d51c","src/codegen/pointer_sized_int.rs":"6ca13c214b6cf7e0929dbe18e96a16fc0bb7d8799608df29c4c8115490f99e01","src/codegen/reductions.rs":"8eb18ebac76985d2aa30262a2edd8cb004230b511a765d657525f677a585c12c","src/codegen/reductions/mask.rs":"e67f35a1f4d156a4894a2d6ea5a935b4d898cf70eefb2715f5c1cc165e776c11","src/codegen/reductions/mask/aarch64.rs":"84b101c17cad1ede4eb6d38cada0ac7da239dba8cea3badd3829b967e558431f","src/codegen/reductions/mask/arm.rs":"aaa07129bd078ae7e677cf8b8e67ec9f30536606a0c7ed1baaa18fd1793bb218","src/codegen/reductions/mask/fallback.rs":"3eb9319d2c7cf19216b607b8459612c4e027b643cf11b036937d36896bf76786","src/codegen/reductions/mask/fallback_impl.rs":"76547f396e55ef403327c77c314cf8db8c7a5c9b9819bfb925abeacf130249e5","src/codegen/reductions/mask/x86.rs":"36dcd8af4ab99730a078ed113d3955f74eb1a2876e2e6d9f224e0ff462c216d1","src/codegen/reductions/mask/x86/avx.rs":"3a40868b38c86e35aefb96d7578de6322efe89d8135e0366359b54ddd06f861a","src/codegen/reductions/mask/x86/avx2.rs":"677aed3f056285285daa3adff8bc65e739630b4424defa6d9665e160f027507e","src/codegen/reductions/mask/x86/sse.rs":"8522f6ed03f6c32dd577d4298df477c08aeaaa38563706f29096e1911ed731f2","src/codegen/reductions/mask/x86/sse2.rs":"54ec56e49b0c6841eccb719e4f310d65fe767c04136b2ec20bd8b9d7d9897b9e","src/codegen/shuffle.rs":"1ec2930f4e1acc43ac30b518af298d466a79e9e75734a51c380b7810efd1a27f","src/codegen/shuffle1_dyn.rs":"3f13ca1597378758d05106bf5ff3715eee531f3cb6d88f48b9182bd6c9386b51","src/codegen/swap_bytes.rs":"c67c86e91ca3fc77539e0efcea081a3c62548cccf503963ae408f2e86f4e6a21","src/codegen/v128.rs":"94226b31ec403d18d9d2fe06713f147c9c79e9b5f9105089088266313f843185","src/codegen/v16.rs":"ddec4ffb66b6f7aaffb9a1780c5ddba82557abd74f45073d335047e04cf74924","src/codegen/v256.rs":"6b63917f0444118d6b1595bff2045e59b97c4d24012bd575f69f1f0efc5a0241","src/codegen/v32.rs":"3477b3c5540aed86e61e2f5807dd31db947413cec9181c587d93ed6ec74f0eba","src/codegen/v512.rs":"5854f99d3aabc4cd42b28a20d9ce447756dc2ba024a409a69b6a8ae1f1842fc5","src/codegen/v64.rs":"e9e89caebfe63d10c0cbca61e4dfdba3b7e02ee0989170f80beed23237ddd950","src/codegen/vPtr.rs":"f0753b405cdc865bdf8e82c6505f299ea1f96136239ebbaf7f9ce93d310764b8","src/codegen/vSize.rs":"c89f5fdeb28ac4c8272ed1816fce03d9d95308cc32bb2533bd8b20cd5ac102ac","src/lib.rs":"05048c6a85ec65cf902d9dd8f757a3f76392b703a6794ea71f0d41500a89f78f","src/masks.rs":"70fc0abe4c2907ce2a491c574e1cfb9f3423385da2e1a923a48c9c13f8ba6ed8","src/sealed.rs":"ae7fdeaf5d84cd7710ed730ca72ca7eaba93df6cb0acb183e5c0a7327acf197f","src/testing.rs":"896669c08d8c801448a4d2fadc9d633eda0fbe879d229997e2a182e31278e469","src/testing/macros.rs":"403bbc5ecb7c786fe36156df302d0c07a8122408dbb15f7474d7682224ba1106","src/testing/utils.rs":"41912a92266dfe884647fc035e4242fd746100df8e839808ae0397af3759a3c8","src/v128.rs":"16cf9a8e7156b899ee9b9cd3f2dba9d13ec63289bea8c3ee9ae2e43ad9510288","src/v16.rs":"cb6465cf1e00bf530183af1819b9fe3d7eec978f8765d5e85d9b58a39a4b4045","src/v256.rs":"fe235017da18c7f3c361831c60e3173ad304d8ea1e95d64ebebc79da2d708511","src/v32.rs":"145d347855bac59b2de6508f9e594654e6c330423af9edc0e2ac8f4d1abdf45e","src/v512.rs":"f372f277f3e62eb5c945bb1c460333fdb17b6974fcc876633788ff53bded9599","src/v64.rs":"0b8079881b71575e3414be0b7f8f7eaba65281ba6732f2b2f61f73e95b6f48f7","src/vPtr.rs":"8b3e433d487180bb4304ff71245ecad90f0010f43e139a72027b672abe58facc","src/vSize.rs":"eda5aa020706cbf94d15bada41a0c2a35fc8f3f37cb7c2cd6f34d201399a495e","tests/endianness.rs":"5147f86d224c4c540b772033da2f994cad9bc9c035f38ec21e23bc4e55f8a759"},"package":null} \ No newline at end of file
diff --git a/third_party/rust/packed_simd/.github/workflows/benchmarks.yml b/third_party/rust/packed_simd/.github/workflows/benchmarks.yml
new file mode 100644
index 0000000000..2102661b29
--- /dev/null
+++ b/third_party/rust/packed_simd/.github/workflows/benchmarks.yml
@@ -0,0 +1,31 @@
+name: benchmarks
+
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+ workflow_dispatch:
+
+jobs:
+ x86_64-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ with:
+ target: x86_64-unknown-linux-gnu
+ setup_script: ci/setup_benchmarks.sh
+ script: ci/benchmark.sh
+ norun: 1
+ verify: 1
+ # FIXME: figure out how to add downloaded ispc to PATH
+ # features: ispc
+ x86_64-apple-darwin:
+ uses: ./.github/workflows/run-ci-script.yml
+ with:
+ target: x86_64-apple-darwin
+ runner: macos-latest
+ setup_script: ci/setup_benchmarks.sh
+ script: ci/benchmark.sh
+ norun: 1
+ verify: 1
+ # FIXME: figure out how to add downloaded ispc to PATH
+ # features: ispc
diff --git a/third_party/rust/packed_simd/.github/workflows/ci.yml b/third_party/rust/packed_simd/.github/workflows/ci.yml
new file mode 100644
index 0000000000..15f096370c
--- /dev/null
+++ b/third_party/rust/packed_simd/.github/workflows/ci.yml
@@ -0,0 +1,218 @@
+name: ci
+
+# trigger for all PRs and changes to master
+on:
+ push:
+ branches:
+ - master
+ pull_request:
+
+jobs:
+ rustfmt:
+ uses: ./.github/workflows/run-ci-script.yml
+ with:
+ script: ci/all.sh check_fmt || true
+ x86_64-unknown-linux-android:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: x86_64-linux-android
+ armv7-linux-androideabi:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: armv7-linux-androideabi
+ aarch64-unknown-linux-android-NEON:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: aarch64-linux-android
+ rustflags: -Ctarget-feature=+neon
+ thumbv7neon-linux-androideabi:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: thumbv7neon-linux-androideabi
+ i586-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i586-unknown-linux-gnu
+ rustflags: -Crelocation-model=static
+ i586-unknown-linux-gnu-SSE:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i586-unknown-linux-gnu
+ rustflags: -Crelocation-model=static -Ctarget-feature=+sse
+ i586-unknown-linux-gnu-SSE2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i586-unknown-linux-gnu
+ rustflags: -Crelocation-model=static -Ctarget-feature=+sse2
+ i686-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i686-unknown-linux-gnu
+ rustflags: -Crelocation-model=static
+ i686-unknown-linux-gnu-SSE4_2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i686-unknown-linux-gnu
+ rustflags: -Crelocation-model=static -Ctarget-feature=+sse4.2
+ i686-unknown-linux-gnu-AVX2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: i686-unknown-linux-gnu
+ rustflags: -Crelocation-model=static -Ctarget-feature=+avx2
+ x86_64-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: x86_64-unknown-linux-gnu
+ x86_64-unknown-linux-gnu-SSE4_2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: x86_64-unknown-linux-gnu
+ rustflags: -Ctarget-feature=+sse4.2
+ x86_64-unknown-linux-gnu-AVX2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: x86_64-unknown-linux-gnu
+ rustflags: -Ctarget-feature=+avx2
+ arm-unknown-linux-gnueabihf:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: arm-unknown-linux-gnueabihf
+ armv7-unknown-linux-gnueabihf:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: armv7-unknown-linux-gnueabihf
+ armv7-unknown-linux-gnueabihf-NEON:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: armv7-unknown-linux-gnueabihf
+ rustflags: -Ctarget-feature=+neon
+ thumbv7neon-unknown-linux-gnueabihf:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: thumbv7neon-unknown-linux-gnueabihf
+ aarch64-unknown-linux-gnu-NEON:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: aarch64-unknown-linux-gnu
+ rustflags: -Ctarget-feature=+neon
+ powerpc-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: powerpc-unknown-linux-gnu
+ powerpc64-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: powerpc64-unknown-linux-gnu
+ powerpc64le-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: powerpc64le-unknown-linux-gnu
+ powerpc64le-unknown-linux-gnu-ALTIVEC:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: powerpc64le-unknown-linux-gnu
+ rustflags: -Ctarget-feature=+altivec
+ powerpc64le-unknown-linux-gnu-VSX:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ target: powerpc64le-unknown-linux-gnu
+ rustflags: -Ctarget-feature=+vsx
+ s390x-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: s390x-unknown-linux-gnu
+ sparc64-unknown-linux-gnu:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: sparc64-unknown-linux-gnu
+ wasm32-unknown-unknown:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: false
+ with:
+ target: wasm32-unknown-unknown
+ x86_64-apple-darwin-SSE4_2:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ runner: macos-latest
+ script: ci/run.sh
+ target: x86_64-apple-darwin
+ rustflags: -Ctarget-feature=+sse4.2
+ x86_64-apple-darwin-AVX:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ runner: macos-latest
+ script: ci/run.sh
+ target: x86_64-apple-darwin
+ rustflags: -Ctarget-feature=+avx
+ x86_64-apple-ios:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ runner: macos-latest
+ script: ci/run.sh
+ target: x86_64-apple-ios
+ aarch64-apple-ios:
+ uses: ./.github/workflows/run-ci-script.yml
+ strategy:
+ fail-fast: true
+ with:
+ runner: macos-latest
+ script: ci/run.sh
+ target: aarch64-apple-ios
+ rustflags: -Ctarget-feature=+neon
diff --git a/third_party/rust/packed_simd/.github/workflows/docs.yml b/third_party/rust/packed_simd/.github/workflows/docs.yml
new file mode 100644
index 0000000000..54bdc240aa
--- /dev/null
+++ b/third_party/rust/packed_simd/.github/workflows/docs.yml
@@ -0,0 +1,13 @@
+name: docs
+
+on:
+ push:
+ branches:
+ - master
+
+jobs:
+ docs:
+ uses: ./.github/workflows/run-ci-script.yml
+ with:
+ setup_script: cargo install mdbook
+ script: ci/dox.sh
diff --git a/third_party/rust/packed_simd/.github/workflows/run-ci-script.yml b/third_party/rust/packed_simd/.github/workflows/run-ci-script.yml
new file mode 100644
index 0000000000..733c539659
--- /dev/null
+++ b/third_party/rust/packed_simd/.github/workflows/run-ci-script.yml
@@ -0,0 +1,86 @@
+name: run-ci-script
+
+on:
+ workflow_call:
+ inputs:
+ runner:
+ required: false
+ type: string
+ default: ubuntu-latest
+ target:
+ required: false
+ type: string
+ default: ''
+ rustflags:
+ required: false
+ type: string
+ default: ''
+ script:
+ required: false
+ type: string
+ default: ci/run-docker.sh
+ setup_script:
+ required: false
+ type: string
+ norun:
+ required: false
+ type: string
+ default: ''
+ verify:
+ required: false
+ type: string
+ default: ''
+ features:
+ required: false
+ type: string
+ default: ''
+
+jobs:
+ run-ci-script:
+ runs-on: ${{ inputs.runner }}
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v2
+ - name: Init Rustup Cache
+ uses: actions/cache@v2
+ with:
+ path: |
+ ~/.rustup/toolchains
+ key: ${{ runner.os }}-cargo-${{ hashFiles('**/rust-toolchain') }}
+ - name: Install Toolchain
+ uses: dtolnay/rust-toolchain@nightly
+ with:
+ # FIXME: change to nightly once https://github.com/rust-lang/packed_simd/pull/350 is merged
+ # needs to be kept in sync with the toolchain files
+ targets: ${{ inputs.target }}
+ components: rustfmt
+ - name: Generate Lockfile
+ run: cargo generate-lockfile
+ - name: Init Cargo Cache
+ uses: actions/cache@v2
+ with:
+ path: |
+ ~/.cargo/bin/
+ ~/.cargo/registry/index/
+ ~/.cargo/registry/cache/
+ ~/.cargo/git/db/
+ target/
+ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+ - name: Setup
+ if: ${{ inputs.setup_script != '' }}
+ run: ${{ inputs.setup_script }}
+ env:
+ TARGET: ${{ inputs.target }}
+ RUSTFLAGS: ${{ inputs.rustflags }}
+ NORUN: ${{ inputs.norun }}
+ VERIFY: ${{ inputs.verify }}
+ FEATURES: ${{ inputs.features }}
+ - name: Run CI Script
+ timeout-minutes: 30
+ run: ${{ inputs.script }}
+ env:
+ TARGET: ${{ inputs.target }}
+ RUSTFLAGS: ${{ inputs.rustflags }}
+ NORUN: ${{ inputs.norun }}
+ VERIFY: ${{ inputs.verify }}
+ FEATURES: ${{ inputs.features }}
diff --git a/third_party/rust/packed_simd/.travis.yml b/third_party/rust/packed_simd/.travis.yml
new file mode 100644
index 0000000000..0ffc06afdd
--- /dev/null
+++ b/third_party/rust/packed_simd/.travis.yml
@@ -0,0 +1,212 @@
+language: rust
+rust: nightly
+os: linux
+dist: focal
+
+stages:
+ - tools
+ - build-test-verify # Passes full test suite, permit no regressions (unless it's rustup :/)
+ - 32bit-tier1
+ - 64bit-tier2
+ - 32bit-tier2
+
+jobs:
+ fast_finish: true
+ include:
+ # Android:
+ - env: TARGET=x86_64-linux-android
+ name: "x86_64-unknown-linux-android + SSE2"
+ stage: build-test-verify
+ - env: TARGET=arm-linux-androideabi
+ name: "arm-linux-androideabi"
+ stage: build-test-verify
+ - name: "aarch64-unknown-linux-android + NEON"
+ env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon"
+ stage: build-test-verify
+ - env: TARGET="thumbv7neon-linux-androideabi"
+ name: "thumbv7neon-linux-androideabi"
+ stage: 32bit-tier2
+ # Linux:
+ - env: TARGET=i586-unknown-linux-gnu
+ name: "i586-unknown-linux-gnu"
+ stage: 32bit-tier2
+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse"
+ name: "i586-unknown-linux-gnu + SSE"
+ stage: 32bit-tier2
+ - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2"
+ name: "i586-unknown-linux-gnu + SSE2"
+ stage: 32bit-tier2
+ - env: TARGET=i686-unknown-linux-gnu
+ name: "i686-unknown-linux-gnu + SSE2"
+ stage: 32bit-tier1
+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
+ name: "i686-unknown-linux-gnu + SSE4.2"
+ stage: 32bit-tier1
+ - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
+ name: "i686-unknown-linux-gnu + AVX2"
+ stage: 32bit-tier1
+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2"
+ name: "x86_64-unknown-linux-gnu + SSE4.2"
+ stage: build-test-verify
+ - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2"
+ name: "x86_64-unknown-linux-gnu + AVX2"
+ stage: build-test-verify
+ - env: TARGET=arm-unknown-linux-gnueabihf
+ name: "arm-unknown-linux-gnueabihf"
+ stage: build-test-verify
+ - env: TARGET=armv7-unknown-linux-gnueabihf
+ name: "armv7-unknown-linux-gnueabihf"
+ stage: build-test-verify
+ - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon"
+ name: "armv7-unknown-linux-gnueabihf + NEON"
+ stage: build-test-verify
+ - env: TARGET="thumbv7neon-unknown-linux-gnueabihf"
+ name: "thumbv7neon-unknown-linux-gnueabihf"
+ stage: 32bit-tier2
+ - name: "aarch64-unknown-linux-gnu + NEON"
+ env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon"
+ stage: build-test-verify
+ - env: TARGET=mips-unknown-linux-gnu
+ name: "mips-unknown-linux-gnu"
+ stage: 32bit-tier2
+ - env: TARGET=mipsel-unknown-linux-musl
+ name: "mipsel-unknown-linux-musl"
+ stage: 32bit-tier2
+ - env: TARGET=mips64-unknown-linux-gnuabi64
+ name: "mips64-unknown-linux-gnuabi64"
+ stage: 64bit-tier2
+ - env: TARGET=mips64el-unknown-linux-gnuabi64
+ name: "mips64el-unknown-linux-gnuabi64"
+ stage: 64bit-tier2
+ # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18
+ # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6"
+ - env: TARGET=powerpc-unknown-linux-gnu
+ name: "powerpc-unknown-linux-gnu"
+ stage: 32bit-tier2
+ - env: TARGET=powerpc64-unknown-linux-gnu
+ name: "powerpc64-unknown-linux-gnu"
+ stage: 64bit-tier2
+ - name: "powerpc64le-unknown-linux-gnu"
+ env: TARGET=powerpc64le-unknown-linux-gnu
+ stage: build-test-verify
+ - name: "powerpc64le-unknown-linux-gnu + ALTIVEC"
+ env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec"
+ stage: build-test-verify
+ - name: "powerpc64le-unknown-linux-gnu + VSX"
+ env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx"
+ stage: build-test-verify
+ - name: "s390x-unknown-linux-gnu"
+ env: TARGET=s390x-unknown-linux-gnu
+ stage: 64bit-tier2
+ - env: TARGET=sparc64-unknown-linux-gnu
+ name: "sparc64-unknown-linux-gnu"
+ stage: 64bit-tier2
+ # WebAssembly:
+ - env: TARGET=wasm32-unknown-unknown
+ name: "wasm32-unknown-unknown"
+ stage: 32bit-tier2
+ # MacOSX:
+ - os: osx
+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2"
+ name: "x86_64-apple-darwin + SSE4.2"
+ install: true
+ script: ci/run.sh
+ osx_image: xcode10
+ stage: build-test-verify
+ # Travis-CI OSX build bots do not support AVX2:
+ - os: osx
+ env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx"
+ name: "x86_64-apple-darwin + AVX"
+ install: true
+ script: ci/run.sh
+ osx_image: xcode10
+ stage: build-test-verify
+ # *BSDs:
+ #- env: TARGET=i686-unknown-freebsd NORUN=1
+ # script: ci/run.sh
+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1
+ # script: ci/run.sh
+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1
+ # script: ci/run.sh
+ # Solaris:
+ #- env: TARGET=x86_64-sun-solaris NORUN=1
+ # script: ci/run.sh
+ # iOS:
+ - os: osx
+ env: TARGET=x86_64-apple-ios
+ name: "x86_64-apple-ios + SSE2"
+ script: ci/run.sh
+ osx_image: xcode9.4
+ stage: 64bit-tier2
+ - name: "aarch64-apple-ios + NEON"
+ env: TARGET=aarch64-apple-ios RUSTFLAGS="-C target-feature=+neon"
+ os: osx
+ osx_image: xcode9.4
+ script: ci/run.sh
+ stage: 64bit-tier2
+ # BENCHMARKS:
+ - name: "Benchmarks - x86_64-unknown-linux-gnu"
+ install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh
+ # FIXME: Use `core_arch,sleef-sys` features once they works again
+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
+ stage: tools
+ - name: "Benchmarks - x86_64-apple-darwin"
+ install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh
+ # FIXME: Use `core_arch,sleef-sys` features once they works again
+ script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=ispc ci/benchmark.sh
+ os: osx
+ osx_image: xcode9.4
+ stage: tools
+ # TOOLS:
+ - name: "Documentation"
+ before_install:
+ - sudo add-apt-repository -y ppa:deadsnakes/ppa
+ - sudo apt-get update -y
+ - sudo apt-get install -y python3.9
+ install:
+ - cargo install mdbook
+ script: ci/dox.sh
+ stage: tools
+ - name: "rustfmt"
+ install: true
+ script: |
+ rustup toolchain install nightly -c rustfmt --allow-downgrade
+ ci/all.sh check_fmt || true
+ stage: tools
+
+ allow_failures:
+ # FIXME: ISPC cannot be found?
+ - name: "Benchmarks - x86_64-apple-darwin"
+ # FIXME: i686 fails in inlining, apparently
+ - stage: 32bit-tier1
+ #- env: TARGET=i686-unknown-freebsd NORUN=1
+ #- env: TARGET=x86_64-unknown-freebsd NORUN=1
+ #- env: TARGET=x86_64-unknown-netbsd NORUN=1
+ #- env: TARGET=x86_64-sun-solaris NORUN=1
+
+ # FIXME: TBD
+ - stage: 64bit-tier2
+ - stage: 32bit-tier2
+
+ # FIXME: iOS
+ # https://github.com/rust-lang-nursery/packed_simd/issues/26
+ - env: TARGET=x86_64-apple-ios
+ # Is this related to the above? Mysterious test failure
+ - name: "aarch64-apple-ios + NEON"
+
+install: travis_retry rustup target add $TARGET
+before_script: cargo generate-lockfile
+script: travis_wait 50 ci/run-docker.sh
+after_script: sleep 5
+
+env:
+ global:
+ secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo="
+branches:
+ only:
+ - staging # bors r+
+ - trying # bors try
+ - master
+notifications:
+ email:
+ on_success: never
diff --git a/third_party/rust/packed_simd/Cargo.toml b/third_party/rust/packed_simd/Cargo.toml
new file mode 100644
index 0000000000..a8d2f971e6
--- /dev/null
+++ b/third_party/rust/packed_simd/Cargo.toml
@@ -0,0 +1,80 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2018"
+name = "packed_simd"
+version = "0.3.9"
+build = "build.rs"
+description = "Portable Packed SIMD vectors"
+homepage = "https://github.com/rust-lang/packed_simd"
+documentation = "https://docs.rs/crate/packed_simd/"
+readme = "README.md"
+keywords = [
+ "simd",
+ "vector",
+ "portability",
+]
+categories = [
+ "hardware-support",
+ "concurrency",
+ "no-std",
+ "data-structures",
+]
+license = "MIT OR Apache-2.0"
+repository = "https://github.com/rust-lang/packed_simd"
+
+[package.metadata.docs.rs]
+features = ["into_bits"]
+rustdoc-args = [
+ "--cfg",
+ "doc_cfg",
+]
+
+[dependencies]
+cfg-if = "1.0.0"
+
+[dependencies.core_arch]
+version = "0.1.5"
+optional = true
+
+[dependencies.num-traits]
+version = "0.2.14"
+default-features = false
+
+[dev-dependencies]
+paste = "^1"
+
+[dev-dependencies.arrayvec]
+version = "^0.5"
+default-features = false
+
+[features]
+default = []
+into_bits = []
+libcore_neon = []
+
+[target."cfg(target_arch = \"x86_64\")".dependencies.sleef-sys]
+version = "0.1.2"
+optional = true
+
+[target.wasm32-unknown-unknown.dev-dependencies]
+wasm-bindgen = "=0.2.87"
+wasm-bindgen-test = "=0.3.37"
+
+[badges.is-it-maintained-issue-resolution]
+repository = "rust-lang/packed_simd"
+
+[badges.is-it-maintained-open-issues]
+repository = "rust-lang/packed_simd"
+
+[badges.maintenance]
+status = "experimental"
diff --git a/third_party/rust/packed_simd/LICENSE-APACHE b/third_party/rust/packed_simd/LICENSE-APACHE
new file mode 100644
index 0000000000..16fe87b06e
--- /dev/null
+++ b/third_party/rust/packed_simd/LICENSE-APACHE
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/third_party/rust/packed_simd/LICENSE-MIT b/third_party/rust/packed_simd/LICENSE-MIT
new file mode 100644
index 0000000000..39d4bdb5ac
--- /dev/null
+++ b/third_party/rust/packed_simd/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
diff --git a/third_party/rust/packed_simd/README.md b/third_party/rust/packed_simd/README.md
new file mode 100644
index 0000000000..59db13fe4f
--- /dev/null
+++ b/third_party/rust/packed_simd/README.md
@@ -0,0 +1,144 @@
+# `Simd<[T; N]>`
+
+## Implementation of [Rust RFC #2366: `std::simd`][rfc2366]
+
+[![Latest Version]][crates.io] [![docs]][master_docs]
+
+**WARNING**: this crate only supports the most recent nightly Rust toolchain
+and will be superseded by [`#![feature(portable_simd)]`](https://github.com/rust-lang/portable-simd).
+
+## Documentation
+
+* [API docs (`master` branch)][master_docs]
+* [Performance guide][perf_guide]
+* [API docs (`docs.rs`)][docs.rs]
+* [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale,
+ discussion, etc.
+
+## Examples
+
+Most of the examples come with both a scalar and a vectorized implementation.
+
+* [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench)
+* [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux)
+* [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse)
+* [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot)
+* [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody)
+* [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing)
+* [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm)
+* [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform)
+* [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil)
+* [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product)
+
+## Cargo features
+
+* `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait
+ implementations for the vector types. These allow reinterpreting the bits of a
+ vector type as those of another vector type safely by just using the
+ `.into_bits()` method.
+
+## Performance
+
+The following [ISPC] examples are also part of `packed_simd`'s
+[`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/)
+directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s
+Single-Program-Multiple-Data (SPMD) programming model. The performance results
+on different hardware is shown in the `readme.md` of each example. The following
+table summarizes the performance ranges, where `+` means speed-up and `-`
+slowdown:
+
+* `aobench`: `[-1.02x, +1.53x]`,
+* `stencil`: `[+1.06x, +1.72x]`,
+* `mandelbrot`: `[-1.74x, +1.2x]`,
+* `options_pricing`:
+ * `black_scholes`: `+1.0x`
+ * `binomial_put`: `+1.4x`
+
+ While SPMD is not the intended use case for `packed_simd`, it is possible to
+ combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming
+ model in Rust. Writing performant code is not as straightforward as with
+ [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one
+ can easily match and often out-perform [ISPC]'s "default performance".
+
+## Platform support
+
+The following table describes the supported platforms: `build` shows whether
+the library compiles without issues for a given target, while `run` shows
+whether the test suite passes for a given target.
+
+| **Linux** | **build** | **run** |
+|---------------------------------------|-----------|---------|
+| `i586-unknown-linux-gnu` | ✓ | ✗ |
+| `i686-unknown-linux-gnu` | ✓ | ✗ |
+| `x86_64-unknown-linux-gnu` | ✓ | ✓ |
+| `arm-unknown-linux-gnueabihf` | ✓ | ✓ |
+| `armv7-unknown-linux-gnueabi` | ✓ | ✓ |
+| `aarch64-unknown-linux-gnu` | ✓ | ✓ |
+| `powerpc-unknown-linux-gnu` | ✓ | ✗ |
+| `powerpc64-unknown-linux-gnu` | ✓ | ✗ |
+| `powerpc64le-unknown-linux-gnu` | ✓ | ✓ |
+| `s390x-unknown-linux-gnu` | ✓ | ✗ |
+| `sparc64-unknown-linux-gnu` | ✓ | ✗ |
+| `thumbv7neon-unknown-linux-gnueabihf` | ✓ | ✓ |
+| **MacOSX** | **build** | **run** |
+| `x86_64-apple-darwin` | ✓ | ✓ |
+| **Android** | **build** | **run** |
+| `x86_64-linux-android` | ✓ | ✓ |
+| `armv7-linux-androideabi` | ✓ | ✗ |
+| `aarch64-linux-android` | ✓ | ✗ |
+| `thumbv7neon-linux-androideabi` | ✓ | ✗ |
+| **iOS** | **build** | **run** |
+| `x86_64-apple-ios` | ✗ | ✗ |
+| `aarch64-apple-ios` | ✗ | ✗ |
+
+
+## Machine code verification
+
+The
+[`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify)
+crate tests disassembles the portable packed vector APIs at run-time and
+compares the generated machine code against the desired one to make sure that
+this crate remains efficient.
+
+## License
+
+This project is licensed under either of
+
+* [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
+ ([LICENSE-APACHE](LICENSE-APACHE))
+
+* [MIT License](http://opensource.org/licenses/MIT)
+ ([LICENSE-MIT](LICENSE-MIT))
+
+at your option.
+
+## Contributing
+
+We welcome all people who want to contribute.
+Please see the [contributing instructions] for more information.
+
+Contributions in any form (issues, pull requests, etc.) to this project
+must adhere to Rust's [Code of Conduct].
+
+Unless you explicitly state otherwise, any contribution intentionally submitted
+for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be
+dual licensed as above, without any additional terms or conditions.
+
+[travis]: https://travis-ci.com/rust-lang/packed_simd
+[Travis-CI Status]: https://travis-ci.com/rust-lang/packed_simd.svg?branch=master
+[appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd
+[Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true
+[Latest Version]: https://img.shields.io/crates/v/packed_simd.svg
+[crates.io]: https://crates.io/crates/packed_simd
+[docs]: https://docs.rs/packed_simd/badge.svg
+[docs.rs]: https://docs.rs/packed_simd
+[master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/
+[perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
+[rfc2366]: https://github.com/rust-lang/rfcs/pull/2366
+[ISPC]: https://ispc.github.io/
+[rayon]: https://crates.io/crates/rayon
+[boost_license]: https://www.boost.org/LICENSE_1_0.txt
+[SLEEF]: https://sleef.org/
+[sleef_sys]: https://crates.io/crates/sleef-sys
+[contributing instructions]: contributing.md
+[Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html
diff --git a/third_party/rust/packed_simd/bors.toml b/third_party/rust/packed_simd/bors.toml
new file mode 100644
index 0000000000..6d302dc85c
--- /dev/null
+++ b/third_party/rust/packed_simd/bors.toml
@@ -0,0 +1,3 @@
+status = [
+ "continuous-integration/travis-ci/push"
+] \ No newline at end of file
diff --git a/third_party/rust/packed_simd/build.rs b/third_party/rust/packed_simd/build.rs
new file mode 100644
index 0000000000..e87298a2de
--- /dev/null
+++ b/third_party/rust/packed_simd/build.rs
@@ -0,0 +1,6 @@
+fn main() {
+ let target = std::env::var("TARGET").expect("TARGET environment variable not defined");
+ if target.contains("neon") {
+ println!("cargo:rustc-cfg=libcore_neon");
+ }
+}
diff --git a/third_party/rust/packed_simd/ci/all.sh b/third_party/rust/packed_simd/ci/all.sh
new file mode 100755
index 0000000000..55a1fa2efe
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/all.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+#
+# Performs an operation on all targets
+
+set -ex
+
+: "${1?The all.sh script requires one argument.}"
+
+op=$1
+
+cargo_clean() {
+ cargo clean
+}
+
+cargo_check_fmt() {
+ cargo fmt --all -- --check
+}
+
+cargo_fmt() {
+ cargo fmt --all
+}
+
+cargo_clippy() {
+ cargo clippy --all -- -D clippy::perf
+}
+
+CMD="-1"
+
+case $op in
+ clean*)
+ CMD=cargo_clean
+ ;;
+ check_fmt*)
+ CMD=cargo_check_fmt
+ ;;
+ fmt*)
+ CMD=cargo_fmt
+ ;;
+ clippy)
+ CMD=cargo_clippy
+ ;;
+ *)
+ echo "Unknown operation: \"${op}\""
+ exit 1
+ ;;
+esac
+
+echo "Operation is: ${CMD}"
+
+# On src/
+$CMD
+
+# Check examples/
+for dir in examples/*/
+do
+ dir=${dir%*/}
+ (
+ cd "${dir%*/}"
+ $CMD
+ )
+done
+
+(
+ cd verify/verify
+ $CMD
+)
+
+(
+ cd micro_benchmarks
+ $CMD
+)
diff --git a/third_party/rust/packed_simd/ci/android-install-ndk.sh b/third_party/rust/packed_simd/ci/android-install-ndk.sh
new file mode 100644
index 0000000000..5370853937
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/android-install-ndk.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env sh
+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+set -ex
+
+ANDROID_NDK_URL=https://dl.google.com/android/repository
+ANDROID_NDK_ARCHIVE=android-ndk-r25b-linux.zip
+
+curl -fO "$ANDROID_NDK_URL/$ANDROID_NDK_ARCHIVE"
+unzip -q $ANDROID_NDK_ARCHIVE
+rm $ANDROID_NDK_ARCHIVE
+mv android-ndk-* ndk
+rm -rf android-ndk-*
diff --git a/third_party/rust/packed_simd/ci/android-install-sdk.sh b/third_party/rust/packed_simd/ci/android-install-sdk.sh
new file mode 100644
index 0000000000..6b5ac09ab0
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/android-install-sdk.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env sh
+# Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+set -ex
+
+# Prep the SDK and emulator
+#
+# Note that the update process requires that we accept a bunch of licenses, and
+# we can't just pipe `yes` into it for some reason, so we take the same strategy
+# located in https://github.com/appunite/docker by just wrapping it in a script
+# which apparently magically accepts the licenses.
+
+mkdir sdk
+curl --retry 5 https://dl.google.com/android/repository/sdk-tools-linux-3859397.zip -O
+unzip -d sdk sdk-tools-linux-3859397.zip
+
+case "$1" in
+ arm | armv7)
+ abi=armeabi-v7a
+ ;;
+
+ aarch64)
+ abi=arm64-v8a
+ ;;
+
+ i686)
+ abi=x86
+ ;;
+
+ x86_64)
+ abi=x86_64
+ ;;
+
+ *)
+ echo "invalid arch: $1"
+ exit 1
+ ;;
+esac;
+
+# --no_https avoids
+ # javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: No trusted certificate found
+yes | ./sdk/tools/bin/sdkmanager --licenses --no_https
+yes | ./sdk/tools/bin/sdkmanager --no_https \
+ "emulator" \
+ "platform-tools" \
+ "platforms;android-24" \
+ "system-images;android-24;default;$abi"
+
+echo "no" |
+ ./sdk/tools/bin/avdmanager create avd \
+ --name "${1}" \
+ --package "system-images;android-24;default;$abi"
diff --git a/third_party/rust/packed_simd/ci/android-sysimage.sh b/third_party/rust/packed_simd/ci/android-sysimage.sh
new file mode 100644
index 0000000000..9eabd7c8d9
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/android-sysimage.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+
+# Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+set -ex
+
+URL=https://dl.google.com/android/repository/sys-img/android
+
+main() {
+ local arch="${1}"
+ local name="${2}"
+ local dest=/system
+ local td
+ td="$(mktemp -d)"
+
+ apt-get install --no-install-recommends e2tools
+
+ pushd "${td}"
+ curl --retry 5 -O "${URL}/${name}"
+ unzip -q "${name}"
+
+ local system
+ system="$(find . -name system.img)"
+ mkdir -p ${dest}/{bin,lib,lib64}
+
+ # Extract android linker and libraries to /system
+ # This allows android executables to be run directly (or with qemu)
+ if [ "${arch}" = "x86_64" ] || [ "${arch}" = "arm64" ]; then
+ e2cp -p "${system}:/bin/linker64" "${dest}/bin/"
+ e2cp -p "${system}:/lib64/libdl.so" "${dest}/lib64/"
+ e2cp -p "${system}:/lib64/libc.so" "${dest}/lib64/"
+ e2cp -p "${system}:/lib64/libm.so" "${dest}/lib64/"
+ else
+ e2cp -p "${system}:/bin/linker" "${dest}/bin/"
+ e2cp -p "${system}:/lib/libdl.so" "${dest}/lib/"
+ e2cp -p "${system}:/lib/libc.so" "${dest}/lib/"
+ e2cp -p "${system}:/lib/libm.so" "${dest}/lib/"
+ fi
+
+ # clean up
+ apt-get purge --auto-remove -y e2tools
+
+ popd
+
+ rm -rf "${td}"
+}
+
+main "${@}"
diff --git a/third_party/rust/packed_simd/ci/benchmark.sh b/third_party/rust/packed_simd/ci/benchmark.sh
new file mode 100755
index 0000000000..3635b9e371
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/benchmark.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# Runs all benchmarks. Controlled by the following environment variables:
+#
+# FEATURES={} - cargo features to pass to all benchmarks (e.g. core_arch,sleef-sys,ispc)
+# NORUN={1} - only builds the benchmarks
+
+set -ex
+
+if [[ ${NORUN} != 1 ]]; then
+ # Most benchmarks require hyperfine; require it upfront.
+ hash hyperfine 2>/dev/null || { echo >&2 "hyperfine is not in PATH."; exit 1; }
+fi
+
+
+# If the ispc benchmark feature is enabled, ispc must be in the path of the
+# benchmarks.
+if echo "$FEATURES" | grep -q "ispc"; then
+ hash ispc 2>/dev/null || { echo >&2 "ispc is not in PATH."; exit 1; }
+fi
+
+# An example with a benchmark.sh is a benchmark:
+for dir in examples/*/
+do
+ dir=${dir%*/}
+ cd ${dir%*/}
+ if [ -f "benchmark.sh" ]; then
+ ./benchmark.sh
+ fi
+ cd -
+done
+
diff --git a/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs
new file mode 100644
index 0000000000..c0fe52c356
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/deploy_and_run_on_ios_simulator.rs
@@ -0,0 +1,176 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is a script to deploy and execute a binary on an iOS simulator.
+// The primary use of this is to be able to run unit tests on the simulator and
+// retrieve the results.
+//
+// To do this through Cargo instead, use Dinghy
+// (https://github.com/snipsco/dinghy): cargo dinghy install, then cargo dinghy
+// test.
+
+use std::env;
+use std::fs::{self, File};
+use std::io::Write;
+use std::path::Path;
+use std::process;
+use std::process::Command;
+
+macro_rules! t {
+ ($e:expr) => (match $e {
+ Ok(e) => e,
+ Err(e) => panic!("{} failed with: {}", stringify!($e), e),
+ })
+}
+
+// Step one: Wrap as an app
+fn package_as_simulator_app(crate_name: &str, test_binary_path: &Path) {
+ println!("Packaging simulator app");
+ drop(fs::remove_dir_all("ios_simulator_app"));
+ t!(fs::create_dir("ios_simulator_app"));
+ t!(fs::copy(test_binary_path,
+ Path::new("ios_simulator_app").join(crate_name)));
+
+ let mut f = t!(File::create("ios_simulator_app/Info.plist"));
+ t!(f.write_all(format!(r#"
+ <?xml version="1.0" encoding="UTF-8"?>
+ <!DOCTYPE plist PUBLIC
+ "-//Apple//DTD PLIST 1.0//EN"
+ "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+ <plist version="1.0">
+ <dict>
+ <key>CFBundleExecutable</key>
+ <string>{}</string>
+ <key>CFBundleIdentifier</key>
+ <string>com.rust.unittests</string>
+ </dict>
+ </plist>
+ "#, crate_name).as_bytes()));
+}
+
+// Step two: Start the iOS simulator
+fn start_simulator() {
+ println!("Looking for iOS simulator");
+ let output = t!(Command::new("xcrun").arg("simctl").arg("list").output());
+ assert!(output.status.success());
+ let mut simulator_exists = false;
+ let mut simulator_booted = false;
+ let mut found_rust_sim = false;
+ let stdout = t!(String::from_utf8(output.stdout));
+ for line in stdout.lines() {
+ if line.contains("rust_ios") {
+ if found_rust_sim {
+ panic!("Duplicate rust_ios simulators found. Please \
+ double-check xcrun simctl list.");
+ }
+ simulator_exists = true;
+ simulator_booted = line.contains("(Booted)");
+ found_rust_sim = true;
+ }
+ }
+
+ if simulator_exists == false {
+ println!("Creating iOS simulator");
+ Command::new("xcrun")
+ .arg("simctl")
+ .arg("create")
+ .arg("rust_ios")
+ .arg("com.apple.CoreSimulator.SimDeviceType.iPhone-SE")
+ .arg("com.apple.CoreSimulator.SimRuntime.iOS-10-2")
+ .check_status();
+ } else if simulator_booted == true {
+ println!("Shutting down already-booted simulator");
+ Command::new("xcrun")
+ .arg("simctl")
+ .arg("shutdown")
+ .arg("rust_ios")
+ .check_status();
+ }
+
+ println!("Starting iOS simulator");
+ // We can't uninstall the app (if present) as that will hang if the
+ // simulator isn't completely booted; just erase the simulator instead.
+ Command::new("xcrun").arg("simctl").arg("erase").arg("rust_ios").check_status();
+ Command::new("xcrun").arg("simctl").arg("boot").arg("rust_ios").check_status();
+}
+
+// Step three: Install the app
+fn install_app_to_simulator() {
+ println!("Installing app to simulator");
+ Command::new("xcrun")
+ .arg("simctl")
+ .arg("install")
+ .arg("booted")
+ .arg("ios_simulator_app/")
+ .check_status();
+}
+
+// Step four: Run the app
+fn run_app_on_simulator() {
+ println!("Running app");
+ let output = t!(Command::new("xcrun")
+ .arg("simctl")
+ .arg("launch")
+ .arg("--console")
+ .arg("booted")
+ .arg("com.rust.unittests")
+ .output());
+
+ println!("stdout --\n{}\n", String::from_utf8_lossy(&output.stdout));
+ println!("stderr --\n{}\n", String::from_utf8_lossy(&output.stderr));
+
+ let stdout = String::from_utf8_lossy(&output.stdout);
+ let failed = stdout.lines()
+ .find(|l| l.contains("FAILED"))
+ .map(|l| l.contains("FAILED"))
+ .unwrap_or(false);
+
+ let passed = stdout.lines()
+ .find(|l| l.contains("test result: ok"))
+ .map(|l| l.contains("test result: ok"))
+ .unwrap_or(false);
+
+ println!("Shutting down simulator");
+ Command::new("xcrun")
+ .arg("simctl")
+ .arg("shutdown")
+ .arg("rust_ios")
+ .check_status();
+ if !(passed && !failed) {
+ panic!("tests didn't pass");
+ }
+}
+
+trait CheckStatus {
+ fn check_status(&mut self);
+}
+
+impl CheckStatus for Command {
+ fn check_status(&mut self) {
+ println!("\trunning: {:?}", self);
+ assert!(t!(self.status()).success());
+ }
+}
+
+fn main() {
+ let args: Vec<String> = env::args().collect();
+ if args.len() != 2 {
+ println!("Usage: {} <executable>", args[0]);
+ process::exit(-1);
+ }
+
+ let test_binary_path = Path::new(&args[1]);
+ let crate_name = test_binary_path.file_name().unwrap();
+
+ package_as_simulator_app(crate_name.to_str().unwrap(), test_binary_path);
+ start_simulator();
+ install_app_to_simulator();
+ run_app_on_simulator();
+}
diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile
new file mode 100644
index 0000000000..27bde89c5a
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/aarch64-linux-android/Dockerfile
@@ -0,0 +1,47 @@
+FROM ubuntu:16.04
+
+RUN dpkg --add-architecture i386 && \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
+ file \
+ make \
+ curl \
+ ca-certificates \
+ python \
+ unzip \
+ expect \
+ openjdk-9-jre \
+ libstdc++6:i386 \
+ libpulse0 \
+ gcc \
+ libc6-dev
+
+WORKDIR /android/
+COPY android* /android/
+
+ENV ANDROID_ARCH=aarch64
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
+
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
+RUN mv /root/.android /tmp
+RUN chmod 777 -R /tmp/.android
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
+
+ENV PATH=$PATH:/rust/bin \
+ CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER=aarch64-linux-android-gcc \
+ CARGO_TARGET_AARCH64_LINUX_ANDROID_RUNNER=/tmp/runtest \
+ OBJDUMP=aarch64-linux-android-objdump \
+ HOME=/tmp
+
+ADD runtest-android.rs /tmp/runtest.rs
+ENTRYPOINT [ \
+ "bash", \
+ "-c", \
+ # set SHELL so android can detect a 64bits system, see
+ # http://stackoverflow.com/a/41789144
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @aarch64 -no-window & \
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
+ exec \"$@\"", \
+ "--" \
+]
diff --git a/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..41ff4729ac
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,14 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ gcc-aarch64-linux-gnu \
+ libc6-dev-arm64-cross \
+ qemu-user \
+ make \
+ file
+
+ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
+ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -L /usr/aarch64-linux-gnu" \
+ OBJDUMP=aarch64-linux-gnu-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
new file mode 100644
index 0000000000..e1c591dd97
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
@@ -0,0 +1,15 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ libc6-armel-cross \
+ libc6-dev-armel-cross \
+ binutils-arm-linux-gnueabi \
+ gcc-arm-linux-gnueabi \
+ qemu-user \
+ make \
+ file
+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_LINKER=arm-linux-gnueabi-gcc \
+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABI_RUNNER="qemu-arm -L /usr/arm-linux-gnueabi" \
+ OBJDUMP=arm-linux-gnueabi-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
new file mode 100644
index 0000000000..757b79e7ec
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ gcc-arm-linux-gnueabihf \
+ libc6-dev-armhf-cross \
+ qemu-user \
+ make \
+ file
+ENV CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
+ CARGO_TARGET_ARM_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
+ OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/armv7-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-linux-androideabi/Dockerfile
new file mode 100644
index 0000000000..995a9e30e6
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/armv7-linux-androideabi/Dockerfile
@@ -0,0 +1,47 @@
+FROM ubuntu:16.04
+
+RUN dpkg --add-architecture i386 && \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
+ file \
+ make \
+ curl \
+ ca-certificates \
+ python \
+ unzip \
+ expect \
+ openjdk-9-jre \
+ libstdc++6:i386 \
+ libpulse0 \
+ gcc \
+ libc6-dev
+
+WORKDIR /android/
+COPY android* /android/
+
+ENV ANDROID_ARCH=arm
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
+
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
+RUN mv /root/.android /tmp
+RUN chmod 777 -R /tmp/.android
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
+
+ENV PATH=$PATH:/rust/bin \
+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
+ CARGO_TARGET_ARM_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
+ OBJDUMP=arm-linux-androideabi-objdump \
+ HOME=/tmp
+
+ADD runtest-android.rs /tmp/runtest.rs
+ENTRYPOINT [ \
+ "bash", \
+ "-c", \
+ # set SHELL so android can detect a 64bits system, see
+ # http://stackoverflow.com/a/41789144
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
+ exec \"$@\"", \
+ "--" \
+]
diff --git a/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
new file mode 100644
index 0000000000..2539062933
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ gcc-arm-linux-gnueabihf \
+ libc6-dev-armhf-cross \
+ qemu-user \
+ make \
+ file
+ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
+ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
+ OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..01093698f6
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc-multilib \
+ libc6-dev \
+ file \
+ make \
+ ca-certificates
diff --git a/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..01093698f6
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,7 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc-multilib \
+ libc6-dev \
+ file \
+ make \
+ ca-certificates
diff --git a/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..3bd471e87d
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/mips-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc libc6-dev qemu-user ca-certificates \
+ gcc-mips-linux-gnu libc6-dev-mips-cross \
+ qemu-system-mips \
+ qemu-user \
+ make \
+ file
+
+ENV CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_LINKER=mips-linux-gnu-gcc \
+ CARGO_TARGET_MIPS_UNKNOWN_LINUX_GNU_RUNNER="qemu-mips -L /usr/mips-linux-gnu" \
+ OBJDUMP=mips-linux-gnu-objdump \ No newline at end of file
diff --git a/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
new file mode 100644
index 0000000000..f26f1f38eb
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@@ -0,0 +1,10 @@
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc libc6-dev qemu-user ca-certificates \
+ gcc-mips64-linux-gnuabi64 libc6-dev-mips64-cross \
+ qemu-system-mips64 qemu-user
+
+ENV CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_LINKER=mips64-linux-gnuabi64-gcc \
+ CARGO_TARGET_MIPS64_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64 -L /usr/mips64-linux-gnuabi64" \
+ OBJDUMP=mips64-linux-gnuabi64-objdump \ No newline at end of file
diff --git a/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
new file mode 100644
index 0000000000..7d9f0bd992
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@@ -0,0 +1,10 @@
+FROM ubuntu:18.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc libc6-dev qemu-user ca-certificates \
+ gcc-mips64el-linux-gnuabi64 libc6-dev-mips64el-cross \
+ qemu-system-mips64el
+
+ENV CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_LINKER=mips64el-linux-gnuabi64-gcc \
+ CARGO_TARGET_MIPS64EL_UNKNOWN_LINUX_GNUABI64_RUNNER="qemu-mips64el -L /usr/mips64el-linux-gnuabi64" \
+ OBJDUMP=mips64el-linux-gnuabi64-objdump \ No newline at end of file
diff --git a/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
new file mode 100644
index 0000000000..7488662ef2
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/mipsel-unknown-linux-musl/Dockerfile
@@ -0,0 +1,25 @@
+FROM ubuntu:18.10
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends \
+ ca-certificates \
+ gcc \
+ libc6-dev \
+ make \
+ qemu-user \
+ qemu-system-mips \
+ bzip2 \
+ curl \
+ file
+
+RUN mkdir /toolchain
+
+# Note that this originally came from:
+# https://downloads.openwrt.org/snapshots/trunk/malta/generic/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2
+RUN curl -L https://ci-mirrors.rust-lang.org/libc/OpenWrt-Toolchain-malta-le_gcc-5.3.0_musl-1.1.15.Linux-x86_64.tar.bz2 | \
+ tar xjf - -C /toolchain --strip-components=2
+
+ENV PATH=$PATH:/rust/bin:/toolchain/bin \
+ CC_mipsel_unknown_linux_musl=mipsel-openwrt-linux-gcc \
+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_LINKER=mipsel-openwrt-linux-gcc \
+ CARGO_TARGET_MIPSEL_UNKNOWN_LINUX_MUSL_RUNNER="qemu-mipsel -L /toolchain"
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..15ba58e60c
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc libc6-dev qemu-user ca-certificates \
+ gcc-powerpc-linux-gnu libc6-dev-powerpc-cross \
+ qemu-system-ppc \
+ make \
+ file
+
+ENV CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_LINKER=powerpc-linux-gnu-gcc \
+ CARGO_TARGET_POWERPC_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc -cpu Vger -L /usr/powerpc-linux-gnu" \
+ CC=powerpc-linux-gnu-gcc \
+ OBJDUMP=powerpc-linux-gnu-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..21c296dc44
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,17 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ gcc-powerpc64-linux-gnu \
+ libc6-dev-ppc64-cross \
+ qemu-user \
+ qemu-system-ppc \
+ make \
+ file
+
+ENV CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_LINKER=powerpc64-linux-gnu-gcc \
+ CARGO_TARGET_POWERPC64_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64 -L /usr/powerpc64-linux-gnu" \
+ CC=powerpc64-linux-gnu-gcc \
+ OBJDUMP=powerpc64-linux-gnu-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..8034145fc0
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,11 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc libc6-dev qemu-user ca-certificates \
+ gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross \
+ qemu-system-ppc file make
+
+ENV CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_LINKER=powerpc64le-linux-gnu-gcc \
+ CARGO_TARGET_POWERPC64LE_UNKNOWN_LINUX_GNU_RUNNER="qemu-ppc64le -L /usr/powerpc64le-linux-gnu" \
+ CC=powerpc64le-linux-gnu-gcc \
+ OBJDUMP=powerpc64le-linux-gnu-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..e785ca370c
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/s390x-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,20 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends \
+ ca-certificates \
+ curl \
+ cmake \
+ gcc \
+ libc6-dev \
+ g++-s390x-linux-gnu \
+ libc6-dev-s390x-cross \
+ qemu-user \
+ make \
+ file
+
+ENV CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_LINKER=s390x-linux-gnu-gcc \
+ CARGO_TARGET_S390X_UNKNOWN_LINUX_GNU_RUNNER="qemu-s390x -L /usr/s390x-linux-gnu" \
+ CC_s390x_unknown_linux_gnu=s390x-linux-gnu-gcc \
+ CXX_s390x_unknown_linux_gnu=s390x-linux-gnu-g++ \
+ OBJDUMP=s390x-linux-gnu-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..c35f4d8f31
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/sparc64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,18 @@
+FROM debian:bookworm
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ curl ca-certificates \
+ gcc libc6-dev \
+ gcc-sparc64-linux-gnu libc6-dev-sparc64-cross \
+ qemu-system-sparc64 openbios-sparc seabios ipxe-qemu \
+ p7zip-full cpio
+
+COPY linux-sparc64.sh /
+RUN bash /linux-sparc64.sh
+
+COPY test-runner-linux /
+
+ENV CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_LINKER=sparc64-linux-gnu-gcc \
+ CARGO_TARGET_SPARC64_UNKNOWN_LINUX_GNU_RUNNER="/test-runner-linux sparc64" \
+ CC_sparc64_unknown_linux_gnu=sparc64-linux-gnu-gcc \
+ PATH=$PATH:/rust/bin
diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile
new file mode 100644
index 0000000000..c1da77109c
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-linux-androideabi/Dockerfile
@@ -0,0 +1,47 @@
+FROM ubuntu:16.04
+
+RUN dpkg --add-architecture i386 && \
+ apt-get update && \
+ apt-get install -y --no-install-recommends \
+ file \
+ make \
+ curl \
+ ca-certificates \
+ python \
+ unzip \
+ expect \
+ openjdk-9-jre \
+ libstdc++6:i386 \
+ libpulse0 \
+ gcc \
+ libc6-dev
+
+WORKDIR /android/
+COPY android* /android/
+
+ENV ANDROID_ARCH=arm
+ENV PATH=$PATH:/android/ndk-$ANDROID_ARCH/bin:/android/sdk/tools:/android/sdk/platform-tools
+
+RUN sh /android/android-install-ndk.sh $ANDROID_ARCH
+RUN sh /android/android-install-sdk.sh $ANDROID_ARCH
+RUN mv /root/.android /tmp
+RUN chmod 777 -R /tmp/.android
+RUN chmod 755 /android/sdk/tools/* /android/sdk/emulator/qemu/linux-x86_64/*
+
+ENV PATH=$PATH:/rust/bin \
+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_LINKER=arm-linux-androideabi-gcc \
+ CARGO_TARGET_THUMBV7NEON_LINUX_ANDROIDEABI_RUNNER=/tmp/runtest \
+ OBJDUMP=arm-linux-androideabi-objdump \
+ HOME=/tmp
+
+ADD runtest-android.rs /tmp/runtest.rs
+ENTRYPOINT [ \
+ "bash", \
+ "-c", \
+ # set SHELL so android can detect a 64bits system, see
+ # http://stackoverflow.com/a/41789144
+ "SHELL=/bin/dash /android/sdk/emulator/emulator @arm -no-window & \
+ rustc /tmp/runtest.rs -o /tmp/runtest && \
+ exec \"$@\"", \
+ "--" \
+]
diff --git a/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
new file mode 100644
index 0000000000..588d23c65a
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/thumbv7neon-unknown-linux-gnueabihf/Dockerfile
@@ -0,0 +1,13 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ ca-certificates \
+ libc6-dev \
+ gcc-arm-linux-gnueabihf \
+ libc6-dev-armhf-cross \
+ qemu-user \
+ make \
+ file
+ENV CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
+ CARGO_TARGET_THUMBV7NEON_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -L /usr/arm-linux-gnueabihf" \
+ OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile
new file mode 100644
index 0000000000..51ee13e6c9
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -0,0 +1,39 @@
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends \
+ ca-certificates \
+ clang \
+ cmake \
+ curl \
+ git \
+ libc6-dev \
+ make \
+ ninja-build \
+ python-is-python3 \
+ xz-utils
+
+# Install `wasm2wat`
+RUN git clone --recursive https://github.com/WebAssembly/wabt
+RUN make -C wabt -j$(nproc)
+ENV PATH=$PATH:/wabt/bin
+
+# Install `wasm-bindgen-test-runner`
+RUN curl -L https://github.com/rustwasm/wasm-bindgen/releases/download/0.2.87/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl.tar.gz \
+ | tar xzf -
+# Keep in sync with the version on Cargo.toml.
+ENV PATH=$PATH:/wasm-bindgen-0.2.87-x86_64-unknown-linux-musl
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_RUNNER=wasm-bindgen-test-runner
+
+# Install `node`
+RUN curl https://nodejs.org/dist/v14.16.0/node-v14.16.0-linux-x64.tar.xz | tar xJf -
+ENV PATH=$PATH:/node-v14.16.0-linux-x64/bin
+
+# We use a shim linker that removes `--strip-debug` when passed to LLD. While
+# this typically results in invalid debug information in release mode it doesn't
+# result in an invalid names section which is what we're interested in.
+COPY lld-shim.rs /
+ENV CARGO_TARGET_WASM32_UNKNOWN_UNKNOWN_LINKER=/tmp/lld-shim
+
+# Rustc isn't available until this container starts, so defer compilation of the
+# shim.
+ENTRYPOINT /rust/bin/rustc /lld-shim.rs -o /tmp/lld-shim && exec bash "$@"
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile
new file mode 100644
index 0000000000..785936d347
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-linux-android/Dockerfile
@@ -0,0 +1,31 @@
+FROM ubuntu:20.04
+
+RUN apt-get update && \
+ apt-get install -y --no-install-recommends \
+ ca-certificates \
+ curl \
+ gcc \
+ libc-dev \
+ python \
+ unzip \
+ file \
+ make
+
+WORKDIR /android/
+ENV ANDROID_ARCH=x86_64
+COPY android-install-ndk.sh /android/
+RUN sh /android/android-install-ndk.sh
+
+ENV STDARCH_ASSERT_INSTR_LIMIT=30
+
+# We do not run x86_64-linux-android tests on an android emulator.
+# See ci/android-sysimage.sh for informations about how tests are run.
+COPY android-sysimage.sh /android/
+RUN bash /android/android-sysimage.sh x86_64 x86_64-24_r07.zip
+
+ENV PATH=$PATH:/rust/bin:/android/ndk/toolchains/llvm/prebuilt/linux-x86_64/bin \
+ CARGO_TARGET_X86_64_LINUX_ANDROID_LINKER=x86_64-linux-android21-clang \
+ CC_x86_64_linux_android=x86_64-linux-android21-clang \
+ CXX_x86_64_linux_android=x86_64-linux-android21-clang++ \
+ OBJDUMP=llvm-objdump \
+ HOME=/tmp
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
new file mode 100644
index 0000000000..a6bbe66539
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
@@ -0,0 +1,16 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ libc6-dev \
+ file \
+ make \
+ ca-certificates \
+ wget \
+ bzip2 \
+ cmake \
+ libclang-dev \
+ clang
+
+RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.16.0-2018-01-30-lin.tar.bz2
+RUN tar -xjf sde-external-8.16.0-2018-01-30-lin.tar.bz2
+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.16.0-2018-01-30-lin/sde64 --"
diff --git a/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
new file mode 100644
index 0000000000..ce5bb88e62
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -0,0 +1,10 @@
+FROM ubuntu:18.04
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ gcc \
+ libc6-dev \
+ file \
+ make \
+ ca-certificates \
+ cmake \
+ libclang-dev \
+ clang
diff --git a/third_party/rust/packed_simd/ci/dox.sh b/third_party/rust/packed_simd/ci/dox.sh
new file mode 100755
index 0000000000..560eaadcc8
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/dox.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+set -ex
+
+rm -rf target/doc
+mkdir -p target/doc
+
+# Build API documentation
+cargo doc --features=into_bits
+
+# Build Performance Guide
+# FIXME: https://github.com/rust-lang-nursery/mdBook/issues/780
+# mdbook build perf-guide -d target/doc/perf-guide
+cd perf-guide
+mdbook build
+cd -
+cp -r perf-guide/book target/doc/perf-guide
+
+# If we're on travis, not a PR, and on the right branch, publish!
+if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ "$TRAVIS_BRANCH" = "master" ]; then
+ python3 -vV
+ pip -vV
+ python3.9 -vV
+ pip install ghp_import --user
+ ghp-import -n target/doc
+ git push -qf https://${GH_PAGES}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
+fi
diff --git a/third_party/rust/packed_simd/ci/linux-s390x.sh b/third_party/rust/packed_simd/ci/linux-s390x.sh
new file mode 100644
index 0000000000..972abeec56
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/linux-s390x.sh
@@ -0,0 +1,18 @@
+set -ex
+
+mkdir -m 777 /qemu
+cd /qemu
+
+curl -LO https://github.com/qemu/qemu/raw/master/pc-bios/s390-ccw.img
+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/kernel.debian
+curl -LO http://ftp.debian.org/debian/dists/testing/main/installer-s390x/20170828/images/generic/initrd.debian
+
+mv kernel.debian kernel
+mv initrd.debian initrd.gz
+
+mkdir init
+cd init
+gunzip -c ../initrd.gz | cpio -id
+rm ../initrd.gz
+cp /usr/s390x-linux-gnu/lib/libgcc_s.so.1 usr/lib/
+chmod a+w .
diff --git a/third_party/rust/packed_simd/ci/linux-sparc64.sh b/third_party/rust/packed_simd/ci/linux-sparc64.sh
new file mode 100644
index 0000000000..4452b120e1
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/linux-sparc64.sh
@@ -0,0 +1,17 @@
+set -ex
+
+mkdir -m 777 /qemu
+cd /qemu
+
+curl -LO https://cdimage.debian.org/cdimage/ports/9.0/sparc64/iso-cd/debian-9.0-sparc64-NETINST-1.iso
+7z e debian-9.0-sparc64-NETINST-1.iso boot/initrd.gz
+7z e debian-9.0-sparc64-NETINST-1.iso boot/sparc64
+mv sparc64 kernel
+rm debian-9.0-sparc64-NETINST-1.iso
+
+mkdir init
+cd init
+gunzip -c ../initrd.gz | cpio -id
+rm ../initrd.gz
+cp /usr/sparc64-linux-gnu/lib/libgcc_s.so.1 usr/lib/
+chmod a+w .
diff --git a/third_party/rust/packed_simd/ci/lld-shim.rs b/third_party/rust/packed_simd/ci/lld-shim.rs
new file mode 100644
index 0000000000..10263869e8
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/lld-shim.rs
@@ -0,0 +1,11 @@
+use std::os::unix::prelude::*;
+use std::process::Command;
+use std::env;
+
+fn main() {
+ let args = env::args()
+ .skip(1)
+ .filter(|s| s != "--strip-debug")
+ .collect::<Vec<_>>();
+ panic!("failed to exec: {}", Command::new("rust-lld").args(&args).exec());
+}
diff --git a/third_party/rust/packed_simd/ci/max_line_width.sh b/third_party/rust/packed_simd/ci/max_line_width.sh
new file mode 100755
index 0000000000..f70639b6f8
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/max_line_width.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env sh
+
+set -x
+
+export success=true
+
+find . -iname '*.rs' | while read -r file; do
+ result=$(grep '.\{79\}' "${file}" | grep --invert 'http')
+ if [ "${result}" = "" ]
+ then
+ :
+ else
+ echo "file \"${file}\": $result"
+ exit 1
+ fi
+done
+
diff --git a/third_party/rust/packed_simd/ci/run-docker.sh b/third_party/rust/packed_simd/ci/run-docker.sh
new file mode 100755
index 0000000000..abdd6852fc
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/run-docker.sh
@@ -0,0 +1,38 @@
+# Small script to run tests for a target (or all targets) inside all the
+# respective docker images.
+
+set -ex
+
+run() {
+ echo "Building docker container for TARGET=${TARGET} RUSTFLAGS=${RUSTFLAGS}"
+ docker build -t packed_simd -f ci/docker/${TARGET}/Dockerfile ci/
+ mkdir -p target
+ target=$(echo "${TARGET}" | sed 's/-emulated//')
+ echo "Running docker"
+ docker run \
+ --user `id -u`:`id -g` \
+ --rm \
+ --init \
+ --volume $HOME/.cargo:/cargo \
+ --env CARGO_HOME=/cargo \
+ --volume `rustc --print sysroot`:/rust:ro \
+ --env TARGET=$target \
+ --env NORUN \
+ --env NOVERIFY \
+ --env RUSTFLAGS \
+ --volume `pwd`:/checkout:ro \
+ --volume `pwd`/target:/checkout/target \
+ --workdir /checkout \
+ --privileged \
+ packed_simd \
+ bash \
+ -c 'PATH=$PATH:/rust/bin exec ci/run.sh'
+}
+
+if [ -z "${TARGET}" ]; then
+ for d in `ls ci/docker/`; do
+ run $d
+ done
+else
+ run ${TARGET}
+fi
diff --git a/third_party/rust/packed_simd/ci/run.sh b/third_party/rust/packed_simd/ci/run.sh
new file mode 100755
index 0000000000..b1b22ebd7c
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/run.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+
+set -ex
+
+: ${TARGET?"The TARGET environment variable must be set."}
+
+# Tests are all super fast anyway, and they fault often enough on travis that
+# having only one thread increases debuggability to be worth it.
+#export RUST_TEST_THREADS=1
+#export RUST_BACKTRACE=full
+#export RUST_TEST_NOCAPTURE=1
+
+# Some appveyor builds run out-of-memory; this attempts to mitigate that:
+# https://github.com/rust-lang-nursery/packed_simd/issues/39
+# export RUSTFLAGS="${RUSTFLAGS} -C codegen-units=1"
+# export CARGO_BUILD_JOBS=1
+
+export CARGO_SUBCMD=test
+if [[ "${NORUN}" == "1" ]]; then
+ export CARGO_SUBCMD=build
+fi
+
+if [[ ${TARGET} == "x86_64-apple-ios" ]] || [[ ${TARGET} == "i386-apple-ios" ]]; then
+ export RUSTFLAGS="${RUSTFLAGS} -Clink-arg=-mios-simulator-version-min=7.0"
+ rustc ./ci/deploy_and_run_on_ios_simulator.rs -o $HOME/runtest
+ export CARGO_TARGET_X86_64_APPLE_IOS_RUNNER=$HOME/runtest
+ export CARGO_TARGET_I386_APPLE_IOS_RUNNER=$HOME/runtest
+fi
+
+# The source directory is read-only. Need to copy internal crates to the target
+# directory for their Cargo.lock to be properly written.
+mkdir target || true
+
+rustc --version
+cargo --version
+echo "TARGET=${TARGET}"
+echo "HOST=${HOST}"
+echo "RUSTFLAGS=${RUSTFLAGS}"
+echo "NORUN=${NORUN}"
+echo "NOVERIFY=${NOVERIFY}"
+echo "CARGO_SUBCMD=${CARGO_SUBCMD}"
+echo "CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS}"
+echo "CARGO_INCREMENTAL=${CARGO_INCREMENTAL}"
+echo "RUST_TEST_THREADS=${RUST_TEST_THREADS}"
+echo "RUST_BACKTRACE=${RUST_BACKTRACE}"
+echo "RUST_TEST_NOCAPTURE=${RUST_TEST_NOCAPTURE}"
+
+cargo_test() {
+ cmd="cargo ${CARGO_SUBCMD} --verbose --target=${TARGET} ${@}"
+ if [ "${NORUN}" != "1" ]
+ then
+ if [ "$TARGET" != "wasm32-unknown-unknown" ]
+ then
+ cmd="$cmd -- --quiet"
+ fi
+ fi
+ mkdir target || true
+ ${cmd} 2>&1 | tee > target/output
+ if [[ ${PIPESTATUS[0]} != 0 ]]; then
+ cat target/output
+ return 1
+ fi
+}
+
+cargo_test_impl() {
+ ORIGINAL_RUSTFLAGS=${RUSTFLAGS}
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v16 --cfg test_v32 --cfg test_v64" cargo_test ${@}
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v128 --cfg test_v256" cargo_test ${@}
+ RUSTFLAGS="${ORIGINAL_RUSTFLAGS} --cfg test_v512" cargo_test ${@}
+ RUSTFLAGS=${ORIGINAL_RUSTFLAGS}
+}
+
+# Debug run:
+if [[ "${TARGET}" != "wasm32-unknown-unknown" ]]; then
+ # Run wasm32-unknown-unknown in release mode only
+ cargo_test_impl
+fi
+
+if [[ "${TARGET}" == "x86_64-unknown-linux-gnu" ]] || [[ "${TARGET}" == "x86_64-pc-windows-msvc" ]]; then
+ # use sleef on linux and windows x86_64 builds
+ # FIXME: Use `core_arch,sleef-sys` features once they works again
+ cargo_test_impl --release --features=into_bits
+else
+ # FIXME: Use `core_arch` feature once it works again
+ cargo_test_impl --release --features=into_bits
+fi
+
+# Verify code generation
+if [[ "${NOVERIFY}" != "1" ]]; then
+ cp -r verify/verify target/verify
+ export STDSIMD_ASSERT_INSTR_LIMIT=30
+ if [[ "${TARGET}" == "i586-unknown-linux-gnu" ]]; then
+ export STDSIMD_ASSERT_INSTR_LIMIT=50
+ fi
+ cargo_test --release --manifest-path=target/verify/Cargo.toml
+fi
+
+# FIXME: Figure out which examples take too long to run and ignore or adjust those
+#. ci/run_examples.sh
diff --git a/third_party/rust/packed_simd/ci/run_examples.sh b/third_party/rust/packed_simd/ci/run_examples.sh
new file mode 100644
index 0000000000..5b26b18afb
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/run_examples.sh
@@ -0,0 +1,51 @@
+# Runs all examples.
+
+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/55
+# All examples fail to build for `armv7-apple-ios`.
+if [[ ${TARGET} == "armv7-apple-ios" ]]; then
+ exit 0
+fi
+
+# FIXME: travis exceeds 50 minutes on these targets
+# Skipping the examples is an attempt at preventing travis from timing-out
+if [[ ${TARGET} == "arm-linux-androidabi" ]] || [[ ${TARGET} == "aarch64-linux-androidabi" ]] \
+ || [[ ${TARGET} == "sparc64-unknown-linux-gnu" ]]; then
+ exit 0
+fi
+
+if [[ ${TARGET} == "wasm32-unknown-unknown" ]]; then
+ exit 0
+fi
+
+cp -r examples/aobench target/aobench
+cargo_test --manifest-path=target/aobench/Cargo.toml --release --no-default-features
+cargo_test --manifest-path=target/aobench/Cargo.toml --release --features=256bit
+
+cp -r examples/dot_product target/dot_product
+cargo_test --manifest-path=target/dot_product/Cargo.toml --release
+
+cp -r examples/fannkuch_redux target/fannkuch_redux
+cargo_test --manifest-path=target/fannkuch_redux/Cargo.toml --release
+
+# FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/56
+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
+ cp -r examples/mandelbrot target/mandelbrot
+ cargo_test --manifest-path=target/mandelbrot/Cargo.toml --release
+fi
+
+cp -r examples/matrix_inverse target/matrix_inverse
+cargo_test --manifest-path=target/matrix_inverse/Cargo.toml --release
+
+cp -r examples/nbody target/nbody
+cargo_test --manifest-path=target/nbody/Cargo.toml --release
+
+cp -r examples/spectral_norm target/spectral_norm
+cargo_test --manifest-path=target/spectral_norm/Cargo.toml --release
+
+if [[ ${TARGET} != "i586-unknown-linux-gnu" ]]; then
+ cp -r examples/stencil target/stencil
+ cargo_test --manifest-path=target/stencil/Cargo.toml --release
+fi
+
+cp -r examples/triangle_xform target/triangle_xform
+cargo_test --manifest-path=target/triangle_xform/Cargo.toml --release
diff --git a/third_party/rust/packed_simd/ci/runtest-android.rs b/third_party/rust/packed_simd/ci/runtest-android.rs
new file mode 100644
index 0000000000..ed1cd80c83
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/runtest-android.rs
@@ -0,0 +1,45 @@
+use std::env;
+use std::process::Command;
+use std::path::{Path, PathBuf};
+
+fn main() {
+ let args = env::args_os()
+ .skip(1)
+ .filter(|arg| arg != "--quiet")
+ .collect::<Vec<_>>();
+ assert_eq!(args.len(), 1);
+ let test = PathBuf::from(&args[0]);
+ let dst = Path::new("/data/local/tmp").join(test.file_name().unwrap());
+
+ let status = Command::new("adb")
+ .arg("wait-for-device")
+ .status()
+ .expect("failed to run: adb wait-for-device");
+ assert!(status.success());
+
+ let status = Command::new("adb")
+ .arg("push")
+ .arg(&test)
+ .arg(&dst)
+ .status()
+ .expect("failed to run: adb pushr");
+ assert!(status.success());
+
+ let output = Command::new("adb")
+ .arg("shell")
+ .arg(&dst)
+ .output()
+ .expect("failed to run: adb shell");
+ assert!(status.success());
+
+ println!("status: {}\nstdout ---\n{}\nstderr ---\n{}",
+ output.status,
+ String::from_utf8_lossy(&output.stdout),
+ String::from_utf8_lossy(&output.stderr));
+
+ let stdout = String::from_utf8_lossy(&output.stdout);
+ let mut lines = stdout.lines().filter(|l| l.starts_with("test result"));
+ if !lines.all(|l| l.contains("test result: ok") && l.contains("0 failed")) {
+ panic!("failed to find successful test run");
+ }
+}
diff --git a/third_party/rust/packed_simd/ci/setup_benchmarks.sh b/third_party/rust/packed_simd/ci/setup_benchmarks.sh
new file mode 100755
index 0000000000..cd41a78513
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/setup_benchmarks.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+set -ex
+
+# Get latest ISPC binary for the target and put it in the path
+git clone https://github.com/gnzlbg/ispc-binaries
+cp ispc-binaries/ispc-${TARGET} ispc
diff --git a/third_party/rust/packed_simd/ci/test-runner-linux b/third_party/rust/packed_simd/ci/test-runner-linux
new file mode 100755
index 0000000000..0654f63bfd
--- /dev/null
+++ b/third_party/rust/packed_simd/ci/test-runner-linux
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -e
+
+arch=$1
+prog=$2
+
+cd /qemu/init
+cp -f $2 prog
+find . | cpio --create --format='newc' --quiet | gzip > ../initrd.gz
+cd ..
+
+timeout 30s qemu-system-$arch \
+ -m 1024 \
+ -nographic \
+ -kernel kernel \
+ -initrd initrd.gz \
+ -append init=/prog > output || true
+
+# remove kernel messages
+tr -d '\r' < output | egrep -v '^\['
+
+# if the output contains a failure, return error
+! grep FAILED output > /dev/null
diff --git a/third_party/rust/packed_simd/contributing.md b/third_party/rust/packed_simd/contributing.md
new file mode 100644
index 0000000000..79af8c199e
--- /dev/null
+++ b/third_party/rust/packed_simd/contributing.md
@@ -0,0 +1,67 @@
+# Contributing to `packed_simd`
+
+Welcome! If you are reading this document, it means you are interested in contributing
+to the `packed_simd` crate.
+
+## Reporting issues
+
+All issues with this crate are tracked using GitHub's [Issue Tracker].
+
+You can use issues to bring bugs to the attention of the maintainers, to discuss
+certain problems encountered with the crate, or to request new features (although
+feature requests should be limited to things mentioned in the [RFC]).
+
+One thing to keep in mind is to always use the **latest** nightly toolchain when
+working on this crate. Due to the nature of this project, we use a lot of unstable
+features, meaning breakage happens often.
+
+[Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues
+[RFC]: https://github.com/rust-lang/rfcs/pull/2366
+
+### LLVM issues
+
+The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation,
+and quite a few LLVM bugs have been discovered during the development of this project.
+
+If you encounter issues with incorrect/suboptimal codegen, which you do not encounter
+when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/),
+it is likely the issue is with LLVM, or this crate's interaction with it.
+
+You should first open an issue **in this repo** to help us track the problem, and we
+will help determine what is the exact cause of the problem.
+If LLVM is indeed the cause, the issue will be reported upstream to the
+[LLVM bugtracker](https://bugs.llvm.org/).
+
+## Submitting Pull Requests
+
+New code is submitted to the crate using GitHub's [pull request] mechanism.
+You should first fork this repository, make your changes (preferably in a new
+branch), then use GitHub's web UI to create a new PR.
+
+[pull request]: https://help.github.com/articles/about-pull-requests/
+
+### Examples
+
+The `examples` directory contains code showcasing SIMD code written with this crate,
+usually in comparison to scalar or ISPC code. If you have a project / idea which
+uses SIMD, we'd love to add it to the examples list.
+
+Every example should include a small `README`, describing the example code's purpose.
+If your example could potentially work as a benchmark, then add a `benchmark.sh`
+script to allow running the example benchmark code in CI. See an existing example's
+[`benchmark.sh`](examples/aobench/benchmark.sh) for a sample.
+
+Don't forget to update the crate's top-level `README` with a link to your example.
+
+### Perf guide
+
+The objective of the [performance guide][perf-guide] is to be a comprehensive
+resource detailing the process of optimizing Rust code with SIMD support.
+
+If you believe a certain section could be reworded, or if you have any tips & tricks
+related to SIMD which you'd like to share, please open a PR.
+
+[mdBook] is used to manage the formatting of the guide as a book.
+
+[perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/
+[mdBook]: https://github.com/rust-lang-nursery/mdBook
diff --git a/third_party/rust/packed_simd/perf-guide/.gitignore b/third_party/rust/packed_simd/perf-guide/.gitignore
new file mode 100644
index 0000000000..5a0bf0317d
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/.gitignore
@@ -0,0 +1 @@
+/book
diff --git a/third_party/rust/packed_simd/perf-guide/book.toml b/third_party/rust/packed_simd/perf-guide/book.toml
new file mode 100644
index 0000000000..69ba3053ca
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/book.toml
@@ -0,0 +1,12 @@
+[book]
+authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"]
+multilingual = false
+src = "src"
+title = "Rust SIMD Performance Guide"
+description = "This book describes how to write performant SIMD code in Rust."
+
+[build]
+create-missing = false
+
+[output.html]
+additional-css = ["./src/ascii.css"]
diff --git a/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md
new file mode 100644
index 0000000000..1e76898865
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/SUMMARY.md
@@ -0,0 +1,21 @@
+# Summary
+
+[Introduction](./introduction.md)
+
+- [Floating-point Math](./float-math/fp.md)
+ - [Short-vector Math Library](./float-math/svml.md)
+ - [Approximate functions](./float-math/approx.md)
+ - [Fused multiply-accumulate](./float-math/fma.md)
+
+- [Target features](./target-feature/features.md)
+ - [Using `RUSTFLAGS`](./target-feature/rustflags.md)
+ - [Using the `target_feature` attribute](./target-feature/attribute.md)
+ - [Interaction with inlining](./target-feature/inlining.md)
+ - [Detecting features at runtime](./target-feature/runtime.md)
+
+- [Bounds checking](./bound_checks.md)
+- [Vertical and horizontal operations](./vert-hor-ops.md)
+
+- [Performance profiling](./prof/profiling.md)
+ - [Profiling on Linux](./prof/linux.md)
+ - [Using machine code analyzers](./prof/mca.md)
diff --git a/third_party/rust/packed_simd/perf-guide/src/ascii.css b/third_party/rust/packed_simd/perf-guide/src/ascii.css
new file mode 100644
index 0000000000..4c02651195
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/ascii.css
@@ -0,0 +1,4 @@
+code {
+ /* "Source Code Pro" breaks ASCII art */
+ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace;
+}
diff --git a/third_party/rust/packed_simd/perf-guide/src/bound_checks.md b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md
new file mode 100644
index 0000000000..2eeedb5ac8
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/bound_checks.md
@@ -0,0 +1,22 @@
+# Bounds checking
+
+Reading and writing packed vectors to/from slices is checked by default.
+Independently of the configuration options used, the safe functions:
+
+* `Simd<[T; N]>::from_slice_aligned(& s[..])`
+* `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])`
+
+always check that:
+
+* the slice is big enough to hold the vector
+* the slice is suitably aligned to perform an aligned load/store for a `Simd<[T;
+ N]>` (this alignment is often much larger than that of `T`).
+
+There are `_unaligned` versions that use unaligned load and stores, as well as
+`unsafe` `_unchecked` that do not perform any checks iff `debug-assertions =
+false` / `debug = false`. That is, the `_unchecked` methods do still assert size
+and alignment in debug builds and could also do so in release builds depending
+on the configuration options.
+
+These assertions do often significantly impact performance and you should be
+aware of them.
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md
new file mode 100644
index 0000000000..2237c67ec4
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/approx.md
@@ -0,0 +1,8 @@
+# Approximate functions
+
+<!-- TODO:
+
+Explain that they exists, that they are often _much_ faster, how to use them,
+that people should check whether the error is good enough for their
+applications. Explain that this error is currently unstable and might change.
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md
new file mode 100644
index 0000000000..357748383d
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fma.md
@@ -0,0 +1,6 @@
+# Fused Multiply Add
+
+<!-- TODO:
+Explain that this is a compound operation, infinite precision, difference
+between `mul_add` and `mul_adde`, that LLVM cannot do this by itself, etc.
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md
new file mode 100644
index 0000000000..711fcc4fd5
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/fp.md
@@ -0,0 +1,3 @@
+# Floating-point math
+
+This chapter contains information pertaining to working with floating-point numbers.
diff --git a/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md
new file mode 100644
index 0000000000..266c2531cc
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/float-math/svml.md
@@ -0,0 +1,7 @@
+# Short Vector Math Library
+
+<!-- TODO:
+Explain how is short-vector math performed by default (just scalarized libm calls).
+
+Explain how to enable `sleef`, etc.
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/introduction.md b/third_party/rust/packed_simd/perf-guide/src/introduction.md
new file mode 100644
index 0000000000..7243e19c8a
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/introduction.md
@@ -0,0 +1,26 @@
+# Introduction
+
+## What is SIMD
+
+<!-- TODO:
+describe what SIMD is, which algorithms can benefit from it,
+give usage examples
+-->
+
+## History of SIMD in Rust
+
+<!-- TODO:
+discuss history of unstable std::simd,
+stabilization of std::arch, etc.
+-->
+
+## Discover packed_simd
+
+<!-- TODO: describe scope of this project -->
+
+Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately,
+not trivial. There are many pitfals that one should be aware of, and some idioms
+that help avoid those pitfalls.
+
+This book attempts to document these best practices and provides practical examples
+on how to apply the tips to _your_ code.
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/linux.md b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md
new file mode 100644
index 0000000000..96c7d67bc4
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/linux.md
@@ -0,0 +1,107 @@
+# Performance profiling on Linux
+
+## Using `perf`
+
+[perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler
+for Linux, featuring support for various hardware Performance Monitoring Units,
+as well as integration with the kernel's performance events framework.
+
+We will only look at how can the `perf` command can be used to profile SIMD code.
+Full system profiling is outside of the scope of this book.
+
+### Recording
+
+The first step is to record a program's execution during an average workload.
+It helps if you can isolate the parts of your program which have performance
+issues, and set up a benchmark which can be easily (re)run.
+
+Build the benchmark binary in release mode, after having enabled debug info:
+
+```sh
+$ cargo build --release
+Finished release [optimized + debuginfo] target(s) in 0.02s
+```
+
+Then use the `perf record` subcommand:
+
+```sh
+$ perf record --call-graph=dwarf ./target/release/my-program
+[ perf record: Woken up 10 times to write data ]
+[ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ]
+```
+
+Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use
+`--call-graph=lbr` if you have a processor with support for Last Branch Record
+(i.e. Intel Haswell and newer).
+
+`perf` will, by default, record the count of CPU cycles it takes to execute
+various parts of your program. You can use the `-e` command line option
+to enable other performance events, such as `cache-misses`. Use `perf list`
+to get a list of all hardware counters supported by your CPU.
+
+### Viewing the report
+
+The next step is getting a bird's eye view of the program's execution.
+`perf` provides a `ncurses`-based interface which will get you started.
+
+Use `perf report` to open a visualization of your program's performance:
+
+```sh
+perf report --hierarchy -M intel
+```
+
+`--hierarchy` will display a tree-like structure of where your program spent
+most of its time. `-M intel` enables disassembly output with Intel syntax, which
+is subjectively more readable than the default AT&T syntax.
+
+Here is the output from profiling the `nbody` benchmark:
+
+```
+- 100,00% nbody
+ - 94,18% nbody
+ + 93,48% [.] nbody_lib::simd::advance
+ + 0,70% [.] nbody_lib::run
+ + 5,06% libc-2.28.so
+```
+
+If you move with the arrow keys to any node in the tree, you can the press `a`
+to have `perf` _annotate_ that node. This means it will:
+
+- disassemble the function
+
+- associate every instruction with the percentage of time which was spent executing it
+
+- interleaves the disassembly with the source code,
+ assuming it found the debug symbols
+ (you can use `s` to toggle this behaviour)
+
+`perf` will, by default, open the instruction which it identified as being the
+hottest spot in the function:
+
+```
+0,76 │ movapd xmm2,xmm0
+0,38 │ movhlps xmm2,xmm0
+ │ addpd xmm2,xmm0
+ │ unpcklpd xmm1,xmm2
+12,50 │ sqrtpd xmm0,xmm1
+1,52 │ mulpd xmm0,xmm1
+```
+
+In this case, `sqrtpd` will be highlighted in red, since that's the instruction
+which the CPU spends most of its time executing.
+
+## Using Valgrind
+
+Valgrind is a set of tools which initially helped C/C++ programmers find unsafe
+memory accesses in their code. Nowadays the project also has
+
+- a heap profiler called `massif`
+
+- a cache utilization profiler called `cachegrind`
+
+- a call-graph performance profiler called `callgrind`
+
+<!--
+TODO: explain valgrind's dynamic binary translation, warn about massive
+slowdown, talk about `kcachegrind` for a GUI
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/mca.md b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md
new file mode 100644
index 0000000000..65ddf1a4eb
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/mca.md
@@ -0,0 +1,100 @@
+# Machine code analysis tools
+
+## The microarchitecture of modern CPUs
+
+While you might have heard of Instruction Set Architectures, such as `x86` or
+`arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_),
+refers to the internal details of an actual family of CPUs, such as Intel's
+_Haswell_ or AMD's _Jaguar_.
+
+Replacing scalar code with SIMD code will improve performance on all CPUs
+supporting the required vector extensions.
+However, due to microarchitectural differences, the actual speed-up at
+runtime might vary.
+
+**Example**: a simple example arises when optimizing for AMD K8 CPUs.
+The assembly generated for an empty function should look like this:
+
+```asm
+nop
+ret
+```
+
+The `nop` is used to align the `ret` instruction for better performance.
+However, the compiler will actually generated the following code:
+
+```asm
+repz ret
+```
+
+The `repz` instruction will repeat the following instruction until a certain
+condition. Of course, in this situation, the function will simply immediately
+return, and the `ret` instruction is still aligned.
+However, AMD K8's branch predictor performs better with the latter code.
+
+For those looking to absolutely maximize performance for a certain target µ-arch,
+you will have to read some CPU manuals, or ask the compiler to do it for you
+with `-C target-cpu`.
+
+### Summary of CPU internals
+
+Modern processors are able to execute instructions out-of-order for better performance,
+by utilizing tricks such as [branch prediction], [instruction pipelining],
+or [superscalar execution].
+
+[branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor
+[instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining
+[superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor
+
+SIMD instructions are also subject to these optimizations, meaning it can get pretty
+difficult to determine where the slowdown happens.
+For example, if the profiler reports a store operation is slow, one of two things
+could be happening:
+
+- the store is limited by the CPU's memory bandwidth, which is actually an ideal
+ scenario, all things considered;
+
+- memory bandwidth is nowhere near its peak, but the value to be stored is at the
+ end of a long chain of operations, and this store is where the profiler
+ encountered the pipeline stall;
+
+Since most profilers are simple tools which don't understand the subtleties of
+instruction scheduling, you
+
+## Analyzing the machine code
+
+Certain tools have knowledge of internal CPU microarchitecture, i.e. they know
+
+- how many physical [register files] a CPU actually has
+
+- what is the latency / throughtput of an instruction
+
+- what [µ-ops] are generated for a set of instructions
+
+and many other architectural details.
+
+[register files]: https://en.wikipedia.org/wiki/Register_file
+[µ-ops]: https://en.wikipedia.org/wiki/Micro-operation
+
+These tools are therefore able to provide accurate information as to why some
+instructions are inefficient, and where the bottleneck is.
+
+The disadvantage is that the output of these tools requires advanced knowledge
+of the target architecture to understand, i.e. they **cannot** point out what
+the cause of the issue is explicitly.
+
+## Intel's Architecture Code Analyzer (IACA)
+
+[IACA] is a free tool offered by Intel for analyzing the performance of various
+computational kernels.
+
+Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches.
+
+[IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer
+
+## llvm-mca
+
+<!--
+TODO: once LLVM 7 gets released, write a chapter on using llvm-mca
+with SIMD disassembly.
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md
new file mode 100644
index 0000000000..02ba78d2f2
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/prof/profiling.md
@@ -0,0 +1,14 @@
+# Performance profiling
+
+While the rest of the book provides practical advice on how to improve the performance
+of SIMD code, this chapter is dedicated to [**performance profiling**][profiling].
+Profiling consists of recording a program's execution in order to identify program
+hotspots.
+
+**Important**: most profilers require debug information in order to accurately
+link the program hotspots back to the corresponding source code lines. Rust will
+disable debug info generation by default for optimized builds, but you can change
+that [in your `Cargo.toml`][cargo-ref].
+
+[profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming)
+[cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md
new file mode 100644
index 0000000000..ee670fea5b
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/attribute.md
@@ -0,0 +1,5 @@
+# The `target_feature` attribute
+
+<!-- TODO:
+Explain the `#[target_feature]` attribute
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md
new file mode 100644
index 0000000000..b93030ca67
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/features.md
@@ -0,0 +1,13 @@
+# Enabling target features
+
+Not all processors of a certain architecture will have SIMD processing units,
+and using a SIMD instruction which is not supported will trigger undefined behavior.
+
+To allow building safe, portable programs, the Rust compiler will **not**, by default,
+generate any sort of vector instructions, unless it can statically determine
+they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed.
+The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of
+which features are enabled by default on various platforms, refer to the target
+specifications [in the compiler's source code][targets].
+
+[targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md
new file mode 100644
index 0000000000..86705102a7
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/inlining.md
@@ -0,0 +1,5 @@
+# Inlining
+
+<!-- TODO:
+Explain how the `#[target_feature]` attribute interacts with inlining
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md
new file mode 100644
index 0000000000..5b55c61c26
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/practice.md
@@ -0,0 +1,31 @@
+# Target features in practice
+
+Using `RUSTFLAGS` will allow the crate being compiled, as well as all its
+transitive dependencies to use certain target features.
+
+A tehnique used to avoid undefined behavior at runtime is to compile and
+ship multiple binaries, each compiled with a certain set of features.
+This might not be feasible in some cases, and can quickly get out of hand
+as more and more vector extensions are added to an architecture.
+
+Rust can be more flexible: you can build a single binary/library which automatically
+picks the best supported vector instructions depending on the host machine.
+The trick consists of monomorphizing parts of the code during building, and then
+using run-time feature detection to select the right code path when running.
+
+<!-- TODO
+Explain how to create efficient functions that dispatch to different
+implementations at run-time without issues (e.g. using `#[inline(always)]` for
+the impls, wrapping in `#[target_feature]`, and the wrapping those in a function
+that does run-time feature detection).
+-->
+
+**NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing
+SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause
+performance issues.
+
+The solution is to compile all code, even the code written with 128-bit vectors,
+with the AVX target feature enabled. This will cause the compiler to prefix the
+generated instructions with the [VEX] prefix.
+
+[VEX]: https://en.wikipedia.org/wiki/VEX_prefix
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md
new file mode 100644
index 0000000000..47ddcc8660
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/runtime.md
@@ -0,0 +1,5 @@
+# Detecting host features at runtime
+
+<!-- TODO:
+Explain cost (how it works).
+-->
diff --git a/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md
new file mode 100644
index 0000000000..f4c1d1304a
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/target-feature/rustflags.md
@@ -0,0 +1,77 @@
+# Using RUSTFLAGS
+
+One of the easiest ways to benefit from SIMD is to allow the compiler
+to generate code using certain vector instruction extensions.
+
+The environment variable `RUSTFLAGS` can be used to pass options for code
+generation to the Rust compiler. These flags will affect **all** compiled crates.
+
+There are two flags which can be used to enable specific vector extensions:
+
+## target-feature
+
+- Syntax: `-C target-feature=<features>`
+
+- Provides the compiler with a comma-separated set of instruction extensions
+ to enable.
+
+ **Example**: Use `-C target-feature=+sse3,+avx` to enable generating instructions
+ for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and
+ [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions).
+
+- To list target triples for all targets supported by Rust, use:
+
+ ```sh
+ rustc --print target-list
+ ```
+
+- To list all support target features for a certain target triple, use:
+
+ ```sh
+ rustc --target=${TRIPLE} --print target-features
+ ```
+
+- Note that all CPU features are independent, and will have to be enabled individually.
+
+ **Example**: Setting `-C target-feature=+avx2` will _not_ enable `fma`, even though
+ all CPUs which support AVX2 also support FMA. To enable both, one has to use
+ `-C target-feature=+avx2,+fma`
+
+- Some features also depend on other features, which need to be enabled for the
+ target instructions to be generated.
+
+ **Example**: Unless `v7` is specified as the target CPU (see below), to enable
+ NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`.
+
+## target-cpu
+
+- Syntax: `-C target-cpu=<cpu>`
+
+- Sets the identifier of a CPU family / model for which to build and optimize the code.
+
+ **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'`
+
+- To list all supported target CPUs for a certain target triple, use:
+
+ ```sh
+ rustc --target=${TRIPLE} --print target-cpus
+ ```
+
+ **Example**:
+
+ ```sh
+ rustc --target=i686-pc-windows-msvc --print target-cpus
+ ```
+
+- The compiler will translate this into a list of target features. Therefore,
+ individual feature checks (`#[cfg(target_feature = "...")]`) will still
+ work properly.
+
+- It will cause the code generator to optimize the generated code for that
+ specific CPU model.
+
+- Using `native` as the CPU model will cause Rust to generate and optimize code
+ for the CPU running the compiler. It is useful when building programs which you
+ plan to only use locally. This should never be used when the generated programs
+ are meant to be run on other computers, such as when packaging for distribution
+ or cross-compiling.
diff --git a/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md
new file mode 100644
index 0000000000..d0dd1be12a
--- /dev/null
+++ b/third_party/rust/packed_simd/perf-guide/src/vert-hor-ops.md
@@ -0,0 +1,76 @@
+# Vertical and horizontal operations
+
+In SIMD terminology, each vector has a certain "width" (number of lanes).
+A vector processor is able to perform two kinds of operations on a vector:
+
+- Vertical operations:
+ operate on two vectors of the same width, result has same width
+
+**Example**: vertical addition of two `f32x4` vectors
+
+ %0 == | 2 | -3.5 | 0 | 7 |
+ + + + +
+ %1 == | 4 | 1.5 | -1 | 0 |
+ = = = =
+ %0 + %1 == | 6 | -2 | -1 | 7 |
+
+- Horizontal operations:
+ reduce the elements of two vectors in some way,
+ the result's elements combine information from the two original ones
+
+**Example**: horizontal addition of two `u64x2` vectors
+
+ %0 == | 1 | 3 |
+ └─+───┘
+ └───────┐
+ │
+ %1 == | 4 | -1 | │
+ └─+──┘ │
+ └───┐ │
+ │ │
+ ┌─────│───┘
+ ▼ ▼
+ %0 + %1 == | 4 | 3 |
+
+## Performance consideration of horizontal operations
+
+The result of vertical operations, like vector negation: `-a`, for a given lane,
+does not depend on the result of the operation for the other lanes. The result
+of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on
+the value of all vector lanes.
+
+In virtually all architectures vertical operations are fast, while horizontal
+operations are, by comparison, very slow.
+
+Consider the following two functions for computing the sum of all `f32` values
+in a slice:
+
+```rust
+fn fast_sum(x: &[f32]) -> f32 {
+ assert!(x.len() % 4 == 0);
+ let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.]
+ for i in (0..x.len()).step_by(4) {
+ sum += f32x4::from_slice_unaligned(&x[i..]);
+ }
+ sum.sum()
+}
+
+fn slow_sum(x: &[f32]) -> f32 {
+ assert!(x.len() % 4 == 0);
+ let mut sum: f32 = 0.;
+ for i in (0..x.len()).step_by(4) {
+ sum += f32x4::from_slice_unaligned(&x[i..]).sum();
+ }
+ sum
+}
+```
+
+The inner loop over the slice is where the bulk of the work actually happens.
+There, the `fast_sum` function perform vertical operations into a vector, doing
+a single horizontal reduction at the end, while the `slow_sum` function performs
+horizontal vector operations inside of the loop.
+
+On all widely-used architectures, `fast_sum` is a large constant factor faster
+than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On
+the particular machine tested there the algorithm using the horizontal vector
+addition is 2.7x slower than the one using vertical vector operations!
diff --git a/third_party/rust/packed_simd/rustfmt.toml b/third_party/rust/packed_simd/rustfmt.toml
new file mode 100644
index 0000000000..7316518b99
--- /dev/null
+++ b/third_party/rust/packed_simd/rustfmt.toml
@@ -0,0 +1,5 @@
+max_width = 110
+use_small_heuristics = "Max"
+wrap_comments = true
+edition = "2018"
+error_on_line_overflow = true \ No newline at end of file
diff --git a/third_party/rust/packed_simd/src/api.rs b/third_party/rust/packed_simd/src/api.rs
new file mode 100644
index 0000000000..aa1403e6e2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api.rs
@@ -0,0 +1,308 @@
+//! Implements the Simd<[T; N]> APIs
+
+#[macro_use]
+mod bitmask;
+pub(crate) mod cast;
+#[macro_use]
+mod cmp;
+#[macro_use]
+mod default;
+#[macro_use]
+mod fmt;
+#[macro_use]
+mod from;
+#[macro_use]
+mod hash;
+#[macro_use]
+mod math;
+#[macro_use]
+mod minimal;
+#[macro_use]
+mod ops;
+#[macro_use]
+mod ptr;
+#[macro_use]
+mod reductions;
+#[macro_use]
+mod select;
+#[macro_use]
+mod shuffle;
+#[macro_use]
+mod shuffle1_dyn;
+#[macro_use]
+mod slice;
+#[macro_use]
+mod swap_bytes;
+#[macro_use]
+mod bit_manip;
+
+#[cfg(feature = "into_bits")]
+pub(crate) mod into_bits;
+
+macro_rules! impl_i {
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+ | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | $($elem_ids),* | $(#[$doc])*);
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+ );
+ impl_ops_scalar_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+ );
+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_int_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
+ );
+ impl_reduction_integer_arithmetic!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ );
+ impl_reduction_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ );
+ impl_reduction_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
+ );
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
+ impl_from_vectors!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+ );
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_partial_eq!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)
+ );
+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+ impl_cmp_vertical!(
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
+ );
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+ impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt);
+
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ }
+}
+
+macro_rules! impl_u {
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+ | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | $($elem_ids),* | $(#[$doc])*);
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+ );
+ impl_ops_scalar_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0)
+ );
+ impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_int_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
+ );
+ impl_reduction_integer_arithmetic!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ );
+ impl_reduction_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ );
+ impl_reduction_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0)
+ );
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1));
+ impl_from_vectors!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+ );
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_partial_eq!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0)
+ );
+ impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+ impl_cmp_vertical!(
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt
+ );
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1));
+ impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) |
+ $test_tt);
+
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt);
+ test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ }
+}
+
+macro_rules! impl_f {
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident
+ | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),*
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+ impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | $($elem_ids),* | $(#[$doc])*);
+ impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_ops_vector_float_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
+ );
+ impl_reduction_float_arithmetic!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_reduction_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ );
+ impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.));
+ impl_from_vectors!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+ );
+ impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_partial_eq!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.)
+ );
+ impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+
+ impl_float_consts!([$elem_ty; $elem_n]: $tuple_id);
+ impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty);
+
+ // floating-point math
+ impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_vertical!(
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
+ | $test_tt
+ );
+
+ test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt);
+ test_reduction_float_min_max!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
+ );
+ test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ }
+}
+
+macro_rules! impl_m {
+ ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident
+ | $ielem_ty:ident, $ibitmask_ty:ident
+ | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),*
+ | $(#[$doc:meta])*) => {
+ impl_minimal_mask!(
+ [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | $($elem_ids),* | $(#[$doc])*
+ );
+ impl_ops_vector_mask_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+ );
+ impl_ops_scalar_mask_bitwise!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+ );
+ impl_reduction_bitwise!(
+ [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt
+ | (|x|{ x != 0 }) | (true, false)
+ );
+ impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt);
+ impl_from_array!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt
+ | (crate::$elem_ty::new(true), true)
+ );
+ impl_from_vectors!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),*
+ );
+ impl_default!([bool; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_partial_eq!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+ );
+ impl_cmp_eq!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false)
+ );
+ impl_cmp_vertical!(
+ [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false)
+ | $test_tt
+ );
+ impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_cmp_ord!(
+ [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true)
+ );
+ impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt);
+
+ test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
+ }
+}
+
+macro_rules! impl_const_p {
+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
+ $usize_ty:ident, $isize_ty:ident
+ | $test_tt:tt | $($elem_ids:ident),*
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+ impl_minimal_p!(
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
+ | ref_ | $test_tt | $($elem_ids),*
+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
+ );
+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+ }
+}
+
+macro_rules! impl_mut_p {
+ ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident,
+ $usize_ty:ident, $isize_ty:ident
+ | $test_tt:tt | $($elem_ids:ident),*
+ | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => {
+ impl_minimal_p!(
+ [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty
+ | ref_mut_ | $test_tt | $($elem_ids),*
+ | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])*
+ );
+ impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+ impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs
new file mode 100644
index 0000000000..c1e90bb0fb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/bit_manip.rs
@@ -0,0 +1,129 @@
+//! Bit manipulations.
+
+macro_rules! impl_bit_manip {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Returns the number of ones in the binary representation of
+ /// the lanes of `self`.
+ #[inline]
+ pub fn count_ones(self) -> Self {
+ super::codegen::bit_manip::BitManip::ctpop(self)
+ }
+
+ /// Returns the number of zeros in the binary representation of
+ /// the lanes of `self`.
+ #[inline]
+ pub fn count_zeros(self) -> Self {
+ super::codegen::bit_manip::BitManip::ctpop(!self)
+ }
+
+ /// Returns the number of leading zeros in the binary
+ /// representation of the lanes of `self`.
+ #[inline]
+ pub fn leading_zeros(self) -> Self {
+ super::codegen::bit_manip::BitManip::ctlz(self)
+ }
+
+ /// Returns the number of trailing zeros in the binary
+ /// representation of the lanes of `self`.
+ #[inline]
+ pub fn trailing_zeros(self) -> Self {
+ super::codegen::bit_manip::BitManip::cttz(self)
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ #[allow(overflowing_literals)]
+ pub mod [<$id _bit_manip>] {
+ #![allow(const_item_mutation)]
+ use super::*;
+
+ const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8;
+
+ macro_rules! test_func {
+ ($x:expr, $func:ident) => {{
+ let mut actual = $x;
+ for i in 0..$id::lanes() {
+ actual = actual.replace(
+ i,
+ $x.extract(i).$func() as $elem_ty
+ );
+ }
+ let expected = $x.$func();
+ assert_eq!(actual, expected);
+ }};
+ }
+
+ const BYTES: [u8; 64] = [
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ ];
+
+ fn load_bytes() -> $id {
+ let elems: &mut [$elem_ty] = unsafe {
+ slice::from_raw_parts_mut(
+ BYTES.as_mut_ptr() as *mut $elem_ty,
+ $id::lanes(),
+ )
+ };
+ $id::from_slice_unaligned(elems)
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn count_ones() {
+ test_func!($id::splat(0), count_ones);
+ test_func!($id::splat(!0), count_ones);
+ test_func!(load_bytes(), count_ones);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn count_zeros() {
+ test_func!($id::splat(0), count_zeros);
+ test_func!($id::splat(!0), count_zeros);
+ test_func!(load_bytes(), count_zeros);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn leading_zeros() {
+ test_func!($id::splat(0), leading_zeros);
+ test_func!($id::splat(1), leading_zeros);
+ // some implementations use `pshufb` which has unique
+ // behavior when the 8th bit is set.
+ test_func!($id::splat(0b1000_0010), leading_zeros);
+ test_func!($id::splat(!0), leading_zeros);
+ test_func!(
+ $id::splat(1 << (LANE_WIDTH - 1)),
+ leading_zeros
+ );
+ test_func!(load_bytes(), leading_zeros);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn trailing_zeros() {
+ test_func!($id::splat(0), trailing_zeros);
+ test_func!($id::splat(1), trailing_zeros);
+ test_func!($id::splat(0b1000_0010), trailing_zeros);
+ test_func!($id::splat(!0), trailing_zeros);
+ test_func!(
+ $id::splat(1 << (LANE_WIDTH - 1)),
+ trailing_zeros
+ );
+ test_func!(load_bytes(), trailing_zeros);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/bitmask.rs b/third_party/rust/packed_simd/src/api/bitmask.rs
new file mode 100644
index 0000000000..8f4868f328
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/bitmask.rs
@@ -0,0 +1,79 @@
+//! Bitmask API
+
+macro_rules! impl_bitmask {
+ ($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr)
+ | $test_tt:tt) => {
+ impl $id {
+ /// Creates a bitmask with the MSB of each vector lane.
+ ///
+ /// If the vector has less than 8 lanes, the bits that do not
+ /// correspond to any vector lanes are cleared.
+ #[inline]
+ pub fn bitmask(self) -> $ibitmask_ty {
+ unsafe { codegen::llvm::simd_bitmask(self.0) }
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ #[cfg(not(
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210
+ target_endian = "big"
+ ))]
+ pub mod [<$id _bitmask>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn bitmask() {
+ // clear all lanes
+ let vec = $id::splat($clear as _);
+ let bitmask: $ibitmask_ty = 0;
+ assert_eq!(vec.bitmask(), bitmask);
+
+ // set even lanes
+ let mut vec = $id::splat($clear as _);
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ vec = vec.replace(i, $set as _);
+ }
+ }
+ // create bitmask with even lanes set:
+ let mut bitmask: $ibitmask_ty = 0;
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ bitmask |= 1 << i;
+ }
+ }
+ assert_eq!(vec.bitmask(), bitmask);
+
+
+ // set odd lanes
+ let mut vec = $id::splat($clear as _);
+ for i in 0..$id::lanes() {
+ if i % 2 != 0 {
+ vec = vec.replace(i, $set as _);
+ }
+ }
+ // create bitmask with odd lanes set:
+ let mut bitmask: $ibitmask_ty = 0;
+ for i in 0..$id::lanes() {
+ if i % 2 != 0 {
+ bitmask |= 1 << i;
+ }
+ }
+ assert_eq!(vec.bitmask(), bitmask);
+
+ // set all lanes
+ let vec = $id::splat($set as _);
+ let mut bitmask: $ibitmask_ty = 0;
+ for i in 0..$id::lanes() {
+ bitmask |= 1 << i;
+ }
+ assert_eq!(vec.bitmask(), bitmask);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs
new file mode 100644
index 0000000000..f1c32ca1a3
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast.rs
@@ -0,0 +1,108 @@
+//! Implementation of `FromCast` and `IntoCast`.
+#![allow(clippy::module_name_repetitions)]
+
+/// Numeric cast from `T` to `Self`.
+///
+/// > Note: This is a temporary workaround until the conversion traits
+/// specified > in [RFC2484] are implemented.
+///
+/// Numeric cast between vectors with the same number of lanes, such that:
+///
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
+/// -> `u32xN`) is a **no-op**,
+///
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+/// `u8xN`) will **truncate**,
+///
+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
+/// `u32xN`) will:
+/// * **zero-extend** if the source is unsigned, or
+/// * **sign-extend** if the source is signed,
+///
+/// * casting from a float to an integer will **round the float towards zero**,
+///
+/// * casting from an integer to float will produce the floating point
+/// representation of the integer, **rounding to nearest, ties to even**,
+///
+/// * casting from an `f32` to an `f64` is perfect and lossless,
+///
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+///
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
+pub trait FromCast<T>: crate::marker::Sized {
+ /// Numeric cast from `T` to `Self`.
+ fn from_cast(_: T) -> Self;
+}
+
+/// Numeric cast from `Self` to `T`.
+///
+/// > Note: This is a temporary workaround until the conversion traits
+/// specified > in [RFC2484] are implemented.
+///
+/// Numeric cast between vectors with the same number of lanes, such that:
+///
+/// * casting integer vectors whose lane types have the same size (e.g. `i32xN`
+/// -> `u32xN`) is a **no-op**,
+///
+/// * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+/// `u8xN`) will **truncate**,
+///
+/// * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
+/// `u32xN`) will:
+/// * **zero-extend** if the source is unsigned, or
+/// * **sign-extend** if the source is signed,
+///
+/// * casting from a float to an integer will **round the float towards zero**,
+///
+/// * casting from an integer to float will produce the floating point
+/// representation of the integer, **rounding to nearest, ties to even**,
+///
+/// * casting from an `f32` to an `f64` is perfect and lossless,
+///
+/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+///
+/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484
+pub trait Cast<T>: crate::marker::Sized {
+ /// Numeric cast from `self` to `T`.
+ fn cast(self) -> T;
+}
+
+/// `FromCast` implies `Cast`.
+impl<T, U> Cast<U> for T
+where
+ U: FromCast<T>,
+{
+ #[inline]
+ fn cast(self) -> U {
+ U::from_cast(self)
+ }
+}
+
+/// `FromCast` and `Cast` are reflexive
+impl<T> FromCast<T> for T {
+ #[inline]
+ fn from_cast(t: Self) -> Self {
+ t
+ }
+}
+
+#[macro_use]
+mod macros;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs
new file mode 100644
index 0000000000..3bb29f0b80
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/macros.rs
@@ -0,0 +1,82 @@
+//! Macros implementing `FromCast`
+
+macro_rules! impl_from_cast_ {
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+ impl crate::api::cast::FromCast<$from_ty> for $id {
+ #[inline]
+ fn from_cast(x: $from_ty) -> Self {
+ use crate::llvm::simd_cast;
+ debug_assert_eq!($from_ty::lanes(), $id::lanes());
+ Simd(unsafe { simd_cast(x.0) })
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _from_cast_ $from_ty>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn test() {
+ assert_eq!($id::lanes(), $from_ty::lanes());
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_from_cast {
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+ $(
+ impl_from_cast_!($id[$test_tt]: $from_ty);
+ )*
+ }
+}
+
+macro_rules! impl_from_cast_mask_ {
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+ impl crate::api::cast::FromCast<$from_ty> for $id {
+ #[inline]
+ fn from_cast(x: $from_ty) -> Self {
+ debug_assert_eq!($from_ty::lanes(), $id::lanes());
+ x.ne($from_ty::default())
+ .select($id::splat(true), $id::splat(false))
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _from_cast_ $from_ty>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn test() {
+ assert_eq!($id::lanes(), $from_ty::lanes());
+
+ let x = $from_ty::default();
+ let m: $id = x.cast();
+ assert!(m.none());
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_from_cast_mask {
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+ $(
+ impl_from_cast_mask_!($id[$test_tt]: $from_ty);
+ )*
+ }
+}
+
+#[allow(unused)]
+macro_rules! impl_into_cast {
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+ $(
+ impl_from_cast_!($from_ty[$test_tt]: $id);
+ )*
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs
new file mode 100644
index 0000000000..2e10b97b77
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v128.rs
@@ -0,0 +1,302 @@
+//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
+impl_from_cast!(u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
+impl_from_cast_mask!(m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
+
+impl_from_cast!(
+ i16x8[test_v128]: i8x8,
+ u8x8,
+ m8x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ u16x8[test_v128]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast_mask!(
+ m16x8[test_v128]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+
+impl_from_cast!(
+ i32x4[test_v128]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ u32x4[test_v128]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ f32x4[test_v128]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ m32x4[test_v128]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+
+impl_from_cast!(
+ i64x2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ u64x2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ f64x2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ m64x2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+
+impl_from_cast!(
+ isizex2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ usizex2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ msizex2[test_v128]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2
+);
+
+// FIXME[test_v128]: 64-bit single element vectors into_cast impls
+impl_from_cast!(i128x1[test_v128]: u128x1, m128x1);
+impl_from_cast!(u128x1[test_v128]: i128x1, m128x1);
+impl_from_cast!(m128x1[test_v128]: i128x1, u128x1);
diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs
new file mode 100644
index 0000000000..896febacbb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v16.rs
@@ -0,0 +1,68 @@
+//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+ i8x2[test_v16]: u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ u8x2[test_v16]: i8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ m8x2[test_v16]: i8x2,
+ u8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs
new file mode 100644
index 0000000000..fe0c835e3c
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v256.rs
@@ -0,0 +1,298 @@
+//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32);
+impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32);
+impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32);
+
+impl_from_cast!(i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
+impl_from_cast!(u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, i32x16, u32x16, f32x16, m32x16);
+impl_from_cast_mask!(m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, i32x16, u32x16, f32x16, m32x16);
+
+impl_from_cast!(
+ i32x8[test_v256]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ u32x8[test_v256]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ f32x8[test_v256]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast_mask!(
+ m32x8[test_v256]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+
+impl_from_cast!(
+ i64x4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ u64x4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ f64x4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ m64x4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+
+impl_from_cast!(
+ i128x2[test_v256]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ u128x2[test_v256]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ m128x2[test_v256]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ m64x2,
+ f64x2,
+ i128x2,
+ u128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+
+impl_from_cast!(
+ isizex4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ usizex4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ msizex4[test_v256]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4
+);
diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs
new file mode 100644
index 0000000000..4ad1cbf74d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v32.rs
@@ -0,0 +1,132 @@
+//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+ i8x4[test_v32]: u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ u8x4[test_v32]: i8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ m8x4[test_v32]: i8x4,
+ u8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+
+impl_from_cast!(
+ i16x2[test_v32]: i8x2,
+ u8x2,
+ m8x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ u16x2[test_v32]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ m16x2[test_v32]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs
new file mode 100644
index 0000000000..b64605045e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v512.rs
@@ -0,0 +1,209 @@
+//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(i8x64[test_v512]: u8x64, m8x64);
+impl_from_cast!(u8x64[test_v512]: i8x64, m8x64);
+impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64);
+
+impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32);
+impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32);
+impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32);
+
+impl_from_cast!(i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16);
+impl_from_cast!(u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16);
+impl_from_cast!(f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16);
+impl_from_cast_mask!(m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16);
+
+impl_from_cast!(
+ i64x8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ u64x8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ f64x8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast_mask!(
+ m64x8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+
+impl_from_cast!(
+ i128x4[test_v512]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ u128x4[test_v512]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ m128x4[test_v512]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ m64x4,
+ f64x4,
+ i128x4,
+ u128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+
+impl_from_cast!(
+ isizex8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ usizex8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ msizex8
+);
+impl_from_cast_mask!(
+ msizex8[test_v512]: i8x8,
+ u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8
+);
diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs
new file mode 100644
index 0000000000..b23d1a4917
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cast/v64.rs
@@ -0,0 +1,208 @@
+//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_from_cast!(
+ i8x8[test_v64]: u8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast!(
+ u8x8[test_v64]: i8x8,
+ m8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+impl_from_cast_mask!(
+ m8x8[test_v64]: i8x8,
+ u8x8,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ isizex8,
+ usizex8,
+ msizex8
+);
+
+impl_from_cast!(
+ i16x4[test_v64]: i8x4,
+ u8x4,
+ m8x4,
+ u16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast!(
+ u16x4[test_v64]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ m16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+impl_from_cast_mask!(
+ m16x4[test_v64]: i8x4,
+ u8x4,
+ m8x4,
+ i16x4,
+ u16x4,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x4,
+ u128x4,
+ m128x4,
+ isizex4,
+ usizex4,
+ msizex4
+);
+
+impl_from_cast!(
+ i32x2[test_v64]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ u32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ u32x2[test_v64]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ f32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast!(
+ f32x2[test_v64]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ m32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
+impl_from_cast_mask!(
+ m32x2[test_v64]: i8x2,
+ u8x2,
+ m8x2,
+ i16x2,
+ u16x2,
+ m16x2,
+ i32x2,
+ u32x2,
+ f32x2,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x2,
+ u128x2,
+ m128x2,
+ isizex2,
+ usizex2,
+ msizex2
+);
diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs
new file mode 100644
index 0000000000..6d5301dddd
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp.rs
@@ -0,0 +1,16 @@
+//! Implement cmp traits for vector types
+
+#[macro_use]
+mod partial_eq;
+
+#[macro_use]
+mod eq;
+
+#[macro_use]
+mod partial_ord;
+
+#[macro_use]
+mod ord;
+
+#[macro_use]
+mod vertical;
diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs
new file mode 100644
index 0000000000..3c55d0dce5
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs
@@ -0,0 +1,27 @@
+//! Implements `Eq` for vector types.
+
+macro_rules! impl_cmp_eq {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl crate::cmp::Eq for $id {}
+ impl crate::cmp::Eq for LexicographicallyOrdered<$id> {}
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_eq>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn eq() {
+ fn foo<E: crate::cmp::Eq>(_: E) {}
+ let a = $id::splat($false);
+ foo(a);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs
new file mode 100644
index 0000000000..e54ba3bfde
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs
@@ -0,0 +1,43 @@
+//! Implements `Ord` for vector types.
+
+macro_rules! impl_cmp_ord {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl $id {
+ /// Returns a wrapper that implements `Ord`.
+ #[inline]
+ pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> {
+ LexicographicallyOrdered(*self)
+ }
+ }
+
+ impl crate::cmp::Ord for LexicographicallyOrdered<$id> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
+ match self.partial_cmp(other) {
+ Some(x) => x,
+ None => unsafe { crate::hint::unreachable_unchecked() },
+ }
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_ord>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn eq() {
+ fn foo<E: crate::cmp::Ord>(_: E) {}
+ let a = $id::splat($false);
+ foo(a.partial_lex_ord());
+ foo(a.lex_ord());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs
new file mode 100644
index 0000000000..d69dd47425
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs
@@ -0,0 +1,65 @@
+//! Implements `PartialEq` for vector types.
+
+macro_rules! impl_cmp_partial_eq {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+ #[allow(clippy::partialeq_ne_impl)]
+ impl crate::cmp::PartialEq<$id> for $id {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ $id::eq(*self, *other).all()
+ }
+ #[inline]
+ fn ne(&self, other: &Self) -> bool {
+ $id::ne(*self, *other).any()
+ }
+ }
+
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+ #[allow(clippy::partialeq_ne_impl)]
+ impl crate::cmp::PartialEq<LexicographicallyOrdered<$id>> for LexicographicallyOrdered<$id> {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+ #[inline]
+ fn ne(&self, other: &Self) -> bool {
+ self.0 != other.0
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_PartialEq>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn partial_eq() {
+ let a = $id::splat($false);
+ let b = $id::splat($true);
+
+ assert!(a != b);
+ assert!(!(a == b));
+ assert!(a == a);
+ assert!(!(a != a));
+
+ if $id::lanes() > 1 {
+ let a = $id::splat($false).replace(0, $true);
+ let b = $id::splat($true);
+
+ assert!(a != b);
+ assert!(!(a == b));
+ assert!(a == a);
+ assert!(!(a != a));
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs
new file mode 100644
index 0000000000..76ed9ebe4e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs
@@ -0,0 +1,230 @@
+//! Implements `PartialOrd` for vector types.
+//!
+//! This implements a lexicographical order.
+
+macro_rules! impl_cmp_partial_ord {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Returns a wrapper that implements `PartialOrd`.
+ #[inline]
+ pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> {
+ LexicographicallyOrdered(*self)
+ }
+ }
+
+ impl crate::cmp::PartialOrd<LexicographicallyOrdered<$id>> for LexicographicallyOrdered<$id> {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<crate::cmp::Ordering> {
+ if PartialEq::eq(self, other) {
+ Some(crate::cmp::Ordering::Equal)
+ } else if PartialOrd::lt(self, other) {
+ Some(crate::cmp::Ordering::Less)
+ } else if PartialOrd::gt(self, other) {
+ Some(crate::cmp::Ordering::Greater)
+ } else {
+ None
+ }
+ }
+ #[inline]
+ fn lt(&self, other: &Self) -> bool {
+ let m_lt = self.0.lt(other.0);
+ let m_eq = self.0.eq(other.0);
+ for i in 0..$id::lanes() {
+ if m_eq.extract(i) {
+ continue;
+ }
+ return m_lt.extract(i);
+ }
+ false
+ }
+ #[inline]
+ fn le(&self, other: &Self) -> bool {
+ self.lt(other) | PartialEq::eq(self, other)
+ }
+ #[inline]
+ fn ge(&self, other: &Self) -> bool {
+ self.gt(other) | PartialEq::eq(self, other)
+ }
+ #[inline]
+ fn gt(&self, other: &Self) -> bool {
+ let m_gt = self.0.gt(other.0);
+ let m_eq = self.0.eq(other.0);
+ for i in 0..$id::lanes() {
+ if m_eq.extract(i) {
+ continue;
+ }
+ return m_gt.extract(i);
+ }
+ false
+ }
+ }
+ };
+}
+
+macro_rules! test_cmp_partial_ord_int {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_PartialOrd>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn partial_lex_ord() {
+ use crate::testing::utils::{test_cmp};
+ // constant values
+ let a = $id::splat(0);
+ let b = $id::splat(1);
+
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+
+ // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0]
+ let mut a = $id::splat(0);
+ let mut b = $id::splat(0);
+ for i in 0..$id::lanes() {
+ a = a.replace(i, i as $elem_ty);
+ b = b.replace(i, ($id::lanes() - i) as $elem_ty);
+ }
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+
+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4]
+ let mut b = a;
+ b = b.replace(
+ $id::lanes() - 1,
+ a.extract($id::lanes() - 1) + 1 as $elem_ty
+ );
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+
+ if $id::lanes() > 2 {
+ // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3]
+ let b = a;
+ let mut a = $id::splat(0);
+ a = a.replace(1, 1 as $elem_ty);
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+
+ // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2]
+ let mut b = a;
+ b = b.replace(
+ 2, a.extract($id::lanes() - 1) + 1 as $elem_ty
+ );
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(crate::cmp::Ordering::Equal));
+ }
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! test_cmp_partial_ord_mask {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_PartialOrd>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn partial_lex_ord() {
+ use crate::testing::utils::{test_cmp};
+ use crate::cmp::Ordering;
+
+ // constant values
+ let a = $id::splat(false);
+ let b = $id::splat(true);
+
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Equal));
+
+ // variable values:
+ // a = [false, false, false, false];
+ // b = [false, false, false, true]
+ let a = $id::splat(false);
+ let mut b = $id::splat(false);
+ b = b.replace($id::lanes() - 1, true);
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Equal));
+
+ // variable values:
+ // a = [true, true, true, false];
+ // b = [true, true, true, true]
+ let mut a = $id::splat(true);
+ let b = $id::splat(true);
+ a = a.replace($id::lanes() - 1, false);
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Equal));
+
+ if $id::lanes() > 2 {
+ // variable values
+ // a = [false, true, false, false];
+ // b = [false, true, true, true]
+ let mut a = $id::splat(false);
+ let mut b = $id::splat(true);
+ a = a.replace(1, true);
+ b = b.replace(0, false);
+ test_cmp(a.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Less));
+ test_cmp(b.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Greater));
+ test_cmp(a.partial_lex_ord(), a.partial_lex_ord(),
+ Some(Ordering::Equal));
+ test_cmp(b.partial_lex_ord(), b.partial_lex_ord(),
+ Some(Ordering::Equal));
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs
new file mode 100644
index 0000000000..ea4a0d1a34
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs
@@ -0,0 +1,114 @@
+//! Vertical (lane-wise) vector comparisons returning vector masks.
+
+macro_rules! impl_cmp_vertical {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident,
+ $mask_ty:ident,
+ $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt
+ ) => {
+ impl $id {
+ /// Lane-wise equality comparison.
+ #[inline]
+ pub fn eq(self, other: Self) -> $mask_ty {
+ use crate::llvm::simd_eq;
+ Simd(unsafe { simd_eq(self.0, other.0) })
+ }
+
+ /// Lane-wise inequality comparison.
+ #[inline]
+ pub fn ne(self, other: Self) -> $mask_ty {
+ use crate::llvm::simd_ne;
+ Simd(unsafe { simd_ne(self.0, other.0) })
+ }
+
+ /// Lane-wise less-than comparison.
+ #[inline]
+ pub fn lt(self, other: Self) -> $mask_ty {
+ use crate::llvm::{simd_gt, simd_lt};
+ if $is_mask {
+ Simd(unsafe { simd_gt(self.0, other.0) })
+ } else {
+ Simd(unsafe { simd_lt(self.0, other.0) })
+ }
+ }
+
+ /// Lane-wise less-than-or-equals comparison.
+ #[inline]
+ pub fn le(self, other: Self) -> $mask_ty {
+ use crate::llvm::{simd_ge, simd_le};
+ if $is_mask {
+ Simd(unsafe { simd_ge(self.0, other.0) })
+ } else {
+ Simd(unsafe { simd_le(self.0, other.0) })
+ }
+ }
+
+ /// Lane-wise greater-than comparison.
+ #[inline]
+ pub fn gt(self, other: Self) -> $mask_ty {
+ use crate::llvm::{simd_gt, simd_lt};
+ if $is_mask {
+ Simd(unsafe { simd_lt(self.0, other.0) })
+ } else {
+ Simd(unsafe { simd_gt(self.0, other.0) })
+ }
+ }
+
+ /// Lane-wise greater-than-or-equals comparison.
+ #[inline]
+ pub fn ge(self, other: Self) -> $mask_ty {
+ use crate::llvm::{simd_ge, simd_le};
+ if $is_mask {
+ Simd(unsafe { simd_le(self.0, other.0) })
+ } else {
+ Simd(unsafe { simd_ge(self.0, other.0) })
+ }
+ }
+ }
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_vertical>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn cmp() {
+ let a = $id::splat($false);
+ let b = $id::splat($true);
+
+ let r = a.lt(b);
+ let e = $mask_ty::splat(true);
+ assert!(r == e);
+ let r = a.le(b);
+ assert!(r == e);
+
+ let e = $mask_ty::splat(false);
+ let r = a.gt(b);
+ assert!(r == e);
+ let r = a.ge(b);
+ assert!(r == e);
+ let r = a.eq(b);
+ assert!(r == e);
+
+ let mut a = a;
+ let mut b = b;
+ let mut e = e;
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ a = a.replace(i, $false);
+ b = b.replace(i, $true);
+ e = e.replace(i, true);
+ } else {
+ a = a.replace(i, $true);
+ b = b.replace(i, $false);
+ e = e.replace(i, false);
+ }
+ }
+ let r = a.lt(b);
+ assert!(r == e);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs
new file mode 100644
index 0000000000..7af55ea77a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/default.rs
@@ -0,0 +1,30 @@
+//! Implements `Default` for vector types.
+
+macro_rules! impl_default {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl Default for $id {
+ #[inline]
+ fn default() -> Self {
+ Self::splat($elem_ty::default())
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _default>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn default() {
+ let a = $id::default();
+ for i in 0..$id::lanes() {
+ assert_eq!(a.extract(i), $elem_ty::default());
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs
new file mode 100644
index 0000000000..f3f55c4015
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt.rs
@@ -0,0 +1,12 @@
+//! Implements formatting APIs
+
+#[macro_use]
+mod debug;
+#[macro_use]
+mod lower_hex;
+#[macro_use]
+mod upper_hex;
+#[macro_use]
+mod octal;
+#[macro_use]
+mod binary;
diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs
new file mode 100644
index 0000000000..91c0825559
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs
@@ -0,0 +1,54 @@
+//! Implement Octal formatting
+
+macro_rules! impl_fmt_binary {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::fmt::Binary for $id {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+ write!(f, "{}(", stringify!($id))?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_binary>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn binary() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::splat($elem_ty::default());
+ let mut s = TinyString::new();
+ write!(&mut s, "{:#b}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
+ assert!(s.starts_with(beg.as_str()));
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "")
+ .replace(")", "").split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:#b}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs
new file mode 100644
index 0000000000..1e209b3bff
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs
@@ -0,0 +1,60 @@
+//! Implement debug formatting
+
+macro_rules! impl_fmt_debug_tests {
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_debug>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn debug() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::default();
+ let mut s = TinyString::new();
+ write!(&mut s, "{:?}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
+ assert!(s.starts_with(beg.as_str()));
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "")
+ .replace(")", "").split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:?}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_fmt_debug {
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::fmt::Debug for $id {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+ write!(f, "{}(", stringify!($id))?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+ impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt);
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs
new file mode 100644
index 0000000000..8f11d3119b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs
@@ -0,0 +1,54 @@
+//! Implement `LowerHex` formatting
+
+macro_rules! impl_fmt_lower_hex {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::fmt::LowerHex for $id {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+ write!(f, "{}(", stringify!($id))?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_lower_hex>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn lower_hex() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::splat($elem_ty::default());
+ let mut s = TinyString::new();
+ write!(&mut s, "{:#x}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
+ assert!(s.starts_with(beg.as_str()));
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "").replace(")", "")
+ .split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:#x}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs
new file mode 100644
index 0000000000..e708e094ce
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs
@@ -0,0 +1,54 @@
+//! Implement Octal formatting
+
+macro_rules! impl_fmt_octal {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::fmt::Octal for $id {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+ write!(f, "{}(", stringify!($id))?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_octal>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn octal_hex() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::splat($elem_ty::default());
+ let mut s = TinyString::new();
+ write!(&mut s, "{:#o}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
+ assert!(s.starts_with(beg.as_str()));
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "").replace(")", "")
+ .split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:#o}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs
new file mode 100644
index 0000000000..5ad455706b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs
@@ -0,0 +1,54 @@
+//! Implement `UpperHex` formatting
+
+macro_rules! impl_fmt_upper_hex {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::fmt::UpperHex for $id {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+ write!(f, "{}(", stringify!($id))?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_upper_hex>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn upper_hex() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::splat($elem_ty::default());
+ let mut s = TinyString::new();
+ write!(&mut s, "{:#X}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}(", stringify!($id)).unwrap();
+ assert!(s.starts_with(beg.as_str()));
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "").replace(")", "")
+ .split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:#X}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs
new file mode 100644
index 0000000000..c30c4d6e21
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/from.rs
@@ -0,0 +1,7 @@
+//! Implementations of the `From` and `Into` traits
+
+#[macro_use]
+mod from_array;
+
+#[macro_use]
+mod from_vector;
diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs
new file mode 100644
index 0000000000..5c7801ddaf
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/from/from_array.rs
@@ -0,0 +1,124 @@
+//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types.
+
+macro_rules! impl_from_array {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+ | ($non_default_array:expr, $non_default_vec:expr)) => {
+ impl From<[$elem_ty; $elem_count]> for $id {
+ #[inline]
+ fn from(array: [$elem_ty; $elem_count]) -> Self {
+ union U {
+ array: [$elem_ty; $elem_count],
+ vec: $id,
+ }
+ unsafe { U { array }.vec }
+ }
+ }
+
+ impl From<$id> for [$elem_ty; $elem_count] {
+ #[inline]
+ fn from(vec: $id) -> Self {
+ union U {
+ array: [$elem_ty; $elem_count],
+ vec: $id,
+ }
+ unsafe { U { vec }.array }
+ }
+ }
+
+ // FIXME: `Into::into` is not inline, but due to
+ // the blanket impl in `std`, which is not
+ // marked `default`, we cannot override it here with
+ // specialization.
+ /*
+ impl Into<[$elem_ty; $elem_count]> for $id {
+ #[inline]
+ fn into(self) -> [$elem_ty; $elem_count] {
+ union U {
+ array: [$elem_ty; $elem_count],
+ vec: $id,
+ }
+ unsafe { U { vec: self }.array }
+ }
+ }
+
+ impl Into<$id> for [$elem_ty; $elem_count] {
+ #[inline]
+ fn into(self) -> $id {
+ union U {
+ array: [$elem_ty; $elem_count],
+ vec: $id,
+ }
+ unsafe { U { array: self }.vec }
+ }
+ }
+ */
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ mod [<$id _from>] {
+ use super::*;
+ #[test]
+ #[cfg_attr(miri, ignore)]
+ fn array() {
+ let vec: $id = Default::default();
+
+ // FIXME: Workaround for arrays with more than 32
+ // elements.
+ //
+ // Safe because we never take a reference to any
+ // uninitialized element.
+ union W {
+ array: [$elem_ty; $elem_count],
+ other: ()
+ }
+ let mut array = W { other: () };
+ for i in 0..$elem_count {
+ let default: $elem_ty = Default::default();
+ // note: array.other is the active member and
+ // initialized so we can take a reference to it:
+ let p = unsafe {
+ &mut array.other as *mut () as *mut $elem_ty
+ };
+ // note: default is a valid bit-pattern for
+ // $elem_ty:
+ unsafe {
+ crate::ptr::write(p.wrapping_add(i), default)
+ };
+ }
+ // note: the array variant of the union is properly
+ // initialized:
+ let mut array = unsafe {
+ array.array
+ };
+
+ array[0] = $non_default_array;
+ let vec = vec.replace(0, $non_default_vec);
+
+ let vec_from_array = $id::from(array);
+ assert_eq!(vec_from_array, vec);
+ let array_from_vec
+ = <[$elem_ty; $elem_count]>::from(vec);
+ // FIXME: Workaround for arrays with more than 32
+ // elements.
+ for i in 0..$elem_count {
+ assert_eq!(array_from_vec[i], array[i]);
+ }
+
+ let vec_from_into_array: $id = array.into();
+ assert_eq!(vec_from_into_array, vec);
+ let array_from_into_vec: [$elem_ty; $elem_count]
+ = vec.into();
+ // FIXME: Workaround for arrays with more than 32
+ // elements.
+ for i in 0..$elem_count {
+ assert_eq!(array_from_into_vec[i], array[i]);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs
new file mode 100644
index 0000000000..55f70016d5
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs
@@ -0,0 +1,67 @@
+//! Implements `From` and `Into` for vector types.
+
+macro_rules! impl_from_vector {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+ | $source:ident) => {
+ impl From<$source> for $id {
+ #[inline]
+ fn from(source: $source) -> Self {
+ fn static_assert_same_number_of_lanes<T, U>()
+ where
+ T: crate::sealed::Simd,
+ U: crate::sealed::Simd<LanesType = T::LanesType>,
+ {
+ }
+ use crate::llvm::simd_cast;
+ static_assert_same_number_of_lanes::<$id, $source>();
+ Simd(unsafe { simd_cast(source.0) })
+ }
+ }
+
+ // FIXME: `Into::into` is not inline, but due to the blanket impl in
+ // `std`, which is not marked `default`, we cannot override it here
+ // with specialization.
+
+ /*
+ impl Into<$id> for $source {
+ #[inline]
+ fn into(self) -> $id {
+ unsafe { simd_cast(self) }
+ }
+ }
+ */
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _from_ $source>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from() {
+ assert_eq!($id::lanes(), $source::lanes());
+ let source: $source = Default::default();
+ let vec: $id = Default::default();
+
+ let e = $id::from(source);
+ assert_eq!(e, vec);
+
+ let e: $id = source.into();
+ assert_eq!(e, vec);
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_from_vectors {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt
+ | $($source:ident),*) => {
+ $(
+ impl_from_vector!(
+ [$elem_ty; $elem_count]: $id | $test_tt | $source
+ );
+ )*
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs
new file mode 100644
index 0000000000..ee80eff939
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/hash.rs
@@ -0,0 +1,49 @@
+//! Implements `Hash` for vector types.
+
+macro_rules! impl_hash {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::hash::Hash for $id {
+ #[inline]
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+ unsafe {
+ union A {
+ data: [$elem_ty; $id::lanes()],
+ vec: $id,
+ }
+ A { vec: *self }.data.hash(state)
+ }
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _hash>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn hash() {
+ use crate::hash::{Hash, Hasher};
+ #[allow(deprecated)]
+ use crate::hash::{SipHasher13};
+ type A = [$elem_ty; $id::lanes()];
+ let a: A = [42 as $elem_ty; $id::lanes()];
+ assert_eq!(
+ crate::mem::size_of::<A>(),
+ crate::mem::size_of::<$id>()
+ );
+ #[allow(deprecated)]
+ let mut a_hash = SipHasher13::new();
+ let mut v_hash = a_hash.clone();
+ a.hash(&mut a_hash);
+
+ // Integer within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ let v = $id::splat(42 as $elem_ty);
+ v.hash(&mut v_hash);
+ assert_eq!(a_hash.finish(), v_hash.finish());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs
new file mode 100644
index 0000000000..03fbe4bff7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits.rs
@@ -0,0 +1,59 @@
+//! Implementation of `FromBits` and `IntoBits`.
+
+/// Safe lossless bitwise conversion from `T` to `Self`.
+#[cfg_attr(doc_cfg, doc(cfg(feature = "into_bits")))]
+pub trait FromBits<T>: crate::marker::Sized {
+ /// Safe lossless bitwise transmute from `T` to `Self`.
+ fn from_bits(t: T) -> Self;
+}
+
+/// Safe lossless bitwise conversion from `Self` to `T`.
+#[cfg_attr(doc_cfg, doc(cfg(feature = "into_bits")))]
+pub trait IntoBits<T>: crate::marker::Sized {
+ /// Safe lossless bitwise transmute from `self` to `T`.
+ fn into_bits(self) -> T;
+}
+
+/// `FromBits` implies `IntoBits`.
+impl<T, U> IntoBits<U> for T
+where
+ U: FromBits<T>,
+{
+ #[inline]
+ fn into_bits(self) -> U {
+ debug_assert!(crate::mem::size_of::<Self>() == crate::mem::size_of::<U>());
+ U::from_bits(self)
+ }
+}
+
+/// `FromBits` and `IntoBits` are reflexive
+impl<T> FromBits<T> for T {
+ #[inline]
+ fn from_bits(t: Self) -> Self {
+ t
+ }
+}
+
+#[macro_use]
+mod macros;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
+
+mod arch_specific;
+pub use self::arch_specific::*;
diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
new file mode 100644
index 0000000000..bfac915576
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs
@@ -0,0 +1,345 @@
+//! `FromBits` and `IntoBits` between portable vector types and the
+//! architecture-specific vector types.
+#[rustfmt::skip]
+
+// FIXME: MIPS FromBits/IntoBits
+
+#[allow(unused)]
+use crate::*;
+
+/// This macro implements FromBits for the portable and the architecture
+/// specific vector types.
+///
+/// The "leaf" case is at the bottom, and the most generic case is at the top.
+/// The generic case is split into smaller cases recursively.
+macro_rules! impl_arch {
+ ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*],
+ $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* |
+ from: $($from_ty:ident),* | into: $($into_ty:ident),* |
+ test: $test_tt:tt) => {
+ impl_arch!(
+ [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] |
+ from: $($from_ty),* |
+ into: $($into_ty),* |
+ test: $test_tt
+ );
+ impl_arch!(
+ $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* |
+ from: $($from_ty),* |
+ into: $($into_ty),* |
+ test: $test_tt
+ );
+ };
+ ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] |
+ from: $($from_ty:ident),* | into: $($into_ty:ident),* |
+ test: $test_tt:tt) => {
+ // note: if target is "arm", "+v7,+neon" must be enabled
+ // and the std library must be recompiled with them
+ #[cfg(any(
+ not(target_arch = "arm"),
+ all(target_feature = "v7", target_feature = "neon",
+ any(feature = "core_arch", libcore_neon)))
+ )]
+ // note: if target is "powerpc", "altivec" must be enabled
+ // and the std library must be recompiled with it
+ #[cfg(any(
+ not(target_arch = "powerpc"),
+ all(target_feature = "altivec", feature = "core_arch"),
+ ))]
+ #[cfg(target_arch = $arch_tt)]
+ use crate::arch::$arch::{
+ $($arch_ty),*
+ };
+
+ #[cfg(any(
+ not(target_arch = "arm"),
+ all(target_feature = "v7", target_feature = "neon",
+ any(feature = "core_arch", libcore_neon)))
+ )]
+ #[cfg(any(
+ not(target_arch = "powerpc"),
+ all(target_feature = "altivec", feature = "core_arch"),
+ ))]
+ #[cfg(target_arch = $arch_tt)]
+ impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* |
+ test: $test_tt);
+ };
+ ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),*
+ | $($into_ty:ident),* | test: $test_tt:tt) => {
+ impl_arch!($arch_head | $($from_ty),* | $($into_ty),* |
+ test: $test_tt);
+ impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* |
+ test: $test_tt);
+ };
+ ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* |
+ test: $test_tt:tt) => {
+ impl_from_bits!($arch_head[$test_tt]: $($from_ty),*);
+ impl_into_bits!($arch_head[$test_tt]: $($into_ty),*);
+ };
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 64-bit wide vector types:
+
+// FIXME: 64-bit single element types
+// FIXME: arm/aarch float16x4_t missing
+impl_arch!(
+ [
+ arm["arm"]: int8x8_t,
+ uint8x8_t,
+ poly8x8_t,
+ int16x4_t,
+ uint16x4_t,
+ poly16x4_t,
+ int32x2_t,
+ uint32x2_t,
+ float32x2_t,
+ int64x1_t,
+ uint64x1_t
+ ],
+ [
+ aarch64["aarch64"]: int8x8_t,
+ uint8x8_t,
+ poly8x8_t,
+ int16x4_t,
+ uint16x4_t,
+ poly16x4_t,
+ int32x2_t,
+ uint32x2_t,
+ float32x2_t,
+ int64x1_t,
+ uint64x1_t,
+ float64x1_t
+ ] | from: i8x8,
+ u8x8,
+ m8x8,
+ i16x4,
+ u16x4,
+ m16x4,
+ i32x2,
+ u32x2,
+ f32x2,
+ m32x2 | into: i8x8,
+ u8x8,
+ i16x4,
+ u16x4,
+ i32x2,
+ u32x2,
+ f32x2 | test: test_v64
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 128-bit wide vector types:
+
+// FIXME: arm/aarch float16x8_t missing
+// FIXME: ppc vector_pixel missing
+// FIXME: ppc64 vector_Float16 missing
+// FIXME: ppc64 vector_signed_long_long missing
+// FIXME: ppc64 vector_unsigned_long_long missing
+// FIXME: ppc64 vector_bool_long_long missing
+// FIXME: ppc64 vector_signed___int128 missing
+// FIXME: ppc64 vector_unsigned___int128 missing
+impl_arch!(
+ [x86["x86"]: __m128, __m128i, __m128d],
+ [x86_64["x86_64"]: __m128, __m128i, __m128d],
+ [
+ arm["arm"]: int8x16_t,
+ uint8x16_t,
+ poly8x16_t,
+ int16x8_t,
+ uint16x8_t,
+ poly16x8_t,
+ int32x4_t,
+ uint32x4_t,
+ float32x4_t,
+ int64x2_t,
+ uint64x2_t
+ ],
+ [
+ aarch64["aarch64"]: int8x16_t,
+ uint8x16_t,
+ poly8x16_t,
+ int16x8_t,
+ uint16x8_t,
+ poly16x8_t,
+ int32x4_t,
+ uint32x4_t,
+ float32x4_t,
+ int64x2_t,
+ uint64x2_t,
+ float64x2_t
+ ],
+ [
+ powerpc["powerpc"]: vector_signed_char,
+ vector_unsigned_char,
+ vector_signed_short,
+ vector_unsigned_short,
+ vector_signed_int,
+ vector_unsigned_int,
+ vector_float
+ ],
+ [
+ powerpc64["powerpc64"]: vector_signed_char,
+ vector_unsigned_char,
+ vector_signed_short,
+ vector_unsigned_short,
+ vector_signed_int,
+ vector_unsigned_int,
+ vector_float,
+ vector_signed_long,
+ vector_unsigned_long,
+ vector_double
+ ] | from: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1 | into: i8x16,
+ u8x16,
+ i16x8,
+ u16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x1,
+ u128x1 | test: test_v128
+);
+
+impl_arch!(
+ [powerpc["powerpc"]: vector_bool_char],
+ [powerpc64["powerpc64"]: vector_bool_char] | from: m8x16,
+ m16x8,
+ m32x4,
+ m64x2,
+ m128x1 | into: i8x16,
+ u8x16,
+ i16x8,
+ u16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x1,
+ u128x1,
+ // Masks:
+ m8x16 | test: test_v128
+);
+
+impl_arch!(
+ [powerpc["powerpc"]: vector_bool_short],
+ [powerpc64["powerpc64"]: vector_bool_short] | from: m16x8,
+ m32x4,
+ m64x2,
+ m128x1 | into: i8x16,
+ u8x16,
+ i16x8,
+ u16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x1,
+ u128x1,
+ // Masks:
+ m8x16,
+ m16x8 | test: test_v128
+);
+
+impl_arch!(
+ [powerpc["powerpc"]: vector_bool_int],
+ [powerpc64["powerpc64"]: vector_bool_int] | from: m32x4,
+ m64x2,
+ m128x1 | into: i8x16,
+ u8x16,
+ i16x8,
+ u16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x1,
+ u128x1,
+ // Masks:
+ m8x16,
+ m16x8,
+ m32x4 | test: test_v128
+);
+
+impl_arch!(
+ [powerpc64["powerpc64"]: vector_bool_long] | from: m64x2,
+ m128x1 | into: i8x16,
+ u8x16,
+ i16x8,
+ u16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ i128x1,
+ u128x1,
+ // Masks:
+ m8x16,
+ m16x8,
+ m32x4,
+ m64x2 | test: test_v128
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// Implementations for the 256-bit wide vector types
+
+impl_arch!(
+ [x86["x86"]: __m256, __m256i, __m256d],
+ [x86_64["x86_64"]: __m256, __m256i, __m256d] | from: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2 | into: i8x32,
+ u8x32,
+ i16x16,
+ u16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ i128x2,
+ u128x2 | test: test_v256
+);
+
+////////////////////////////////////////////////////////////////////////////////
+// FIXME: Implementations for the 512-bit wide vector types
diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs
new file mode 100644
index 0000000000..265ab34ae0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs
@@ -0,0 +1,74 @@
+//! Macros implementing `FromBits`
+
+macro_rules! impl_from_bits_ {
+ ($id:ident[$test_tt:tt]: $from_ty:ident) => {
+ impl crate::api::into_bits::FromBits<$from_ty> for $id {
+ #[inline]
+ fn from_bits(x: $from_ty) -> Self {
+ unsafe { crate::mem::transmute(x) }
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _from_bits_ $from_ty>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn test() {
+ use crate::{
+ ptr::{read_unaligned},
+ mem::{size_of, zeroed}
+ };
+ use crate::IntoBits;
+ assert_eq!(size_of::<$id>(),
+ size_of::<$from_ty>());
+ // This is safe because we never create a reference to
+ // uninitialized memory:
+ let a: $from_ty = unsafe { zeroed() };
+
+ let b_0: $id = crate::FromBits::from_bits(a);
+ let b_1: $id = a.into_bits();
+
+ // Check that these are byte-wise equal, that is,
+ // that the bit patterns are identical:
+ for i in 0..size_of::<$id>() {
+ // This is safe because we only read initialized
+ // memory in bounds. Also, taking a reference to
+ // `b_i` is ok because the fields are initialized.
+ unsafe {
+ let b_0_v: u8 = read_unaligned(
+ (&b_0 as *const $id as *const u8)
+ .wrapping_add(i)
+ );
+ let b_1_v: u8 = read_unaligned(
+ (&b_1 as *const $id as *const u8)
+ .wrapping_add(i)
+ );
+ assert_eq!(b_0_v, b_1_v);
+ }
+ }
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_from_bits {
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+ $(
+ impl_from_bits_!($id[$test_tt]: $from_ty);
+ )*
+ }
+}
+
+#[allow(unused)]
+macro_rules! impl_into_bits {
+ ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => {
+ $(
+ impl_from_bits_!($from_ty[$test_tt]: $id);
+ )*
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs
new file mode 100644
index 0000000000..639c09c2c4
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs
@@ -0,0 +1,232 @@
+//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(
+ i8x16[test_v128]: u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ u8x16[test_v128]: i8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1);
+
+impl_from_bits!(
+ i16x8[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ u16x8[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1);
+
+impl_from_bits!(
+ i32x4[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ u32x4[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ f32x4[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(m32x4[test_v128]: m64x2, m128x1);
+
+impl_from_bits!(
+ i64x2[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ u64x2[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ f64x2[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ m64x2,
+ i128x1,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(m64x2[test_v128]: m128x1);
+
+impl_from_bits!(
+ i128x1[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ u128x1,
+ m128x1
+);
+impl_from_bits!(
+ u128x1[test_v128]: i8x16,
+ u8x16,
+ m8x16,
+ i16x8,
+ u16x8,
+ m16x8,
+ i32x4,
+ u32x4,
+ f32x4,
+ m32x4,
+ i64x2,
+ u64x2,
+ f64x2,
+ m64x2,
+ i128x1,
+ m128x1
+);
+// note: m128x1 cannot be constructed from all the other masks bit patterns in
+// here
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs
new file mode 100644
index 0000000000..e44d0e7f9a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs
@@ -0,0 +1,9 @@
+//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x2[test_v16]: u8x2, m8x2);
+impl_from_bits!(u8x2[test_v16]: i8x2, m8x2);
+// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs
new file mode 100644
index 0000000000..e432bbbc9f
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs
@@ -0,0 +1,232 @@
+//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(
+ i8x32[test_v256]: u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ u8x32[test_v256]: i8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2);
+
+impl_from_bits!(
+ i16x16[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ u16x16[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2);
+
+impl_from_bits!(
+ i32x8[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ u32x8[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ f32x8[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(m32x8[test_v256]: m64x4, m128x2);
+
+impl_from_bits!(
+ i64x4[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ u64x4[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ f64x4[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ m64x4,
+ i128x2,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(m64x4[test_v256]: m128x2);
+
+impl_from_bits!(
+ i128x2[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ u128x2,
+ m128x2
+);
+impl_from_bits!(
+ u128x2[test_v256]: i8x32,
+ u8x32,
+ m8x32,
+ i16x16,
+ u16x16,
+ m16x16,
+ i32x8,
+ u32x8,
+ f32x8,
+ m32x8,
+ i64x4,
+ u64x4,
+ f64x4,
+ m64x4,
+ i128x2,
+ m128x2
+);
+// note: m128x2 cannot be constructed from all the other masks bit patterns in
+// here
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs
new file mode 100644
index 0000000000..5dba38a179
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs
@@ -0,0 +1,13 @@
+//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2);
+impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2);
+impl_from_bits!(m8x4[test_v32]: m16x2);
+
+impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2);
+impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2);
+// note: m16x2 cannot be constructed from all m8x4 bit patterns
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs
new file mode 100644
index 0000000000..f6e9bb8bf7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs
@@ -0,0 +1,232 @@
+//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(
+ i8x64[test_v512]: u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ u8x64[test_v512]: i8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4);
+
+impl_from_bits!(
+ i16x32[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ u16x32[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4);
+
+impl_from_bits!(
+ i32x16[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ u32x16[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ f32x16[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(m32x16[test_v512]: m64x8, m128x4);
+
+impl_from_bits!(
+ i64x8[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ u64x8[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ f64x8[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ m64x8,
+ i128x4,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(m64x8[test_v512]: m128x4);
+
+impl_from_bits!(
+ i128x4[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ u128x4,
+ m128x4
+);
+impl_from_bits!(
+ u128x4[test_v512]: i8x64,
+ u8x64,
+ m8x64,
+ i16x32,
+ u16x32,
+ m16x32,
+ i32x16,
+ u32x16,
+ f32x16,
+ m32x16,
+ i64x8,
+ u64x8,
+ f64x8,
+ m64x8,
+ i128x4,
+ m128x4
+);
+// note: m128x4 cannot be constructed from all the other masks bit patterns in
+// here
diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs
new file mode 100644
index 0000000000..5b065f1bd5
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs
@@ -0,0 +1,18 @@
+//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors
+#[rustfmt::skip]
+
+#[allow(unused)] // wasm_bindgen_test
+use crate::*;
+
+impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(m8x8[test_v64]: m16x4, m32x2);
+
+impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2);
+impl_from_bits!(m16x4[test_v64]: m32x2);
+
+impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2);
+impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2);
+impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2);
+// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns
diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs
new file mode 100644
index 0000000000..e7a8d256ba
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math.rs
@@ -0,0 +1,4 @@
+//! Implements vertical math operations
+
+#[macro_use]
+mod float;
diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs
new file mode 100644
index 0000000000..c0ec46e917
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float.rs
@@ -0,0 +1,61 @@
+//! Implements vertical floating-point math operations.
+
+#[macro_use]
+mod abs;
+
+#[macro_use]
+mod consts;
+
+#[macro_use]
+mod cos;
+
+#[macro_use]
+mod exp;
+
+#[macro_use]
+mod powf;
+
+#[macro_use]
+mod ln;
+
+#[macro_use]
+mod mul_add;
+
+#[macro_use]
+mod mul_adde;
+
+#[macro_use]
+mod recpre;
+
+#[macro_use]
+mod rsqrte;
+
+#[macro_use]
+mod sin;
+
+#[macro_use]
+mod sqrt;
+
+#[macro_use]
+mod sqrte;
+
+macro_rules! impl_float_category {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => {
+ impl $id {
+ #[inline]
+ pub fn is_nan(self) -> $mask_ty {
+ self.ne(self)
+ }
+
+ #[inline]
+ pub fn is_infinite(self) -> $mask_ty {
+ self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY)
+ }
+
+ #[inline]
+ pub fn is_finite(self) -> $mask_ty {
+ !(self.is_nan() | self.is_infinite())
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs
new file mode 100644
index 0000000000..1865bdb68e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs
@@ -0,0 +1,31 @@
+//! Implements vertical (lane-wise) floating-point `abs`.
+
+macro_rules! impl_math_float_abs {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Absolute value.
+ #[inline]
+ pub fn abs(self) -> Self {
+ use crate::codegen::math::float::abs::Abs;
+ Abs::abs(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_abs>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn abs() {
+ let o = $id::splat(1 as $elem_ty);
+ assert_eq!(o, o.abs());
+
+ let mo = $id::splat(-1 as $elem_ty);
+ assert_eq!(o, mo.abs());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs
new file mode 100644
index 0000000000..7f41acbf1b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs
@@ -0,0 +1,74 @@
+macro_rules! impl_float_consts {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident) => {
+ impl $id {
+ /// Machine epsilon value.
+ pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON);
+
+ /// Smallest finite value.
+ pub const MIN: $id = $id::splat(core::$elem_ty::MIN);
+
+ /// Smallest positive normal value.
+ pub const MIN_POSITIVE: $id = $id::splat(core::$elem_ty::MIN_POSITIVE);
+
+ /// Largest finite value.
+ pub const MAX: $id = $id::splat(core::$elem_ty::MAX);
+
+ /// Not a Number (NaN).
+ pub const NAN: $id = $id::splat(core::$elem_ty::NAN);
+
+ /// Infinity (∞).
+ pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY);
+
+ /// Negative infinity (-∞).
+ pub const NEG_INFINITY: $id = $id::splat(core::$elem_ty::NEG_INFINITY);
+
+ /// Archimedes' constant (π)
+ pub const PI: $id = $id::splat(core::$elem_ty::consts::PI);
+
+ /// π/2
+ pub const FRAC_PI_2: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_2);
+
+ /// π/3
+ pub const FRAC_PI_3: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_3);
+
+ /// π/4
+ pub const FRAC_PI_4: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_4);
+
+ /// π/6
+ pub const FRAC_PI_6: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_6);
+
+ /// π/8
+ pub const FRAC_PI_8: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_8);
+
+ /// 1/π
+ pub const FRAC_1_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_1_PI);
+
+ /// 2/π
+ pub const FRAC_2_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_2_PI);
+
+ /// 2/sqrt(π)
+ pub const FRAC_2_SQRT_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI);
+
+ /// sqrt(2)
+ pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2);
+
+ /// 1/sqrt(2)
+ pub const FRAC_1_SQRT_2: $id = $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2);
+
+ /// Euler's number (e)
+ pub const E: $id = $id::splat(core::$elem_ty::consts::E);
+
+ /// log<sub>2</sub>(e)
+ pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E);
+
+ /// log<sub>10</sub>(e)
+ pub const LOG10_E: $id = $id::splat(core::$elem_ty::consts::LOG10_E);
+
+ /// ln(2)
+ pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2);
+
+ /// ln(10)
+ pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10);
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs
new file mode 100644
index 0000000000..e5b8f46036
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `cos`.
+
+macro_rules! impl_math_float_cos {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Cosine.
+ #[inline]
+ pub fn cos(self) -> Self {
+ use crate::codegen::math::float::cos::Cos;
+ Cos::cos(self)
+ }
+
+ /// Cosine of `self * PI`.
+ #[inline]
+ pub fn cos_pi(self) -> Self {
+ use crate::codegen::math::float::cos_pi::CosPi;
+ CosPi::cos_pi(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_cos>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn cos() {
+ use crate::$elem_ty::consts::PI;
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let p = $id::splat(PI as $elem_ty);
+ let ph = $id::splat(PI as $elem_ty / 2.);
+ let z_r = $id::splat((PI as $elem_ty / 2.).cos());
+ let o_r = $id::splat((PI as $elem_ty).cos());
+
+ assert_eq!(o, z.cos());
+ assert_eq!(z_r, ph.cos());
+ assert_eq!(o_r, p.cos());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs
new file mode 100644
index 0000000000..e3356d853a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs
@@ -0,0 +1,33 @@
+//! Implements vertical (lane-wise) floating-point `exp`.
+
+macro_rules! impl_math_float_exp {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Returns the exponential function of `self`: `e^(self)`.
+ #[inline]
+ pub fn exp(self) -> Self {
+ use crate::codegen::math::float::exp::Exp;
+ Exp::exp(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_exp>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn exp() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ assert_eq!(o, z.exp());
+
+ let e = $id::splat(crate::f64::consts::E as $elem_ty);
+ let tol = $id::splat(2.4e-4 as $elem_ty);
+ assert!((e - o.exp()).abs().le(tol).all());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs
new file mode 100644
index 0000000000..5ceb9173ae
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs
@@ -0,0 +1,33 @@
+//! Implements vertical (lane-wise) floating-point `ln`.
+
+macro_rules! impl_math_float_ln {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Returns the natural logarithm of `self`.
+ #[inline]
+ pub fn ln(self) -> Self {
+ use crate::codegen::math::float::ln::Ln;
+ Ln::ln(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_ln>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ln() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ assert_eq!(z, o.ln());
+
+ let e = $id::splat(crate::f64::consts::E as $elem_ty);
+ let tol = $id::splat(2.4e-4 as $elem_ty);
+ assert!((o - e.ln()).abs().le(tol).all());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs
new file mode 100644
index 0000000000..4b170ee2b7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `mul_add`.
+
+macro_rules! impl_math_float_mul_add {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Fused multiply add: `self * y + z`
+ #[inline]
+ pub fn mul_add(self, y: Self, z: Self) -> Self {
+ use crate::codegen::math::float::mul_add::MulAdd;
+ MulAdd::mul_add(self, y, z)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_mul_add>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn mul_add() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let t3 = $id::splat(3 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ assert_eq!(z, z.mul_add(z, z));
+ assert_eq!(o, o.mul_add(o, z));
+ assert_eq!(o, o.mul_add(z, o));
+ assert_eq!(o, z.mul_add(o, o));
+
+ assert_eq!(t, o.mul_add(o, o));
+ assert_eq!(t, o.mul_add(t, z));
+ assert_eq!(t, t.mul_add(o, z));
+
+ assert_eq!(f, t.mul_add(t, z));
+ assert_eq!(f, t.mul_add(o, t));
+ assert_eq!(t3, t.mul_add(o, o));
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs
new file mode 100644
index 0000000000..c5b27110f2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs
@@ -0,0 +1,48 @@
+//! Implements vertical (lane-wise) floating-point `mul_adde`.
+
+macro_rules! impl_math_float_mul_adde {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Fused multiply add estimate: ~= `self * y + z`
+ ///
+ /// While fused multiply-add (`fma`) has infinite precision,
+ /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add.
+ /// This might be more efficient on architectures that do not have an `fma` instruction.
+ #[inline]
+ pub fn mul_adde(self, y: Self, z: Self) -> Self {
+ use crate::codegen::math::float::mul_adde::MulAddE;
+ MulAddE::mul_adde(self, y, z)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_mul_adde>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn mul_adde() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let t3 = $id::splat(3 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ assert_eq!(z, z.mul_adde(z, z));
+ assert_eq!(o, o.mul_adde(o, z));
+ assert_eq!(o, o.mul_adde(z, o));
+ assert_eq!(o, z.mul_adde(o, o));
+
+ assert_eq!(t, o.mul_adde(o, o));
+ assert_eq!(t, o.mul_adde(t, z));
+ assert_eq!(t, t.mul_adde(o, z));
+
+ assert_eq!(f, t.mul_adde(t, z));
+ assert_eq!(f, t.mul_adde(o, t));
+ assert_eq!(t3, t.mul_adde(o, o));
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs
new file mode 100644
index 0000000000..83dc9ff9c0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs
@@ -0,0 +1,36 @@
+//! Implements vertical (lane-wise) floating-point `powf`.
+
+macro_rules! impl_math_float_powf {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Raises `self` number to the floating point power of `x`.
+ #[inline]
+ pub fn powf(self, x: Self) -> Self {
+ use crate::codegen::math::float::powf::Powf;
+ Powf::powf(self, x)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_powf>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn powf() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ assert_eq!(o, o.powf(z));
+ assert_eq!(o, t.powf(z));
+ assert_eq!(o, o.powf(o));
+ assert_eq!(t, t.powf(o));
+
+ let f = $id::splat(4 as $elem_ty);
+ assert_eq!(f, t.powf(t));
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs
new file mode 100644
index 0000000000..127f0b2ff6
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs
@@ -0,0 +1,36 @@
+//! Implements vertical (lane-wise) floating-point `recpre`.
+
+macro_rules! impl_math_float_recpre {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Reciprocal estimate: `~= 1. / self`.
+ ///
+ /// FIXME: The precision of the estimate is currently unspecified.
+ #[inline]
+ pub fn recpre(self) -> Self {
+ $id::splat(1.) / self
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_recpre>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn recpre() {
+ let tol = $id::splat(2.4e-4 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let error = (o - o.recpre()).abs();
+ assert!(error.le(tol).all());
+
+ let t = $id::splat(2 as $elem_ty);
+ let e = 0.5;
+ let error = (e - t.recpre()).abs();
+ assert!(error.le(tol).all());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs
new file mode 100644
index 0000000000..c77977f7b1
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs
@@ -0,0 +1,40 @@
+//! Implements vertical (lane-wise) floating-point `rsqrte`.
+
+macro_rules! impl_math_float_rsqrte {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`.
+ ///
+ /// FIXME: The precision of the estimate is currently unspecified.
+ #[inline]
+ pub fn rsqrte(self) -> Self {
+ unsafe {
+ use crate::llvm::simd_fsqrt;
+ $id::splat(1.) / Simd(simd_fsqrt(self.0))
+ }
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_rsqrte>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn rsqrte() {
+ use crate::$elem_ty::consts::SQRT_2;
+ let tol = $id::splat(2.4e-4 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let error = (o - o.rsqrte()).abs();
+ assert!(error.le(tol).all());
+
+ let t = $id::splat(2 as $elem_ty);
+ let e = 1. / SQRT_2;
+ let error = (e - t.rsqrte()).abs();
+ assert!(error.le(tol).all());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs
new file mode 100644
index 0000000000..49908319b1
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs
@@ -0,0 +1,50 @@
+//! Implements vertical (lane-wise) floating-point `sin`.
+
+macro_rules! impl_math_float_sin {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Sine.
+ #[inline]
+ pub fn sin(self) -> Self {
+ use crate::codegen::math::float::sin::Sin;
+ Sin::sin(self)
+ }
+
+ /// Sine of `self * PI`.
+ #[inline]
+ pub fn sin_pi(self) -> Self {
+ use crate::codegen::math::float::sin_pi::SinPi;
+ SinPi::sin_pi(self)
+ }
+
+ /// Sine and cosine of `self * PI`.
+ #[inline]
+ pub fn sin_cos_pi(self) -> (Self, Self) {
+ use crate::codegen::math::float::sin_cos_pi::SinCosPi;
+ SinCosPi::sin_cos_pi(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_sin>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn sin() {
+ use crate::$elem_ty::consts::PI;
+ let z = $id::splat(0 as $elem_ty);
+ let p = $id::splat(PI as $elem_ty);
+ let ph = $id::splat(PI as $elem_ty / 2.);
+ let o_r = $id::splat((PI as $elem_ty / 2.).sin());
+ let z_r = $id::splat((PI as $elem_ty).sin());
+
+ assert_eq!(z, z.sin());
+ assert_eq!(o_r, ph.sin());
+ assert_eq!(z_r, p.sin());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs
new file mode 100644
index 0000000000..ae624122d0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs
@@ -0,0 +1,35 @@
+//! Implements vertical (lane-wise) floating-point `sqrt`.
+
+macro_rules! impl_math_float_sqrt {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ #[inline]
+ pub fn sqrt(self) -> Self {
+ use crate::codegen::math::float::sqrt::Sqrt;
+ Sqrt::sqrt(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_sqrt>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn sqrt() {
+ use crate::$elem_ty::consts::SQRT_2;
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ assert_eq!(z, z.sqrt());
+ assert_eq!(o, o.sqrt());
+
+ let t = $id::splat(2 as $elem_ty);
+ let e = $id::splat(SQRT_2);
+ assert_eq!(e, t.sqrt());
+
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs
new file mode 100644
index 0000000000..f7ffad748d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs
@@ -0,0 +1,44 @@
+//! Implements vertical (lane-wise) floating-point `sqrte`.
+
+macro_rules! impl_math_float_sqrte {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Square-root estimate.
+ ///
+ /// FIXME: The precision of the estimate is currently unspecified.
+ #[inline]
+ pub fn sqrte(self) -> Self {
+ use crate::codegen::math::float::sqrte::Sqrte;
+ Sqrte::sqrte(self)
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _math_sqrte>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn sqrte() {
+ use crate::$elem_ty::consts::SQRT_2;
+ let tol = $id::splat(2.4e-4 as $elem_ty);
+
+ let z = $id::splat(0 as $elem_ty);
+ let error = (z - z.sqrte()).abs();
+ assert!(error.le(tol).all());
+
+ let o = $id::splat(1 as $elem_ty);
+ let error = (o - o.sqrte()).abs();
+ assert!(error.le(tol).all());
+
+ let t = $id::splat(2 as $elem_ty);
+ let e = $id::splat(SQRT_2 as $elem_ty);
+ let error = (e - t.sqrte()).abs();
+
+ assert!(error.le(tol).all());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs
new file mode 100644
index 0000000000..840d9e3258
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/minimal.rs
@@ -0,0 +1,6 @@
+#[macro_use]
+mod iuf;
+#[macro_use]
+mod mask;
+#[macro_use]
+mod ptr;
diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs
new file mode 100644
index 0000000000..a155ac178a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs
@@ -0,0 +1,169 @@
+//! Minimal API of signed integer, unsigned integer, and floating-point
+//! vectors.
+
+macro_rules! impl_minimal_iuf {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident |
+ $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
+
+ $(#[$doc])*
+ pub type $id = Simd<[$elem_ty; $elem_count]>;
+
+ impl sealed::Simd for $id {
+ type Element = $elem_ty;
+ const LANES: usize = $elem_count;
+ type LanesType = [u32; $elem_count];
+ }
+
+ impl $id {
+ /// Creates a new instance with each vector elements initialized
+ /// with the provided values.
+ #[inline]
+ #[allow(clippy::too_many_arguments)]
+ pub const fn new($($elem_name: $elem_ty),*) -> Self {
+ Simd(codegen::$id($($elem_name as $ielem_ty),*))
+ }
+
+ /// Returns the number of vector lanes.
+ #[inline]
+ pub const fn lanes() -> usize {
+ $elem_count
+ }
+
+ /// Constructs a new instance with each element initialized to
+ /// `value`.
+ #[inline]
+ pub const fn splat(value: $elem_ty) -> Self {
+ Simd(codegen::$id($({
+ #[allow(non_camel_case_types, dead_code)]
+ struct $elem_name;
+ value as $ielem_ty
+ }),*))
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ pub fn extract(self, index: usize) -> $elem_ty {
+ assert!(index < $elem_count);
+ unsafe { self.extract_unchecked(index) }
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
+ use crate::llvm::simd_extract;
+ let e: $ielem_ty = simd_extract(self.0, index as u32);
+ e as $elem_ty
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ #[must_use = "replace does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
+ assert!(index < $elem_count);
+ unsafe { self.replace_unchecked(index, new_value) }
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ #[must_use = "replace_unchecked does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ pub unsafe fn replace_unchecked(
+ self,
+ index: usize,
+ new_value: $elem_ty,
+ ) -> Self {
+ use crate::llvm::simd_insert;
+ Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty))
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _minimal>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn minimal() {
+ // lanes:
+ assert_eq!($elem_count, $id::lanes());
+
+ // splat and extract / extract_unchecked:
+ const VAL: $elem_ty = 7 as $elem_ty;
+ const VEC: $id = $id::splat(VAL);
+ for i in 0..$id::lanes() {
+ assert_eq!(VAL, VEC.extract(i));
+ assert_eq!(
+ VAL, unsafe { VEC.extract_unchecked(i) }
+ );
+ }
+
+ // replace / replace_unchecked
+ let new_vec = VEC.replace(0, 42 as $elem_ty);
+ for i in 0..$id::lanes() {
+ if i == 0 {
+ assert_eq!(42 as $elem_ty, new_vec.extract(i));
+ } else {
+ assert_eq!(VAL, new_vec.extract(i));
+ }
+ }
+ let new_vec = unsafe {
+ VEC.replace_unchecked(0, 42 as $elem_ty)
+ };
+ for i in 0..$id::lanes() {
+ if i == 0 {
+ assert_eq!(42 as $elem_ty, new_vec.extract(i));
+ } else {
+ assert_eq!(VAL, new_vec.extract(i));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn extract_panic_oob() {
+ const VAL: $elem_ty = 7 as $elem_ty;
+ const VEC: $id = $id::splat(VAL);
+ let _ = VEC.extract($id::lanes());
+ }
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn replace_panic_oob() {
+ const VAL: $elem_ty = 7 as $elem_ty;
+ const VEC: $id = $id::splat(VAL);
+ let _ = VEC.replace($id::lanes(), 42 as $elem_ty);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs
new file mode 100644
index 0000000000..a420060b42
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs
@@ -0,0 +1,176 @@
+//! Minimal API of mask vectors.
+
+macro_rules! impl_minimal_mask {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
+ | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
+ $(#[$doc])*
+ pub type $id = Simd<[$elem_ty; $elem_count]>;
+
+ impl sealed::Simd for $id {
+ type Element = $elem_ty;
+ const LANES: usize = $elem_count;
+ type LanesType = [u32; $elem_count];
+ }
+
+ impl $id {
+ /// Creates a new instance with each vector elements initialized
+ /// with the provided values.
+ #[inline]
+ #[allow(clippy::too_many_arguments)]
+ pub const fn new($($elem_name: bool),*) -> Self {
+ Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*))
+ }
+
+ /// Converts a boolean type into the type of the vector lanes.
+ #[inline]
+ #[allow(clippy::indexing_slicing)]
+ const fn bool_to_internal(x: bool) -> $ielem_ty {
+ [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize]
+ }
+
+ /// Returns the number of vector lanes.
+ #[inline]
+ pub const fn lanes() -> usize {
+ $elem_count
+ }
+
+ /// Constructs a new instance with each element initialized to
+ /// `value`.
+ #[inline]
+ pub const fn splat(value: bool) -> Self {
+ Simd(codegen::$id($({
+ #[allow(non_camel_case_types, dead_code)]
+ struct $elem_name;
+ Self::bool_to_internal(value)
+ }),*))
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ pub fn extract(self, index: usize) -> bool {
+ assert!(index < $elem_count);
+ unsafe { self.extract_unchecked(index) }
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn extract_unchecked(self, index: usize) -> bool {
+ use crate::llvm::simd_extract;
+ let x: $ielem_ty = simd_extract(self.0, index as u32);
+ x != 0
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by
+ /// `new_value`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ #[must_use = "replace does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ pub fn replace(self, index: usize, new_value: bool) -> Self {
+ assert!(index < $elem_count);
+ unsafe { self.replace_unchecked(index, new_value) }
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by
+ /// `new_value`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ #[must_use = "replace_unchecked does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ pub unsafe fn replace_unchecked(
+ self,
+ index: usize,
+ new_value: bool,
+ ) -> Self {
+ use crate::llvm::simd_insert;
+ Simd(simd_insert(self.0, index as u32,
+ Self::bool_to_internal(new_value)))
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _minimal>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn minimal() {
+ // TODO: test new
+
+ // lanes:
+ assert_eq!($elem_count, $id::lanes());
+
+ // splat and extract / extract_unchecked:
+ let vec = $id::splat(true);
+ for i in 0..$id::lanes() {
+ assert_eq!(true, vec.extract(i));
+ assert_eq!(true,
+ unsafe { vec.extract_unchecked(i) }
+ );
+ }
+
+ // replace / replace_unchecked
+ let new_vec = vec.replace(0, false);
+ for i in 0..$id::lanes() {
+ if i == 0 {
+ assert_eq!(false, new_vec.extract(i));
+ } else {
+ assert_eq!(true, new_vec.extract(i));
+ }
+ }
+ let new_vec = unsafe {
+ vec.replace_unchecked(0, false)
+ };
+ for i in 0..$id::lanes() {
+ if i == 0 {
+ assert_eq!(false, new_vec.extract(i));
+ } else {
+ assert_eq!(true, new_vec.extract(i));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn extract_panic_oob() {
+ let vec = $id::splat(false);
+ let _ = vec.extract($id::lanes());
+ }
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn replace_panic_oob() {
+ let vec = $id::splat(false);
+ let _ = vec.replace($id::lanes(), true);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs
new file mode 100644
index 0000000000..d9e47c9ccb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs
@@ -0,0 +1,1373 @@
+//! Minimal API of pointer vectors.
+
+macro_rules! impl_minimal_p {
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident,
+ $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt
+ | $($elem_name:ident),+ | ($true:expr, $false:expr) |
+ $(#[$doc:meta])*) => {
+
+ $(#[$doc])*
+ pub type $id<T> = Simd<[$elem_ty; $elem_count]>;
+
+ impl<T> sealed::Simd for $id<T> {
+ type Element = $elem_ty;
+ const LANES: usize = $elem_count;
+ type LanesType = [u32; $elem_count];
+ }
+
+ impl<T> $id<T> {
+ /// Creates a new instance with each vector elements initialized
+ /// with the provided values.
+ #[inline]
+ #[allow(clippy::too_many_arguments)]
+ pub const fn new($($elem_name: $elem_ty),*) -> Self {
+ Simd(codegen::$id($($elem_name),*))
+ }
+
+ /// Returns the number of vector lanes.
+ #[inline]
+ pub const fn lanes() -> usize {
+ $elem_count
+ }
+
+ /// Constructs a new instance with each element initialized to
+ /// `value`.
+ #[inline]
+ pub const fn splat(value: $elem_ty) -> Self {
+ Simd(codegen::$id($({
+ #[allow(non_camel_case_types, dead_code)]
+ struct $elem_name;
+ value
+ }),*))
+ }
+
+ /// Constructs a new instance with each element initialized to
+ /// `null`.
+ #[inline]
+ pub const fn null() -> Self {
+ Self::splat(crate::ptr::null_mut() as $elem_ty)
+ }
+
+ /// Returns a mask that selects those lanes that contain `null`
+ /// pointers.
+ #[inline]
+ pub fn is_null(self) -> $mask_ty {
+ self.eq(Self::null())
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ pub fn extract(self, index: usize) -> $elem_ty {
+ assert!(index < $elem_count);
+ unsafe { self.extract_unchecked(index) }
+ }
+
+ /// Extracts the value at `index`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
+ use crate::llvm::simd_extract;
+ simd_extract(self.0, index as u32)
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by
+ /// `new_value`.
+ ///
+ /// # Panics
+ ///
+ /// If `index >= Self::lanes()`.
+ #[inline]
+ #[must_use = "replace does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ #[allow(clippy::not_unsafe_ptr_arg_deref)]
+ pub fn replace(self, index: usize, new_value: $elem_ty) -> Self {
+ assert!(index < $elem_count);
+ unsafe { self.replace_unchecked(index, new_value) }
+ }
+
+ /// Returns a new vector where the value at `index` is replaced by `new_value`.
+ ///
+ /// # Safety
+ ///
+ /// If `index >= Self::lanes()` the behavior is undefined.
+ #[inline]
+ #[must_use = "replace_unchecked does not modify the original value - \
+ it returns a new vector with the value at `index` \
+ replaced by `new_value`d"
+ ]
+ pub unsafe fn replace_unchecked(
+ self,
+ index: usize,
+ new_value: $elem_ty,
+ ) -> Self {
+ use crate::llvm::simd_insert;
+ Simd(simd_insert(self.0, index as u32, new_value))
+ }
+ }
+
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _minimal>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn minimal() {
+ // lanes:
+ assert_eq!($elem_count, $id::<i32>::lanes());
+
+ // splat and extract / extract_unchecked:
+ let VAL7: <$id<i32> as sealed::Simd>::Element
+ = $ref!(7);
+ let VAL42: <$id<i32> as sealed::Simd>::Element
+ = $ref!(42);
+ let VEC: $id<i32> = $id::splat(VAL7);
+ for i in 0..$id::<i32>::lanes() {
+ assert_eq!(VAL7, VEC.extract(i));
+ assert_eq!(
+ VAL7, unsafe { VEC.extract_unchecked(i) }
+ );
+ }
+
+ // replace / replace_unchecked
+ let new_vec = VEC.replace(0, VAL42);
+ for i in 0..$id::<i32>::lanes() {
+ if i == 0 {
+ assert_eq!(VAL42, new_vec.extract(i));
+ } else {
+ assert_eq!(VAL7, new_vec.extract(i));
+ }
+ }
+ let new_vec = unsafe {
+ VEC.replace_unchecked(0, VAL42)
+ };
+ for i in 0..$id::<i32>::lanes() {
+ if i == 0 {
+ assert_eq!(VAL42, new_vec.extract(i));
+ } else {
+ assert_eq!(VAL7, new_vec.extract(i));
+ }
+ }
+
+ let mut n = $id::<i32>::null();
+ assert_eq!(
+ n,
+ $id::<i32>::splat(unsafe { crate::mem::zeroed() })
+ );
+ assert!(n.is_null().all());
+ n = n.replace(
+ 0, unsafe { crate::mem::transmute(1_isize) }
+ );
+ assert!(!n.is_null().all());
+ if $id::<i32>::lanes() > 1 {
+ assert!(n.is_null().any());
+ } else {
+ assert!(!n.is_null().any());
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn extract_panic_oob() {
+ let VAL: <$id<i32> as sealed::Simd>::Element
+ = $ref!(7);
+ let VEC: $id<i32> = $id::splat(VAL);
+ let _ = VEC.extract($id::<i32>::lanes());
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn replace_panic_oob() {
+ let VAL: <$id<i32> as sealed::Simd>::Element
+ = $ref!(7);
+ let VAL42: <$id<i32> as sealed::Simd>::Element
+ = $ref!(42);
+ let VEC: $id<i32> = $id::splat(VAL);
+ let _ = VEC.replace($id::<i32>::lanes(), VAL42);
+ }
+ }
+ }
+ }
+
+ impl<T> crate::fmt::Debug for $id<T> {
+ #[allow(clippy::missing_inline_in_public_items)]
+ fn fmt(&self, f: &mut crate::fmt::Formatter<'_>)
+ -> crate::fmt::Result {
+ write!(
+ f,
+ "{}<{}>(",
+ stringify!($id),
+ crate::intrinsics::type_name::<T>()
+ )?;
+ for i in 0..$elem_count {
+ if i > 0 {
+ write!(f, ", ")?;
+ }
+ self.extract(i).fmt(f)?;
+ }
+ write!(f, ")")
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _fmt_debug>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn debug() {
+ use arrayvec::{ArrayString,ArrayVec};
+ type TinyString = ArrayString<[u8; 512]>;
+
+ use crate::fmt::Write;
+ let v = $id::<i32>::default();
+ let mut s = TinyString::new();
+ write!(&mut s, "{:?}", v).unwrap();
+
+ let mut beg = TinyString::new();
+ write!(&mut beg, "{}<i32>(", stringify!($id)).unwrap();
+ assert!(
+ s.starts_with(beg.as_str()),
+ "s = {} (should start with = {})", s, beg
+ );
+ assert!(s.ends_with(")"));
+ let s: ArrayVec<[TinyString; 64]>
+ = s.replace(beg.as_str(), "")
+ .replace(")", "").split(",")
+ .map(|v| TinyString::from(v.trim()).unwrap())
+ .collect();
+ assert_eq!(s.len(), $id::<i32>::lanes());
+ for (index, ss) in s.into_iter().enumerate() {
+ let mut e = TinyString::new();
+ write!(&mut e, "{:?}", v.extract(index)).unwrap();
+ assert_eq!(ss, e);
+ }
+ }
+ }
+ }
+ }
+
+ impl<T> Default for $id<T> {
+ #[inline]
+ fn default() -> Self {
+ // FIXME: ptrs do not implement default
+ Self::null()
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _default>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn default() {
+ let a = $id::<i32>::default();
+ for i in 0..$id::<i32>::lanes() {
+ assert_eq!(
+ a.extract(i), unsafe { crate::mem::zeroed() }
+ );
+ }
+ }
+ }
+ }
+ }
+
+ impl<T> $id<T> {
+ /// Lane-wise equality comparison.
+ #[inline]
+ pub fn eq(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_eq;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_eq(a.0, b.0))
+ }
+ }
+
+ /// Lane-wise inequality comparison.
+ #[inline]
+ pub fn ne(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_ne;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_ne(a.0, b.0))
+ }
+ }
+
+ /// Lane-wise less-than comparison.
+ #[inline]
+ pub fn lt(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_lt;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_lt(a.0, b.0))
+ }
+ }
+
+ /// Lane-wise less-than-or-equals comparison.
+ #[inline]
+ pub fn le(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_le;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_le(a.0, b.0))
+ }
+ }
+
+ /// Lane-wise greater-than comparison.
+ #[inline]
+ pub fn gt(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_gt;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_gt(a.0, b.0))
+ }
+ }
+
+ /// Lane-wise greater-than-or-equals comparison.
+ #[inline]
+ pub fn ge(self, other: Self) -> $mask_ty {
+ unsafe {
+ use crate::llvm::simd_ge;
+ let a: $usize_ty = crate::mem::transmute(self);
+ let b: $usize_ty = crate::mem::transmute(other);
+ Simd(simd_ge(a.0, b.0))
+ }
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_vertical>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn cmp() {
+ let a = $id::<i32>::null();
+ let b = $id::<i32>::splat(unsafe {
+ crate::mem::transmute(1_isize)
+ });
+
+ let r = a.lt(b);
+ let e = $mask_ty::splat(true);
+ assert!(r == e);
+ let r = a.le(b);
+ assert!(r == e);
+
+ let e = $mask_ty::splat(false);
+ let r = a.gt(b);
+ assert!(r == e);
+ let r = a.ge(b);
+ assert!(r == e);
+ let r = a.eq(b);
+ assert!(r == e);
+
+ let mut a = a;
+ let mut b = b;
+ let mut e = e;
+ for i in 0..$id::<i32>::lanes() {
+ if i % 2 == 0 {
+ a = a.replace(
+ i,
+ unsafe { crate::mem::transmute(0_isize) }
+ );
+ b = b.replace(
+ i,
+ unsafe { crate::mem::transmute(1_isize) }
+ );
+ e = e.replace(i, true);
+ } else {
+ a = a.replace(
+ i,
+ unsafe { crate::mem::transmute(1_isize) }
+ );
+ b = b.replace(
+ i,
+ unsafe { crate::mem::transmute(0_isize) }
+ );
+ e = e.replace(i, false);
+ }
+ }
+ let r = a.lt(b);
+ assert!(r == e);
+ }
+ }
+ }
+ }
+
+ #[allow(clippy::partialeq_ne_impl)]
+ impl<T> crate::cmp::PartialEq<$id<T>> for $id<T> {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ $id::<T>::eq(*self, *other).all()
+ }
+ #[inline]
+ fn ne(&self, other: &Self) -> bool {
+ $id::<T>::ne(*self, *other).any()
+ }
+ }
+
+ // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892
+ #[allow(clippy::partialeq_ne_impl)]
+ impl<T> crate::cmp::PartialEq<LexicographicallyOrdered<$id<T>>>
+ for LexicographicallyOrdered<$id<T>>
+ {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+ #[inline]
+ fn ne(&self, other: &Self) -> bool {
+ self.0 != other.0
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_PartialEq>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn partial_eq() {
+ let a = $id::<i32>::null();
+ let b = $id::<i32>::splat(unsafe {
+ crate::mem::transmute(1_isize)
+ });
+
+ assert!(a != b);
+ assert!(!(a == b));
+ assert!(a == a);
+ assert!(!(a != a));
+
+ if $id::<i32>::lanes() > 1 {
+ let a = $id::<i32>::null().replace(0, unsafe {
+ crate::mem::transmute(1_isize)
+ });
+ let b = $id::<i32>::splat(unsafe {
+ crate::mem::transmute(1_isize)
+ });
+
+ assert!(a != b);
+ assert!(!(a == b));
+ assert!(a == a);
+ assert!(!(a != a));
+ }
+ }
+ }
+ }
+ }
+
+ impl<T> crate::cmp::Eq for $id<T> {}
+ impl<T> crate::cmp::Eq for LexicographicallyOrdered<$id<T>> {}
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _cmp_eq>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn eq() {
+ fn foo<E: crate::cmp::Eq>(_: E) {}
+ let a = $id::<i32>::null();
+ foo(a);
+ }
+ }
+ }
+ }
+
+ impl<T> From<[$elem_ty; $elem_count]> for $id<T> {
+ #[inline]
+ fn from(array: [$elem_ty; $elem_count]) -> Self {
+ unsafe {
+ // FIXME: unnecessary zeroing; better than UB.
+ let mut u: Self = crate::mem::zeroed();
+ crate::ptr::copy_nonoverlapping(
+ &array as *const [$elem_ty; $elem_count] as *const u8,
+ &mut u as *mut Self as *mut u8,
+ crate::mem::size_of::<Self>()
+ );
+ u
+ }
+ }
+ }
+ impl<T> Into<[$elem_ty; $elem_count]> for $id<T> {
+ #[inline]
+ fn into(self) -> [$elem_ty; $elem_count] {
+ unsafe {
+ // FIXME: unnecessary zeroing; better than UB.
+ let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed();
+ crate::ptr::copy_nonoverlapping(
+ &self as *const $id<T> as *const u8,
+ &mut u as *mut [$elem_ty; $elem_count] as *mut u8,
+ crate::mem::size_of::<Self>()
+ );
+ u
+ }
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _from>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn array() {
+ let values = [1_i32; $elem_count];
+
+ let mut vec: $id<i32> = Default::default();
+ let mut array = [
+ $id::<i32>::null().extract(0); $elem_count
+ ];
+
+ for i in 0..$elem_count {
+ let ptr = &values[i] as *const i32 as *mut i32;
+ vec = vec.replace(i, ptr);
+ array[i] = ptr;
+ }
+
+ // FIXME: there is no impl of From<$id<T>> for [$elem_ty; N]
+ // let a0 = From::from(vec);
+ // assert_eq!(a0, array);
+ #[allow(unused_assignments)]
+ let mut a1 = array;
+ a1 = vec.into();
+ assert_eq!(a1, array);
+
+ let v0: $id<i32> = From::from(array);
+ assert_eq!(v0, vec);
+ let v1: $id<i32> = array.into();
+ assert_eq!(v1, vec);
+ }
+ }
+ }
+ }
+
+ impl<T> $id<T> {
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+ /// to an `align_of::<Self>()` boundary.
+ #[inline]
+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_ptr();
+ assert!(
+ target_ptr.align_offset(crate::mem::align_of::<Self>())
+ == 0
+ );
+ Self::from_slice_aligned_unchecked(slice)
+ }
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()`.
+ #[inline]
+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ Self::from_slice_unaligned_unchecked(slice)
+ }
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+ /// to an `align_of::<Self>()` boundary, the behavior is undefined.
+ #[inline]
+ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty])
+ -> Self {
+ #[allow(clippy::cast_ptr_alignment)]
+ *(slice.as_ptr().cast())
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn from_slice_unaligned_unchecked(
+ slice: &[$elem_ty],
+ ) -> Self {
+ use crate::mem::size_of;
+ let target_ptr = slice.as_ptr().cast();
+ let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty);
+ let self_ptr = &mut x as *mut Self as *mut u8;
+ crate::ptr::copy_nonoverlapping(
+ target_ptr,
+ self_ptr,
+ size_of::<Self>(),
+ );
+ x
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _slice_from_slice>] {
+ use super::*;
+ use crate::iter::Iterator;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_slice_unaligned() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+
+ let mut unaligned = [
+ non_null; $id::<i32>::lanes() + 1
+ ];
+ unaligned[0] = null;
+ let vec = $id::<i32>::from_slice_unaligned(
+ &unaligned[1..]
+ );
+ for (index, &b) in unaligned.iter().enumerate() {
+ if index == 0 {
+ assert_eq!(b, null);
+ } else {
+ assert_eq!(b, non_null);
+ assert_eq!(b, vec.extract(index - 1));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_unaligned_fail() {
+ let (_null, non_null) = ptr_vals!($id<i32>);
+ let unaligned = [non_null; $id::<i32>::lanes() + 1];
+ // the slice is not large enough => panic
+ let _vec = $id::<i32>::from_slice_unaligned(
+ &unaligned[2..]
+ );
+ }
+
+ union A {
+ data: [<$id<i32> as sealed::Simd>::Element;
+ 2 * $id::<i32>::lanes()],
+ _vec: $id<i32>,
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_slice_aligned() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ let mut aligned = A {
+ data: [null; 2 * $id::<i32>::lanes()],
+ };
+ for i in
+ $id::<i32>::lanes()..(2 * $id::<i32>::lanes()) {
+ unsafe {
+ aligned.data[i] = non_null;
+ }
+ }
+
+ let vec = unsafe {
+ $id::<i32>::from_slice_aligned(
+ &aligned.data[$id::<i32>::lanes()..]
+ )
+ };
+ for (index, &b) in unsafe {
+ aligned.data.iter().enumerate()
+ } {
+ if index < $id::<i32>::lanes() {
+ assert_eq!(b, null);
+ } else {
+ assert_eq!(b, non_null);
+ assert_eq!(
+ b, vec.extract(index - $id::<i32>::lanes())
+ );
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_aligned_fail_lanes() {
+ let (_null, non_null) = ptr_vals!($id<i32>);
+ let aligned = A {
+ data: [non_null; 2 * $id::<i32>::lanes()],
+ };
+ // the slice is not large enough => panic
+ let _vec = unsafe {
+ $id::<i32>::from_slice_aligned(
+ &aligned.data[2 * $id::<i32>::lanes()..]
+ )
+ };
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_aligned_fail_align() {
+ unsafe {
+ let (null, _non_null) = ptr_vals!($id<i32>);
+ let aligned = A {
+ data: [null; 2 * $id::<i32>::lanes()],
+ };
+
+ // get a pointer to the front of data
+ let ptr = aligned.data.as_ptr();
+ // offset pointer by one element
+ let ptr = ptr.wrapping_add(1);
+
+ if ptr.align_offset(
+ crate::mem::align_of::<$id<i32>>()
+ ) == 0 {
+ // the pointer is properly aligned, so
+ // from_slice_aligned won't fail here (e.g. this
+ // can happen for i128x1). So we panic to make
+ // the "should_fail" test pass:
+ panic!("ok");
+ }
+
+ // create a slice - this is safe, because the
+ // elements of the slice exist, are properly
+ // initialized, and properly aligned:
+ let s = slice::from_raw_parts(
+ ptr, $id::<i32>::lanes()
+ );
+ // this should always panic because the slice
+ // alignment does not match the alignment
+ // requirements for the vector type:
+ let _vec = $id::<i32>::from_slice_aligned(s);
+ }
+ }
+ }
+ }
+ }
+
+ impl<T> $id<T> {
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+ /// aligned to an `align_of::<Self>()` boundary.
+ #[inline]
+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_mut_ptr();
+ assert!(
+ target_ptr.align_offset(crate::mem::align_of::<Self>())
+ == 0
+ );
+ self.write_to_slice_aligned_unchecked(slice);
+ }
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()`.
+ #[inline]
+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ self.write_to_slice_unaligned_unchecked(slice);
+ }
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+ /// aligned to an `align_of::<Self>()` boundary, the behavior is
+ /// undefined.
+ #[inline]
+ pub unsafe fn write_to_slice_aligned_unchecked(
+ self, slice: &mut [$elem_ty],
+ ) {
+ #[allow(clippy::cast_ptr_alignment)]
+ *(slice.as_mut_ptr().cast()) = self;
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn write_to_slice_unaligned_unchecked(
+ self, slice: &mut [$elem_ty],
+ ) {
+ let target_ptr = slice.as_mut_ptr().cast();
+ let self_ptr = &self as *const Self as *const u8;
+ crate::ptr::copy_nonoverlapping(
+ self_ptr,
+ target_ptr,
+ crate::mem::size_of::<Self>(),
+ );
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _slice_write_to_slice>] {
+ use super::*;
+ use crate::iter::Iterator;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn write_to_slice_unaligned() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ let mut unaligned = [null; $id::<i32>::lanes() + 1];
+ let vec = $id::<i32>::splat(non_null);
+ vec.write_to_slice_unaligned(&mut unaligned[1..]);
+ for (index, &b) in unaligned.iter().enumerate() {
+ if index == 0 {
+ assert_eq!(b, null);
+ } else {
+ assert_eq!(b, non_null);
+ assert_eq!(b, vec.extract(index - 1));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_unaligned_fail() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ let mut unaligned = [null; $id::<i32>::lanes() + 1];
+ let vec = $id::<i32>::splat(non_null);
+ // the slice is not large enough => panic
+ vec.write_to_slice_unaligned(&mut unaligned[2..]);
+ }
+
+ union A {
+ data: [<$id<i32> as sealed::Simd>::Element;
+ 2 * $id::<i32>::lanes()],
+ _vec: $id<i32>,
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn write_to_slice_aligned() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ let mut aligned = A {
+ data: [null; 2 * $id::<i32>::lanes()],
+ };
+ let vec = $id::<i32>::splat(non_null);
+ unsafe {
+ vec.write_to_slice_aligned(
+ &mut aligned.data[$id::<i32>::lanes()..]
+ )
+ };
+ for (index, &b) in
+ unsafe { aligned.data.iter().enumerate() } {
+ if index < $id::<i32>::lanes() {
+ assert_eq!(b, null);
+ } else {
+ assert_eq!(b, non_null);
+ assert_eq!(
+ b, vec.extract(index - $id::<i32>::lanes())
+ );
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_aligned_fail_lanes() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ let mut aligned = A {
+ data: [null; 2 * $id::<i32>::lanes()],
+ };
+ let vec = $id::<i32>::splat(non_null);
+ // the slice is not large enough => panic
+ unsafe {
+ vec.write_to_slice_aligned(
+ &mut aligned.data[2 * $id::<i32>::lanes()..]
+ )
+ };
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_aligned_fail_align() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+ unsafe {
+ let mut aligned = A {
+ data: [null; 2 * $id::<i32>::lanes()],
+ };
+
+ // get a pointer to the front of data
+ let ptr = aligned.data.as_mut_ptr();
+ // offset pointer by one element
+ let ptr = ptr.wrapping_add(1);
+
+ if ptr.align_offset(
+ crate::mem::align_of::<$id<i32>>()
+ ) == 0 {
+ // the pointer is properly aligned, so
+ // write_to_slice_aligned won't fail here (e.g.
+ // this can happen for i128x1). So we panic to
+ // make the "should_fail" test pass:
+ panic!("ok");
+ }
+
+ // create a slice - this is safe, because the
+ // elements of the slice exist, are properly
+ // initialized, and properly aligned:
+ let s = slice::from_raw_parts_mut(
+ ptr, $id::<i32>::lanes()
+ );
+ // this should always panic because the slice
+ // alignment does not match the alignment
+ // requirements for the vector type:
+ let vec = $id::<i32>::splat(non_null);
+ vec.write_to_slice_aligned(s);
+ }
+ }
+ }
+ }
+ }
+
+ impl<T> crate::hash::Hash for $id<T> {
+ #[inline]
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+ let s: $usize_ty = unsafe { crate::mem::transmute(*self) };
+ s.hash(state)
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _hash>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn hash() {
+ use crate::hash::{Hash, Hasher};
+ #[allow(deprecated)]
+ use crate::hash::{SipHasher13};
+
+ let values = [1_i32; $elem_count];
+
+ let mut vec: $id<i32> = Default::default();
+ let mut array = [
+ $id::<i32>::null().extract(0);
+ $elem_count
+ ];
+
+ for i in 0..$elem_count {
+ let ptr = &values[i] as *const i32 as *mut i32;
+ vec = vec.replace(i, ptr);
+ array[i] = ptr;
+ }
+
+ #[allow(deprecated)]
+ let mut a_hash = SipHasher13::new();
+ let mut v_hash = a_hash.clone();
+ array.hash(&mut a_hash);
+ vec.hash(&mut v_hash);
+ assert_eq!(a_hash.finish(), v_hash.finish());
+ }
+ }
+ }
+ }
+
+ impl<T> $id<T> {
+ /// Calculates the offset from a pointer.
+ ///
+ /// `count` is in units of `T`; e.g. a count of `3` represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// Undefined Behavior:
+ ///
+ /// * Both the starting and resulting pointer must be either in
+ /// bounds or one byte past the end of an allocated object.
+ ///
+ /// * The computed offset, in bytes, cannot overflow an `isize`.
+ ///
+ /// * The offset being in bounds cannot rely on "wrapping around"
+ /// the address space. That is, the infinite-precision sum, in bytes
+ /// must fit in a `usize`.
+ ///
+ /// The compiler and standard library generally tries to ensure
+ /// allocations never reach a size where an offset is a concern. For
+ /// instance, `Vec` and `Box` ensure they never allocate more than
+ /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)`
+ /// is always safe.
+ ///
+ /// Most platforms fundamentally can't even construct such an
+ /// allocation. For instance, no known 64-bit platform can ever
+ /// serve a request for 263 bytes due to page-table limitations or
+ /// splitting the address space. However, some 32-bit and 16-bit
+ /// platforms may successfully serve a request for more than
+ /// `isize::MAX` bytes with things like Physical Address Extension.
+ /// As such, memory acquired directly from allocators or memory
+ /// mapped files may be too large to handle with this function.
+ ///
+ /// Consider using `wrapping_offset` instead if these constraints
+ /// are difficult to satisfy. The only advantage of this method is
+ /// that it enables more aggressive compiler optimizations.
+ #[inline]
+ pub unsafe fn offset(self, count: $isize_ty) -> Self {
+ // FIXME: should use LLVM's `add nsw nuw`
+ self.wrapping_offset(count)
+ }
+
+ /// Calculates the offset from a pointer using wrapping arithmetic.
+ ///
+ /// `count` is in units of `T`; e.g. a count of `3` represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// The resulting pointer does not need to be in bounds, but it is
+ /// potentially hazardous to dereference (which requires unsafe).
+ ///
+ /// Always use `.offset(count)` instead when possible, because
+ /// offset allows the compiler to optimize better.
+ #[inline]
+ pub fn wrapping_offset(self, count: $isize_ty) -> Self {
+ unsafe {
+ let x: $isize_ty = crate::mem::transmute(self);
+ // note: {+,*} currently performs a `wrapping_{add, mul}`
+ crate::mem::transmute(
+ x + (count * crate::mem::size_of::<T>() as isize)
+ )
+ }
+ }
+
+ /// Calculates the distance between two pointers.
+ ///
+ /// The returned value is in units of `T`: the distance in bytes is
+ /// divided by `mem::size_of::<T>()`.
+ ///
+ /// This function is the inverse of offset.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// Undefined Behavior:
+ ///
+ /// * Both the starting and other pointer must be either in bounds
+ /// or one byte past the end of the same allocated object.
+ ///
+ /// * The distance between the pointers, in bytes, cannot overflow
+ /// an `isize`.
+ ///
+ /// * The distance between the pointers, in bytes, must be an exact
+ /// multiple of the size of `T`.
+ ///
+ /// * The distance being in bounds cannot rely on "wrapping around"
+ /// the address space.
+ ///
+ /// The compiler and standard library generally try to ensure
+ /// allocations never reach a size where an offset is a concern. For
+ /// instance, `Vec` and `Box` ensure they never allocate more than
+ /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())`
+ /// is always safe.
+ ///
+ /// Most platforms fundamentally can't even construct such an
+ /// allocation. For instance, no known 64-bit platform can ever
+ /// serve a request for 263 bytes due to page-table limitations or
+ /// splitting the address space. However, some 32-bit and 16-bit
+ /// platforms may successfully serve a request for more than
+ /// `isize::MAX` bytes with things like Physical Address Extension.
+ /// As such, memory acquired directly from allocators or memory
+ /// mapped files may be too large to handle with this function.
+ ///
+ /// Consider using `wrapping_offset_from` instead if these constraints
+ /// are difficult to satisfy. The only advantage of this method is
+ /// that it enables more aggressive compiler optimizations.
+ #[inline]
+ pub unsafe fn offset_from(self, origin: Self) -> $isize_ty {
+ // FIXME: should use LLVM's `sub nsw nuw`.
+ self.wrapping_offset_from(origin)
+ }
+
+ /// Calculates the distance between two pointers.
+ ///
+ /// The returned value is in units of `T`: the distance in bytes is
+ /// divided by `mem::size_of::<T>()`.
+ ///
+ /// If the address different between the two pointers is not a
+ /// multiple of `mem::size_of::<T>()` then the result of the
+ /// division is rounded towards zero.
+ ///
+ /// Though this method is safe for any two pointers, note that its
+ /// result will be mostly useless if the two pointers aren't into
+ /// the same allocated object, for example if they point to two
+ /// different local variables.
+ #[inline]
+ pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty {
+ let x: $isize_ty = unsafe { crate::mem::transmute(self) };
+ let y: $isize_ty = unsafe { crate::mem::transmute(origin) };
+ // note: {-,/} currently perform wrapping_{sub, div}
+ (y - x) / (crate::mem::size_of::<T>() as isize)
+ }
+
+ /// Calculates the offset from a pointer (convenience for
+ /// `.offset(count as isize)`).
+ ///
+ /// `count` is in units of `T`; e.g. a count of 3 represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// Undefined Behavior:
+ ///
+ /// * Both the starting and resulting pointer must be either in
+ /// bounds or one byte past the end of an allocated object.
+ ///
+ /// * The computed offset, in bytes, cannot overflow an `isize`.
+ ///
+ /// * The offset being in bounds cannot rely on "wrapping around"
+ /// the address space. That is, the infinite-precision sum must fit
+ /// in a `usize`.
+ ///
+ /// The compiler and standard library generally tries to ensure
+ /// allocations never reach a size where an offset is a concern. For
+ /// instance, `Vec` and `Box` ensure they never allocate more than
+ /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always
+ /// safe.
+ ///
+ /// Most platforms fundamentally can't even construct such an
+ /// allocation. For instance, no known 64-bit platform can ever
+ /// serve a request for 263 bytes due to page-table limitations or
+ /// splitting the address space. However, some 32-bit and 16-bit
+ /// platforms may successfully serve a request for more than
+ /// `isize::MAX` bytes with things like Physical Address Extension.
+ /// As such, memory acquired directly from allocators or memory
+ /// mapped files may be too large to handle with this function.
+ ///
+ /// Consider using `wrapping_offset` instead if these constraints
+ /// are difficult to satisfy. The only advantage of this method is
+ /// that it enables more aggressive compiler optimizations.
+ #[inline]
+ #[allow(clippy::should_implement_trait)]
+ pub unsafe fn add(self, count: $usize_ty) -> Self {
+ self.offset(count.cast())
+ }
+
+ /// Calculates the offset from a pointer (convenience for
+ /// `.offset((count as isize).wrapping_neg())`).
+ ///
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// If any of the following conditions are violated, the result is
+ /// Undefined Behavior:
+ ///
+ /// * Both the starting and resulting pointer must be either in
+ /// bounds or one byte past the end of an allocated object.
+ ///
+ /// * The computed offset cannot exceed `isize::MAX` **bytes**.
+ ///
+ /// * The offset being in bounds cannot rely on "wrapping around"
+ /// the address space. That is, the infinite-precision sum must fit
+ /// in a usize.
+ ///
+ /// The compiler and standard library generally tries to ensure
+ /// allocations never reach a size where an offset is a concern. For
+ /// instance, `Vec` and `Box` ensure they never allocate more than
+ /// `isize::MAX` bytes, so
+ /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe.
+ ///
+ /// Most platforms fundamentally can't even construct such an
+ /// allocation. For instance, no known 64-bit platform can ever
+ /// serve a request for 2<sup>63</sup> bytes due to page-table
+ /// limitations or splitting the address space. However, some 32-bit
+ /// and 16-bit platforms may successfully serve a request for more
+ /// than `isize::MAX` bytes with things like Physical Address
+ /// Extension. As such, memory acquired directly from allocators or
+ /// memory mapped files *may* be too large to handle with this
+ /// function.
+ ///
+ /// Consider using `wrapping_offset` instead if these constraints
+ /// are difficult to satisfy. The only advantage of this method is
+ /// that it enables more aggressive compiler optimizations.
+ #[inline]
+ #[allow(clippy::should_implement_trait)]
+ pub unsafe fn sub(self, count: $usize_ty) -> Self {
+ let x: $isize_ty = count.cast();
+ // note: - is currently wrapping_neg
+ self.offset(-x)
+ }
+
+ /// Calculates the offset from a pointer using wrapping arithmetic.
+ /// (convenience for `.wrapping_offset(count as isize)`)
+ ///
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// The resulting pointer does not need to be in bounds, but it is
+ /// potentially hazardous to dereference (which requires `unsafe`).
+ ///
+ /// Always use `.add(count)` instead when possible, because `add`
+ /// allows the compiler to optimize better.
+ #[inline]
+ pub fn wrapping_add(self, count: $usize_ty) -> Self {
+ self.wrapping_offset(count.cast())
+ }
+
+ /// Calculates the offset from a pointer using wrapping arithmetic.
+ /// (convenience for `.wrapping_offset((count as
+ /// isize).wrapping_sub())`)
+ ///
+ /// `count` is in units of T; e.g. a `count` of 3 represents a
+ /// pointer offset of `3 * size_of::<T>()` bytes.
+ ///
+ /// # Safety
+ ///
+ /// The resulting pointer does not need to be in bounds, but it is
+ /// potentially hazardous to dereference (which requires `unsafe`).
+ ///
+ /// Always use `.sub(count)` instead when possible, because `sub`
+ /// allows the compiler to optimize better.
+ #[inline]
+ pub fn wrapping_sub(self, count: $usize_ty) -> Self {
+ let x: $isize_ty = count.cast();
+ self.wrapping_offset(-1 * x)
+ }
+ }
+
+ impl<T> $id<T> {
+ /// Shuffle vector elements according to `indices`.
+ #[inline]
+ pub fn shuffle1_dyn<I>(self, indices: I) -> Self
+ where
+ Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
+ {
+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _shuffle1_dyn>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn shuffle1_dyn() {
+ let (null, non_null) = ptr_vals!($id<i32>);
+
+ // alternating = [non_null, null, non_null, null, ...]
+ let mut alternating = $id::<i32>::splat(null);
+ for i in 0..$id::<i32>::lanes() {
+ if i % 2 == 0 {
+ alternating = alternating.replace(i, non_null);
+ }
+ }
+
+ type Indices = <$id<i32>
+ as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices;
+ // even = [0, 0, 2, 2, 4, 4, ..]
+ let even = {
+ let mut v = Indices::splat(0);
+ for i in 0..$id::<i32>::lanes() {
+ if i % 2 == 0 {
+ v = v.replace(i, (i as u8).into());
+ } else {
+ v = v.replace(i, (i as u8 - 1).into());
+ }
+ }
+ v
+ };
+ // odd = [1, 1, 3, 3, 5, 5, ...]
+ let odd = {
+ let mut v = Indices::splat(0);
+ for i in 0..$id::<i32>::lanes() {
+ if i % 2 != 0 {
+ v = v.replace(i, (i as u8).into());
+ } else {
+ v = v.replace(i, (i as u8 + 1).into());
+ }
+ }
+ v
+ };
+
+ assert_eq!(
+ alternating.shuffle1_dyn(even),
+ $id::<i32>::splat(non_null)
+ );
+ if $id::<i32>::lanes() > 1 {
+ assert_eq!(
+ alternating.shuffle1_dyn(odd),
+ $id::<i32>::splat(null)
+ );
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs
new file mode 100644
index 0000000000..f71c98795d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops.rs
@@ -0,0 +1,32 @@
+//! Implementation of the `ops` traits
+#[macro_use]
+mod vector_mask_bitwise;
+#[macro_use]
+mod scalar_mask_bitwise;
+
+#[macro_use]
+mod vector_arithmetic;
+#[macro_use]
+mod scalar_arithmetic;
+
+#[macro_use]
+mod vector_bitwise;
+#[macro_use]
+mod scalar_bitwise;
+
+#[macro_use]
+mod vector_shifts;
+#[macro_use]
+mod scalar_shifts;
+
+#[macro_use]
+mod vector_rotates;
+
+#[macro_use]
+mod vector_neg;
+
+#[macro_use]
+mod vector_int_min_max;
+
+#[macro_use]
+mod vector_float_min_max;
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs
new file mode 100644
index 0000000000..da1a2037ea
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs
@@ -0,0 +1,203 @@
+//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations.
+
+macro_rules! impl_ops_scalar_arithmetic {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::ops::Add<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn add(self, other: $elem_ty) -> Self {
+ self + $id::splat(other)
+ }
+ }
+ impl crate::ops::Add<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn add(self, other: $id) -> $id {
+ $id::splat(self) + other
+ }
+ }
+
+ impl crate::ops::Sub<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn sub(self, other: $elem_ty) -> Self {
+ self - $id::splat(other)
+ }
+ }
+ impl crate::ops::Sub<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn sub(self, other: $id) -> $id {
+ $id::splat(self) - other
+ }
+ }
+
+ impl crate::ops::Mul<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn mul(self, other: $elem_ty) -> Self {
+ self * $id::splat(other)
+ }
+ }
+ impl crate::ops::Mul<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn mul(self, other: $id) -> $id {
+ $id::splat(self) * other
+ }
+ }
+
+ impl crate::ops::Div<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn div(self, other: $elem_ty) -> Self {
+ self / $id::splat(other)
+ }
+ }
+ impl crate::ops::Div<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn div(self, other: $id) -> $id {
+ $id::splat(self) / other
+ }
+ }
+
+ impl crate::ops::Rem<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn rem(self, other: $elem_ty) -> Self {
+ self % $id::splat(other)
+ }
+ }
+ impl crate::ops::Rem<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn rem(self, other: $id) -> $id {
+ $id::splat(self) % other
+ }
+ }
+
+ impl crate::ops::AddAssign<$elem_ty> for $id {
+ #[inline]
+ fn add_assign(&mut self, other: $elem_ty) {
+ *self = *self + other;
+ }
+ }
+
+ impl crate::ops::SubAssign<$elem_ty> for $id {
+ #[inline]
+ fn sub_assign(&mut self, other: $elem_ty) {
+ *self = *self - other;
+ }
+ }
+
+ impl crate::ops::MulAssign<$elem_ty> for $id {
+ #[inline]
+ fn mul_assign(&mut self, other: $elem_ty) {
+ *self = *self * other;
+ }
+ }
+
+ impl crate::ops::DivAssign<$elem_ty> for $id {
+ #[inline]
+ fn div_assign(&mut self, other: $elem_ty) {
+ *self = *self / other;
+ }
+ }
+
+ impl crate::ops::RemAssign<$elem_ty> for $id {
+ #[inline]
+ fn rem_assign(&mut self, other: $elem_ty) {
+ *self = *self % other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_scalar_arith>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_scalar_arithmetic() {
+ let zi = 0 as $elem_ty;
+ let oi = 1 as $elem_ty;
+ let ti = 2 as $elem_ty;
+ let fi = 4 as $elem_ty;
+ let z = $id::splat(zi);
+ let o = $id::splat(oi);
+ let t = $id::splat(ti);
+ let f = $id::splat(fi);
+
+ // add
+ assert_eq!(zi + z, z);
+ assert_eq!(z + zi, z);
+ assert_eq!(oi + z, o);
+ assert_eq!(o + zi, o);
+ assert_eq!(ti + z, t);
+ assert_eq!(t + zi, t);
+ assert_eq!(ti + t, f);
+ assert_eq!(t + ti, f);
+ // sub
+ assert_eq!(zi - z, z);
+ assert_eq!(z - zi, z);
+ assert_eq!(oi - z, o);
+ assert_eq!(o - zi, o);
+ assert_eq!(ti - z, t);
+ assert_eq!(t - zi, t);
+ assert_eq!(fi - t, t);
+ assert_eq!(f - ti, t);
+ assert_eq!(f - o - o, t);
+ assert_eq!(f - oi - oi, t);
+ // mul
+ assert_eq!(zi * z, z);
+ assert_eq!(z * zi, z);
+ assert_eq!(zi * o, z);
+ assert_eq!(z * oi, z);
+ assert_eq!(zi * t, z);
+ assert_eq!(z * ti, z);
+ assert_eq!(oi * t, t);
+ assert_eq!(o * ti, t);
+ assert_eq!(ti * t, f);
+ assert_eq!(t * ti, f);
+ // div
+ assert_eq!(zi / o, z);
+ assert_eq!(z / oi, z);
+ assert_eq!(ti / o, t);
+ assert_eq!(t / oi, t);
+ assert_eq!(fi / o, f);
+ assert_eq!(f / oi, f);
+ assert_eq!(ti / t, o);
+ assert_eq!(t / ti, o);
+ assert_eq!(fi / t, t);
+ assert_eq!(f / ti, t);
+ // rem
+ assert_eq!(oi % o, z);
+ assert_eq!(o % oi, z);
+ assert_eq!(fi % t, z);
+ assert_eq!(f % ti, z);
+
+ {
+ let mut v = z;
+ assert_eq!(v, z);
+ v += oi; // add_assign
+ assert_eq!(v, o);
+ v -= oi; // sub_assign
+ assert_eq!(v, z);
+ v = t;
+ v *= oi; // mul_assign
+ assert_eq!(v, t);
+ v *= ti;
+ assert_eq!(v, f);
+ v /= oi; // div_assign
+ assert_eq!(v, f);
+ v /= ti;
+ assert_eq!(v, t);
+ v %= ti; // rem_assign
+ assert_eq!(v, z);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs
new file mode 100644
index 0000000000..88216769ae
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs
@@ -0,0 +1,162 @@
+//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations.
+
+macro_rules! impl_ops_scalar_bitwise {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl crate::ops::BitXor<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitxor(self, other: $elem_ty) -> Self {
+ self ^ $id::splat(other)
+ }
+ }
+ impl crate::ops::BitXor<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn bitxor(self, other: $id) -> $id {
+ $id::splat(self) ^ other
+ }
+ }
+
+ impl crate::ops::BitAnd<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitand(self, other: $elem_ty) -> Self {
+ self & $id::splat(other)
+ }
+ }
+ impl crate::ops::BitAnd<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn bitand(self, other: $id) -> $id {
+ $id::splat(self) & other
+ }
+ }
+
+ impl crate::ops::BitOr<$elem_ty> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitor(self, other: $elem_ty) -> Self {
+ self | $id::splat(other)
+ }
+ }
+ impl crate::ops::BitOr<$id> for $elem_ty {
+ type Output = $id;
+ #[inline]
+ fn bitor(self, other: $id) -> $id {
+ $id::splat(self) | other
+ }
+ }
+
+ impl crate::ops::BitAndAssign<$elem_ty> for $id {
+ #[inline]
+ fn bitand_assign(&mut self, other: $elem_ty) {
+ *self = *self & other;
+ }
+ }
+ impl crate::ops::BitOrAssign<$elem_ty> for $id {
+ #[inline]
+ fn bitor_assign(&mut self, other: $elem_ty) {
+ *self = *self | other;
+ }
+ }
+ impl crate::ops::BitXorAssign<$elem_ty> for $id {
+ #[inline]
+ fn bitxor_assign(&mut self, other: $elem_ty) {
+ *self = *self ^ other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_scalar_bitwise>] {
+ use super::*;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_scalar_bitwise() {
+ let zi = 0 as $elem_ty;
+ let oi = 1 as $elem_ty;
+ let ti = 2 as $elem_ty;
+ let z = $id::splat(zi);
+ let o = $id::splat(oi);
+ let t = $id::splat(ti);
+
+ // BitAnd:
+ assert_eq!(oi & o, o);
+ assert_eq!(o & oi, o);
+ assert_eq!(oi & z, z);
+ assert_eq!(o & zi, z);
+ assert_eq!(zi & o, z);
+ assert_eq!(z & oi, z);
+ assert_eq!(zi & z, z);
+ assert_eq!(z & zi, z);
+
+ assert_eq!(ti & t, t);
+ assert_eq!(t & ti, t);
+ assert_eq!(ti & o, z);
+ assert_eq!(t & oi, z);
+ assert_eq!(oi & t, z);
+ assert_eq!(o & ti, z);
+
+ // BitOr:
+ assert_eq!(oi | o, o);
+ assert_eq!(o | oi, o);
+ assert_eq!(oi | z, o);
+ assert_eq!(o | zi, o);
+ assert_eq!(zi | o, o);
+ assert_eq!(z | oi, o);
+ assert_eq!(zi | z, z);
+ assert_eq!(z | zi, z);
+
+ assert_eq!(ti | t, t);
+ assert_eq!(t | ti, t);
+ assert_eq!(zi | t, t);
+ assert_eq!(z | ti, t);
+ assert_eq!(ti | z, t);
+ assert_eq!(t | zi, t);
+
+ // BitXOR:
+ assert_eq!(oi ^ o, z);
+ assert_eq!(o ^ oi, z);
+ assert_eq!(zi ^ z, z);
+ assert_eq!(z ^ zi, z);
+ assert_eq!(zi ^ o, o);
+ assert_eq!(z ^ oi, o);
+ assert_eq!(oi ^ z, o);
+ assert_eq!(o ^ zi, o);
+
+ assert_eq!(ti ^ t, z);
+ assert_eq!(t ^ ti, z);
+ assert_eq!(ti ^ z, t);
+ assert_eq!(t ^ zi, t);
+ assert_eq!(zi ^ t, t);
+ assert_eq!(z ^ ti, t);
+
+ {
+ // AndAssign:
+ let mut v = o;
+ v &= ti;
+ assert_eq!(v, z);
+ }
+ {
+ // OrAssign:
+ let mut v = z;
+ v |= oi;
+ assert_eq!(v, o);
+ }
+ {
+ // XORAssign:
+ let mut v = z;
+ v ^= oi;
+ assert_eq!(v, o);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs
new file mode 100644
index 0000000000..523a85207b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs
@@ -0,0 +1,140 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_scalar_mask_bitwise {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl crate::ops::BitXor<bool> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitxor(self, other: bool) -> Self {
+ self ^ $id::splat(other)
+ }
+ }
+ impl crate::ops::BitXor<$id> for bool {
+ type Output = $id;
+ #[inline]
+ fn bitxor(self, other: $id) -> $id {
+ $id::splat(self) ^ other
+ }
+ }
+
+ impl crate::ops::BitAnd<bool> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitand(self, other: bool) -> Self {
+ self & $id::splat(other)
+ }
+ }
+ impl crate::ops::BitAnd<$id> for bool {
+ type Output = $id;
+ #[inline]
+ fn bitand(self, other: $id) -> $id {
+ $id::splat(self) & other
+ }
+ }
+
+ impl crate::ops::BitOr<bool> for $id {
+ type Output = Self;
+ #[inline]
+ fn bitor(self, other: bool) -> Self {
+ self | $id::splat(other)
+ }
+ }
+ impl crate::ops::BitOr<$id> for bool {
+ type Output = $id;
+ #[inline]
+ fn bitor(self, other: $id) -> $id {
+ $id::splat(self) | other
+ }
+ }
+
+ impl crate::ops::BitAndAssign<bool> for $id {
+ #[inline]
+ fn bitand_assign(&mut self, other: bool) {
+ *self = *self & other;
+ }
+ }
+ impl crate::ops::BitOrAssign<bool> for $id {
+ #[inline]
+ fn bitor_assign(&mut self, other: bool) {
+ *self = *self | other;
+ }
+ }
+ impl crate::ops::BitXorAssign<bool> for $id {
+ #[inline]
+ fn bitxor_assign(&mut self, other: bool) {
+ *self = *self ^ other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_scalar_mask_bitwise>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_scalar_mask_bitwise() {
+ let ti = true;
+ let fi = false;
+ let t = $id::splat(ti);
+ let f = $id::splat(fi);
+ assert!(t != f);
+ assert!(!(t == f));
+
+ // BitAnd:
+ assert_eq!(ti & f, f);
+ assert_eq!(t & fi, f);
+ assert_eq!(fi & t, f);
+ assert_eq!(f & ti, f);
+ assert_eq!(ti & t, t);
+ assert_eq!(t & ti, t);
+ assert_eq!(fi & f, f);
+ assert_eq!(f & fi, f);
+
+ // BitOr:
+ assert_eq!(ti | f, t);
+ assert_eq!(t | fi, t);
+ assert_eq!(fi | t, t);
+ assert_eq!(f | ti, t);
+ assert_eq!(ti | t, t);
+ assert_eq!(t | ti, t);
+ assert_eq!(fi | f, f);
+ assert_eq!(f | fi, f);
+
+ // BitXOR:
+ assert_eq!(ti ^ f, t);
+ assert_eq!(t ^ fi, t);
+ assert_eq!(fi ^ t, t);
+ assert_eq!(f ^ ti, t);
+ assert_eq!(ti ^ t, f);
+ assert_eq!(t ^ ti, f);
+ assert_eq!(fi ^ f, f);
+ assert_eq!(f ^ fi, f);
+
+ {
+ // AndAssign:
+ let mut v = f;
+ v &= ti;
+ assert_eq!(v, f);
+ }
+ {
+ // OrAssign:
+ let mut v = f;
+ v |= ti;
+ assert_eq!(v, t);
+ }
+ {
+ // XORAssign:
+ let mut v = f;
+ v ^= ti;
+ assert_eq!(v, t);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs
new file mode 100644
index 0000000000..4a7a096263
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs
@@ -0,0 +1,106 @@
+//! Vertical (lane-wise) vector-scalar shifts operations.
+
+macro_rules! impl_ops_scalar_shifts {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::ops::Shl<u32> for $id {
+ type Output = Self;
+ #[inline]
+ fn shl(self, other: u32) -> Self {
+ self << $id::splat(other as $elem_ty)
+ }
+ }
+ impl crate::ops::Shr<u32> for $id {
+ type Output = Self;
+ #[inline]
+ fn shr(self, other: u32) -> Self {
+ self >> $id::splat(other as $elem_ty)
+ }
+ }
+
+ impl crate::ops::ShlAssign<u32> for $id {
+ #[inline]
+ fn shl_assign(&mut self, other: u32) {
+ *self = *self << other;
+ }
+ }
+ impl crate::ops::ShrAssign<u32> for $id {
+ #[inline]
+ fn shr_assign(&mut self, other: u32) {
+ *self = *self >> other;
+ }
+ }
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_scalar_shifts>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
+ allow(unreachable_code, unused_variables)
+ )]
+ #[cfg(not(target_arch = "aarch64"))]
+ //~^ FIXME: https://github.com/rust-lang/packed_simd/issues/317
+ fn ops_scalar_shifts() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ {
+ let zi = 0 as u32;
+ let oi = 1 as u32;
+ let ti = 2 as u32;
+ let maxi
+ = (mem::size_of::<$elem_ty>() * 8 - 1) as u32;
+
+ // shr
+ assert_eq!(z >> zi, z);
+ assert_eq!(z >> oi, z);
+ assert_eq!(z >> ti, z);
+ assert_eq!(z >> ti, z);
+
+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13
+ return;
+ }
+
+ assert_eq!(o >> zi, o);
+ assert_eq!(t >> zi, t);
+ assert_eq!(f >> zi, f);
+ assert_eq!(f >> maxi, z);
+
+ assert_eq!(o >> oi, z);
+ assert_eq!(t >> oi, o);
+ assert_eq!(t >> ti, z);
+ assert_eq!(f >> oi, t);
+ assert_eq!(f >> ti, o);
+ assert_eq!(f >> maxi, z);
+
+ // shl
+ assert_eq!(z << zi, z);
+ assert_eq!(o << zi, o);
+ assert_eq!(t << zi, t);
+ assert_eq!(f << zi, f);
+ assert_eq!(f << maxi, z);
+
+ assert_eq!(o << oi, t);
+ assert_eq!(o << ti, f);
+ assert_eq!(t << oi, f);
+
+ { // shr_assign
+ let mut v = o;
+ v >>= oi;
+ assert_eq!(v, z);
+ }
+ { // shl_assign
+ let mut v = o;
+ v <<= oi;
+ assert_eq!(v, t);
+ }
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs
new file mode 100644
index 0000000000..7057f52d03
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs
@@ -0,0 +1,148 @@
+//! Vertical (lane-wise) vector-vector arithmetic operations.
+
+macro_rules! impl_ops_vector_arithmetic {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::ops::Add for $id {
+ type Output = Self;
+ #[inline]
+ fn add(self, other: Self) -> Self {
+ use crate::llvm::simd_add;
+ unsafe { Simd(simd_add(self.0, other.0)) }
+ }
+ }
+
+ impl crate::ops::Sub for $id {
+ type Output = Self;
+ #[inline]
+ fn sub(self, other: Self) -> Self {
+ use crate::llvm::simd_sub;
+ unsafe { Simd(simd_sub(self.0, other.0)) }
+ }
+ }
+
+ impl crate::ops::Mul for $id {
+ type Output = Self;
+ #[inline]
+ fn mul(self, other: Self) -> Self {
+ use crate::llvm::simd_mul;
+ unsafe { Simd(simd_mul(self.0, other.0)) }
+ }
+ }
+
+ impl crate::ops::Div for $id {
+ type Output = Self;
+ #[inline]
+ fn div(self, other: Self) -> Self {
+ use crate::llvm::simd_div;
+ unsafe { Simd(simd_div(self.0, other.0)) }
+ }
+ }
+
+ impl crate::ops::Rem for $id {
+ type Output = Self;
+ #[inline]
+ fn rem(self, other: Self) -> Self {
+ use crate::llvm::simd_rem;
+ unsafe { Simd(simd_rem(self.0, other.0)) }
+ }
+ }
+
+ impl crate::ops::AddAssign for $id {
+ #[inline]
+ fn add_assign(&mut self, other: Self) {
+ *self = *self + other;
+ }
+ }
+
+ impl crate::ops::SubAssign for $id {
+ #[inline]
+ fn sub_assign(&mut self, other: Self) {
+ *self = *self - other;
+ }
+ }
+
+ impl crate::ops::MulAssign for $id {
+ #[inline]
+ fn mul_assign(&mut self, other: Self) {
+ *self = *self * other;
+ }
+ }
+
+ impl crate::ops::DivAssign for $id {
+ #[inline]
+ fn div_assign(&mut self, other: Self) {
+ *self = *self / other;
+ }
+ }
+
+ impl crate::ops::RemAssign for $id {
+ #[inline]
+ fn rem_assign(&mut self, other: Self) {
+ *self = *self % other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_arith>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_vector_arithmetic() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ // add
+ assert_eq!(z + z, z);
+ assert_eq!(o + z, o);
+ assert_eq!(t + z, t);
+ assert_eq!(t + t, f);
+ // sub
+ assert_eq!(z - z, z);
+ assert_eq!(o - z, o);
+ assert_eq!(t - z, t);
+ assert_eq!(f - t, t);
+ assert_eq!(f - o - o, t);
+ // mul
+ assert_eq!(z * z, z);
+ assert_eq!(z * o, z);
+ assert_eq!(z * t, z);
+ assert_eq!(o * t, t);
+ assert_eq!(t * t, f);
+ // div
+ assert_eq!(z / o, z);
+ assert_eq!(t / o, t);
+ assert_eq!(f / o, f);
+ assert_eq!(t / t, o);
+ assert_eq!(f / t, t);
+ // rem
+ assert_eq!(o % o, z);
+ assert_eq!(f % t, z);
+
+ {
+ let mut v = z;
+ assert_eq!(v, z);
+ v += o; // add_assign
+ assert_eq!(v, o);
+ v -= o; // sub_assign
+ assert_eq!(v, z);
+ v = t;
+ v *= o; // mul_assign
+ assert_eq!(v, t);
+ v *= t;
+ assert_eq!(v, f);
+ v /= o; // div_assign
+ assert_eq!(v, f);
+ v /= t;
+ assert_eq!(v, t);
+ v %= t; // rem_assign
+ assert_eq!(v, z);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs
new file mode 100644
index 0000000000..7be9603fa2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs
@@ -0,0 +1,129 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_vector_bitwise {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl crate::ops::Not for $id {
+ type Output = Self;
+ #[inline]
+ fn not(self) -> Self {
+ Self::splat($true) ^ self
+ }
+ }
+ impl crate::ops::BitXor for $id {
+ type Output = Self;
+ #[inline]
+ fn bitxor(self, other: Self) -> Self {
+ use crate::llvm::simd_xor;
+ unsafe { Simd(simd_xor(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitAnd for $id {
+ type Output = Self;
+ #[inline]
+ fn bitand(self, other: Self) -> Self {
+ use crate::llvm::simd_and;
+ unsafe { Simd(simd_and(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitOr for $id {
+ type Output = Self;
+ #[inline]
+ fn bitor(self, other: Self) -> Self {
+ use crate::llvm::simd_or;
+ unsafe { Simd(simd_or(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitAndAssign for $id {
+ #[inline]
+ fn bitand_assign(&mut self, other: Self) {
+ *self = *self & other;
+ }
+ }
+ impl crate::ops::BitOrAssign for $id {
+ #[inline]
+ fn bitor_assign(&mut self, other: Self) {
+ *self = *self | other;
+ }
+ }
+ impl crate::ops::BitXorAssign for $id {
+ #[inline]
+ fn bitxor_assign(&mut self, other: Self) {
+ *self = *self ^ other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_bitwise>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_vector_bitwise() {
+
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let m = $id::splat(!z.extract(0));
+
+ // Not:
+ assert_eq!(!z, m);
+ assert_eq!(!m, z);
+
+ // BitAnd:
+ assert_eq!(o & o, o);
+ assert_eq!(o & z, z);
+ assert_eq!(z & o, z);
+ assert_eq!(z & z, z);
+
+ assert_eq!(t & t, t);
+ assert_eq!(t & o, z);
+ assert_eq!(o & t, z);
+
+ // BitOr:
+ assert_eq!(o | o, o);
+ assert_eq!(o | z, o);
+ assert_eq!(z | o, o);
+ assert_eq!(z | z, z);
+
+ assert_eq!(t | t, t);
+ assert_eq!(z | t, t);
+ assert_eq!(t | z, t);
+
+ // BitXOR:
+ assert_eq!(o ^ o, z);
+ assert_eq!(z ^ z, z);
+ assert_eq!(z ^ o, o);
+ assert_eq!(o ^ z, o);
+
+ assert_eq!(t ^ t, z);
+ assert_eq!(t ^ z, t);
+ assert_eq!(z ^ t, t);
+
+ {
+ // AndAssign:
+ let mut v = o;
+ v &= t;
+ assert_eq!(v, z);
+ }
+ {
+ // OrAssign:
+ let mut v = z;
+ v |= o;
+ assert_eq!(v, o);
+ }
+ {
+ // XORAssign:
+ let mut v = z;
+ v ^= o;
+ assert_eq!(v, o);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
new file mode 100644
index 0000000000..8310667b7a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs
@@ -0,0 +1,74 @@
+//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors.
+
+macro_rules! impl_ops_vector_float_min_max {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Minimum of two vectors.
+ ///
+ /// Returns a new vector containing the minimum value of each of
+ /// the input vector lanes.
+ #[inline]
+ pub fn min(self, x: Self) -> Self {
+ use crate::llvm::simd_fmin;
+ unsafe { Simd(simd_fmin(self.0, x.0)) }
+ }
+
+ /// Maximum of two vectors.
+ ///
+ /// Returns a new vector containing the maximum value of each of
+ /// the input vector lanes.
+ #[inline]
+ pub fn max(self, x: Self) -> Self {
+ use crate::llvm::simd_fmax;
+ unsafe { Simd(simd_fmax(self.0, x.0)) }
+ }
+ }
+ test_if!{
+ $test_tt:
+ paste::item! {
+ #[cfg(not(any(
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/223
+ all(target_arch = "mips", target_endian = "big"),
+ target_arch = "mips64",
+ )))]
+ pub mod [<$id _ops_vector_min_max>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn min_max() {
+ let n = crate::$elem_ty::NAN;
+ let o = $id::splat(1. as $elem_ty);
+ let t = $id::splat(2. as $elem_ty);
+
+ let mut m = o; // [1., 2., 1., 2., ...]
+ let mut on = o;
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ m = m.replace(i, 2. as $elem_ty);
+ on = on.replace(i, n);
+ }
+ }
+
+ assert_eq!(o.min(t), o);
+ assert_eq!(t.min(o), o);
+ assert_eq!(m.min(o), o);
+ assert_eq!(o.min(m), o);
+ assert_eq!(m.min(t), m);
+ assert_eq!(t.min(m), m);
+
+ assert_eq!(o.max(t), t);
+ assert_eq!(t.max(o), t);
+ assert_eq!(m.max(o), m);
+ assert_eq!(o.max(m), m);
+ assert_eq!(m.max(t), t);
+ assert_eq!(t.max(m), t);
+
+ assert_eq!(on.min(o), o);
+ assert_eq!(o.min(on), o);
+ assert_eq!(on.max(o), o);
+ assert_eq!(o.max(on), o);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs
new file mode 100644
index 0000000000..36ea98e6bf
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs
@@ -0,0 +1,57 @@
+//! Vertical (lane-wise) vector `min` and `max` for integer vectors.
+
+macro_rules! impl_ops_vector_int_min_max {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Minimum of two vectors.
+ ///
+ /// Returns a new vector containing the minimum value of each of
+ /// the input vector lanes.
+ #[inline]
+ pub fn min(self, x: Self) -> Self {
+ self.lt(x).select(self, x)
+ }
+
+ /// Maximum of two vectors.
+ ///
+ /// Returns a new vector containing the maximum value of each of
+ /// the input vector lanes.
+ #[inline]
+ pub fn max(self, x: Self) -> Self {
+ self.gt(x).select(self, x)
+ }
+ }
+ test_if!{$test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_min_max>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn min_max() {
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+
+ let mut m = o;
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ m = m.replace(i, 2 as $elem_ty);
+ }
+ }
+ assert_eq!(o.min(t), o);
+ assert_eq!(t.min(o), o);
+ assert_eq!(m.min(o), o);
+ assert_eq!(o.min(m), o);
+ assert_eq!(m.min(t), m);
+ assert_eq!(t.min(m), m);
+
+ assert_eq!(o.max(t), t);
+ assert_eq!(t.max(o), t);
+ assert_eq!(m.max(o), m);
+ assert_eq!(o.max(m), m);
+ assert_eq!(m.max(t), t);
+ assert_eq!(t.max(m), t);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs
new file mode 100644
index 0000000000..295fc1ca81
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs
@@ -0,0 +1,116 @@
+//! Vertical (lane-wise) vector-vector bitwise operations.
+
+macro_rules! impl_ops_vector_mask_bitwise {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $test_tt:tt |
+ ($true:expr, $false:expr)
+ ) => {
+ impl crate::ops::Not for $id {
+ type Output = Self;
+ #[inline]
+ fn not(self) -> Self {
+ Self::splat($true) ^ self
+ }
+ }
+ impl crate::ops::BitXor for $id {
+ type Output = Self;
+ #[inline]
+ fn bitxor(self, other: Self) -> Self {
+ use crate::llvm::simd_xor;
+ unsafe { Simd(simd_xor(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitAnd for $id {
+ type Output = Self;
+ #[inline]
+ fn bitand(self, other: Self) -> Self {
+ use crate::llvm::simd_and;
+ unsafe { Simd(simd_and(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitOr for $id {
+ type Output = Self;
+ #[inline]
+ fn bitor(self, other: Self) -> Self {
+ use crate::llvm::simd_or;
+ unsafe { Simd(simd_or(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::BitAndAssign for $id {
+ #[inline]
+ fn bitand_assign(&mut self, other: Self) {
+ *self = *self & other;
+ }
+ }
+ impl crate::ops::BitOrAssign for $id {
+ #[inline]
+ fn bitor_assign(&mut self, other: Self) {
+ *self = *self | other;
+ }
+ }
+ impl crate::ops::BitXorAssign for $id {
+ #[inline]
+ fn bitxor_assign(&mut self, other: Self) {
+ *self = *self ^ other;
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_mask_bitwise>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn ops_vector_mask_bitwise() {
+ let t = $id::splat(true);
+ let f = $id::splat(false);
+ assert!(t != f);
+ assert!(!(t == f));
+
+ // Not:
+ assert_eq!(!t, f);
+ assert_eq!(t, !f);
+
+ // BitAnd:
+ assert_eq!(t & f, f);
+ assert_eq!(f & t, f);
+ assert_eq!(t & t, t);
+ assert_eq!(f & f, f);
+
+ // BitOr:
+ assert_eq!(t | f, t);
+ assert_eq!(f | t, t);
+ assert_eq!(t | t, t);
+ assert_eq!(f | f, f);
+
+ // BitXOR:
+ assert_eq!(t ^ f, t);
+ assert_eq!(f ^ t, t);
+ assert_eq!(t ^ t, f);
+ assert_eq!(f ^ f, f);
+
+ {
+ // AndAssign:
+ let mut v = f;
+ v &= t;
+ assert_eq!(v, f);
+ }
+ {
+ // OrAssign:
+ let mut v = f;
+ v |= t;
+ assert_eq!(v, t);
+ }
+ {
+ // XORAssign:
+ let mut v = f;
+ v ^= t;
+ assert_eq!(v, t);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs
new file mode 100644
index 0000000000..e2d91fd2fe
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs
@@ -0,0 +1,43 @@
+//! Vertical (lane-wise) vector `Neg`.
+
+macro_rules! impl_ops_vector_neg {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::ops::Neg for $id {
+ type Output = Self;
+ #[inline]
+ fn neg(self) -> Self {
+ Self::splat(-1 as $elem_ty) * self
+ }
+ }
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_neg>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn neg() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ let nz = $id::splat(-(0 as $elem_ty));
+ let no = $id::splat(-(1 as $elem_ty));
+ let nt = $id::splat(-(2 as $elem_ty));
+ let nf = $id::splat(-(4 as $elem_ty));
+
+ assert_eq!(-z, nz);
+ assert_eq!(-o, no);
+ assert_eq!(-t, nt);
+ assert_eq!(-f, nf);
+
+ assert_eq!(z, -nz);
+ assert_eq!(o, -no);
+ assert_eq!(t, -nt);
+ assert_eq!(f, -nf);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs
new file mode 100644
index 0000000000..6c4bed72a2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs
@@ -0,0 +1,92 @@
+//! Vertical (lane-wise) vector rotates operations.
+#![allow(unused)]
+
+macro_rules! impl_ops_vector_rotates {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Shifts the bits of each lane to the left by the specified
+ /// amount in the corresponding lane of `n`, wrapping the
+ /// truncated bits to the end of the resulting integer.
+ ///
+ /// Note: this is neither the same operation as `<<` nor equivalent
+ /// to `slice::rotate_left`.
+ #[inline]
+ pub fn rotate_left(self, n: $id) -> $id {
+ const LANE_WIDTH: $elem_ty =
+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
+ // Protect against undefined behavior for over-long bit shifts
+ let n = n % LANE_WIDTH;
+ (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH))
+ }
+
+ /// Shifts the bits of each lane to the right by the specified
+ /// amount in the corresponding lane of `n`, wrapping the
+ /// truncated bits to the beginning of the resulting integer.
+ ///
+ /// Note: this is neither the same operation as `>>` nor equivalent
+ /// to `slice::rotate_right`.
+ #[inline]
+ pub fn rotate_right(self, n: $id) -> $id {
+ const LANE_WIDTH: $elem_ty =
+ crate::mem::size_of::<$elem_ty>() as $elem_ty * 8;
+ // Protect against undefined behavior for over-long bit shifts
+ let n = n % LANE_WIDTH;
+ (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH))
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ // FIXME:
+ // https://github.com/rust-lang-nursery/packed_simd/issues/75
+ #[cfg(not(any(
+ target_arch = "s390x",
+ target_arch = "sparc64",
+ )))]
+ pub mod [<$id _ops_vector_rotate>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "aarch64"))]
+ //~^ FIXME: https://github.com/rust-lang/packed_simd/issues/317
+ fn rotate_ops() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ let max = $id::splat(
+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty);
+
+ // rotate_right
+ assert_eq!(z.rotate_right(z), z);
+ assert_eq!(z.rotate_right(o), z);
+ assert_eq!(z.rotate_right(t), z);
+
+ assert_eq!(o.rotate_right(z), o);
+ assert_eq!(t.rotate_right(z), t);
+ assert_eq!(f.rotate_right(z), f);
+ assert_eq!(f.rotate_right(max), f << 1);
+
+ assert_eq!(o.rotate_right(o), o << max);
+ assert_eq!(t.rotate_right(o), o);
+ assert_eq!(t.rotate_right(t), o << max);
+ assert_eq!(f.rotate_right(o), t);
+ assert_eq!(f.rotate_right(t), o);
+
+ // rotate_left
+ assert_eq!(z.rotate_left(z), z);
+ assert_eq!(o.rotate_left(z), o);
+ assert_eq!(t.rotate_left(z), t);
+ assert_eq!(f.rotate_left(z), f);
+ assert_eq!(f.rotate_left(max), t);
+
+ assert_eq!(o.rotate_left(o), t);
+ assert_eq!(o.rotate_left(t), f);
+ assert_eq!(t.rotate_left(o), f);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs
new file mode 100644
index 0000000000..8bb5ac2fc0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs
@@ -0,0 +1,106 @@
+//! Vertical (lane-wise) vector-vector shifts operations.
+
+macro_rules! impl_ops_vector_shifts {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl crate::ops::Shl<$id> for $id {
+ type Output = Self;
+ #[inline]
+ fn shl(self, other: Self) -> Self {
+ use crate::llvm::simd_shl;
+ unsafe { Simd(simd_shl(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::Shr<$id> for $id {
+ type Output = Self;
+ #[inline]
+ fn shr(self, other: Self) -> Self {
+ use crate::llvm::simd_shr;
+ unsafe { Simd(simd_shr(self.0, other.0)) }
+ }
+ }
+ impl crate::ops::ShlAssign<$id> for $id {
+ #[inline]
+ fn shl_assign(&mut self, other: Self) {
+ *self = *self << other;
+ }
+ }
+ impl crate::ops::ShrAssign<$id> for $id {
+ #[inline]
+ fn shr_assign(&mut self, other: Self) {
+ *self = *self >> other;
+ }
+ }
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _ops_vector_shifts>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"),
+ allow(unreachable_code, unused_variables)
+ )]
+ #[cfg(not(target_arch = "aarch64"))]
+ //~^ FIXME: https://github.com/rust-lang/packed_simd/issues/317
+ fn ops_vector_shifts() {
+ let z = $id::splat(0 as $elem_ty);
+ let o = $id::splat(1 as $elem_ty);
+ let t = $id::splat(2 as $elem_ty);
+ let f = $id::splat(4 as $elem_ty);
+
+ let max =$id::splat(
+ (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty
+ );
+
+ // shr
+ assert_eq!(z >> z, z);
+ assert_eq!(z >> o, z);
+ assert_eq!(z >> t, z);
+ assert_eq!(z >> t, z);
+
+ #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] {
+ // FIXME: rust produces bad codegen for shifts:
+ // https://github.com/rust-lang-nursery/packed_simd/issues/13
+ return;
+ }
+
+ assert_eq!(o >> z, o);
+ assert_eq!(t >> z, t);
+ assert_eq!(f >> z, f);
+ assert_eq!(f >> max, z);
+
+ assert_eq!(o >> o, z);
+ assert_eq!(t >> o, o);
+ assert_eq!(t >> t, z);
+ assert_eq!(f >> o, t);
+ assert_eq!(f >> t, o);
+ assert_eq!(f >> max, z);
+
+ // shl
+ assert_eq!(z << z, z);
+ assert_eq!(o << z, o);
+ assert_eq!(t << z, t);
+ assert_eq!(f << z, f);
+ assert_eq!(f << max, z);
+
+ assert_eq!(o << o, t);
+ assert_eq!(o << t, f);
+ assert_eq!(t << o, f);
+
+ {
+ // shr_assign
+ let mut v = o;
+ v >>= o;
+ assert_eq!(v, z);
+ }
+ {
+ // shl_assign
+ let mut v = o;
+ v <<= o;
+ assert_eq!(v, t);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs
new file mode 100644
index 0000000000..d2e523a49f
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ptr.rs
@@ -0,0 +1,4 @@
+//! Vector of pointers
+
+#[macro_use]
+mod gather_scatter;
diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
new file mode 100644
index 0000000000..374482ac31
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs
@@ -0,0 +1,216 @@
+//! Implements masked gather and scatters for vectors of pointers
+
+macro_rules! impl_ptr_read {
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
+ | $test_tt:tt) => {
+ impl<T> $id<T>
+ where
+ [T; $elem_count]: sealed::SimdArray,
+ {
+ /// Reads selected vector elements from memory.
+ ///
+ /// Instantiates a new vector by reading the values from `self` for
+ /// those lanes whose `mask` is `true`, and using the elements of
+ /// `value` otherwise.
+ ///
+ /// No memory is accessed for those lanes of `self` whose `mask` is
+ /// `false`.
+ ///
+ /// # Safety
+ ///
+ /// This method is unsafe because it dereferences raw pointers. The
+ /// pointers must be aligned to `mem::align_of::<T>()`.
+ #[inline]
+ pub unsafe fn read<M>(
+ self,
+ mask: Simd<[M; $elem_count]>,
+ value: Simd<[T; $elem_count]>,
+ ) -> Simd<[T; $elem_count]>
+ where
+ M: sealed::Mask,
+ [M; $elem_count]: sealed::SimdArray,
+ {
+ use crate::llvm::simd_gather;
+ Simd(simd_gather(value.0, self.0, mask.0))
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ mod [<$id _read>] {
+ use super::*;
+ #[test]
+ fn read() {
+ let mut v = [0_i32; $elem_count];
+ for i in 0..$elem_count {
+ v[i] = i as i32;
+ }
+
+ let mut ptr = $id::<i32>::null();
+
+ for i in 0..$elem_count {
+ ptr = ptr.replace(i,
+ &v[i] as *const i32 as *mut i32
+ );
+ }
+
+ // all mask elements are true:
+ let mask = $mask_ty::splat(true);
+ let def = Simd::<[i32; $elem_count]>::splat(42_i32);
+ let r: Simd<[i32; $elem_count]> = unsafe {
+ ptr.read(mask, def)
+ };
+ assert_eq!(
+ r,
+ Simd::<[i32; $elem_count]>::from_slice_unaligned(
+ &v
+ )
+ );
+
+ let mut mask = mask;
+ for i in 0..$elem_count {
+ if i % 2 != 0 {
+ mask = mask.replace(i, false);
+ }
+ }
+
+ // even mask elements are true, odd ones are false:
+ let r: Simd<[i32; $elem_count]> = unsafe {
+ ptr.read(mask, def)
+ };
+ let mut e = v;
+ for i in 0..$elem_count {
+ if i % 2 != 0 {
+ e[i] = 42;
+ }
+ }
+ assert_eq!(
+ r,
+ Simd::<[i32; $elem_count]>::from_slice_unaligned(
+ &e
+ )
+ );
+
+ // all mask elements are false:
+ let mask = $mask_ty::splat(false);
+ let def = Simd::<[i32; $elem_count]>::splat(42_i32);
+ let r: Simd<[i32; $elem_count]> = unsafe {
+ ptr.read(mask, def) }
+ ;
+ assert_eq!(r, def);
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_ptr_write {
+ ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident
+ | $test_tt:tt) => {
+ impl<T> $id<T>
+ where
+ [T; $elem_count]: sealed::SimdArray,
+ {
+ /// Writes selected vector elements to memory.
+ ///
+ /// Writes the lanes of `values` for which the mask is `true` to
+ /// their corresponding memory addresses in `self`.
+ ///
+ /// No memory is accessed for those lanes of `self` whose `mask` is
+ /// `false`.
+ ///
+ /// Overlapping memory addresses of `self` are written to in order
+ /// from the lest-significant to the most-significant element.
+ ///
+ /// # Safety
+ ///
+ /// This method is unsafe because it dereferences raw pointers. The
+ /// pointers must be aligned to `mem::align_of::<T>()`.
+ #[inline]
+ pub unsafe fn write<M>(self, mask: Simd<[M; $elem_count]>, value: Simd<[T; $elem_count]>)
+ where
+ M: sealed::Mask,
+ [M; $elem_count]: sealed::SimdArray,
+ {
+ use crate::llvm::simd_scatter;
+ simd_scatter(value.0, self.0, mask.0)
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ mod [<$id _write>] {
+ use super::*;
+ #[test]
+ fn write() {
+ // forty_two = [42, 42, 42, ...]
+ let forty_two
+ = Simd::<[i32; $elem_count]>::splat(42_i32);
+
+ // This test will write to this array
+ let mut arr = [0_i32; $elem_count];
+ for i in 0..$elem_count {
+ arr[i] = i as i32;
+ }
+ // arr = [0, 1, 2, ...]
+
+ let mut ptr = $id::<i32>::null();
+ for i in 0..$elem_count {
+ ptr = ptr.replace(i, unsafe {
+ arr.as_ptr().add(i) as *mut i32
+ });
+ }
+ // ptr = [&arr[0], &arr[1], ...]
+
+ // write `forty_two` to all elements of `v`
+ {
+ let backup = arr;
+ unsafe {
+ ptr.write($mask_ty::splat(true), forty_two)
+ };
+ assert_eq!(arr, [42_i32; $elem_count]);
+ arr = backup; // arr = [0, 1, 2, ...]
+ }
+
+ // write 42 to even elements of arr:
+ {
+ // set odd elements of the mask to false
+ let mut mask = $mask_ty::splat(true);
+ for i in 0..$elem_count {
+ if i % 2 != 0 {
+ mask = mask.replace(i, false);
+ }
+ }
+ // mask = [true, false, true, false, ...]
+
+ // expected result r = [42, 1, 42, 3, 42, 5, ...]
+ let mut r = arr;
+ for i in 0..$elem_count {
+ if i % 2 == 0 {
+ r[i] = 42;
+ }
+ }
+
+ let backup = arr;
+ unsafe { ptr.write(mask, forty_two) };
+ assert_eq!(arr, r);
+ arr = backup; // arr = [0, 1, 2, 3, ...]
+ }
+
+ // write 42 to no elements of arr
+ {
+ let backup = arr;
+ unsafe {
+ ptr.write($mask_ty::splat(false), forty_two)
+ };
+ assert_eq!(arr, backup);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs
new file mode 100644
index 0000000000..54d2f0cc7f
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions.rs
@@ -0,0 +1,12 @@
+//! Reductions
+
+#[macro_use]
+mod float_arithmetic;
+#[macro_use]
+mod integer_arithmetic;
+#[macro_use]
+mod bitwise;
+#[macro_use]
+mod mask;
+#[macro_use]
+mod min_max;
diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
new file mode 100644
index 0000000000..5bad4f474b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
@@ -0,0 +1,151 @@
+//! Implements portable horizontal bitwise vector reductions.
+#![allow(unused)]
+
+macro_rules! impl_reduction_bitwise {
+ (
+ [$elem_ty:ident; $elem_count:expr]:
+ $id:ident | $ielem_ty:ident | $test_tt:tt |
+ ($convert:expr) |
+ ($true:expr, $false:expr)
+ ) => {
+ impl $id {
+ /// Lane-wise bitwise `and` of the vector elements.
+ ///
+ /// Note: if the vector has one lane, the first element of the
+ /// vector is returned.
+ #[inline]
+ pub fn and(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_and;
+ let r: $ielem_ty = unsafe { simd_reduce_and(self.0) };
+ $convert(r)
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on aarch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x &= self.extract(i) as $elem_ty;
+ }
+ x
+ }
+ }
+
+ /// Lane-wise bitwise `or` of the vector elements.
+ ///
+ /// Note: if the vector has one lane, the first element of the
+ /// vector is returned.
+ #[inline]
+ pub fn or(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_or;
+ let r: $ielem_ty = unsafe { simd_reduce_or(self.0) };
+ $convert(r)
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on aarch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x |= self.extract(i) as $elem_ty;
+ }
+ x
+ }
+ }
+
+ /// Lane-wise bitwise `xor` of the vector elements.
+ ///
+ /// Note: if the vector has one lane, the first element of the
+ /// vector is returned.
+ #[inline]
+ pub fn xor(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_xor;
+ let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) };
+ $convert(r)
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on aarch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x ^= self.extract(i) as $elem_ty;
+ }
+ x
+ }
+ }
+ }
+
+ test_if!{
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _reduction_bitwise>] {
+ use super::*;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn and() {
+ let v = $id::splat($false);
+ assert_eq!(v.and(), $false);
+ let v = $id::splat($true);
+ assert_eq!(v.and(), $true);
+ let v = $id::splat($false);
+ let v = v.replace(0, $true);
+ if $id::lanes() > 1 {
+ assert_eq!(v.and(), $false);
+ } else {
+ assert_eq!(v.and(), $true);
+ }
+ let v = $id::splat($true);
+ let v = v.replace(0, $false);
+ assert_eq!(v.and(), $false);
+
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn or() {
+ let v = $id::splat($false);
+ assert_eq!(v.or(), $false);
+ let v = $id::splat($true);
+ assert_eq!(v.or(), $true);
+ let v = $id::splat($false);
+ let v = v.replace(0, $true);
+ assert_eq!(v.or(), $true);
+ let v = $id::splat($true);
+ let v = v.replace(0, $false);
+ if $id::lanes() > 1 {
+ assert_eq!(v.or(), $true);
+ } else {
+ assert_eq!(v.or(), $false);
+ }
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn xor() {
+ let v = $id::splat($false);
+ assert_eq!(v.xor(), $false);
+ let v = $id::splat($true);
+ if $id::lanes() > 1 {
+ assert_eq!(v.xor(), $false);
+ } else {
+ assert_eq!(v.xor(), $true);
+ }
+ let v = $id::splat($false);
+ let v = v.replace(0, $true);
+ assert_eq!(v.xor(), $true);
+ let v = $id::splat($true);
+ let v = v.replace(0, $false);
+ if $id::lanes() > 1 {
+ assert_eq!(v.xor(), $true);
+ } else {
+ assert_eq!(v.xor(), $false);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
new file mode 100644
index 0000000000..9dc8783dbb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
@@ -0,0 +1,313 @@
+//! Implements portable horizontal float vector arithmetic reductions.
+
+macro_rules! impl_reduction_float_arithmetic {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Horizontal sum of the vector elements.
+ ///
+ /// The intrinsic performs a tree-reduction of the vector elements.
+ /// That is, for an 8 element vector:
+ ///
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+ ///
+ /// If one of the vector element is `NaN` the reduction returns
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
+ /// of the `NaN`s in the vector.
+ #[inline]
+ pub fn sum(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_add_ordered;
+ unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) }
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x += self.extract(i) as $elem_ty;
+ }
+ x
+ }
+ }
+
+ /// Horizontal product of the vector elements.
+ ///
+ /// The intrinsic performs a tree-reduction of the vector elements.
+ /// That is, for an 8 element vector:
+ ///
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+ ///
+ /// If one of the vector element is `NaN` the reduction returns
+ /// `NaN`. The resulting `NaN` is not required to be equal to any
+ /// of the `NaN`s in the vector.
+ #[inline]
+ pub fn product(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_mul_ordered;
+ unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) }
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x *= self.extract(i) as $elem_ty;
+ }
+ x
+ }
+ }
+ }
+
+ impl crate::iter::Sum for $id {
+ #[inline]
+ fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+ iter.fold($id::splat(0.), crate::ops::Add::add)
+ }
+ }
+
+ impl crate::iter::Product for $id {
+ #[inline]
+ fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+ iter.fold($id::splat(1.), crate::ops::Mul::mul)
+ }
+ }
+
+ impl<'a> crate::iter::Sum<&'a $id> for $id {
+ #[inline]
+ fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+ iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b))
+ }
+ }
+
+ impl<'a> crate::iter::Product<&'a $id> for $id {
+ #[inline]
+ fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+ iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b))
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _reduction_float_arith>] {
+ use super::*;
+ fn alternating(x: usize) -> $id {
+ let mut v = $id::splat(1 as $elem_ty);
+ for i in 0..$id::lanes() {
+ if i % x == 0 {
+ v = v.replace(i, 2 as $elem_ty);
+ }
+ }
+ v
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn sum() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.sum(), 0 as $elem_ty);
+ let v = $id::splat(1 as $elem_ty);
+ assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+ let v = alternating(2);
+ assert_eq!(
+ v.sum(),
+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+ );
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn product() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.product(), 0 as $elem_ty);
+ let v = $id::splat(1 as $elem_ty);
+ assert_eq!(v.product(), 1 as $elem_ty);
+ let f = match $id::lanes() {
+ 64 => 16,
+ 32 => 8,
+ 16 => 4,
+ _ => 2,
+ };
+ let v = alternating(f);
+ assert_eq!(
+ v.product(),
+ (2_usize.pow(($id::lanes() / f) as u32)
+ as $elem_ty)
+ );
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[allow(unreachable_code)]
+ fn sum_nan() {
+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+ // https://github.com/rust-lang-nursery/packed_simd/issues/6
+ return;
+
+ let n0 = crate::$elem_ty::NAN;
+ let v0 = $id::splat(-3.0);
+ for i in 0..$id::lanes() {
+ let mut v = v0.replace(i, n0);
+ // If the vector contains a NaN the result is NaN:
+ assert!(
+ v.sum().is_nan(),
+ "nan at {} => {} | {:?}",
+ i,
+ v.sum(),
+ v
+ );
+ for j in 0..i {
+ v = v.replace(j, n0);
+ assert!(v.sum().is_nan());
+ }
+ }
+ let v = $id::splat(n0);
+ assert!(v.sum().is_nan(), "all nans | {:?}", v);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[allow(unreachable_code)]
+ fn product_nan() {
+ // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+ // https://github.com/rust-lang-nursery/packed_simd/issues/6
+ return;
+
+ let n0 = crate::$elem_ty::NAN;
+ let v0 = $id::splat(-3.0);
+ for i in 0..$id::lanes() {
+ let mut v = v0.replace(i, n0);
+ // If the vector contains a NaN the result is NaN:
+ assert!(
+ v.product().is_nan(),
+ "nan at {} => {} | {:?}",
+ i,
+ v.product(),
+ v
+ );
+ for j in 0..i {
+ v = v.replace(j, n0);
+ assert!(v.product().is_nan());
+ }
+ }
+ let v = $id::splat(n0);
+ assert!(v.product().is_nan(), "all nans | {:?}", v);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[allow(unused, dead_code)]
+ fn sum_roundoff() {
+ // Performs a tree-reduction
+ fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty {
+ assert!(!a.is_empty());
+ if a.len() == 1 {
+ a[0]
+ } else if a.len() == 2 {
+ a[0] + a[1]
+ } else {
+ let mid = a.len() / 2;
+ let (left, right) = a.split_at(mid);
+ tree_reduce_sum(left) + tree_reduce_sum(right)
+ }
+ }
+
+ let mut start = crate::$elem_ty::EPSILON;
+ let mut scalar_reduction = 0. as $elem_ty;
+
+ let mut v = $id::splat(0. as $elem_ty);
+ for i in 0..$id::lanes() {
+ let c = if i % 2 == 0 { 1e3 } else { -1. };
+ start *= ::core::$elem_ty::consts::PI * c;
+ scalar_reduction += start;
+ v = v.replace(i, start);
+ }
+ let simd_reduction = v.sum();
+
+ let mut a = [0. as $elem_ty; $id::lanes()];
+ v.write_to_slice_unaligned(&mut a);
+ let tree_reduction = tree_reduce_sum(&a);
+
+ // tolerate 1 ULP difference:
+ let red_bits = simd_reduction.to_bits();
+ let tree_bits = tree_reduction.to_bits();
+ assert!(
+ if red_bits > tree_bits {
+ red_bits - tree_bits
+ } else {
+ tree_bits - red_bits
+ } < 2,
+ "vector: {:?} | simd_reduction: {:?} | \
+tree_reduction: {} | scalar_reduction: {}",
+ v,
+ simd_reduction,
+ tree_reduction,
+ scalar_reduction
+ );
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[allow(unused, dead_code)]
+ fn product_roundoff() {
+ use ::core::convert::TryInto;
+ // Performs a tree-reduction
+ fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty {
+ assert!(!a.is_empty());
+ if a.len() == 1 {
+ a[0]
+ } else if a.len() == 2 {
+ a[0] * a[1]
+ } else {
+ let mid = a.len() / 2;
+ let (left, right) = a.split_at(mid);
+ tree_reduce_product(left)
+ * tree_reduce_product(right)
+ }
+ }
+
+ let mut start = crate::$elem_ty::EPSILON;
+ let mut scalar_reduction = 1. as $elem_ty;
+
+ let mut v = $id::splat(0. as $elem_ty);
+ for i in 0..$id::lanes() {
+ let c = if i % 2 == 0 { 1e3 } else { -1. };
+ start *= ::core::$elem_ty::consts::PI * c;
+ scalar_reduction *= start;
+ v = v.replace(i, start);
+ }
+ let simd_reduction = v.product();
+
+ let mut a = [0. as $elem_ty; $id::lanes()];
+ v.write_to_slice_unaligned(&mut a);
+ let tree_reduction = tree_reduce_product(&a);
+
+ // FIXME: Too imprecise, even only for product(f32x8).
+ // Figure out how to narrow this down.
+ let ulp_limit = $id::lanes() / 2;
+ let red_bits = simd_reduction.to_bits();
+ let tree_bits = tree_reduction.to_bits();
+ assert!(
+ if red_bits > tree_bits {
+ red_bits - tree_bits
+ } else {
+ tree_bits - red_bits
+ } < ulp_limit.try_into().unwrap(),
+ "vector: {:?} | simd_reduction: {:?} | \
+tree_reduction: {} | scalar_reduction: {}",
+ v,
+ simd_reduction,
+ tree_reduction,
+ scalar_reduction
+ );
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
new file mode 100644
index 0000000000..e99e6cb5d7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
@@ -0,0 +1,193 @@
+//! Implements portable horizontal integer vector arithmetic reductions.
+
+macro_rules! impl_reduction_integer_arithmetic {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
+ | $test_tt:tt) => {
+ impl $id {
+ /// Horizontal wrapping sum of the vector elements.
+ ///
+ /// The intrinsic performs a tree-reduction of the vector elements.
+ /// That is, for an 8 element vector:
+ ///
+ /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+ ///
+ /// If an operation overflows it returns the mathematical result
+ /// modulo `2^n` where `n` is the number of times it overflows.
+ #[inline]
+ pub fn wrapping_sum(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_add_ordered;
+ let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
+ v as $elem_ty
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x = x.wrapping_add(self.extract(i) as $elem_ty);
+ }
+ x
+ }
+ }
+
+ /// Horizontal wrapping product of the vector elements.
+ ///
+ /// The intrinsic performs a tree-reduction of the vector elements.
+ /// That is, for an 8 element vector:
+ ///
+ /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+ ///
+ /// If an operation overflows it returns the mathematical result
+ /// modulo `2^n` where `n` is the number of times it overflows.
+ #[inline]
+ pub fn wrapping_product(self) -> $elem_ty {
+ #[cfg(not(target_arch = "aarch64"))]
+ {
+ use crate::llvm::simd_reduce_mul_ordered;
+ let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
+ v as $elem_ty
+ }
+ #[cfg(target_arch = "aarch64")]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ let mut x = self.extract(0) as $elem_ty;
+ for i in 1..$id::lanes() {
+ x = x.wrapping_mul(self.extract(i) as $elem_ty);
+ }
+ x
+ }
+ }
+ }
+
+ impl crate::iter::Sum for $id {
+ #[inline]
+ fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+ iter.fold($id::splat(0), crate::ops::Add::add)
+ }
+ }
+
+ impl crate::iter::Product for $id {
+ #[inline]
+ fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+ iter.fold($id::splat(1), crate::ops::Mul::mul)
+ }
+ }
+
+ impl<'a> crate::iter::Sum<&'a $id> for $id {
+ #[inline]
+ fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+ iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
+ }
+ }
+
+ impl<'a> crate::iter::Product<&'a $id> for $id {
+ #[inline]
+ fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+ iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _reduction_int_arith>] {
+ use super::*;
+
+ fn alternating(x: usize) -> $id {
+ let mut v = $id::splat(1 as $elem_ty);
+ for i in 0..$id::lanes() {
+ if i % x == 0 {
+ v = v.replace(i, 2 as $elem_ty);
+ }
+ }
+ v
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn wrapping_sum() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
+ let v = $id::splat(1 as $elem_ty);
+ assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
+ let v = alternating(2);
+ if $id::lanes() > 1 {
+ assert_eq!(
+ v.wrapping_sum(),
+ ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+ );
+ } else {
+ assert_eq!(
+ v.wrapping_sum(),
+ 2 as $elem_ty
+ );
+ }
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn wrapping_sum_overflow() {
+ let start = $elem_ty::max_value()
+ - ($id::lanes() as $elem_ty / 2);
+
+ let v = $id::splat(start as $elem_ty);
+ let vwrapping_sum = v.wrapping_sum();
+
+ let mut wrapping_sum = start;
+ for _ in 1..$id::lanes() {
+ wrapping_sum = wrapping_sum.wrapping_add(start);
+ }
+ assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn wrapping_product() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.wrapping_product(), 0 as $elem_ty);
+ let v = $id::splat(1 as $elem_ty);
+ assert_eq!(v.wrapping_product(), 1 as $elem_ty);
+ let f = match $id::lanes() {
+ 64 => 16,
+ 32 => 8,
+ 16 => 4,
+ _ => 2,
+ };
+ let v = alternating(f);
+ if $id::lanes() > 1 {
+ assert_eq!(
+ v.wrapping_product(),
+ (2_usize.pow(($id::lanes() / f) as u32)
+ as $elem_ty)
+ );
+ } else {
+ assert_eq!(
+ v.wrapping_product(),
+ 2 as $elem_ty
+ );
+ }
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn wrapping_product_overflow() {
+ let start = $elem_ty::max_value()
+ - ($id::lanes() as $elem_ty / 2);
+
+ let v = $id::splat(start as $elem_ty);
+ let vmul = v.wrapping_product();
+
+ let mut mul = start;
+ for _ in 1..$id::lanes() {
+ mul = mul.wrapping_mul(start);
+ }
+ assert_eq!(mul, vmul, "v = {:?}", v);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs
new file mode 100644
index 0000000000..0dd6a84e7e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs
@@ -0,0 +1,89 @@
+//! Implements portable horizontal mask reductions.
+
+macro_rules! impl_reduction_mask {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Are `all` vector lanes `true`?
+ #[inline]
+ pub fn all(self) -> bool {
+ unsafe { crate::codegen::reductions::mask::All::all(self) }
+ }
+ /// Is `any` vector lane `true`?
+ #[inline]
+ pub fn any(self) -> bool {
+ unsafe { crate::codegen::reductions::mask::Any::any(self) }
+ }
+ /// Are `all` vector lanes `false`?
+ #[inline]
+ pub fn none(self) -> bool {
+ !self.any()
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _reduction>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn all() {
+ let a = $id::splat(true);
+ assert!(a.all());
+ let a = $id::splat(false);
+ assert!(!a.all());
+
+ if $id::lanes() > 1 {
+ for i in 0..$id::lanes() {
+ let mut a = $id::splat(true);
+ a = a.replace(i, false);
+ assert!(!a.all());
+ let mut a = $id::splat(false);
+ a = a.replace(i, true);
+ assert!(!a.all());
+ }
+ }
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn any() {
+ let a = $id::splat(true);
+ assert!(a.any());
+ let a = $id::splat(false);
+ assert!(!a.any());
+
+ if $id::lanes() > 1 {
+ for i in 0..$id::lanes() {
+ let mut a = $id::splat(true);
+ a = a.replace(i, false);
+ assert!(a.any());
+ let mut a = $id::splat(false);
+ a = a.replace(i, true);
+ assert!(a.any());
+ }
+ }
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn none() {
+ let a = $id::splat(true);
+ assert!(!a.none());
+ let a = $id::splat(false);
+ assert!(a.none());
+
+ if $id::lanes() > 1 {
+ for i in 0..$id::lanes() {
+ let mut a = $id::splat(true);
+ a = a.replace(i, false);
+ assert!(!a.none());
+ let mut a = $id::splat(false);
+ a = a.replace(i, true);
+ assert!(!a.none());
+ }
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
new file mode 100644
index 0000000000..a3ce13a451
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
@@ -0,0 +1,360 @@
+//! Implements portable horizontal vector min/max reductions.
+
+macro_rules! impl_reduction_min_max {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident
+ | $ielem_ty:ident | $test_tt:tt) => {
+ impl $id {
+ /// Largest vector element value.
+ #[inline]
+ pub fn max_element(self) -> $elem_ty {
+ #[cfg(not(any(
+ target_arch = "aarch64",
+ target_arch = "arm",
+ target_arch = "powerpc64",
+ target_arch = "wasm32",
+ )))]
+ {
+ use crate::llvm::simd_reduce_max;
+ let v: $ielem_ty = unsafe { simd_reduce_max(self.0) };
+ v as $elem_ty
+ }
+ #[cfg(any(
+ target_arch = "aarch64",
+ target_arch = "arm",
+ target_arch = "powerpc64",
+ target_arch = "wasm32",
+ ))]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ // FIXME: broken on WASM32
+ // https://github.com/rust-lang-nursery/packed_simd/issues/91
+ let mut x = self.extract(0);
+ for i in 1..$id::lanes() {
+ x = x.max(self.extract(i));
+ }
+ x
+ }
+ }
+
+ /// Smallest vector element value.
+ #[inline]
+ pub fn min_element(self) -> $elem_ty {
+ #[cfg(not(any(
+ target_arch = "aarch64",
+ target_arch = "arm",
+ all(target_arch = "x86", not(target_feature = "sse2")),
+ target_arch = "powerpc64",
+ target_arch = "wasm32",
+ ),))]
+ {
+ use crate::llvm::simd_reduce_min;
+ let v: $ielem_ty = unsafe { simd_reduce_min(self.0) };
+ v as $elem_ty
+ }
+ #[cfg(any(
+ target_arch = "aarch64",
+ target_arch = "arm",
+ all(target_arch = "x86", not(target_feature = "sse2")),
+ target_arch = "powerpc64",
+ target_arch = "wasm32",
+ ))]
+ {
+ // FIXME: broken on AArch64
+ // https://github.com/rust-lang-nursery/packed_simd/issues/15
+ // FIXME: broken on i586-unknown-linux-gnu
+ // https://github.com/rust-lang-nursery/packed_simd/issues/22
+ // FIXME: broken on WASM32
+ // https://github.com/rust-lang-nursery/packed_simd/issues/91
+ let mut x = self.extract(0);
+ for i in 1..$id::lanes() {
+ x = x.min(self.extract(i));
+ }
+ x
+ }
+ }
+ }
+ test_if! {$test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _reduction_min_max>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ pub fn max_element() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.max_element(), 0 as $elem_ty);
+ if $id::lanes() > 1 {
+ let v = v.replace(1, 1 as $elem_ty);
+ assert_eq!(v.max_element(), 1 as $elem_ty);
+ }
+ let v = v.replace(0, 2 as $elem_ty);
+ assert_eq!(v.max_element(), 2 as $elem_ty);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ pub fn min_element() {
+ let v = $id::splat(0 as $elem_ty);
+ assert_eq!(v.min_element(), 0 as $elem_ty);
+ if $id::lanes() > 1 {
+ let v = v.replace(1, 1 as $elem_ty);
+ assert_eq!(v.min_element(), 0 as $elem_ty);
+ }
+ let v = $id::splat(1 as $elem_ty);
+ let v = v.replace(0, 2 as $elem_ty);
+ if $id::lanes() > 1 {
+ assert_eq!(v.min_element(), 1 as $elem_ty);
+ } else {
+ assert_eq!(v.min_element(), 2 as $elem_ty);
+ }
+ if $id::lanes() > 1 {
+ let v = $id::splat(2 as $elem_ty);
+ let v = v.replace(1, 1 as $elem_ty);
+ assert_eq!(v.min_element(), 1 as $elem_ty);
+ }
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! test_reduction_float_min_max {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if! {
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _reduction_min_max_nan>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn min_element_test() {
+ let n = crate::$elem_ty::NAN;
+
+ assert_eq!(n.min(-3.), -3.);
+ assert_eq!((-3. as $elem_ty).min(n), -3.);
+
+ let v0 = $id::splat(-3.);
+
+ let target_with_broken_last_lane_nan = !cfg!(any(
+ target_arch = "arm", target_arch = "aarch64",
+ all(target_arch = "x86",
+ not(target_feature = "sse2")
+ ),
+ target_arch = "powerpc64",
+ target_arch = "wasm32",
+ ));
+
+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+ for i in 0..$id::lanes() {
+ // We replace the i-th element of the vector with
+ // `NaN`: [-3, -3, -3, NaN]
+ let mut v = v0.replace(i, n);
+
+ // If the NaN is in the last place, the LLVM
+ // implementation of these methods is broken on some
+ // targets:
+ if i == $id::lanes() - 1 &&
+ target_with_broken_last_lane_nan {
+ assert_eq!(v.min_element(), -3.,
+ "[A]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+
+ // If we replace all the elements in the vector
+ // up-to the `i-th` lane with `NaN`s, the result
+ // is still always `-3.` unless all elements of
+ // the vector are `NaN`s:
+ for j in 0..i {
+ v = v.replace(j, n);
+ if j == i-1 {
+ assert!(v.min_element().is_nan(),
+ "[B]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+ } else {
+ assert_eq!(v.min_element(), -3.,
+ "[B]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+ }
+ }
+
+ // We are done here, since we were in the last
+ // lane which is the last iteration of the loop.
+ break
+ }
+
+ // We are not in the last lane, and there is only
+ // one `NaN` in the vector.
+
+ // If the vector has one lane, the result is `NaN`:
+ if $id::lanes() == 1 {
+ assert!(v.min_element().is_nan(),
+ "[C]: all nans | v={:?} | min={} | \
+is_nan: {}",
+ v, v.min_element(),
+ v.min_element().is_nan()
+ );
+
+ // And we are done, since the vector only has
+ // one lane anyways.
+ break;
+ }
+
+ // The vector has more than one lane, since there is
+ // only one `NaN` in the vector, the result is
+ // always `-3`.
+ assert_eq!(v.min_element(), -3.,
+ "[D]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+
+ // If we replace all the elements in the vector
+ // up-to the `i-th` lane with `NaN`s, the result is
+ // still always `-3.` unless all elements of the
+ // vector are `NaN`s:
+ for j in 0..i {
+ v = v.replace(j, n);
+
+ if i == $id::lanes() - 1 && j == i - 1 {
+ // All elements of the vector are `NaN`s,
+ // therefore the result is NaN as well.
+ //
+ // Note: the #lanes of the vector is > 1, so
+ // "i - 1" does not overflow.
+ assert!(v.min_element().is_nan(),
+ "[E]: all nans | v={:?} | min={} | \
+is_nan: {}",
+ v, v.min_element(),
+ v.min_element().is_nan());
+ } else {
+ // There are non-`NaN` elements in the
+ // vector, therefore the result is `-3.`:
+ assert_eq!(v.min_element(), -3.,
+ "[F]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+ }
+ }
+ }
+
+ // If the vector contains all NaNs the result is NaN:
+ assert!($id::splat(n).min_element().is_nan(),
+ "all nans | v={:?} | min={} | is_nan: {}",
+ $id::splat(n), $id::splat(n).min_element(),
+ $id::splat(n).min_element().is_nan());
+ }
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn max_element_test() {
+ let n = crate::$elem_ty::NAN;
+
+ assert_eq!(n.max(-3.), -3.);
+ assert_eq!((-3. as $elem_ty).max(n), -3.);
+
+ let v0 = $id::splat(-3.);
+
+ let target_with_broken_last_lane_nan = !cfg!(any(
+ target_arch = "arm", target_arch = "aarch64",
+ target_arch = "powerpc64", target_arch = "wasm32",
+ ));
+
+ // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+ for i in 0..$id::lanes() {
+ // We replace the i-th element of the vector with
+ // `NaN`: [-3, -3, -3, NaN]
+ let mut v = v0.replace(i, n);
+
+ // If the NaN is in the last place, the LLVM
+ // implementation of these methods is broken on some
+ // targets:
+ if i == $id::lanes() - 1 &&
+ target_with_broken_last_lane_nan {
+ assert_eq!(v.max_element(), -3.,
+ "[A]: nan at {} => {} | {:?}",
+ i, v.max_element(), v);
+
+ // If we replace all the elements in the vector
+ // up-to the `i-th` lane with `NaN`s, the result
+ // is still always `-3.` unless all elements of
+ // the vector are `NaN`s:
+ for j in 0..i {
+ v = v.replace(j, n);
+ if j == i-1 {
+ assert!(v.min_element().is_nan(),
+ "[B]: nan at {} => {} | {:?}",
+ i, v.min_element(), v);
+ } else {
+ assert_eq!(v.max_element(), -3.,
+ "[B]: nan at {} => {} | {:?}",
+ i, v.max_element(), v);
+ }
+ }
+
+ // We are done here, since we were in the last
+ // lane which is the last iteration of the loop.
+ break
+ }
+
+ // We are not in the last lane, and there is only
+ // one `NaN` in the vector.
+
+ // If the vector has one lane, the result is `NaN`:
+ if $id::lanes() == 1 {
+ assert!(v.max_element().is_nan(),
+ "[C]: all nans | v={:?} | min={} | \
+is_nan: {}",
+ v, v.max_element(),
+ v.max_element().is_nan());
+
+ // And we are done, since the vector only has
+ // one lane anyways.
+ break;
+ }
+
+ // The vector has more than one lane, since there is
+ // only one `NaN` in the vector, the result is
+ // always `-3`.
+ assert_eq!(v.max_element(), -3.,
+ "[D]: nan at {} => {} | {:?}",
+ i, v.max_element(), v);
+
+ // If we replace all the elements in the vector
+ // up-to the `i-th` lane with `NaN`s, the result is
+ // still always `-3.` unless all elements of the
+ // vector are `NaN`s:
+ for j in 0..i {
+ v = v.replace(j, n);
+
+ if i == $id::lanes() - 1 && j == i - 1 {
+ // All elements of the vector are `NaN`s,
+ // therefore the result is NaN as well.
+ //
+ // Note: the #lanes of the vector is > 1, so
+ // "i - 1" does not overflow.
+ assert!(v.max_element().is_nan(),
+ "[E]: all nans | v={:?} | max={} | \
+is_nan: {}",
+ v, v.max_element(),
+ v.max_element().is_nan());
+ } else {
+ // There are non-`NaN` elements in the
+ // vector, therefore the result is `-3.`:
+ assert_eq!(v.max_element(), -3.,
+ "[F]: nan at {} => {} | {:?}",
+ i, v.max_element(), v);
+ }
+ }
+ }
+
+ // If the vector contains all NaNs the result is NaN:
+ assert!($id::splat(n).max_element().is_nan(),
+ "all nans | v={:?} | max={} | is_nan: {}",
+ $id::splat(n), $id::splat(n).max_element(),
+ $id::splat(n).max_element().is_nan());
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs
new file mode 100644
index 0000000000..daf6294721
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/select.rs
@@ -0,0 +1,73 @@
+//! Implements mask's `select`.
+
+/// Implements mask select method
+macro_rules! impl_select {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Selects elements of `a` and `b` using mask.
+ ///
+ /// The lanes of the result for which the mask is `true` contain
+ /// the values of `a`. The remaining lanes contain the values of
+ /// `b`.
+ #[inline]
+ pub fn select<T>(self, a: Simd<T>, b: Simd<T>) -> Simd<T>
+ where
+ T: sealed::SimdArray<NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT>,
+ {
+ use crate::llvm::simd_select;
+ Simd(unsafe { simd_select(self.0, a.0, b.0) })
+ }
+ }
+
+ test_select!(bool, $id, $id, (false, true) | $test_tt);
+ };
+}
+
+macro_rules! test_select {
+ (
+ $elem_ty:ident,
+ $mask_ty:ident,
+ $vec_ty:ident,($small:expr, $large:expr) |
+ $test_tt:tt
+ ) => {
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$vec_ty _select>] {
+ use super::*;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn select() {
+ let o = $small as $elem_ty;
+ let t = $large as $elem_ty;
+
+ let a = $vec_ty::splat(o);
+ let b = $vec_ty::splat(t);
+ let m = a.lt(b);
+ assert_eq!(m.select(a, b), a);
+
+ let m = b.lt(a);
+ assert_eq!(m.select(b, a), a);
+
+ let mut c = a;
+ let mut d = b;
+ let mut m_e = $mask_ty::splat(false);
+ for i in 0..$vec_ty::lanes() {
+ if i % 2 == 0 {
+ let c_tmp = c.extract(i);
+ c = c.replace(i, d.extract(i));
+ d = d.replace(i, c_tmp);
+ } else {
+ m_e = m_e.replace(i, true);
+ }
+ }
+
+ let m = c.lt(d);
+ assert_eq!(m_e, m);
+ assert_eq!(m.select(c, d), a);
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs
new file mode 100644
index 0000000000..1c17bd766e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/shuffle.rs
@@ -0,0 +1,184 @@
+//! Implements portable vector shuffles with immediate indices.
+
+// FIXME: comprehensive tests
+// https://github.com/rust-lang-nursery/packed_simd/issues/20
+
+/// Shuffles vector elements.
+///
+/// This macro returns a new vector that contains a shuffle of the elements in
+/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1,
+/// [indices...])`) input vectors.
+///
+/// The type of `vec0` and `vec1` must be equal, and the element type of the
+/// resulting vector is the element type of the input vector.
+///
+/// The number of `indices` must be a power-of-two in range `[0, 64)`, since
+/// currently, the largest vector supported by the library has 64 lanes. The
+/// length of the resulting vector equals the number of indices provided.
+///
+/// The indices must be in range `[0, M * N)` where `M` is the number of input
+/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors.
+/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`,
+/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of
+/// `vec1`.
+///
+/// # Examples
+///
+/// Shuffling elements of two vectors:
+///
+/// ```
+/// # use packed_simd::*;
+/// # fn main() {
+/// // Shuffle allows reordering the elements:
+/// let x = i32x4::new(1, 2, 3, 4);
+/// let y = i32x4::new(5, 6, 7, 8);
+/// let r = shuffle!(x, y, [4, 0, 5, 1]);
+/// assert_eq!(r, i32x4::new(5, 1, 6, 2));
+///
+/// // The resulting vector can als be smaller than the input:
+/// let r = shuffle!(x, y, [1, 6]);
+/// assert_eq!(r, i32x2::new(2, 7));
+///
+/// // Or larger:
+/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]);
+/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3));
+/// // At most 2 * the number of lanes in the input vector.
+/// # }
+/// ```
+///
+/// Shuffling elements of one vector:
+///
+/// ```
+/// # use packed_simd::*;
+/// # fn main() {
+/// // Shuffle allows reordering the elements of a vector:
+/// let x = i32x4::new(1, 2, 3, 4);
+/// let r = shuffle!(x, [2, 1, 3, 0]);
+/// assert_eq!(r, i32x4::new(3, 2, 4, 1));
+///
+/// // The resulting vector can be smaller than the input:
+/// let r = shuffle!(x, [1, 3]);
+/// assert_eq!(r, i32x2::new(2, 4));
+///
+/// // Equal:
+/// let r = shuffle!(x, [1, 3, 2, 0]);
+/// assert_eq!(r, i32x4::new(2, 4, 3, 1));
+///
+/// // Or larger:
+/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]);
+/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3));
+/// // At most 2 * the number of lanes in the input vector.
+/// # }
+/// ```
+#[macro_export]
+macro_rules! shuffle {
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector2::<{[$l0, $l1]}, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector4::<{[$l0, $l1, $l2, $l3]}, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector8::<{[$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7]}, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector16::<{
+ [
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+ $l11, $l12, $l13, $l14, $l15,
+ ]
+ }, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector32::<{
+ [
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
+ $l29, $l30, $l31,
+ ]
+ }, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec0:expr, $vec1:expr,
+ [$l0:expr, $l1:expr, $l2:expr, $l3:expr,
+ $l4:expr, $l5:expr, $l6:expr, $l7:expr,
+ $l8:expr, $l9:expr, $l10:expr, $l11:expr,
+ $l12:expr, $l13:expr, $l14:expr, $l15:expr,
+ $l16:expr, $l17:expr, $l18:expr, $l19:expr,
+ $l20:expr, $l21:expr, $l22:expr, $l23:expr,
+ $l24:expr, $l25:expr, $l26:expr, $l27:expr,
+ $l28:expr, $l29:expr, $l30:expr, $l31:expr,
+ $l32:expr, $l33:expr, $l34:expr, $l35:expr,
+ $l36:expr, $l37:expr, $l38:expr, $l39:expr,
+ $l40:expr, $l41:expr, $l42:expr, $l43:expr,
+ $l44:expr, $l45:expr, $l46:expr, $l47:expr,
+ $l48:expr, $l49:expr, $l50:expr, $l51:expr,
+ $l52:expr, $l53:expr, $l54:expr, $l55:expr,
+ $l56:expr, $l57:expr, $l58:expr, $l59:expr,
+ $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{
+ #[allow(unused_unsafe)]
+ unsafe {
+ $crate::Simd($crate::__shuffle_vector64::<{[
+ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10,
+ $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19,
+ $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28,
+ $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37,
+ $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46,
+ $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55,
+ $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63,
+ ]}, _, _>(
+ $vec0.0,
+ $vec1.0,
+ ))
+ }
+ }};
+ ($vec:expr, [$($l:expr),*]) => {
+ match $vec {
+ v => shuffle!(v, v, [$($l),*])
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs
new file mode 100644
index 0000000000..64536be6cb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs
@@ -0,0 +1,159 @@
+//! Shuffle vector elements according to a dynamic vector of indices.
+
+macro_rules! impl_shuffle1_dyn {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Shuffle vector elements according to `indices`.
+ #[inline]
+ pub fn shuffle1_dyn<I>(self, indices: I) -> Self
+ where
+ Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>,
+ {
+ codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices)
+ }
+ }
+ };
+}
+
+macro_rules! test_shuffle1_dyn {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _shuffle1_dyn>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn shuffle1_dyn() {
+ let increasing = {
+ let mut v = $id::splat(0 as $elem_ty);
+ for i in 0..$id::lanes() {
+ v = v.replace(i, i as $elem_ty);
+ }
+ v
+ };
+ let decreasing = {
+ let mut v = $id::splat(0 as $elem_ty);
+ for i in 0..$id::lanes() {
+ v = v.replace(
+ i,
+ ($id::lanes() - 1 - i) as $elem_ty
+ );
+ }
+ v
+ };
+
+ type Indices = <
+ $id as codegen::shuffle1_dyn::Shuffle1Dyn
+ >::Indices;
+ let increasing_ids: Indices = increasing.cast();
+ let decreasing_ids: Indices = decreasing.cast();
+
+ assert_eq!(
+ increasing.shuffle1_dyn(increasing_ids),
+ increasing,
+ "(i,i)=>i"
+ );
+ assert_eq!(
+ decreasing.shuffle1_dyn(increasing_ids),
+ decreasing,
+ "(d,i)=>d"
+ );
+ assert_eq!(
+ increasing.shuffle1_dyn(decreasing_ids),
+ decreasing,
+ "(i,d)=>d"
+ );
+ assert_eq!(
+ decreasing.shuffle1_dyn(decreasing_ids),
+ increasing,
+ "(d,d)=>i"
+ );
+
+ for i in 0..$id::lanes() {
+ let v_ids: Indices
+ = $id::splat(i as $elem_ty).cast();
+ assert_eq!(increasing.shuffle1_dyn(v_ids),
+ $id::splat(increasing.extract(i))
+ );
+ assert_eq!(decreasing.shuffle1_dyn(v_ids),
+ $id::splat(decreasing.extract(i))
+ );
+ assert_eq!(
+ $id::splat(i as $elem_ty)
+ .shuffle1_dyn(increasing_ids),
+ $id::splat(i as $elem_ty)
+ );
+ assert_eq!(
+ $id::splat(i as $elem_ty)
+ .shuffle1_dyn(decreasing_ids),
+ $id::splat(i as $elem_ty)
+ );
+ }
+ }
+ }
+ }
+ }
+ };
+}
+
+macro_rules! test_shuffle1_dyn_mask {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _shuffle1_dyn>] {
+ use super::*;
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn shuffle1_dyn() {
+ // alternating = [true, false, true, false, ...]
+ let mut alternating = $id::splat(false);
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ alternating = alternating.replace(i, true);
+ }
+ }
+
+ type Indices = <
+ $id as codegen::shuffle1_dyn::Shuffle1Dyn
+ >::Indices;
+ // even = [0, 0, 2, 2, 4, 4, ..]
+ let even = {
+ let mut v = Indices::splat(0);
+ for i in 0..$id::lanes() {
+ if i % 2 == 0 {
+ v = v.replace(i, (i as u8).into());
+ } else {
+ v = v.replace(i, (i as u8 - 1).into());
+ }
+ }
+ v
+ };
+ // odd = [1, 1, 3, 3, 5, 5, ...]
+ let odd = {
+ let mut v = Indices::splat(0);
+ for i in 0..$id::lanes() {
+ if i % 2 != 0 {
+ v = v.replace(i, (i as u8).into());
+ } else {
+ v = v.replace(i, (i as u8 + 1).into());
+ }
+ }
+ v
+ };
+
+ assert_eq!(
+ alternating.shuffle1_dyn(even),
+ $id::splat(true)
+ );
+ if $id::lanes() > 1 {
+ assert_eq!(
+ alternating.shuffle1_dyn(odd),
+ $id::splat(false)
+ );
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs
new file mode 100644
index 0000000000..526b848b5c
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/slice.rs
@@ -0,0 +1,7 @@
+//! Slice from/to methods
+
+#[macro_use]
+mod from_slice;
+
+#[macro_use]
+mod write_to_slice;
diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs
new file mode 100644
index 0000000000..cafd6f8213
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs
@@ -0,0 +1,202 @@
+//! Implements methods to read a vector type from a slice.
+
+macro_rules! impl_slice_from_slice {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+ /// to an `align_of::<Self>()` boundary.
+ #[inline]
+ pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_ptr();
+ assert_eq!(target_ptr.align_offset(crate::mem::align_of::<Self>()), 0);
+ Self::from_slice_aligned_unchecked(slice)
+ }
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()`.
+ #[inline]
+ pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ Self::from_slice_unaligned_unchecked(slice)
+ }
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned
+ /// to an `align_of::<Self>()` boundary, the behavior is undefined.
+ #[inline]
+ pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) -> Self {
+ debug_assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_ptr();
+ debug_assert_eq!(target_ptr.align_offset(crate::mem::align_of::<Self>()), 0);
+
+ #[allow(clippy::cast_ptr_alignment)]
+ *(target_ptr as *const Self)
+ }
+
+ /// Instantiates a new vector with the values of the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn from_slice_unaligned_unchecked(slice: &[$elem_ty]) -> Self {
+ use crate::mem::size_of;
+ debug_assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_ptr().cast();
+ let mut x = Self::splat(0 as $elem_ty);
+ let self_ptr = &mut x as *mut Self as *mut u8;
+ crate::ptr::copy_nonoverlapping(target_ptr, self_ptr, size_of::<Self>());
+ x
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _slice_from_slice>] {
+ use super::*;
+ use crate::iter::Iterator;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_slice_unaligned() {
+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
+ unaligned[0] = 0 as $elem_ty;
+ let vec = $id::from_slice_unaligned(&unaligned[1..]);
+ for (index, &b) in unaligned.iter().enumerate() {
+ if index == 0 {
+ assert_eq!(b, 0 as $elem_ty);
+ } else {
+ assert_eq!(b, 42 as $elem_ty);
+ assert_eq!(b, vec.extract(index - 1));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_unaligned_fail() {
+ let mut unaligned = [42 as $elem_ty; $id::lanes() + 1];
+ unaligned[0] = 0 as $elem_ty;
+ // the slice is not large enough => panic
+ let _vec = $id::from_slice_unaligned(&unaligned[2..]);
+ }
+
+ union A {
+ data: [$elem_ty; 2 * $id::lanes()],
+ _vec: $id,
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_slice_aligned() {
+ let mut aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+ for i in $id::lanes()..(2 * $id::lanes()) {
+ unsafe {
+ aligned.data[i] = 42 as $elem_ty;
+ }
+ }
+
+ let vec = unsafe {
+ $id::from_slice_aligned(
+ &aligned.data[$id::lanes()..]
+ )
+ };
+ for (index, &b) in
+ unsafe { aligned.data.iter().enumerate() } {
+ if index < $id::lanes() {
+ assert_eq!(b, 0 as $elem_ty);
+ } else {
+ assert_eq!(b, 42 as $elem_ty);
+ assert_eq!(
+ b, vec.extract(index - $id::lanes())
+ );
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_aligned_fail_lanes() {
+ let aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+ let _vec = unsafe {
+ $id::from_slice_aligned(
+ &aligned.data[2 * $id::lanes()..]
+ )
+ };
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn from_slice_aligned_fail_align() {
+ unsafe {
+ let aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+
+ // get a pointer to the front of data
+ let ptr: *const $elem_ty = aligned.data.as_ptr()
+ as *const $elem_ty;
+ // offset pointer by one element
+ let ptr = ptr.wrapping_add(1);
+
+ if ptr.align_offset(
+ crate::mem::align_of::<$id>()
+ ) == 0 {
+ // the pointer is properly aligned, so
+ // from_slice_aligned won't fail here (e.g. this
+ // can happen for i128x1). So we panic to make
+ // the "should_fail" test pass:
+ panic!("ok");
+ }
+
+ // create a slice - this is safe, because the
+ // elements of the slice exist, are properly
+ // initialized, and properly aligned:
+ let s: &[$elem_ty] = slice::from_raw_parts(
+ ptr, $id::lanes()
+ );
+ // this should always panic because the slice
+ // alignment does not match the alignment
+ // requirements for the vector type:
+ let _vec = $id::from_slice_aligned(s);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
new file mode 100644
index 0000000000..5abd4916e0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs
@@ -0,0 +1,196 @@
+//! Implements methods to write a vector type to a slice.
+
+macro_rules! impl_slice_write_to_slice {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+ /// aligned to an `align_of::<Self>()` boundary.
+ #[inline]
+ pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_mut_ptr();
+ assert_eq!(target_ptr.align_offset(crate::mem::align_of::<Self>()), 0);
+ self.write_to_slice_aligned_unchecked(slice);
+ }
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Panics
+ ///
+ /// If `slice.len() < Self::lanes()`.
+ #[inline]
+ pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) {
+ unsafe {
+ assert!(slice.len() >= $elem_count);
+ self.write_to_slice_unaligned_unchecked(slice);
+ }
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` or `&slice[0]` is not
+ /// aligned to an `align_of::<Self>()` boundary, the behavior is
+ /// undefined.
+ #[inline]
+ pub unsafe fn write_to_slice_aligned_unchecked(self, slice: &mut [$elem_ty]) {
+ debug_assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_mut_ptr();
+ debug_assert_eq!(target_ptr.align_offset(crate::mem::align_of::<Self>()), 0);
+
+ #[allow(clippy::cast_ptr_alignment)]
+ #[allow(clippy::cast_ptr_alignment)]
+ #[allow(clippy::cast_ptr_alignment)]
+ #[allow(clippy::cast_ptr_alignment)]
+ *(target_ptr as *mut Self) = self;
+ }
+
+ /// Writes the values of the vector to the `slice`.
+ ///
+ /// # Safety
+ ///
+ /// If `slice.len() < Self::lanes()` the behavior is undefined.
+ #[inline]
+ pub unsafe fn write_to_slice_unaligned_unchecked(self, slice: &mut [$elem_ty]) {
+ debug_assert!(slice.len() >= $elem_count);
+ let target_ptr = slice.as_mut_ptr().cast();
+ let self_ptr = &self as *const Self as *const u8;
+ crate::ptr::copy_nonoverlapping(self_ptr, target_ptr, crate::mem::size_of::<Self>());
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ // Comparisons use integer casts within mantissa^1 range.
+ #[allow(clippy::float_cmp)]
+ pub mod [<$id _slice_write_to_slice>] {
+ use super::*;
+ use crate::iter::Iterator;
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn write_to_slice_unaligned() {
+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
+ let vec = $id::splat(42 as $elem_ty);
+ vec.write_to_slice_unaligned(&mut unaligned[1..]);
+ for (index, &b) in unaligned.iter().enumerate() {
+ if index == 0 {
+ assert_eq!(b, 0 as $elem_ty);
+ } else {
+ assert_eq!(b, 42 as $elem_ty);
+ assert_eq!(b, vec.extract(index - 1));
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_unaligned_fail() {
+ let mut unaligned = [0 as $elem_ty; $id::lanes() + 1];
+ let vec = $id::splat(42 as $elem_ty);
+ vec.write_to_slice_unaligned(&mut unaligned[2..]);
+ }
+
+ union A {
+ data: [$elem_ty; 2 * $id::lanes()],
+ _vec: $id,
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)]
+ #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn write_to_slice_aligned() {
+ let mut aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+ let vec = $id::splat(42 as $elem_ty);
+ unsafe {
+ vec.write_to_slice_aligned(
+ &mut aligned.data[$id::lanes()..]
+ );
+ for (idx, &b) in aligned.data.iter().enumerate() {
+ if idx < $id::lanes() {
+ assert_eq!(b, 0 as $elem_ty);
+ } else {
+ assert_eq!(b, 42 as $elem_ty);
+ assert_eq!(
+ b, vec.extract(idx - $id::lanes())
+ );
+ }
+ }
+ }
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_aligned_fail_lanes() {
+ let mut aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+ let vec = $id::splat(42 as $elem_ty);
+ unsafe {
+ vec.write_to_slice_aligned(
+ &mut aligned.data[2 * $id::lanes()..]
+ )
+ };
+ }
+
+ // FIXME: wasm-bindgen-test does not support #[should_panic]
+ // #[cfg_attr(not(target_arch = "wasm32"), test)]
+ // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ #[cfg(not(target_arch = "wasm32"))]
+ #[test]
+ #[should_panic]
+ fn write_to_slice_aligned_fail_align() {
+ unsafe {
+ let mut aligned = A {
+ data: [0 as $elem_ty; 2 * $id::lanes()],
+ };
+
+ // get a pointer to the front of data
+ let ptr: *mut $elem_ty
+ = aligned.data.as_mut_ptr() as *mut $elem_ty;
+ // offset pointer by one element
+ let ptr = ptr.wrapping_add(1);
+
+ if ptr.align_offset(crate::mem::align_of::<$id>())
+ == 0 {
+ // the pointer is properly aligned, so
+ // write_to_slice_aligned won't fail here (e.g.
+ // this can happen for i128x1). So we panic to
+ // make the "should_fail" test pass:
+ panic!("ok");
+ }
+
+ // create a slice - this is safe, because the
+ // elements of the slice exist, are properly
+ // initialized, and properly aligned:
+ let s: &mut [$elem_ty]
+ = slice::from_raw_parts_mut(ptr, $id::lanes());
+ // this should always panic because the slice
+ // alignment does not match the alignment
+ // requirements for the vector type:
+ let vec = $id::splat(42 as $elem_ty);
+ vec.write_to_slice_aligned(s);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs
new file mode 100644
index 0000000000..4649ed679b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs
@@ -0,0 +1,192 @@
+//! Horizontal swap bytes
+
+macro_rules! impl_swap_bytes {
+ ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+ impl $id {
+ /// Reverses the byte order of the vector.
+ #[inline]
+ pub fn swap_bytes(self) -> Self {
+ super::codegen::swap_bytes::SwapBytes::swap_bytes(self)
+ }
+
+ /// Converts self to little endian from the target's endianness.
+ ///
+ /// On little endian this is a no-op. On big endian the bytes are
+ /// swapped.
+ #[inline]
+ pub fn to_le(self) -> Self {
+ #[cfg(target_endian = "little")]
+ {
+ self
+ }
+ #[cfg(not(target_endian = "little"))]
+ {
+ self.swap_bytes()
+ }
+ }
+
+ /// Converts self to big endian from the target's endianness.
+ ///
+ /// On big endian this is a no-op. On little endian the bytes are
+ /// swapped.
+ #[inline]
+ pub fn to_be(self) -> Self {
+ #[cfg(target_endian = "big")]
+ {
+ self
+ }
+ #[cfg(not(target_endian = "big"))]
+ {
+ self.swap_bytes()
+ }
+ }
+
+ /// Converts a vector from little endian to the target's endianness.
+ ///
+ /// On little endian this is a no-op. On big endian the bytes are
+ /// swapped.
+ #[inline]
+ pub fn from_le(x: Self) -> Self {
+ #[cfg(target_endian = "little")]
+ {
+ x
+ }
+ #[cfg(not(target_endian = "little"))]
+ {
+ x.swap_bytes()
+ }
+ }
+
+ /// Converts a vector from big endian to the target's endianness.
+ ///
+ /// On big endian this is a no-op. On little endian the bytes are
+ /// swapped.
+ #[inline]
+ pub fn from_be(x: Self) -> Self {
+ #[cfg(target_endian = "big")]
+ {
+ x
+ }
+ #[cfg(not(target_endian = "big"))]
+ {
+ x.swap_bytes()
+ }
+ }
+ }
+
+ test_if! {
+ $test_tt:
+ paste::item! {
+ pub mod [<$id _swap_bytes>] {
+ use super::*;
+
+ const BYTES: [u8; 64] = [
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+ ];
+
+ macro_rules! swap {
+ ($func: ident) => {{
+ // catch possible future >512 vectors
+ assert!(mem::size_of::<$id>() <= 64);
+
+ let mut actual = BYTES;
+ let elems: &mut [$elem_ty] = unsafe {
+ slice::from_raw_parts_mut(
+ actual.as_mut_ptr() as *mut $elem_ty,
+ $id::lanes(),
+ )
+ };
+
+ let vec = $id::from_slice_unaligned(elems);
+ $id::$func(vec).write_to_slice_unaligned(elems);
+
+ actual
+ }};
+ }
+
+ macro_rules! test_swap {
+ ($func: ident) => {{
+ let actual = swap!($func);
+ let expected =
+ BYTES.iter().rev()
+ .skip(64 - crate::mem::size_of::<$id>());
+ assert!(actual.iter().zip(expected)
+ .all(|(x, y)| x == y));
+ }};
+ }
+
+ macro_rules! test_no_swap {
+ ($func: ident) => {{
+ let actual = swap!($func);
+ let expected = BYTES.iter()
+ .take(mem::size_of::<$id>());
+
+ assert!(actual.iter().zip(expected)
+ .all(|(x, y)| x == y));
+ }};
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn swap_bytes() {
+ test_swap!(swap_bytes);
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn to_le() {
+ #[cfg(target_endian = "little")]
+ {
+ test_no_swap!(to_le);
+ }
+ #[cfg(not(target_endian = "little"))]
+ {
+ test_swap!(to_le);
+ }
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn to_be() {
+ #[cfg(target_endian = "big")]
+ {
+ test_no_swap!(to_be);
+ }
+ #[cfg(not(target_endian = "big"))]
+ {
+ test_swap!(to_be);
+ }
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_le() {
+ #[cfg(target_endian = "little")]
+ {
+ test_no_swap!(from_le);
+ }
+ #[cfg(not(target_endian = "little"))]
+ {
+ test_swap!(from_le);
+ }
+ }
+
+ #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+ fn from_be() {
+ #[cfg(target_endian = "big")]
+ {
+ test_no_swap!(from_be);
+ }
+ #[cfg(not(target_endian = "big"))]
+ {
+ test_swap!(from_be);
+ }
+ }
+ }
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen.rs b/third_party/rust/packed_simd/src/codegen.rs
new file mode 100644
index 0000000000..8a9e971486
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen.rs
@@ -0,0 +1,62 @@
+//! Code-generation utilities
+
+pub(crate) mod bit_manip;
+pub(crate) mod llvm;
+pub(crate) mod math;
+pub(crate) mod reductions;
+pub(crate) mod shuffle;
+pub(crate) mod shuffle1_dyn;
+pub(crate) mod swap_bytes;
+
+macro_rules! impl_simd_array {
+ ([$elem_ty:ident; $elem_count:expr]:
+ $tuple_id:ident | $($elem_tys:ident),*) => {
+ #[derive(Copy, Clone)]
+ #[repr(simd)]
+ pub struct $tuple_id($(pub(crate) $elem_tys),*);
+ //^^^^^^^ leaked through SimdArray
+
+ impl crate::sealed::Seal for [$elem_ty; $elem_count] {}
+
+ impl crate::sealed::SimdArray for [$elem_ty; $elem_count] {
+ type Tuple = $tuple_id;
+ type T = $elem_ty;
+ const N: usize = $elem_count;
+ type NT = [u32; $elem_count];
+ }
+
+ impl crate::sealed::Seal for $tuple_id {}
+ impl crate::sealed::Simd for $tuple_id {
+ type Element = $elem_ty;
+ const LANES: usize = $elem_count;
+ type LanesType = [u32; $elem_count];
+ }
+
+ }
+}
+
+pub(crate) mod pointer_sized_int;
+
+pub(crate) mod v16;
+pub(crate) use self::v16::*;
+
+pub(crate) mod v32;
+pub(crate) use self::v32::*;
+
+pub(crate) mod v64;
+pub(crate) use self::v64::*;
+
+pub(crate) mod v128;
+pub(crate) use self::v128::*;
+
+pub(crate) mod v256;
+pub(crate) use self::v256::*;
+
+pub(crate) mod v512;
+pub(crate) use self::v512::*;
+
+pub(crate) mod vSize;
+pub(crate) use self::vSize::*;
+
+pub(crate) mod vPtr;
+pub(crate) use self::vPtr::*;
diff --git a/third_party/rust/packed_simd/src/codegen/bit_manip.rs b/third_party/rust/packed_simd/src/codegen/bit_manip.rs
new file mode 100644
index 0000000000..32d8d717a0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/bit_manip.rs
@@ -0,0 +1,347 @@
+//! LLVM bit manipulation intrinsics.
+#[rustfmt::skip]
+
+pub(crate) use crate::*;
+
+#[allow(improper_ctypes, dead_code)]
+extern "C" {
+ #[link_name = "llvm.ctlz.v2i8"]
+ fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
+ #[link_name = "llvm.ctlz.v4i8"]
+ fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
+ #[link_name = "llvm.ctlz.v8i8"]
+ fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
+ #[link_name = "llvm.ctlz.v16i8"]
+ fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
+ #[link_name = "llvm.ctlz.v32i8"]
+ fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
+ #[link_name = "llvm.ctlz.v64i8"]
+ fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
+
+ #[link_name = "llvm.ctlz.v2i16"]
+ fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
+ #[link_name = "llvm.ctlz.v4i16"]
+ fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
+ #[link_name = "llvm.ctlz.v8i16"]
+ fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
+ #[link_name = "llvm.ctlz.v16i16"]
+ fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
+ #[link_name = "llvm.ctlz.v32i16"]
+ fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
+
+ #[link_name = "llvm.ctlz.v2i32"]
+ fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
+ #[link_name = "llvm.ctlz.v4i32"]
+ fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
+ #[link_name = "llvm.ctlz.v8i32"]
+ fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
+ #[link_name = "llvm.ctlz.v16i32"]
+ fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
+
+ #[link_name = "llvm.ctlz.v2i64"]
+ fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
+ #[link_name = "llvm.ctlz.v4i64"]
+ fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
+ #[link_name = "llvm.ctlz.v8i64"]
+ fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
+
+ #[link_name = "llvm.ctlz.v1i128"]
+ fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
+ #[link_name = "llvm.ctlz.v2i128"]
+ fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
+ #[link_name = "llvm.ctlz.v4i128"]
+ fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
+
+ #[link_name = "llvm.cttz.v2i8"]
+ fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
+ #[link_name = "llvm.cttz.v4i8"]
+ fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
+ #[link_name = "llvm.cttz.v8i8"]
+ fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
+ #[link_name = "llvm.cttz.v16i8"]
+ fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
+ #[link_name = "llvm.cttz.v32i8"]
+ fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
+ #[link_name = "llvm.cttz.v64i8"]
+ fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
+
+ #[link_name = "llvm.cttz.v2i16"]
+ fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
+ #[link_name = "llvm.cttz.v4i16"]
+ fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
+ #[link_name = "llvm.cttz.v8i16"]
+ fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
+ #[link_name = "llvm.cttz.v16i16"]
+ fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
+ #[link_name = "llvm.cttz.v32i16"]
+ fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
+
+ #[link_name = "llvm.cttz.v2i32"]
+ fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
+ #[link_name = "llvm.cttz.v4i32"]
+ fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
+ #[link_name = "llvm.cttz.v8i32"]
+ fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
+ #[link_name = "llvm.cttz.v16i32"]
+ fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
+
+ #[link_name = "llvm.cttz.v2i64"]
+ fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
+ #[link_name = "llvm.cttz.v4i64"]
+ fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
+ #[link_name = "llvm.cttz.v8i64"]
+ fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
+
+ #[link_name = "llvm.cttz.v1i128"]
+ fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
+ #[link_name = "llvm.cttz.v2i128"]
+ fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
+ #[link_name = "llvm.cttz.v4i128"]
+ fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
+
+ #[link_name = "llvm.ctpop.v2i8"]
+ fn ctpop_u8x2(x: u8x2) -> u8x2;
+ #[link_name = "llvm.ctpop.v4i8"]
+ fn ctpop_u8x4(x: u8x4) -> u8x4;
+ #[link_name = "llvm.ctpop.v8i8"]
+ fn ctpop_u8x8(x: u8x8) -> u8x8;
+ #[link_name = "llvm.ctpop.v16i8"]
+ fn ctpop_u8x16(x: u8x16) -> u8x16;
+ #[link_name = "llvm.ctpop.v32i8"]
+ fn ctpop_u8x32(x: u8x32) -> u8x32;
+ #[link_name = "llvm.ctpop.v64i8"]
+ fn ctpop_u8x64(x: u8x64) -> u8x64;
+
+ #[link_name = "llvm.ctpop.v2i16"]
+ fn ctpop_u16x2(x: u16x2) -> u16x2;
+ #[link_name = "llvm.ctpop.v4i16"]
+ fn ctpop_u16x4(x: u16x4) -> u16x4;
+ #[link_name = "llvm.ctpop.v8i16"]
+ fn ctpop_u16x8(x: u16x8) -> u16x8;
+ #[link_name = "llvm.ctpop.v16i16"]
+ fn ctpop_u16x16(x: u16x16) -> u16x16;
+ #[link_name = "llvm.ctpop.v32i16"]
+ fn ctpop_u16x32(x: u16x32) -> u16x32;
+
+ #[link_name = "llvm.ctpop.v2i32"]
+ fn ctpop_u32x2(x: u32x2) -> u32x2;
+ #[link_name = "llvm.ctpop.v4i32"]
+ fn ctpop_u32x4(x: u32x4) -> u32x4;
+ #[link_name = "llvm.ctpop.v8i32"]
+ fn ctpop_u32x8(x: u32x8) -> u32x8;
+ #[link_name = "llvm.ctpop.v16i32"]
+ fn ctpop_u32x16(x: u32x16) -> u32x16;
+
+ #[link_name = "llvm.ctpop.v2i64"]
+ fn ctpop_u64x2(x: u64x2) -> u64x2;
+ #[link_name = "llvm.ctpop.v4i64"]
+ fn ctpop_u64x4(x: u64x4) -> u64x4;
+ #[link_name = "llvm.ctpop.v8i64"]
+ fn ctpop_u64x8(x: u64x8) -> u64x8;
+
+ #[link_name = "llvm.ctpop.v1i128"]
+ fn ctpop_u128x1(x: u128x1) -> u128x1;
+ #[link_name = "llvm.ctpop.v2i128"]
+ fn ctpop_u128x2(x: u128x2) -> u128x2;
+ #[link_name = "llvm.ctpop.v4i128"]
+ fn ctpop_u128x4(x: u128x4) -> u128x4;
+}
+
+pub(crate) trait BitManip {
+ fn ctpop(self) -> Self;
+ fn ctlz(self) -> Self;
+ fn cttz(self) -> Self;
+}
+
+macro_rules! impl_bit_manip {
+ (inner: $ty:ident, $scalar:ty, $uty:ident,
+ $ctpop:ident, $ctlz:ident, $cttz:ident) => {
+ // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192
+ #[cfg(target_arch = "s390x")]
+ impl_bit_manip! { scalar: $ty, $scalar }
+ #[cfg(not(target_arch = "s390x"))]
+ impl BitManip for $ty {
+ #[inline]
+ fn ctpop(self) -> Self {
+ let y: $uty = self.cast();
+ unsafe { $ctpop(y).cast() }
+ }
+
+ #[inline]
+ fn ctlz(self) -> Self {
+ let y: $uty = self.cast();
+ // the ctxx intrinsics need compile-time constant
+ // `is_zero_undef`
+ unsafe { $ctlz(y, false).cast() }
+ }
+
+ #[inline]
+ fn cttz(self) -> Self {
+ let y: $uty = self.cast();
+ unsafe { $cttz(y, false).cast() }
+ }
+ }
+ };
+ (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => {
+ #[cfg(target_arch = "s390x")]
+ impl_bit_manip! { scalar: $ty, $scalar }
+ #[cfg(not(target_arch = "s390x"))]
+ impl BitManip for $ty {
+ #[inline]
+ fn ctpop(self) -> Self {
+ let y: $uty = self.cast();
+ $uty::ctpop(y).cast()
+ }
+
+ #[inline]
+ fn ctlz(self) -> Self {
+ let y: $uty = self.cast();
+ $uty::ctlz(y).cast()
+ }
+
+ #[inline]
+ fn cttz(self) -> Self {
+ let y: $uty = self.cast();
+ $uty::cttz(y).cast()
+ }
+ }
+ };
+ (scalar: $ty:ident, $scalar:ty) => {
+ impl BitManip for $ty {
+ #[inline]
+ fn ctpop(self) -> Self {
+ let mut ones = self;
+ for i in 0..Self::lanes() {
+ ones = ones.replace(i, self.extract(i).count_ones() as $scalar);
+ }
+ ones
+ }
+
+ #[inline]
+ fn ctlz(self) -> Self {
+ let mut lz = self;
+ for i in 0..Self::lanes() {
+ lz = lz.replace(i, self.extract(i).leading_zeros() as $scalar);
+ }
+ lz
+ }
+
+ #[inline]
+ fn cttz(self) -> Self {
+ let mut tz = self;
+ for i in 0..Self::lanes() {
+ tz = tz.replace(i, self.extract(i).trailing_zeros() as $scalar);
+ }
+ tz
+ }
+ }
+ };
+ ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty,
+ $ctpop:ident, $ctlz:ident, $cttz:ident) => {
+ impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz }
+ impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz }
+ };
+ (sized: $usize:ident, $uscalar:ty, $isize:ident,
+ $iscalar:ty, $ty:ident) => {
+ impl_bit_manip! { sized_inner: $usize, $uscalar, $ty }
+ impl_bit_manip! { sized_inner: $isize, $iscalar, $ty }
+ };
+}
+
+impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 }
+impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 }
+#[cfg(not(target_arch = "aarch64"))] // see below
+impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 }
+impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 }
+impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 }
+impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 }
+impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 }
+impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 }
+impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 }
+impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 }
+impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 }
+impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 }
+impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 }
+impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 }
+impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 }
+impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 }
+impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 }
+impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 }
+impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 }
+impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 }
+impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 }
+
+#[cfg(target_arch = "aarch64")]
+impl BitManip for u8x8 {
+ #[inline]
+ fn ctpop(self) -> Self {
+ let y: u8x8 = self.cast();
+ unsafe { ctpop_u8x8(y).cast() }
+ }
+
+ #[inline]
+ fn ctlz(self) -> Self {
+ let y: u8x8 = self.cast();
+ unsafe { ctlz_u8x8(y, false).cast() }
+ }
+
+ #[inline]
+ fn cttz(self) -> Self {
+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
+ // intrinsics
+ let mut tz = self;
+ for i in 0..Self::lanes() {
+ tz = tz.replace(i, self.extract(i).trailing_zeros() as u8);
+ }
+ tz
+ }
+}
+#[cfg(target_arch = "aarch64")]
+impl BitManip for i8x8 {
+ #[inline]
+ fn ctpop(self) -> Self {
+ let y: u8x8 = self.cast();
+ unsafe { ctpop_u8x8(y).cast() }
+ }
+
+ #[inline]
+ fn ctlz(self) -> Self {
+ let y: u8x8 = self.cast();
+ unsafe { ctlz_u8x8(y, false).cast() }
+ }
+
+ #[inline]
+ fn cttz(self) -> Self {
+ // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
+ // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
+ // intrinsics
+ let mut tz = self;
+ for i in 0..Self::lanes() {
+ tz = tz.replace(i, self.extract(i).trailing_zeros() as i8);
+ }
+ tz
+ }
+}
+
+cfg_if! {
+ if #[cfg(target_pointer_width = "8")] {
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 }
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 }
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 }
+ } else if #[cfg(target_pointer_width = "16")] {
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 }
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 }
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 }
+ } else if #[cfg(target_pointer_width = "32")] {
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 }
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 }
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 }
+ } else if #[cfg(target_pointer_width = "64")] {
+ impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 }
+ impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 }
+ impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 }
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/llvm.rs b/third_party/rust/packed_simd/src/codegen/llvm.rs
new file mode 100644
index 0000000000..bb482fac66
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/llvm.rs
@@ -0,0 +1,122 @@
+//! LLVM's platform intrinsics
+#![allow(dead_code)]
+
+use crate::sealed::Shuffle;
+#[allow(unused_imports)] // FIXME: spurious warning?
+use crate::sealed::Simd;
+
+extern "platform-intrinsic" {
+ fn simd_shuffle<T, I, U>(x: T, y: T, idx: I) -> U;
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector2<const IDX: [u32; 2], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 2], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector4<const IDX: [u32; 4], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 4], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector8<const IDX: [u32; 8], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 8], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector16<const IDX: [u32; 16], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 16], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector32<const IDX: [u32; 32], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 32], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+#[allow(clippy::missing_safety_doc)]
+#[inline]
+pub unsafe fn __shuffle_vector64<const IDX: [u32; 64], T, U>(x: T, y: T) -> U
+where
+ T: Simd,
+ <T as Simd>::Element: Shuffle<[u32; 64], Output = U>,
+{
+ simd_shuffle(x, y, IDX)
+}
+
+extern "platform-intrinsic" {
+ pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_le<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_gt<T, U>(x: T, y: T) -> U;
+ pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
+
+ pub(crate) fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
+ pub(crate) fn simd_extract<T, U>(x: T, idx: u32) -> U;
+
+ pub(crate) fn simd_cast<T, U>(x: T) -> U;
+
+ pub(crate) fn simd_add<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_and<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_or<T>(x: T, y: T) -> T;
+ pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
+
+ pub(crate) fn simd_reduce_add_unordered<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
+ pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
+ pub(crate) fn simd_reduce_min<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_max<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_min_nanless<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_max_nanless<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_and<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_or<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_xor<T, U>(x: T) -> U;
+ pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
+ pub(crate) fn simd_reduce_any<T>(x: T) -> bool;
+
+ pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+
+ pub(crate) fn simd_fmin<T>(a: T, b: T) -> T;
+ pub(crate) fn simd_fmax<T>(a: T, b: T) -> T;
+
+ pub(crate) fn simd_fsqrt<T>(a: T) -> T;
+ pub(crate) fn simd_fma<T>(a: T, b: T, c: T) -> T;
+
+ pub(crate) fn simd_gather<T, P, M>(value: T, pointers: P, mask: M) -> T;
+ pub(crate) fn simd_scatter<T, P, M>(value: T, pointers: P, mask: M);
+
+ pub(crate) fn simd_bitmask<T, U>(value: T) -> U;
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math.rs b/third_party/rust/packed_simd/src/codegen/math.rs
new file mode 100644
index 0000000000..9a0ea7a4e2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math.rs
@@ -0,0 +1,3 @@
+//! Vertical math operations
+
+pub(crate) mod float;
diff --git a/third_party/rust/packed_simd/src/codegen/math/float.rs b/third_party/rust/packed_simd/src/codegen/math/float.rs
new file mode 100644
index 0000000000..ffbf18bfe9
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float.rs
@@ -0,0 +1,18 @@
+//! Vertical floating-point math operations.
+#![allow(clippy::useless_transmute)]
+
+#[macro_use]
+pub(crate) mod macros;
+pub(crate) mod abs;
+pub(crate) mod cos;
+pub(crate) mod cos_pi;
+pub(crate) mod exp;
+pub(crate) mod ln;
+pub(crate) mod mul_add;
+pub(crate) mod mul_adde;
+pub(crate) mod powf;
+pub(crate) mod sin;
+pub(crate) mod sin_cos_pi;
+pub(crate) mod sin_pi;
+pub(crate) mod sqrt;
+pub(crate) mod sqrte;
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/abs.rs b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs
new file mode 100644
index 0000000000..34aacc25be
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/abs.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `fabs`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors fabs
+
+use crate::*;
+
+pub(crate) trait Abs {
+ fn abs(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fabs.v2f32"]
+ fn fabs_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.fabs.v4f32"]
+ fn fabs_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.fabs.v8f32"]
+ fn fabs_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.fabs.v16f32"]
+ fn fabs_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit fabsgle elem vectors
+ #[link_name = "llvm.fabs.v1f64"]
+ fn fabs_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fabs.v2f64"]
+ fn fabs_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.fabs.v4f64"]
+ fn fabs_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.fabs.v8f64"]
+ fn fabs_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.fabs.f32"]
+ fn fabs_f32(x: f32) -> f32;
+ #[link_name = "llvm.fabs.f64"]
+ fn fabs_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Abs, abs);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x4[f32; 4]: fabs_f32);
+ impl_unary!(f32x8[f32; 8]: fabs_f32);
+ impl_unary!(f32x16[f32; 16]: fabs_f32);
+
+ impl_unary!(f64x2[f64; 2]: fabs_f64);
+ impl_unary!(f64x4[f64; 4]: fabs_f64);
+ impl_unary!(f64x8[f64; 8]: fabs_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2);
+
+ impl_unary!(f32x4: Sleef_fabsf4_avx2128);
+ impl_unary!(f32x8: Sleef_fabsf8_avx2);
+ impl_unary!(f64x2: Sleef_fabsd2_avx2128);
+ impl_unary!(f64x4: Sleef_fabsd4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx);
+
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
+ impl_unary!(f32x8: Sleef_fabsf8_avx);
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
+ impl_unary!(f64x4: Sleef_fabsd4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4);
+
+ impl_unary!(f32x4: Sleef_fabsf4_sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4);
+ impl_unary!(f64x2: Sleef_fabsd2_sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x16: fabs_v16f32);
+ impl_unary!(f64x8: fabs_v8f64);
+
+ impl_unary!(f32x4: fabs_v4f32);
+ impl_unary!(f32x8: fabs_v8f32);
+ impl_unary!(f64x2: fabs_v2f64);
+ impl_unary!(f64x4: fabs_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: fabs_f32);
+ impl_unary!(f32x4: fabs_v4f32);
+ impl_unary!(f32x8: fabs_v8f32);
+ impl_unary!(f32x16: fabs_v16f32);
+
+ impl_unary!(f64x2: fabs_v2f64);
+ impl_unary!(f64x4: fabs_v4f64);
+ impl_unary!(f64x8: fabs_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs
new file mode 100644
index 0000000000..dec390cb74
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vector cos
+
+use crate::*;
+
+pub(crate) trait Cos {
+ fn cos(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.cos.v2f32"]
+ fn cos_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.cos.v4f32"]
+ fn cos_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.cos.v8f32"]
+ fn cos_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.cos.v16f32"]
+ fn cos_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit cosgle elem vectors
+ #[link_name = "llvm.cos.v1f64"]
+ fn cos_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.cos.v2f64"]
+ fn cos_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.cos.v4f64"]
+ fn cos_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.cos.v8f64"]
+ fn cos_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.cos.f32"]
+ fn cos_f32(x: f32) -> f32;
+ #[link_name = "llvm.cos.f64"]
+ fn cos_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Cos, cos);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x4[f32; 4]: cos_f32);
+ impl_unary!(f32x8[f32; 8]: cos_f32);
+ impl_unary!(f32x16[f32; 16]: cos_f32);
+
+ impl_unary!(f64x2[f64; 2]: cos_f64);
+ impl_unary!(f64x4[f64; 4]: cos_f64);
+ impl_unary!(f64x8[f64; 8]: cos_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_cosf8_u10avx2);
+ impl_unary!(f64x2: Sleef_cosd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_cosd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x8: Sleef_cosf8_u10avx);
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+ impl_unary!(f64x4: Sleef_cosd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_cosf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4);
+ impl_unary!(f64x2: Sleef_cosd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x16: cos_v16f32);
+ impl_unary!(f64x8: cos_v8f64);
+
+ impl_unary!(f32x4: cos_v4f32);
+ impl_unary!(f32x8: cos_v8f32);
+ impl_unary!(f64x2: cos_v2f64);
+ impl_unary!(f64x4: cos_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: cos_f32);
+ impl_unary!(f32x4: cos_v4f32);
+ impl_unary!(f32x8: cos_v8f32);
+ impl_unary!(f32x16: cos_v16f32);
+
+ impl_unary!(f64x2: cos_v2f64);
+ impl_unary!(f64x4: cos_v4f64);
+ impl_unary!(f64x8: cos_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs
new file mode 100644
index 0000000000..e283280ee4
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/cos_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors cos_pi
+
+use crate::*;
+
+pub(crate) trait CosPi {
+ fn cos_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(CosPi, cos_pi);
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl CosPi for $vid {
+ #[inline]
+ fn cos_pi(self) -> Self {
+ (self * Self::splat($PI)).cos()
+ }
+ }
+ };
+}
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05avx2128);
+ impl_unary!(f32x8: Sleef_cospif8_u05avx2);
+ impl_unary!(f64x2: Sleef_cospid2_u05avx2128);
+ impl_unary!(f64x4: Sleef_cospid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x8: Sleef_cospif8_u05avx);
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+ impl_unary!(f64x4: Sleef_cospid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4);
+
+ impl_unary!(f32x4: Sleef_cospif4_u05sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4);
+ impl_unary!(f64x2: Sleef_cospid2_u05sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/exp.rs b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs
new file mode 100644
index 0000000000..a7b20580e3
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/exp.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `exp`
+#![allow(unused)]
+
+// FIXME 64-bit expgle elem vectors misexpg
+
+use crate::*;
+
+pub(crate) trait Exp {
+ fn exp(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.exp.v2f32"]
+ fn exp_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.exp.v4f32"]
+ fn exp_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.exp.v8f32"]
+ fn exp_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.exp.v16f32"]
+ fn exp_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit expgle elem vectors
+ #[link_name = "llvm.exp.v1f64"]
+ fn exp_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.exp.v2f64"]
+ fn exp_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.exp.v4f64"]
+ fn exp_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.exp.v8f64"]
+ fn exp_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.exp.f32"]
+ fn exp_f32(x: f32) -> f32;
+ #[link_name = "llvm.exp.f64"]
+ fn exp_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Exp, exp);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x4[f32; 4]: exp_f32);
+ impl_unary!(f32x8[f32; 8]: exp_f32);
+ impl_unary!(f32x16[f32; 16]: exp_f32);
+
+ impl_unary!(f64x2[f64; 2]: exp_f64);
+ impl_unary!(f64x4[f64; 4]: exp_f64);
+ impl_unary!(f64x8[f64; 8]: exp_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_expf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_expf8_u10avx2);
+ impl_unary!(f64x2: Sleef_expd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_expd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
+ impl_unary!(f32x8: Sleef_expf8_u10avx);
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
+ impl_unary!(f64x4: Sleef_expd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4);
+ impl_unary!(f64x2: Sleef_expd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2);
+
+ impl_unary!(f32x4: Sleef_expf4_u10sse2);
+ impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2);
+ impl_unary!(f64x2: Sleef_expd2_u10sse2);
+ impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2);
+ } else {
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x16: exp_v16f32);
+ impl_unary!(f64x8: exp_v8f64);
+
+ impl_unary!(f32x4: exp_v4f32);
+ impl_unary!(f32x8: exp_v8f32);
+ impl_unary!(f64x2: exp_v2f64);
+ impl_unary!(f64x4: exp_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: exp_f32);
+ impl_unary!(f32x4: exp_v4f32);
+ impl_unary!(f32x8: exp_v8f32);
+ impl_unary!(f32x16: exp_v16f32);
+
+ impl_unary!(f64x2: exp_v2f64);
+ impl_unary!(f64x4: exp_v4f64);
+ impl_unary!(f64x8: exp_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/ln.rs b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs
new file mode 100644
index 0000000000..a5e38cb40d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/ln.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `ln`
+#![allow(unused)]
+
+// FIXME 64-bit lngle elem vectors mislng
+
+use crate::*;
+
+pub(crate) trait Ln {
+ fn ln(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.log.v2f32"]
+ fn ln_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.log.v4f32"]
+ fn ln_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.log.v8f32"]
+ fn ln_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.log.v16f32"]
+ fn ln_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit lngle elem vectors
+ #[link_name = "llvm.log.v1f64"]
+ fn ln_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.log.v2f64"]
+ fn ln_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.log.v4f64"]
+ fn ln_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.log.v8f64"]
+ fn ln_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.log.f32"]
+ fn ln_f32(x: f32) -> f32;
+ #[link_name = "llvm.log.f64"]
+ fn ln_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Ln, ln);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x4[f32; 4]: ln_f32);
+ impl_unary!(f32x8[f32; 8]: ln_f32);
+ impl_unary!(f32x16[f32; 16]: ln_f32);
+
+ impl_unary!(f64x2[f64; 2]: ln_f64);
+ impl_unary!(f64x4[f64; 4]: ln_f64);
+ impl_unary!(f64x8[f64; 8]: ln_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_logf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_logf8_u10avx2);
+ impl_unary!(f64x2: Sleef_logd2_u10avx2128);
+ impl_unary!(f64x4: Sleef_logd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
+ impl_unary!(f32x8: Sleef_logf8_u10avx);
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
+ impl_unary!(f64x4: Sleef_logd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4);
+ impl_unary!(f64x2: Sleef_logd2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2);
+
+ impl_unary!(f32x4: Sleef_logf4_u10sse2);
+ impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2);
+ impl_unary!(f64x2: Sleef_logd2_u10sse2);
+ impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2);
+ } else {
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x16: ln_v16f32);
+ impl_unary!(f64x8: ln_v8f64);
+
+ impl_unary!(f32x4: ln_v4f32);
+ impl_unary!(f32x8: ln_v8f32);
+ impl_unary!(f64x2: ln_v2f64);
+ impl_unary!(f64x4: ln_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: ln_f32);
+ impl_unary!(f32x4: ln_v4f32);
+ impl_unary!(f32x8: ln_v8f32);
+ impl_unary!(f32x16: ln_v16f32);
+
+ impl_unary!(f64x2: ln_v2f64);
+ impl_unary!(f64x4: ln_v4f64);
+ impl_unary!(f64x8: ln_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/macros.rs b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs
new file mode 100644
index 0000000000..8daee1afe2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/macros.rs
@@ -0,0 +1,470 @@
+//! Utility macros
+#![allow(unused)]
+
+macro_rules! impl_unary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self)))
+ }
+ }
+ }
+ };
+ // implementation mapping 1:1 for when `$fun` is a generic function
+ // like some of the fp math rustc intrinsics (e.g. `fn fun<T>(x: T) -> T`).
+ (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(self.0))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut scalars = U { vec: self }.scalars;
+ for i in &mut scalars {
+ *i = $fun(*i);
+ }
+ U { scalars }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut halves = U { vec: self }.halves;
+
+ *halves.get_unchecked_mut(0) = transmute($fun(transmute(*halves.get_unchecked(0))));
+ *halves.get_unchecked_mut(1) = transmute($fun(transmute(*halves.get_unchecked(1))));
+
+ U { halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut quarters = U { vec: self }.quarters;
+
+ *quarters.get_unchecked_mut(0) = transmute($fun(transmute(*quarters.get_unchecked(0))));
+ *quarters.get_unchecked_mut(1) = transmute($fun(transmute(*quarters.get_unchecked(1))));
+ *quarters.get_unchecked_mut(2) = transmute($fun(transmute(*quarters.get_unchecked(2))));
+ *quarters.get_unchecked_mut(3) = transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+ U { quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let twice = U { vec: [self, uninitialized()] }.twice;
+ let twice = transmute($fun(transmute(twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_unary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_unary {
+ ($vid:ident: $fun:ident) => {
+ impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[g]: $fun:ident) => {
+ impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_unary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_unary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_unary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
+
+macro_rules! impl_tertiary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self), transmute(y), transmute(z)))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut x = U { vec: self }.scalars;
+ let y = U { vec: y }.scalars;
+ let z = U { vec: z }.scalars;
+ for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) {
+ *i = $fun(*i, *y, *z);
+ }
+ U { vec: x }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut x_halves = U { vec: self }.halves;
+ let y_halves = U { vec: y }.halves;
+ let z_halves = U { vec: z }.halves;
+
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_halves.get_unchecked(0)),
+ transmute(*y_halves.get_unchecked(0)),
+ transmute(*z_halves.get_unchecked(0)),
+ ));
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_halves.get_unchecked(1)),
+ transmute(*y_halves.get_unchecked(1)),
+ transmute(*z_halves.get_unchecked(1)),
+ ));
+
+ U { halves: x_halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut x_quarters = U { vec: self }.quarters;
+ let y_quarters = U { vec: y }.quarters;
+ let z_quarters = U { vec: z }.quarters;
+
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(0)),
+ transmute(*y_quarters.get_unchecked(0)),
+ transmute(*z_quarters.get_unchecked(0)),
+ ));
+
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(1)),
+ transmute(*y_quarters.get_unchecked(1)),
+ transmute(*z_quarters.get_unchecked(1)),
+ ));
+
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(2)),
+ transmute(*y_quarters.get_unchecked(2)),
+ transmute(*z_quarters.get_unchecked(2)),
+ ));
+
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(3)),
+ transmute(*y_quarters.get_unchecked(3)),
+ transmute(*z_quarters.get_unchecked(3)),
+ ));
+
+ U { quarters: x_quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self, z: Self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
+ let z_twice = U { vec: [z, uninitialized()] }.twice;
+ let twice: $vect_id =
+ transmute($fun(transmute(x_twice), transmute(y_twice), transmute(z_twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_tertiary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_tertiary {
+ ($vid:ident: $fun:ident) => {
+ impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_tertiary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_tertiary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_tertiary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
+
+macro_rules! impl_binary_ {
+ // implementation mapping 1:1
+ (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self), transmute(y)))
+ }
+ }
+ }
+ };
+ (scalar | $trait_id:ident, $trait_method:ident,
+ $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ union U {
+ vec: $vec_id,
+ scalars: [$sid; $scount],
+ }
+ let mut x = U { vec: self }.scalars;
+ let y = U { vec: y }.scalars;
+ for (x, y) in x.iter_mut().zip(&y) {
+ *x = $fun(*x, *y);
+ }
+ U { scalars: x }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun twice on each of the vector halves:
+ (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vech_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ halves: [$vech_id; 2],
+ }
+
+ let mut x_halves = U { vec: self }.halves;
+ let y_halves = U { vec: y }.halves;
+
+ *x_halves.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_halves.get_unchecked(0)),
+ transmute(*y_halves.get_unchecked(0)),
+ ));
+ *x_halves.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_halves.get_unchecked(1)),
+ transmute(*y_halves.get_unchecked(1)),
+ ));
+
+ U { halves: x_halves }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun four times on each of the vector quarters:
+ (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vecq_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::transmute;
+ union U {
+ vec: $vec_id,
+ quarters: [$vecq_id; 4],
+ }
+
+ let mut x_quarters = U { vec: self }.quarters;
+ let y_quarters = U { vec: y }.quarters;
+
+ *x_quarters.get_unchecked_mut(0) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(0)),
+ transmute(*y_quarters.get_unchecked(0)),
+ ));
+
+ *x_quarters.get_unchecked_mut(1) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(1)),
+ transmute(*y_quarters.get_unchecked(1)),
+ ));
+
+ *x_quarters.get_unchecked_mut(2) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(2)),
+ transmute(*y_quarters.get_unchecked(2)),
+ ));
+
+ *x_quarters.get_unchecked_mut(3) = transmute($fun(
+ transmute(*x_quarters.get_unchecked(3)),
+ transmute(*y_quarters.get_unchecked(3)),
+ ));
+
+ U { quarters: x_quarters }.vec
+ }
+ }
+ }
+ };
+ // implementation calling fun once on a vector twice as large:
+ (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident,
+ $vect_id:ident, $fun:ident) => {
+ impl $trait_id for $vec_id {
+ #[inline]
+ fn $trait_method(self, y: Self) -> Self {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vec_id; 2],
+ twice: $vect_id,
+ }
+
+ let x_twice = U { vec: [self, uninitialized()] }.twice;
+ let y_twice = U { vec: [y, uninitialized()] }.twice;
+ let twice: $vect_id = transmute($fun(transmute(x_twice), transmute(y_twice)));
+
+ *(U { twice }.vec.get_unchecked(0))
+ }
+ }
+ }
+ };
+}
+
+macro_rules! gen_binary_impl_table {
+ ($trait_id:ident, $trait_method:ident) => {
+ macro_rules! impl_binary {
+ ($vid:ident: $fun:ident) => {
+ impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => {
+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun);
+ };
+ ($vid:ident[s]: $fun:ident) => {
+ impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun);
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl_binary_!(halves | $trait_id, $trait_method, $vid, $vid_h, $fun);
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl_binary_!(quarter | $trait_id, $trait_method, $vid, $vid_q, $fun);
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl_binary_!(twice | $trait_id, $trait_method, $vid, $vid_t, $fun);
+ };
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs
new file mode 100644
index 0000000000..d37f30fa86
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_add.rs
@@ -0,0 +1,109 @@
+//! Vertical floating-point `mul_add`
+#![allow(unused)]
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_add
+
+pub(crate) trait MulAdd {
+ fn mul_add(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fma.v2f32"]
+ fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+ #[link_name = "llvm.fma.v4f32"]
+ fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+ #[link_name = "llvm.fma.v8f32"]
+ fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+ #[link_name = "llvm.fma.v16f32"]
+ fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.fma.v1f64"]
+ fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fma.v2f64"]
+ fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+ #[link_name = "llvm.fma.v4f64"]
+ fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+ #[link_name = "llvm.fma.v8f64"]
+ fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+gen_tertiary_impl_table!(MulAdd, mul_add);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ macro_rules! impl_broken {
+ ($id:ident) => {
+ impl MulAdd for $id {
+ #[inline]
+ fn mul_add(self, y: Self, z: Self) -> Self {
+ self * y + z
+ }
+ }
+ };
+ }
+
+ impl_broken!(f32x2);
+ impl_broken!(f32x4);
+ impl_broken!(f32x8);
+ impl_broken!(f32x16);
+
+ impl_broken!(f64x2);
+ impl_broken!(f64x4);
+ impl_broken!(f64x8);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128);
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2);
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_avx2128);
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx2);
+ impl_tertiary!(f64x2: Sleef_fmad2_avx2128);
+ impl_tertiary!(f64x4: Sleef_fmad4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx);
+ impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x8: Sleef_fmaf8_avx);
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+ impl_tertiary!(f64x4: Sleef_fmad4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4);
+
+ impl_tertiary!(f32x4: Sleef_fmaf4_sse4);
+ impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4);
+ impl_tertiary!(f64x2: Sleef_fmad2_sse4);
+ impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4);
+ } else {
+ impl_tertiary!(f32x2: fma_v2f32);
+ impl_tertiary!(f32x16: fma_v16f32);
+ impl_tertiary!(f64x8: fma_v8f64);
+
+ impl_tertiary!(f32x4: fma_v4f32);
+ impl_tertiary!(f32x8: fma_v8f32);
+ impl_tertiary!(f64x2: fma_v2f64);
+ impl_tertiary!(f64x4: fma_v4f64);
+ }
+ }
+ } else {
+ impl_tertiary!(f32x2: fma_v2f32);
+ impl_tertiary!(f32x4: fma_v4f32);
+ impl_tertiary!(f32x8: fma_v8f32);
+ impl_tertiary!(f32x16: fma_v16f32);
+ // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+ impl_tertiary!(f64x2: fma_v2f64);
+ impl_tertiary!(f64x4: fma_v4f64);
+ impl_tertiary!(f64x8: fma_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs
new file mode 100644
index 0000000000..c0baeacec2
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/mul_adde.rs
@@ -0,0 +1,60 @@
+//! Approximation for floating-point `mul_add`
+use crate::*;
+
+// FIXME: 64-bit 1 element mul_adde
+
+pub(crate) trait MulAddE {
+ fn mul_adde(self, y: Self, z: Self) -> Self;
+}
+
+#[cfg(not(target_arch = "s390x"))]
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.fmuladd.v2f32"]
+ fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
+ #[link_name = "llvm.fmuladd.v4f32"]
+ fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
+ #[link_name = "llvm.fmuladd.v8f32"]
+ fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
+ #[link_name = "llvm.fmuladd.v16f32"]
+ fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.fmuladd.v1f64"]
+ fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.fmuladd.v2f64"]
+ fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
+ #[link_name = "llvm.fmuladd.v4f64"]
+ fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
+ #[link_name = "llvm.fmuladd.v8f64"]
+ fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
+}
+
+macro_rules! impl_mul_adde {
+ ($id:ident : $fn:ident) => {
+ impl MulAddE for $id {
+ #[inline]
+ fn mul_adde(self, y: Self, z: Self) -> Self {
+ #[cfg(not(target_arch = "s390x"))]
+ {
+ use crate::mem::transmute;
+ unsafe { transmute($fn(transmute(self), transmute(y), transmute(z))) }
+ }
+ #[cfg(target_arch = "s390x")]
+ {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ self * y + z
+ }
+ }
+ }
+ };
+}
+
+impl_mul_adde!(f32x2: fmuladd_v2f32);
+impl_mul_adde!(f32x4: fmuladd_v4f32);
+impl_mul_adde!(f32x8: fmuladd_v8f32);
+impl_mul_adde!(f32x16: fmuladd_v16f32);
+// impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors
+impl_mul_adde!(f64x2: fmuladd_v2f64);
+impl_mul_adde!(f64x4: fmuladd_v4f64);
+impl_mul_adde!(f64x8: fmuladd_v8f64);
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/powf.rs b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs
new file mode 100644
index 0000000000..89ca52e96d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/powf.rs
@@ -0,0 +1,112 @@
+//! Vertical floating-point `powf`
+#![allow(unused)]
+
+// FIXME 64-bit powfgle elem vectors mispowfg
+
+use crate::*;
+
+pub(crate) trait Powf {
+ fn powf(self, x: Self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.pow.v2f32"]
+ fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2;
+ #[link_name = "llvm.pow.v4f32"]
+ fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4;
+ #[link_name = "llvm.pow.v8f32"]
+ fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8;
+ #[link_name = "llvm.pow.v16f32"]
+ fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16;
+ /* FIXME 64-bit powfgle elem vectors
+ #[link_name = "llvm.pow.v1f64"]
+ fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.pow.v2f64"]
+ fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2;
+ #[link_name = "llvm.pow.v4f64"]
+ fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4;
+ #[link_name = "llvm.pow.v8f64"]
+ fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8;
+
+ #[link_name = "llvm.pow.f32"]
+ fn powf_f32(x: f32, y: f32) -> f32;
+ #[link_name = "llvm.pow.f64"]
+ fn powf_f64(x: f64, y: f64) -> f64;
+}
+
+gen_binary_impl_table!(Powf, powf);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4[f32; 4]: powf_f32);
+ impl_binary!(f32x8[f32; 8]: powf_f32);
+ impl_binary!(f32x16[f32; 16]: powf_f32);
+
+ impl_binary!(f64x2[f64; 2]: powf_f64);
+ impl_binary!(f64x4[f64; 4]: powf_f64);
+ impl_binary!(f64x8[f64; 8]: powf_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128);
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2);
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2);
+
+ impl_binary!(f32x4: Sleef_powf4_u10avx2128);
+ impl_binary!(f32x8: Sleef_powf8_u10avx2);
+ impl_binary!(f64x2: Sleef_powd2_u10avx2128);
+ impl_binary!(f64x4: Sleef_powd4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx);
+ impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
+ impl_binary!(f32x8: Sleef_powf8_u10avx);
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
+ impl_binary!(f64x4: Sleef_powd4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse4);
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4);
+ impl_binary!(f64x2: Sleef_powd2_u10sse4);
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4);
+ } else if #[cfg(target_feature = "sse2")] {
+ impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2);
+
+ impl_binary!(f32x4: Sleef_powf4_u10sse2);
+ impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2);
+ impl_binary!(f64x2: Sleef_powd2_u10sse2);
+ impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2);
+ } else {
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4: powf_v4f32);
+ impl_binary!(f32x8: powf_v8f32);
+ impl_binary!(f32x16: powf_v16f32);
+
+ impl_binary!(f64x2: powf_v2f64);
+ impl_binary!(f64x4: powf_v4f64);
+ impl_binary!(f64x8: powf_v8f64);
+ }
+ }
+ } else {
+ impl_binary!(f32x2[f32; 2]: powf_f32);
+ impl_binary!(f32x4: powf_v4f32);
+ impl_binary!(f32x8: powf_v8f32);
+ impl_binary!(f32x16: powf_v16f32);
+
+ impl_binary!(f64x2: powf_v2f64);
+ impl_binary!(f64x4: powf_v4f64);
+ impl_binary!(f64x8: powf_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs
new file mode 100644
index 0000000000..d881415909
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sin`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin
+
+use crate::*;
+
+pub(crate) trait Sin {
+ fn sin(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.sin.v2f32"]
+ fn sin_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.sin.v4f32"]
+ fn sin_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.sin.v8f32"]
+ fn sin_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.sin.v16f32"]
+ fn sin_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit single elem vectors
+ #[link_name = "llvm.sin.v1f64"]
+ fn sin_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.sin.v2f64"]
+ fn sin_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.sin.v4f64"]
+ fn sin_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.sin.v8f64"]
+ fn sin_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.sin.f32"]
+ fn sin_f32(x: f32) -> f32;
+ #[link_name = "llvm.sin.f64"]
+ fn sin_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sin, sin);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x4[f32; 4]: sin_f32);
+ impl_unary!(f32x8[f32; 8]: sin_f32);
+ impl_unary!(f32x16[f32; 16]: sin_f32);
+
+ impl_unary!(f64x2[f64; 2]: sin_f64);
+ impl_unary!(f64x4[f64; 4]: sin_f64);
+ impl_unary!(f64x8[f64; 8]: sin_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10avx2128);
+ impl_unary!(f32x8: Sleef_sinf8_u10avx2);
+ impl_unary!(f64x2: Sleef_sind2_u10avx2128);
+ impl_unary!(f64x4: Sleef_sind4_u10avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x8: Sleef_sinf8_u10avx);
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
+ impl_unary!(f64x4: Sleef_sind4_u10avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4);
+
+ impl_unary!(f32x4: Sleef_sinf4_u10sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4);
+ impl_unary!(f64x2: Sleef_sind2_u10sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x16: sin_v16f32);
+ impl_unary!(f64x8: sin_v8f64);
+
+ impl_unary!(f32x4: sin_v4f32);
+ impl_unary!(f32x8: sin_v8f32);
+ impl_unary!(f64x2: sin_v2f64);
+ impl_unary!(f64x4: sin_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: sin_f32);
+ impl_unary!(f32x4: sin_v4f32);
+ impl_unary!(f32x8: sin_v8f32);
+ impl_unary!(f32x16: sin_v16f32);
+
+ impl_unary!(f64x2: sin_v2f64);
+ impl_unary!(f64x4: sin_v4f64);
+ impl_unary!(f64x8: sin_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs
new file mode 100644
index 0000000000..b283d11111
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_cos_pi.rs
@@ -0,0 +1,188 @@
+//! Vertical floating-point `sin_cos`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_cos
+
+use crate::*;
+
+pub(crate) trait SinCosPi: Sized {
+ type Output;
+ fn sin_cos_pi(self) -> Self::Output;
+}
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ #[inline]
+ fn sin_cos_pi(self) -> Self::Output {
+ let v = self * Self::splat($PI);
+ (v.sin(), v.cos())
+ }
+ }
+ };
+}
+
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+macro_rules! impl_unary_t {
+ ($vid:ident: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+ transmute($fun(transmute(self)))
+ }
+ }
+ }
+ };
+ ($vid:ident[t => $vid_t:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::{transmute, uninitialized};
+
+ union U {
+ vec: [$vid; 2],
+ twice: $vid_t,
+ }
+
+ let twice = U { vec: [self, uninitialized()] }.twice;
+ let twice = transmute($fun(transmute(twice)));
+
+ union R {
+ twice: ($vid_t, $vid_t),
+ vecs: ([$vid; 2], [$vid; 2]),
+ }
+ let r = R { twice }.vecs;
+ (*r.0.get_unchecked(0), *r.0.get_unchecked(1))
+ }
+ }
+ }
+ };
+ ($vid:ident[h => $vid_h:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ halves: [$vid_h; 2],
+ }
+
+ let halves = U { vec: self }.halves;
+
+ let res_0: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(0))));
+ let res_1: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(1))));
+
+ union R {
+ result: ($vid, $vid),
+ halves: ([$vid_h; 2], [$vid_h; 2]),
+ }
+ R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) }.result
+ }
+ }
+ }
+ };
+ ($vid:ident[q => $vid_q:ident]: $fun:ident) => {
+ impl SinCosPi for $vid {
+ type Output = (Self, Self);
+ fn sin_cos_pi(self) -> Self::Output {
+ unsafe {
+ use crate::mem::transmute;
+
+ union U {
+ vec: $vid,
+ quarters: [$vid_q; 4],
+ }
+
+ let quarters = U { vec: self }.quarters;
+
+ let res_0: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(0))));
+ let res_1: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(1))));
+ let res_2: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(2))));
+ let res_3: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(3))));
+
+ union R {
+ result: ($vid, $vid),
+ quarters: ([$vid_q; 4], [$vid_q; 4]),
+ }
+ R {
+ quarters: (
+ [res_0.0, res_1.0, res_2.0, res_3.0],
+ [res_0.1, res_1.1, res_2.1, res_3.1],
+ ),
+ }
+ .result
+ }
+ }
+ }
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8: Sleef_sincospif8_u05avx);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4: Sleef_sincospid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4);
+
+ impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4);
+ impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4);
+ impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs
new file mode 100644
index 0000000000..0c8f6bb120
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sin_pi.rs
@@ -0,0 +1,87 @@
+//! Vertical floating-point `sin_pi`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sin_pi
+
+use crate::*;
+
+pub(crate) trait SinPi {
+ fn sin_pi(self) -> Self;
+}
+
+gen_unary_impl_table!(SinPi, sin_pi);
+
+macro_rules! impl_def {
+ ($vid:ident, $PI:path) => {
+ impl SinPi for $vid {
+ #[inline]
+ fn sin_pi(self) -> Self {
+ (self * Self::splat($PI)).sin()
+ }
+ }
+ };
+}
+macro_rules! impl_def32 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f32::consts::PI);
+ };
+}
+macro_rules! impl_def64 {
+ ($vid:ident) => {
+ impl_def!($vid, crate::f64::consts::PI);
+ };
+}
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05avx2128);
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx2);
+ impl_unary!(f64x2: Sleef_sinpid2_u05avx2128);
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x8: Sleef_sinpif8_u05avx);
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+ impl_unary!(f64x4: Sleef_sinpid4_u05avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4);
+
+ impl_unary!(f32x4: Sleef_sinpif4_u05sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4);
+ impl_unary!(f64x2: Sleef_sinpid2_u05sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4);
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+ }
+ } else {
+ impl_def32!(f32x2);
+ impl_def32!(f32x4);
+ impl_def32!(f32x8);
+ impl_def32!(f32x16);
+
+ impl_def64!(f64x2);
+ impl_def64!(f64x4);
+ impl_def64!(f64x8);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs
new file mode 100644
index 0000000000..67bb0a2a9c
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrt.rs
@@ -0,0 +1,103 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrt
+
+use crate::*;
+
+pub(crate) trait Sqrt {
+ fn sqrt(self) -> Self;
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.sqrt.v2f32"]
+ fn sqrt_v2f32(x: f32x2) -> f32x2;
+ #[link_name = "llvm.sqrt.v4f32"]
+ fn sqrt_v4f32(x: f32x4) -> f32x4;
+ #[link_name = "llvm.sqrt.v8f32"]
+ fn sqrt_v8f32(x: f32x8) -> f32x8;
+ #[link_name = "llvm.sqrt.v16f32"]
+ fn sqrt_v16f32(x: f32x16) -> f32x16;
+ /* FIXME 64-bit sqrtgle elem vectors
+ #[link_name = "llvm.sqrt.v1f64"]
+ fn sqrt_v1f64(x: f64x1) -> f64x1;
+ */
+ #[link_name = "llvm.sqrt.v2f64"]
+ fn sqrt_v2f64(x: f64x2) -> f64x2;
+ #[link_name = "llvm.sqrt.v4f64"]
+ fn sqrt_v4f64(x: f64x4) -> f64x4;
+ #[link_name = "llvm.sqrt.v8f64"]
+ fn sqrt_v8f64(x: f64x8) -> f64x8;
+
+ #[link_name = "llvm.sqrt.f32"]
+ fn sqrt_f32(x: f32) -> f32;
+ #[link_name = "llvm.sqrt.f64"]
+ fn sqrt_f64(x: f64) -> f64;
+}
+
+gen_unary_impl_table!(Sqrt, sqrt);
+
+cfg_if! {
+ if #[cfg(target_arch = "s390x")] {
+ // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x4[f32; 4]: sqrt_f32);
+ impl_unary!(f32x8[f32; 8]: sqrt_f32);
+ impl_unary!(f32x16[f32; 16]: sqrt_f32);
+
+ impl_unary!(f64x2[f64; 2]: sqrt_f64);
+ impl_unary!(f64x4[f64; 4]: sqrt_f64);
+ impl_unary!(f64x8[f64; 8]: sqrt_f64);
+ } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_avx2128);
+ impl_unary!(f32x8: Sleef_sqrtf8_avx2);
+ impl_unary!(f64x2: Sleef_sqrtd2_avx2128);
+ impl_unary!(f64x4: Sleef_sqrtd4_avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x8: Sleef_sqrtf8_avx);
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+ impl_unary!(f64x4: Sleef_sqrtd4_avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4);
+ impl_unary!(f64x2: Sleef_sqrtd2_sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4);
+ } else {
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x16: sqrt_v16f32);
+ impl_unary!(f64x8: sqrt_v8f64);
+
+ impl_unary!(f32x4: sqrt_v4f32);
+ impl_unary!(f32x8: sqrt_v8f32);
+ impl_unary!(f64x2: sqrt_v2f64);
+ impl_unary!(f64x4: sqrt_v4f64);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[f32; 2]: sqrt_f32);
+ impl_unary!(f32x4: sqrt_v4f32);
+ impl_unary!(f32x8: sqrt_v8f32);
+ impl_unary!(f32x16: sqrt_v16f32);
+
+ impl_unary!(f64x2: sqrt_v2f64);
+ impl_unary!(f64x4: sqrt_v4f64);
+ impl_unary!(f64x8: sqrt_v8f64);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs
new file mode 100644
index 0000000000..58a1de1f40
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/math/float/sqrte.rs
@@ -0,0 +1,67 @@
+//! Vertical floating-point `sqrt`
+#![allow(unused)]
+
+// FIXME 64-bit 1 elem vectors sqrte
+
+use crate::llvm::simd_fsqrt;
+use crate::*;
+
+pub(crate) trait Sqrte {
+ fn sqrte(self) -> Self;
+}
+
+gen_unary_impl_table!(Sqrte, sqrte);
+
+cfg_if! {
+ if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
+ use sleef_sys::*;
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128);
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx2);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128);
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx2);
+ } else if #[cfg(target_feature = "avx")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx);
+ impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x8: Sleef_sqrtf8_u35avx);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+ impl_unary!(f64x4: Sleef_sqrtd4_u35avx);
+ } else if #[cfg(target_feature = "sse4.2")] {
+ impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4);
+
+ impl_unary!(f32x4: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4);
+ impl_unary!(f64x2: Sleef_sqrtd2_u35sse4);
+ impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4);
+ } else {
+ impl_unary!(f32x2[g]: simd_fsqrt);
+ impl_unary!(f32x16[g]: simd_fsqrt);
+ impl_unary!(f64x8[g]: simd_fsqrt);
+
+ impl_unary!(f32x4[g]: simd_fsqrt);
+ impl_unary!(f32x8[g]: simd_fsqrt);
+ impl_unary!(f64x2[g]: simd_fsqrt);
+ impl_unary!(f64x4[g]: simd_fsqrt);
+ }
+ }
+ } else {
+ impl_unary!(f32x2[g]: simd_fsqrt);
+ impl_unary!(f32x4[g]: simd_fsqrt);
+ impl_unary!(f32x8[g]: simd_fsqrt);
+ impl_unary!(f32x16[g]: simd_fsqrt);
+
+ impl_unary!(f64x2[g]: simd_fsqrt);
+ impl_unary!(f64x4[g]: simd_fsqrt);
+ impl_unary!(f64x8[g]: simd_fsqrt);
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs
new file mode 100644
index 0000000000..55cbc297aa
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/pointer_sized_int.rs
@@ -0,0 +1,28 @@
+//! Provides `isize` and `usize`
+
+use cfg_if::cfg_if;
+
+cfg_if! {
+ if #[cfg(target_pointer_width = "8")] {
+ pub(crate) type isize_ = i8;
+ pub(crate) type usize_ = u8;
+ } else if #[cfg(target_pointer_width = "16")] {
+ pub(crate) type isize_ = i16;
+ pub(crate) type usize_ = u16;
+ } else if #[cfg(target_pointer_width = "32")] {
+ pub(crate) type isize_ = i32;
+ pub(crate) type usize_ = u32;
+
+ } else if #[cfg(target_pointer_width = "64")] {
+ pub(crate) type isize_ = i64;
+ pub(crate) type usize_ = u64;
+ } else if #[cfg(target_pointer_width = "64")] {
+ pub(crate) type isize_ = i64;
+ pub(crate) type usize_ = u64;
+ } else if #[cfg(target_pointer_width = "128")] {
+ pub(crate) type isize_ = i128;
+ pub(crate) type usize_ = u128;
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions.rs b/third_party/rust/packed_simd/src/codegen/reductions.rs
new file mode 100644
index 0000000000..302ca6d88f
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions.rs
@@ -0,0 +1 @@
+pub(crate) mod mask;
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs
new file mode 100644
index 0000000000..a78bcc5632
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs
@@ -0,0 +1,69 @@
+//! Code generation workaround for `all()` mask horizontal reduction.
+//!
+//! Works around [LLVM bug 36702].
+//!
+//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702
+#![allow(unused_macros)]
+
+use crate::*;
+
+pub(crate) trait All: crate::marker::Sized {
+ unsafe fn all(self) -> bool;
+}
+
+pub(crate) trait Any: crate::marker::Sized {
+ unsafe fn any(self) -> bool;
+}
+
+#[macro_use]
+mod fallback_impl;
+
+cfg_if! {
+ if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+ #[macro_use]
+ mod x86;
+ } else if #[cfg(all(target_arch = "arm", target_feature = "v7",
+ target_feature = "neon",
+ any(feature = "core_arch", libcore_neon)))] {
+ #[macro_use]
+ mod arm;
+ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
+ #[macro_use]
+ mod aarch64;
+ } else {
+ #[macro_use]
+ mod fallback;
+ }
+}
+
+impl_mask_reductions!(m8x2);
+impl_mask_reductions!(m8x4);
+impl_mask_reductions!(m8x8);
+impl_mask_reductions!(m8x16);
+impl_mask_reductions!(m8x32);
+impl_mask_reductions!(m8x64);
+
+impl_mask_reductions!(m16x2);
+impl_mask_reductions!(m16x4);
+impl_mask_reductions!(m16x8);
+impl_mask_reductions!(m16x16);
+impl_mask_reductions!(m16x32);
+
+impl_mask_reductions!(m32x2);
+impl_mask_reductions!(m32x4);
+impl_mask_reductions!(m32x8);
+impl_mask_reductions!(m32x16);
+
+// FIXME: 64-bit single element vector
+// impl_mask_reductions!(m64x1);
+impl_mask_reductions!(m64x2);
+impl_mask_reductions!(m64x4);
+impl_mask_reductions!(m64x8);
+
+impl_mask_reductions!(m128x1);
+impl_mask_reductions!(m128x2);
+impl_mask_reductions!(m128x4);
+
+impl_mask_reductions!(msizex2);
+impl_mask_reductions!(msizex4);
+impl_mask_reductions!(msizex8);
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
new file mode 100644
index 0000000000..b2db52c891
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
@@ -0,0 +1,81 @@
+//! Mask reductions implementation for `aarch64` targets
+
+/// 128-bit wide vectors
+macro_rules! aarch64_128_neon_impl {
+ ($id:ident, $vmin:ident, $vmax:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn all(self) -> bool {
+ use crate::arch::aarch64::$vmin;
+ $vmin(crate::mem::transmute(self)) != 0
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn any(self) -> bool {
+ use crate::arch::aarch64::$vmax;
+ $vmax(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// 64-bit wide vectors
+macro_rules! aarch64_64_neon_impl {
+ ($id:ident, $vec128:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn all(self) -> bool {
+ // Duplicates the 64-bit vector into a 128-bit one and
+ // calls all on that.
+ union U {
+ halves: ($id, $id),
+ vec: $vec128,
+ }
+ U { halves: (self, self) }.vec.all()
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn any(self) -> bool {
+ union U {
+ halves: ($id, $id),
+ vec: $vec128,
+ }
+ U { halves: (self, self) }.vec.any()
+ }
+ }
+ };
+}
+
+/// Mask reduction implementation for `aarch64` targets
+macro_rules! impl_mask_reductions {
+ // 64-bit wide masks
+ (m8x8) => {
+ aarch64_64_neon_impl!(m8x8, m8x16);
+ };
+ (m16x4) => {
+ aarch64_64_neon_impl!(m16x4, m16x8);
+ };
+ (m32x2) => {
+ aarch64_64_neon_impl!(m32x2, m32x4);
+ };
+ // 128-bit wide masks
+ (m8x16) => {
+ aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8);
+ };
+ (m16x8) => {
+ aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16);
+ };
+ (m32x4) => {
+ aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32);
+ };
+ // Fallback to LLVM's default code-generation:
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs
new file mode 100644
index 0000000000..41c3cbc58a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs
@@ -0,0 +1,56 @@
+//! Mask reductions implementation for `arm` targets
+
+/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with
+/// more than two elements.
+macro_rules! arm_128_v7_neon_impl {
+ ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "v7,neon")]
+ unsafe fn all(self) -> bool {
+ use crate::arch::arm::$vpmin;
+ use crate::mem::transmute;
+ union U {
+ halves: ($half, $half),
+ vec: $id,
+ }
+ let halves = U { vec: self }.halves;
+ let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
+ h.all()
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "v7,neon")]
+ unsafe fn any(self) -> bool {
+ use crate::arch::arm::$vpmax;
+ use crate::mem::transmute;
+ union U {
+ halves: ($half, $half),
+ vec: $id,
+ }
+ let halves = U { vec: self }.halves;
+ let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
+ h.any()
+ }
+ }
+ };
+}
+
+/// Mask reduction implementation for `arm` targets
+macro_rules! impl_mask_reductions {
+ // 128-bit wide masks
+ (m8x16) => {
+ arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8);
+ };
+ (m16x8) => {
+ arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16);
+ };
+ (m32x4) => {
+ arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32);
+ };
+ // Fallback to LLVM's default code-generation:
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs
new file mode 100644
index 0000000000..4c377a6878
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs
@@ -0,0 +1,8 @@
+//! Default mask reduction implementations.
+
+/// Default mask reduction implementation
+macro_rules! impl_mask_reductions {
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs
new file mode 100644
index 0000000000..0d246e2fda
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs
@@ -0,0 +1,237 @@
+//! Default implementation of a mask reduction for any target.
+
+macro_rules! fallback_to_other_impl {
+ ($id:ident, $other:ident) => {
+ impl All for $id {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let m: $other = crate::mem::transmute(self);
+ m.all()
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let m: $other = crate::mem::transmute(self);
+ m.any()
+ }
+ }
+ };
+}
+
+/// Fallback implementation.
+macro_rules! fallback_impl {
+ // 16-bit wide masks:
+ (m8x2) => {
+ impl All for m8x2 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: u16 = crate::mem::transmute(self);
+ i == u16::max_value()
+ }
+ }
+ impl Any for m8x2 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: u16 = crate::mem::transmute(self);
+ i != 0
+ }
+ }
+ };
+ // 32-bit wide masks
+ (m8x4) => {
+ impl All for m8x4 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: u32 = crate::mem::transmute(self);
+ i == u32::max_value()
+ }
+ }
+ impl Any for m8x4 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: u32 = crate::mem::transmute(self);
+ i != 0
+ }
+ }
+ };
+ (m16x2) => {
+ fallback_to_other_impl!(m16x2, m8x4);
+ };
+ // 64-bit wide masks:
+ (m8x8) => {
+ impl All for m8x8 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: u64 = crate::mem::transmute(self);
+ i == u64::max_value()
+ }
+ }
+ impl Any for m8x8 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: u64 = crate::mem::transmute(self);
+ i != 0
+ }
+ }
+ };
+ (m16x4) => {
+ fallback_to_other_impl!(m16x4, m8x8);
+ };
+ (m32x2) => {
+ fallback_to_other_impl!(m32x2, m16x4);
+ };
+ // FIXME: 64x1 maxk
+ // 128-bit wide masks:
+ (m8x16) => {
+ impl All for m8x16 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: u128 = crate::mem::transmute(self);
+ i == u128::max_value()
+ }
+ }
+ impl Any for m8x16 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: u128 = crate::mem::transmute(self);
+ i != 0
+ }
+ }
+ };
+ (m16x8) => {
+ fallback_to_other_impl!(m16x8, m8x16);
+ };
+ (m32x4) => {
+ fallback_to_other_impl!(m32x4, m16x8);
+ };
+ (m64x2) => {
+ fallback_to_other_impl!(m64x2, m32x4);
+ };
+ (m128x1) => {
+ fallback_to_other_impl!(m128x1, m64x2);
+ };
+ // 256-bit wide masks
+ (m8x32) => {
+ impl All for m8x32 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: [u128; 2] = crate::mem::transmute(self);
+ let o: [u128; 2] = [u128::max_value(); 2];
+ i == o
+ }
+ }
+ impl Any for m8x32 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: [u128; 2] = crate::mem::transmute(self);
+ let o: [u128; 2] = [0; 2];
+ i != o
+ }
+ }
+ };
+ (m16x16) => {
+ fallback_to_other_impl!(m16x16, m8x32);
+ };
+ (m32x8) => {
+ fallback_to_other_impl!(m32x8, m16x16);
+ };
+ (m64x4) => {
+ fallback_to_other_impl!(m64x4, m32x8);
+ };
+ (m128x2) => {
+ fallback_to_other_impl!(m128x2, m64x4);
+ };
+ // 512-bit wide masks
+ (m8x64) => {
+ impl All for m8x64 {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ let i: [u128; 4] = crate::mem::transmute(self);
+ let o: [u128; 4] = [u128::max_value(); 4];
+ i == o
+ }
+ }
+ impl Any for m8x64 {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ let i: [u128; 4] = crate::mem::transmute(self);
+ let o: [u128; 4] = [0; 4];
+ i != o
+ }
+ }
+ };
+ (m16x32) => {
+ fallback_to_other_impl!(m16x32, m8x64);
+ };
+ (m32x16) => {
+ fallback_to_other_impl!(m32x16, m16x32);
+ };
+ (m64x8) => {
+ fallback_to_other_impl!(m64x8, m32x16);
+ };
+ (m128x4) => {
+ fallback_to_other_impl!(m128x4, m64x8);
+ };
+ // Masks with pointer-sized elements64
+ (msizex2) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex2, m64x2);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex2, m32x2);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+ (msizex4) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex4, m64x4);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex4, m32x4);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+ (msizex8) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex8, m64x8);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex8, m32x8);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+}
+
+macro_rules! recurse_half {
+ ($vid:ident, $vid_h:ident) => {
+ impl All for $vid {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ union U {
+ halves: ($vid_h, $vid_h),
+ vec: $vid,
+ }
+ let halves = U { vec: self }.halves;
+ halves.0.all() && halves.1.all()
+ }
+ }
+ impl Any for $vid {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ union U {
+ halves: ($vid_h, $vid_h),
+ vec: $vid,
+ }
+ let halves = U { vec: self }.halves;
+ halves.0.any() || halves.1.any()
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
new file mode 100644
index 0000000000..4bf5098065
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs
@@ -0,0 +1,216 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets
+
+#[cfg(target_feature = "sse")]
+#[macro_use]
+mod sse;
+
+#[cfg(target_feature = "sse2")]
+#[macro_use]
+mod sse2;
+
+#[cfg(target_feature = "avx")]
+#[macro_use]
+mod avx;
+
+#[cfg(target_feature = "avx2")]
+#[macro_use]
+mod avx2;
+
+/// x86 64-bit m8x8 implementation
+macro_rules! x86_m8x8_impl {
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}
+
+/// x86 128-bit m8x16 implementation
+macro_rules! x86_m8x16_impl {
+ ($id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "sse2")] {
+ x86_m8x16_sse2_impl!($id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// x86 128-bit m32x4 implementation
+macro_rules! x86_m32x4_impl {
+ ($id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "sse")] {
+ x86_m32x4_sse_impl!($id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// x86 128-bit m64x2 implementation
+macro_rules! x86_m64x2_impl {
+ ($id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "sse2")] {
+ x86_m64x2_sse2_impl!($id);
+ } else if #[cfg(target_feature = "sse")] {
+ x86_m32x4_sse_impl!($id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// x86 256-bit m8x32 implementation
+macro_rules! x86_m8x32_impl {
+ ($id:ident, $half_id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "avx2")] {
+ x86_m8x32_avx2_impl!($id);
+ } else if #[cfg(target_feature = "avx")] {
+ x86_m8x32_avx_impl!($id);
+ } else if #[cfg(target_feature = "sse2")] {
+ recurse_half!($id, $half_id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// x86 256-bit m32x8 implementation
+macro_rules! x86_m32x8_impl {
+ ($id:ident, $half_id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "avx")] {
+ x86_m32x8_avx_impl!($id);
+ } else if #[cfg(target_feature = "sse")] {
+ recurse_half!($id, $half_id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// x86 256-bit m64x4 implementation
+macro_rules! x86_m64x4_impl {
+ ($id:ident, $half_id:ident) => {
+ cfg_if! {
+ if #[cfg(target_feature = "avx")] {
+ x86_m64x4_avx_impl!($id);
+ } else if #[cfg(target_feature = "sse")] {
+ recurse_half!($id, $half_id);
+ } else {
+ fallback_impl!($id);
+ }
+ }
+ };
+}
+
+/// Fallback implementation.
+macro_rules! x86_intr_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ use crate::llvm::simd_reduce_all;
+ simd_reduce_all(self.0)
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ unsafe fn any(self) -> bool {
+ use crate::llvm::simd_reduce_any;
+ simd_reduce_any(self.0)
+ }
+ }
+ };
+}
+
+/// Mask reduction implementation for `x86` and `x86_64` targets
+macro_rules! impl_mask_reductions {
+ // 64-bit wide masks
+ (m8x8) => {
+ x86_m8x8_impl!(m8x8);
+ };
+ (m16x4) => {
+ x86_m8x8_impl!(m16x4);
+ };
+ (m32x2) => {
+ x86_m8x8_impl!(m32x2);
+ };
+ // 128-bit wide masks
+ (m8x16) => {
+ x86_m8x16_impl!(m8x16);
+ };
+ (m16x8) => {
+ x86_m8x16_impl!(m16x8);
+ };
+ (m32x4) => {
+ x86_m32x4_impl!(m32x4);
+ };
+ (m64x2) => {
+ x86_m64x2_impl!(m64x2);
+ };
+ (m128x1) => {
+ x86_intr_impl!(m128x1);
+ };
+ // 256-bit wide masks:
+ (m8x32) => {
+ x86_m8x32_impl!(m8x32, m8x16);
+ };
+ (m16x16) => {
+ x86_m8x32_impl!(m16x16, m16x8);
+ };
+ (m32x8) => {
+ x86_m32x8_impl!(m32x8, m32x4);
+ };
+ (m64x4) => {
+ x86_m64x4_impl!(m64x4, m64x2);
+ };
+ (m128x2) => {
+ x86_intr_impl!(m128x2);
+ };
+ (msizex2) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex2, m64x2);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex2, m32x2);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+ (msizex4) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex4, m64x4);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex4, m32x4);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+ (msizex8) => {
+ cfg_if! {
+ if #[cfg(target_pointer_width = "64")] {
+ fallback_to_other_impl!(msizex8, m64x8);
+ } else if #[cfg(target_pointer_width = "32")] {
+ fallback_to_other_impl!(msizex8, m32x8);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+ }
+ };
+
+ // Fallback to LLVM's default code-generation:
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
new file mode 100644
index 0000000000..61f352d228
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
@@ -0,0 +1,95 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`
+
+/// `x86`/`x86_64` 256-bit `AVX` implementation
+/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
+#[cfg(target_feature = "avx")]
+macro_rules! x86_m8x32_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "avx")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_testc_si256;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_testc_si256;
+ _mm256_testc_si256(crate::mem::transmute(self), crate::mem::transmute($id::splat(true))) != 0
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "avx")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_testz_si256;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_testz_si256;
+ _mm256_testz_si256(crate::mem::transmute(self), crate::mem::transmute(self)) == 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
+macro_rules! x86_m32x8_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_ps;
+ // _mm256_movemask_ps(a) creates a 8bit mask containing the
+ // most significant bit of each lane of `a`. If all bits are
+ // set, then all 8 lanes of the mask are true.
+ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_ps;
+
+ _mm256_movemask_ps(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
+macro_rules! x86_m64x4_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_pd;
+ // _mm256_movemask_pd(a) creates a 4bit mask containing the
+ // most significant bit of each lane of `a`. If all bits are
+ // set, then all 4 lanes of the mask are true.
+ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_pd;
+
+ _mm256_movemask_pd(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
new file mode 100644
index 0000000000..d37d023420
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`.
+#![allow(unused)]
+
+/// x86/x86_64 256-bit m8x32 AVX2 implementation
+macro_rules! x86_m8x32_avx2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_epi8;
+ // _mm256_movemask_epi8(a) creates a 32bit mask containing the
+ // most significant bit of each byte of `a`. If all
+ // bits are set, then all 32 lanes of the mask are
+ // true.
+ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_epi8;
+
+ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
new file mode 100644
index 0000000000..e0c9aee92b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation
+macro_rules! x86_m32x4_sse_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_ps;
+ // _mm_movemask_ps(a) creates a 4bit mask containing the
+ // most significant bit of each lane of `a`. If all
+ // bits are set, then all 4 lanes of the mask are
+ // true.
+ _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_ps;
+
+ _mm_movemask_ps(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
new file mode 100644
index 0000000000..bbb52fa47e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
@@ -0,0 +1,68 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation
+macro_rules! x86_m64x2_sse2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_pd;
+ // _mm_movemask_pd(a) creates a 2bit mask containing the
+ // most significant bit of each lane of `a`. If all
+ // bits are set, then all 2 lanes of the mask are
+ // true.
+ _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_pd;
+
+ _mm_movemask_pd(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation
+macro_rules! x86_m8x16_sse2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_epi8;
+ // _mm_movemask_epi8(a) creates a 16bit mask containing the
+ // most significant bit of each byte of `a`. If all
+ // bits are set, then all 16 lanes of the mask are
+ // true.
+ _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value())
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_epi8;
+
+ _mm_movemask_epi8(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/shuffle.rs b/third_party/rust/packed_simd/src/codegen/shuffle.rs
new file mode 100644
index 0000000000..d3acd48f5b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/shuffle.rs
@@ -0,0 +1,150 @@
+//! Implementations of the `ShuffleResult` trait for the different numbers of
+//! lanes and vector element types.
+
+use crate::masks::*;
+use crate::sealed::{Seal, Shuffle};
+
+macro_rules! impl_shuffle {
+ ($array:ty, $base:ty, $out:ty) => {
+ impl Seal<$array> for $base {}
+ impl Shuffle<$array> for $base {
+ type Output = $out;
+ }
+ };
+}
+
+impl_shuffle! { [u32; 2], i8, crate::codegen::i8x2 }
+impl_shuffle! { [u32; 4], i8, crate::codegen::i8x4 }
+impl_shuffle! { [u32; 8], i8, crate::codegen::i8x8 }
+impl_shuffle! { [u32; 16], i8, crate::codegen::i8x16 }
+impl_shuffle! { [u32; 32], i8, crate::codegen::i8x32 }
+impl_shuffle! { [u32; 64], i8, crate::codegen::i8x64 }
+
+impl_shuffle! { [u32; 2], u8, crate::codegen::u8x2 }
+impl_shuffle! { [u32; 4], u8, crate::codegen::u8x4 }
+impl_shuffle! { [u32; 8], u8, crate::codegen::u8x8 }
+impl_shuffle! { [u32; 16], u8, crate::codegen::u8x16 }
+impl_shuffle! { [u32; 32], u8, crate::codegen::u8x32 }
+impl_shuffle! { [u32; 64], u8, crate::codegen::u8x64 }
+
+impl_shuffle! { [u32; 2], m8, crate::codegen::m8x2 }
+impl_shuffle! { [u32; 4], m8, crate::codegen::m8x4 }
+impl_shuffle! { [u32; 8], m8, crate::codegen::m8x8 }
+impl_shuffle! { [u32; 16], m8, crate::codegen::m8x16 }
+impl_shuffle! { [u32; 32], m8, crate::codegen::m8x32 }
+impl_shuffle! { [u32; 64], m8, crate::codegen::m8x64 }
+
+impl_shuffle! { [u32; 2], i16, crate::codegen::i16x2 }
+impl_shuffle! { [u32; 4], i16, crate::codegen::i16x4 }
+impl_shuffle! { [u32; 8], i16, crate::codegen::i16x8 }
+impl_shuffle! { [u32; 16], i16, crate::codegen::i16x16 }
+impl_shuffle! { [u32; 32], i16, crate::codegen::i16x32 }
+
+impl_shuffle! { [u32; 2], u16, crate::codegen::u16x2 }
+impl_shuffle! { [u32; 4], u16, crate::codegen::u16x4 }
+impl_shuffle! { [u32; 8], u16, crate::codegen::u16x8 }
+impl_shuffle! { [u32; 16], u16, crate::codegen::u16x16 }
+impl_shuffle! { [u32; 32], u16, crate::codegen::u16x32 }
+
+impl_shuffle! { [u32; 2], m16, crate::codegen::m16x2 }
+impl_shuffle! { [u32; 4], m16, crate::codegen::m16x4 }
+impl_shuffle! { [u32; 8], m16, crate::codegen::m16x8 }
+impl_shuffle! { [u32; 16], m16, crate::codegen::m16x16 }
+
+impl_shuffle! { [u32; 2], i32, crate::codegen::i32x2 }
+impl_shuffle! { [u32; 4], i32, crate::codegen::i32x4 }
+impl_shuffle! { [u32; 8], i32, crate::codegen::i32x8 }
+impl_shuffle! { [u32; 16], i32, crate::codegen::i32x16 }
+
+impl_shuffle! { [u32; 2], u32, crate::codegen::u32x2 }
+impl_shuffle! { [u32; 4], u32, crate::codegen::u32x4 }
+impl_shuffle! { [u32; 8], u32, crate::codegen::u32x8 }
+impl_shuffle! { [u32; 16], u32, crate::codegen::u32x16 }
+
+impl_shuffle! { [u32; 2], f32, crate::codegen::f32x2 }
+impl_shuffle! { [u32; 4], f32, crate::codegen::f32x4 }
+impl_shuffle! { [u32; 8], f32, crate::codegen::f32x8 }
+impl_shuffle! { [u32; 16], f32, crate::codegen::f32x16 }
+
+impl_shuffle! { [u32; 2], m32, crate::codegen::m32x2 }
+impl_shuffle! { [u32; 4], m32, crate::codegen::m32x4 }
+impl_shuffle! { [u32; 8], m32, crate::codegen::m32x8 }
+impl_shuffle! { [u32; 16], m32, crate::codegen::m32x16 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], i64, crate::codegen::i64x2 }
+impl_shuffle! { [u32; 4], i64, crate::codegen::i64x4 }
+impl_shuffle! { [u32; 8], i64, crate::codegen::i64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], u64, crate::codegen::u64x2 }
+impl_shuffle! { [u32; 4], u64, crate::codegen::u64x4 }
+impl_shuffle! { [u32; 8], u64, crate::codegen::u64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], f64, crate::codegen::f64x2 }
+impl_shuffle! { [u32; 4], f64, crate::codegen::f64x4 }
+impl_shuffle! { [u32; 8], f64, crate::codegen::f64x8 }
+
+/* FIXME: 64-bit single element vector
+impl_shuffle! { [u32; 1], i64, crate::codegen::i64x1 }
+*/
+impl_shuffle! { [u32; 2], m64, crate::codegen::m64x2 }
+impl_shuffle! { [u32; 4], m64, crate::codegen::m64x4 }
+impl_shuffle! { [u32; 8], m64, crate::codegen::m64x8 }
+
+impl_shuffle! { [u32; 2], isize, crate::codegen::isizex2 }
+impl_shuffle! { [u32; 4], isize, crate::codegen::isizex4 }
+impl_shuffle! { [u32; 8], isize, crate::codegen::isizex8 }
+
+impl_shuffle! { [u32; 2], usize, crate::codegen::usizex2 }
+impl_shuffle! { [u32; 4], usize, crate::codegen::usizex4 }
+impl_shuffle! { [u32; 8], usize, crate::codegen::usizex8 }
+
+impl_shuffle! { [u32; 2], msize, crate::codegen::msizex2 }
+impl_shuffle! { [u32; 4], msize, crate::codegen::msizex4 }
+impl_shuffle! { [u32; 8], msize, crate::codegen::msizex8 }
+
+impl<T> Seal<[u32; 2]> for *const T {}
+impl<T> Shuffle<[u32; 2]> for *const T {
+ type Output = crate::codegen::cptrx2<T>;
+}
+impl<T> Seal<[u32; 4]> for *const T {}
+impl<T> Shuffle<[u32; 4]> for *const T {
+ type Output = crate::codegen::cptrx4<T>;
+}
+impl<T> Seal<[u32; 8]> for *const T {}
+impl<T> Shuffle<[u32; 8]> for *const T {
+ type Output = crate::codegen::cptrx8<T>;
+}
+
+impl<T> Seal<[u32; 2]> for *mut T {}
+impl<T> Shuffle<[u32; 2]> for *mut T {
+ type Output = crate::codegen::mptrx2<T>;
+}
+impl<T> Seal<[u32; 4]> for *mut T {}
+impl<T> Shuffle<[u32; 4]> for *mut T {
+ type Output = crate::codegen::mptrx4<T>;
+}
+impl<T> Seal<[u32; 8]> for *mut T {}
+impl<T> Shuffle<[u32; 8]> for *mut T {
+ type Output = crate::codegen::mptrx8<T>;
+}
+
+impl_shuffle! { [u32; 1], i128, crate::codegen::i128x1 }
+impl_shuffle! { [u32; 2], i128, crate::codegen::i128x2 }
+impl_shuffle! { [u32; 4], i128, crate::codegen::i128x4 }
+
+impl_shuffle! { [u32; 1], u128, crate::codegen::u128x1 }
+impl_shuffle! { [u32; 2], u128, crate::codegen::u128x2 }
+impl_shuffle! { [u32; 4], u128, crate::codegen::u128x4 }
+
+impl_shuffle! { [u32; 1], m128, crate::codegen::m128x1 }
+impl_shuffle! { [u32; 2], m128, crate::codegen::m128x2 }
+impl_shuffle! { [u32; 4], m128, crate::codegen::m128x4 }
diff --git a/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
new file mode 100644
index 0000000000..19d457a45b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/shuffle1_dyn.rs
@@ -0,0 +1,408 @@
+//! Shuffle vector lanes with run-time indices.
+
+use crate::*;
+
+pub trait Shuffle1Dyn {
+ type Indices;
+ fn shuffle1_dyn(self, _: Self::Indices) -> Self;
+}
+
+// Fallback implementation
+macro_rules! impl_fallback {
+ ($id:ident) => {
+ impl Shuffle1Dyn for $id {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ let mut result = Self::splat(0);
+ for i in 0..$id::lanes() {
+ result = result.replace(i, self.extract(indices.extract(i) as usize));
+ }
+ result
+ }
+ }
+ };
+}
+
+macro_rules! impl_shuffle1_dyn {
+ (u8x8) => {
+ cfg_if! {
+ if #[cfg(all(
+ any(
+ all(target_arch = "aarch64", target_feature = "neon"),
+ all(target_arch = "doesnotexist", target_feature = "v7",
+ target_feature = "neon")
+ ),
+ any(feature = "core_arch", libcore_neon)
+ )
+ )] {
+ impl Shuffle1Dyn for u8x8 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ #[cfg(target_arch = "aarch64")]
+ use crate::arch::aarch64::vtbl1_u8;
+ #[cfg(target_arch = "doesnotexist")]
+ use crate::arch::arm::vtbl1_u8;
+
+ // This is safe because the binary is compiled with
+ // neon enabled at compile-time and can therefore only
+ // run on CPUs that have it enabled.
+ unsafe {
+ Simd(mem::transmute(
+ vtbl1_u8(mem::transmute(self.0),
+ crate::mem::transmute(indices.0))
+ ))
+ }
+ }
+ }
+ } else {
+ impl_fallback!(u8x8);
+ }
+ }
+ };
+ (u8x16) => {
+ cfg_if! {
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+ target_feature = "ssse3"))] {
+ impl Shuffle1Dyn for u8x16 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_shuffle_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_shuffle_epi8;
+ // This is safe because the binary is compiled with
+ // ssse3 enabled at compile-time and can therefore only
+ // run on CPUs that have it enabled.
+ unsafe {
+ Simd(mem::transmute(
+ _mm_shuffle_epi8(mem::transmute(self.0),
+ crate::mem::transmute(indices))
+ ))
+ }
+ }
+ }
+ } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon",
+ any(feature = "core_arch", libcore_neon)))] {
+ impl Shuffle1Dyn for u8x16 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ use crate::arch::aarch64::vqtbl1q_u8;
+
+ // This is safe because the binary is compiled with
+ // neon enabled at compile-time and can therefore only
+ // run on CPUs that have it enabled.
+ unsafe {
+ Simd(mem::transmute(
+ vqtbl1q_u8(mem::transmute(self.0),
+ crate::mem::transmute(indices.0))
+ ))
+ }
+ }
+ }
+ } else if #[cfg(all(target_arch = "doesnotexist", target_feature = "v7",
+ target_feature = "neon",
+ any(feature = "core_arch", libcore_neon)))] {
+ impl Shuffle1Dyn for u8x16 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ use crate::arch::arm::vtbl2_u8;
+
+ // This is safe because the binary is compiled with
+ // neon enabled at compile-time and can therefore only
+ // run on CPUs that have it enabled.
+ unsafe {
+ union U {
+ j: u8x16,
+ s: (u8x8, u8x8),
+ }
+
+ let (i0, i1) = U { j: y }.s;
+
+ let r0 = vtbl2_u8(
+ mem::transmute(x),
+ crate::mem::transmute(i0)
+ );
+ let r1 = vtbl2_u8(
+ mem::transmute(x),
+ crate::mem::transmute(i1)
+ );
+
+ let r = U { s: (r0, r1) }.j;
+
+ Simd(mem::transmute(r))
+ }
+ }
+ }
+ } else {
+ impl_fallback!(u8x16);
+ }
+ }
+ };
+ (u16x8) => {
+ impl Shuffle1Dyn for u16x8 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ let indices: u8x8 = (indices * 2).cast();
+ let indices: u8x16 = shuffle!(indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7]);
+ let v = u8x16::new(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1);
+ let indices = indices + v;
+ unsafe {
+ let s: u8x16 = crate::mem::transmute(self);
+ crate::mem::transmute(s.shuffle1_dyn(indices))
+ }
+ }
+ }
+ };
+ (u32x4) => {
+ cfg_if! {
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+ target_feature = "avx"))] {
+ impl Shuffle1Dyn for u32x4 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::{_mm_permutevar_ps};
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::{_mm_permutevar_ps};
+
+ unsafe {
+ crate::mem::transmute(
+ _mm_permutevar_ps(
+ crate::mem::transmute(self.0),
+ crate::mem::transmute(indices.0)
+ )
+ )
+ }
+ }
+ }
+ } else {
+ impl Shuffle1Dyn for u32x4 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ let indices: u8x4 = (indices * 4).cast();
+ let indices: u8x16 = shuffle!(
+ indices,
+ [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
+ );
+ let v = u8x16::new(
+ 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
+ );
+ let indices = indices + v;
+ unsafe {
+ let s: u8x16 =crate::mem::transmute(self);
+ crate::mem::transmute(s.shuffle1_dyn(indices))
+ }
+ }
+ }
+ }
+ }
+ };
+ (u64x2) => {
+ cfg_if! {
+ if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
+ target_feature = "avx"))] {
+ impl Shuffle1Dyn for u64x2 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::{_mm_permutevar_pd};
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::{_mm_permutevar_pd};
+ // _mm_permutevar_pd uses the _second_ bit of each
+ // element to perform the selection, that is: 0b00 => 0,
+ // 0b10 => 1:
+ let indices = indices << 1;
+ unsafe {
+ crate::mem::transmute(
+ _mm_permutevar_pd(
+ crate::mem::transmute(self),
+ crate::mem::transmute(indices)
+ )
+ )
+ }
+ }
+ }
+ } else {
+ impl Shuffle1Dyn for u64x2 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ let indices: u8x2 = (indices * 8).cast();
+ let indices: u8x16 = shuffle!(
+ indices,
+ [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
+ );
+ let v = u8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
+ );
+ let indices = indices + v;
+ unsafe {
+ let s: u8x16 =crate::mem::transmute(self);
+ crate::mem::transmute(s.shuffle1_dyn(indices))
+ }
+ }
+ }
+ }
+ }
+ };
+ (u128x1) => {
+ impl Shuffle1Dyn for u128x1 {
+ type Indices = Self;
+ #[inline]
+ fn shuffle1_dyn(self, _indices: Self::Indices) -> Self {
+ self
+ }
+ }
+ };
+ ($id:ident) => {
+ impl_fallback!($id);
+ };
+}
+
+impl_shuffle1_dyn!(u8x2);
+impl_shuffle1_dyn!(u8x4);
+impl_shuffle1_dyn!(u8x8);
+impl_shuffle1_dyn!(u8x16);
+impl_shuffle1_dyn!(u8x32);
+impl_shuffle1_dyn!(u8x64);
+
+impl_shuffle1_dyn!(u16x2);
+impl_shuffle1_dyn!(u16x4);
+impl_shuffle1_dyn!(u16x8);
+impl_shuffle1_dyn!(u16x16);
+impl_shuffle1_dyn!(u16x32);
+
+impl_shuffle1_dyn!(u32x2);
+impl_shuffle1_dyn!(u32x4);
+impl_shuffle1_dyn!(u32x8);
+impl_shuffle1_dyn!(u32x16);
+
+impl_shuffle1_dyn!(u64x2);
+impl_shuffle1_dyn!(u64x4);
+impl_shuffle1_dyn!(u64x8);
+
+impl_shuffle1_dyn!(usizex2);
+impl_shuffle1_dyn!(usizex4);
+impl_shuffle1_dyn!(usizex8);
+
+impl_shuffle1_dyn!(u128x1);
+impl_shuffle1_dyn!(u128x2);
+impl_shuffle1_dyn!(u128x4);
+
+// Implementation for non-unsigned vector types
+macro_rules! impl_shuffle1_dyn_non_u {
+ ($id:ident, $uid:ident) => {
+ impl Shuffle1Dyn for $id {
+ type Indices = $uid;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ unsafe {
+ let u: $uid = crate::mem::transmute(self);
+ crate::mem::transmute(u.shuffle1_dyn(indices))
+ }
+ }
+ }
+ };
+}
+
+impl_shuffle1_dyn_non_u!(i8x2, u8x2);
+impl_shuffle1_dyn_non_u!(i8x4, u8x4);
+impl_shuffle1_dyn_non_u!(i8x8, u8x8);
+impl_shuffle1_dyn_non_u!(i8x16, u8x16);
+impl_shuffle1_dyn_non_u!(i8x32, u8x32);
+impl_shuffle1_dyn_non_u!(i8x64, u8x64);
+
+impl_shuffle1_dyn_non_u!(i16x2, u16x2);
+impl_shuffle1_dyn_non_u!(i16x4, u16x4);
+impl_shuffle1_dyn_non_u!(i16x8, u16x8);
+impl_shuffle1_dyn_non_u!(i16x16, u16x16);
+impl_shuffle1_dyn_non_u!(i16x32, u16x32);
+
+impl_shuffle1_dyn_non_u!(i32x2, u32x2);
+impl_shuffle1_dyn_non_u!(i32x4, u32x4);
+impl_shuffle1_dyn_non_u!(i32x8, u32x8);
+impl_shuffle1_dyn_non_u!(i32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(i64x2, u64x2);
+impl_shuffle1_dyn_non_u!(i64x4, u64x4);
+impl_shuffle1_dyn_non_u!(i64x8, u64x8);
+
+impl_shuffle1_dyn_non_u!(isizex2, usizex2);
+impl_shuffle1_dyn_non_u!(isizex4, usizex4);
+impl_shuffle1_dyn_non_u!(isizex8, usizex8);
+
+impl_shuffle1_dyn_non_u!(i128x1, u128x1);
+impl_shuffle1_dyn_non_u!(i128x2, u128x2);
+impl_shuffle1_dyn_non_u!(i128x4, u128x4);
+
+impl_shuffle1_dyn_non_u!(m8x2, u8x2);
+impl_shuffle1_dyn_non_u!(m8x4, u8x4);
+impl_shuffle1_dyn_non_u!(m8x8, u8x8);
+impl_shuffle1_dyn_non_u!(m8x16, u8x16);
+impl_shuffle1_dyn_non_u!(m8x32, u8x32);
+impl_shuffle1_dyn_non_u!(m8x64, u8x64);
+
+impl_shuffle1_dyn_non_u!(m16x2, u16x2);
+impl_shuffle1_dyn_non_u!(m16x4, u16x4);
+impl_shuffle1_dyn_non_u!(m16x8, u16x8);
+impl_shuffle1_dyn_non_u!(m16x16, u16x16);
+impl_shuffle1_dyn_non_u!(m16x32, u16x32);
+
+impl_shuffle1_dyn_non_u!(m32x2, u32x2);
+impl_shuffle1_dyn_non_u!(m32x4, u32x4);
+impl_shuffle1_dyn_non_u!(m32x8, u32x8);
+impl_shuffle1_dyn_non_u!(m32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(m64x2, u64x2);
+impl_shuffle1_dyn_non_u!(m64x4, u64x4);
+impl_shuffle1_dyn_non_u!(m64x8, u64x8);
+
+impl_shuffle1_dyn_non_u!(msizex2, usizex2);
+impl_shuffle1_dyn_non_u!(msizex4, usizex4);
+impl_shuffle1_dyn_non_u!(msizex8, usizex8);
+
+impl_shuffle1_dyn_non_u!(m128x1, u128x1);
+impl_shuffle1_dyn_non_u!(m128x2, u128x2);
+impl_shuffle1_dyn_non_u!(m128x4, u128x4);
+
+impl_shuffle1_dyn_non_u!(f32x2, u32x2);
+impl_shuffle1_dyn_non_u!(f32x4, u32x4);
+impl_shuffle1_dyn_non_u!(f32x8, u32x8);
+impl_shuffle1_dyn_non_u!(f32x16, u32x16);
+
+impl_shuffle1_dyn_non_u!(f64x2, u64x2);
+impl_shuffle1_dyn_non_u!(f64x4, u64x4);
+impl_shuffle1_dyn_non_u!(f64x8, u64x8);
+
+// Implementation for non-unsigned vector types
+macro_rules! impl_shuffle1_dyn_ptr {
+ ($id:ident, $uid:ident) => {
+ impl<T> Shuffle1Dyn for $id<T> {
+ type Indices = $uid;
+ #[inline]
+ fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
+ unsafe {
+ let u: $uid = crate::mem::transmute(self);
+ crate::mem::transmute(u.shuffle1_dyn(indices))
+ }
+ }
+ }
+ };
+}
+
+impl_shuffle1_dyn_ptr!(cptrx2, usizex2);
+impl_shuffle1_dyn_ptr!(cptrx4, usizex4);
+impl_shuffle1_dyn_ptr!(cptrx8, usizex8);
+
+impl_shuffle1_dyn_ptr!(mptrx2, usizex2);
+impl_shuffle1_dyn_ptr!(mptrx4, usizex4);
+impl_shuffle1_dyn_ptr!(mptrx8, usizex8);
diff --git a/third_party/rust/packed_simd/src/codegen/swap_bytes.rs b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs
new file mode 100644
index 0000000000..9cf34a3e04
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/swap_bytes.rs
@@ -0,0 +1,149 @@
+//! Horizontal swap bytes reductions.
+
+// FIXME: investigate using `llvm.bswap`
+// https://github.com/rust-lang-nursery/packed_simd/issues/19
+
+use crate::*;
+
+pub(crate) trait SwapBytes {
+ fn swap_bytes(self) -> Self;
+}
+
+macro_rules! impl_swap_bytes {
+ (v16: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ fn swap_bytes(self) -> Self {
+ shuffle!(self, [1, 0])
+ }
+ }
+ )+
+ };
+ (v32: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ #[allow(clippy::useless_transmute)]
+ fn swap_bytes(self) -> Self {
+ unsafe {
+ let bytes: u8x4 = crate::mem::transmute(self);
+ let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]);
+ crate::mem::transmute(result)
+ }
+ }
+ }
+ )+
+ };
+ (v64: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ #[allow(clippy::useless_transmute)]
+ fn swap_bytes(self) -> Self {
+ unsafe {
+ let bytes: u8x8 = crate::mem::transmute(self);
+ let result: u8x8 = shuffle!(
+ bytes, [7, 6, 5, 4, 3, 2, 1, 0]
+ );
+ crate::mem::transmute(result)
+ }
+ }
+ }
+ )+
+ };
+ (v128: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ #[allow(clippy::useless_transmute)]
+ fn swap_bytes(self) -> Self {
+ unsafe {
+ let bytes: u8x16 = crate::mem::transmute(self);
+ let result: u8x16 = shuffle!(bytes, [
+ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ ]);
+ crate::mem::transmute(result)
+ }
+ }
+ }
+ )+
+ };
+ (v256: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ #[allow(clippy::useless_transmute)]
+ fn swap_bytes(self) -> Self {
+ unsafe {
+ let bytes: u8x32 = crate::mem::transmute(self);
+ let result: u8x32 = shuffle!(bytes, [
+ 31, 30, 29, 28, 27, 26, 25, 24,
+ 23, 22, 21, 20, 19, 18, 17, 16,
+ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ ]);
+ crate::mem::transmute(result)
+ }
+ }
+ }
+ )+
+ };
+ (v512: $($id:ident,)+) => {
+ $(
+ impl SwapBytes for $id {
+ #[inline]
+ #[allow(clippy::useless_transmute)]
+ fn swap_bytes(self) -> Self {
+ unsafe {
+ let bytes: u8x64 = crate::mem::transmute(self);
+ let result: u8x64 = shuffle!(bytes, [
+ 63, 62, 61, 60, 59, 58, 57, 56,
+ 55, 54, 53, 52, 51, 50, 49, 48,
+ 47, 46, 45, 44, 43, 42, 41, 40,
+ 39, 38, 37, 36, 35, 34, 33, 32,
+ 31, 30, 29, 28, 27, 26, 25, 24,
+ 23, 22, 21, 20, 19, 18, 17, 16,
+ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ ]);
+ crate::mem::transmute(result)
+ }
+ }
+ }
+ )+
+ };
+}
+
+impl_swap_bytes!(v16: u8x2, i8x2,);
+impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,);
+// FIXME: 64-bit single element vector
+impl_swap_bytes!(v64: u8x8, i8x8, u16x4, i16x4, u32x2, i32x2 /* u64x1, i64x1, */,);
+
+impl_swap_bytes!(v128: u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2, u128x1, i128x1,);
+impl_swap_bytes!(v256: u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4, u128x2, i128x2,);
+
+impl_swap_bytes!(v512: u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4,);
+
+cfg_if! {
+ if #[cfg(target_pointer_width = "8")] {
+ impl_swap_bytes!(v16: isizex2, usizex2,);
+ impl_swap_bytes!(v32: isizex4, usizex4,);
+ impl_swap_bytes!(v64: isizex8, usizex8,);
+ } else if #[cfg(target_pointer_width = "16")] {
+ impl_swap_bytes!(v32: isizex2, usizex2,);
+ impl_swap_bytes!(v64: isizex4, usizex4,);
+ impl_swap_bytes!(v128: isizex8, usizex8,);
+ } else if #[cfg(target_pointer_width = "32")] {
+ impl_swap_bytes!(v64: isizex2, usizex2,);
+ impl_swap_bytes!(v128: isizex4, usizex4,);
+ impl_swap_bytes!(v256: isizex8, usizex8,);
+ } else if #[cfg(target_pointer_width = "64")] {
+ impl_swap_bytes!(v128: isizex2, usizex2,);
+ impl_swap_bytes!(v256: isizex4, usizex4,);
+ impl_swap_bytes!(v512: isizex8, usizex8,);
+ } else {
+ compile_error!("unsupported target_pointer_width");
+ }
+}
diff --git a/third_party/rust/packed_simd/src/codegen/v128.rs b/third_party/rust/packed_simd/src/codegen/v128.rs
new file mode 100644
index 0000000000..9506424fad
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v128.rs
@@ -0,0 +1,46 @@
+//! Internal 128-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+ [i8; 16]: i8x16 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u8; 16]: u8x16 |
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m8; 16]: m8x16 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+
+impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
+impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16);
+impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16);
+
+impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32);
+impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32);
+impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32);
+impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32);
+
+impl_simd_array!([i64; 2]: i64x2 | i64, i64);
+impl_simd_array!([u64; 2]: u64x2 | u64, u64);
+impl_simd_array!([f64; 2]: f64x2 | f64, f64);
+impl_simd_array!([m64; 2]: m64x2 | i64, i64);
+
+impl_simd_array!([i128; 1]: i128x1 | i128);
+impl_simd_array!([u128; 1]: u128x1 | u128);
+impl_simd_array!([m128; 1]: m128x1 | i128);
diff --git a/third_party/rust/packed_simd/src/codegen/v16.rs b/third_party/rust/packed_simd/src/codegen/v16.rs
new file mode 100644
index 0000000000..4d55a6d899
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v16.rs
@@ -0,0 +1,7 @@
+//! Internal 16-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 2]: i8x2 | i8, i8);
+impl_simd_array!([u8; 2]: u8x2 | u8, u8);
+impl_simd_array!([m8; 2]: m8x2 | i8, i8);
diff --git a/third_party/rust/packed_simd/src/codegen/v256.rs b/third_party/rust/packed_simd/src/codegen/v256.rs
new file mode 100644
index 0000000000..5ca4759f0c
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v256.rs
@@ -0,0 +1,78 @@
+//! Internal 256-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+ [i8; 32]: i8x32 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u8; 32]: u8x32 |
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m8; 32]: m8x32 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [i16; 16]: i16x16 |
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u16; 16]: u16x16 |
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m16; 16]: m16x16 |
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16
+);
+
+impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
+impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32);
+impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32);
+impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32);
+
+impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64);
+impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64);
+impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64);
+impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64);
+
+impl_simd_array!([i128; 2]: i128x2 | i128, i128);
+impl_simd_array!([u128; 2]: u128x2 | u128, u128);
+impl_simd_array!([m128; 2]: m128x2 | i128, i128);
diff --git a/third_party/rust/packed_simd/src/codegen/v32.rs b/third_party/rust/packed_simd/src/codegen/v32.rs
new file mode 100644
index 0000000000..ae1dabd00c
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v32.rs
@@ -0,0 +1,11 @@
+//! Internal 32-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8);
+impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8);
+impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8);
+
+impl_simd_array!([i16; 2]: i16x2 | i16, i16);
+impl_simd_array!([u16; 2]: u16x2 | u16, u16);
+impl_simd_array!([m16; 2]: m16x2 | i16, i16);
diff --git a/third_party/rust/packed_simd/src/codegen/v512.rs b/third_party/rust/packed_simd/src/codegen/v512.rs
new file mode 100644
index 0000000000..bf95110340
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v512.rs
@@ -0,0 +1,145 @@
+//! Internal 512-bit wide vector types
+
+use crate::masks::*;
+
+#[rustfmt::skip]
+impl_simd_array!(
+ [i8; 64]: i8x64 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u8; 64]: u8x64 |
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8,
+ u8, u8, u8, u8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m8; 64]: m8x64 |
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8,
+ i8, i8, i8, i8
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [i16; 32]: i16x32 |
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u16; 32]: u16x32 |
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16,
+ u16, u16, u16, u16
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m16; 32]: m16x32 |
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16,
+ i16, i16, i16, i16
+);
+
+#[rustfmt::skip]
+impl_simd_array!(
+ [i32; 16]: i32x16 |
+ i32, i32, i32, i32,
+ i32, i32, i32, i32,
+ i32, i32, i32, i32,
+ i32, i32, i32, i32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [u32; 16]: u32x16 |
+ u32, u32, u32, u32,
+ u32, u32, u32, u32,
+ u32, u32, u32, u32,
+ u32, u32, u32, u32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [f32; 16]: f32x16 |
+ f32, f32, f32, f32,
+ f32, f32, f32, f32,
+ f32, f32, f32, f32,
+ f32, f32, f32, f32
+);
+#[rustfmt::skip]
+impl_simd_array!(
+ [m32; 16]: m32x16 |
+ i32, i32, i32, i32,
+ i32, i32, i32, i32,
+ i32, i32, i32, i32,
+ i32, i32, i32, i32
+);
+
+impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
+impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64);
+impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64);
+impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64);
+
+impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128);
+impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128);
+impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128);
diff --git a/third_party/rust/packed_simd/src/codegen/v64.rs b/third_party/rust/packed_simd/src/codegen/v64.rs
new file mode 100644
index 0000000000..3cfb67c1a0
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/v64.rs
@@ -0,0 +1,21 @@
+//! Internal 64-bit wide vector types
+
+use crate::masks::*;
+
+impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
+impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8);
+impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8);
+
+impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16);
+impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16);
+impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16);
+
+impl_simd_array!([i32; 2]: i32x2 | i32, i32);
+impl_simd_array!([u32; 2]: u32x2 | u32, u32);
+impl_simd_array!([f32; 2]: f32x2 | f32, f32);
+impl_simd_array!([m32; 2]: m32x2 | i32, i32);
+
+impl_simd_array!([i64; 1]: i64x1 | i64);
+impl_simd_array!([u64; 1]: u64x1 | u64);
+impl_simd_array!([f64; 1]: f64x1 | f64);
+impl_simd_array!([m64; 1]: m64x1 | i64);
diff --git a/third_party/rust/packed_simd/src/codegen/vPtr.rs b/third_party/rust/packed_simd/src/codegen/vPtr.rs
new file mode 100644
index 0000000000..abd3aa8779
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/vPtr.rs
@@ -0,0 +1,35 @@
+//! Pointer vector types
+
+macro_rules! impl_simd_ptr {
+ ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident
+ | $($tys:ty),*) => {
+ #[derive(Copy, Clone)]
+ #[repr(simd)]
+ pub struct $tuple_id<$ty>($(pub(crate) $tys),*);
+ //^^^^^^^ leaked through SimdArray
+
+ impl<$ty> crate::sealed::Seal for [$ptr_ty; $elem_count] {}
+ impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] {
+ type Tuple = $tuple_id<$ptr_ty>;
+ type T = $ptr_ty;
+ const N: usize = $elem_count;
+ type NT = [u32; $elem_count];
+ }
+
+ impl<$ty> crate::sealed::Seal for $tuple_id<$ptr_ty> {}
+ impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> {
+ type Element = $ptr_ty;
+ const LANES: usize = $elem_count;
+ type LanesType = [u32; $elem_count];
+ }
+
+ }
+}
+
+impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T);
+impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T);
+impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T);
+
+impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T);
+impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T);
+impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T);
diff --git a/third_party/rust/packed_simd/src/codegen/vSize.rs b/third_party/rust/packed_simd/src/codegen/vSize.rs
new file mode 100644
index 0000000000..d5db03991d
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/vSize.rs
@@ -0,0 +1,16 @@
+//! Vector types with pointer-sized elements
+
+use crate::codegen::pointer_sized_int::{isize_, usize_};
+use crate::masks::*;
+
+impl_simd_array!([isize; 2]: isizex2 | isize_, isize_);
+impl_simd_array!([usize; 2]: usizex2 | usize_, usize_);
+impl_simd_array!([msize; 2]: msizex2 | isize_, isize_);
+
+impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_);
+impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_);
+impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_);
+
+impl_simd_array!([isize; 8]: isizex8 | isize_, isize_, isize_, isize_, isize_, isize_, isize_, isize_);
+impl_simd_array!([usize; 8]: usizex8 | usize_, usize_, usize_, usize_, usize_, usize_, usize_, usize_);
+impl_simd_array!([msize; 8]: msizex8 | isize_, isize_, isize_, isize_, isize_, isize_, isize_, isize_);
diff --git a/third_party/rust/packed_simd/src/lib.rs b/third_party/rust/packed_simd/src/lib.rs
new file mode 100644
index 0000000000..867cc10e9e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/lib.rs
@@ -0,0 +1,348 @@
+//! # Portable packed SIMD vectors
+//!
+//! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366:
+//! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) .
+//!
+//! The examples available in the
+//! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples)
+//! sub-directory of the crate showcase how to use the library in practice.
+//!
+//! ## Table of contents
+//!
+//! - [Introduction](#introduction)
+//! - [Vector types](#vector-types)
+//! - [Conditional operations](#conditional-operations)
+//! - [Conversions](#conversions)
+//! - [Hardware Features](#hardware-features)
+//! - [Performance guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/)
+//!
+//! ## Introduction
+//!
+//! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N`
+//! elements of type `T` as well as many type aliases for this type: for
+//! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`.
+//!
+//! The operations on packed vectors are, by default, "vertical", that is, they
+//! are applied to each vector lane in isolation of the others:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 2, 3, 4);
+//! let b = i32x4::new(5, 6, 7, 8);
+//! assert_eq!(a + b, i32x4::new(6, 8, 10, 12));
+//! ```
+//!
+//! Many "horizontal" operations are also provided:
+//!
+//! ```
+//! # use packed_simd::*;
+//! # let a = i32x4::new(1, 2, 3, 4);
+//! assert_eq!(a.wrapping_sum(), 10);
+//! ```
+//!
+//! In virtually all architectures vertical operations are fast, while
+//! horizontal operations are, by comparison, much slower. That is, the
+//! most portably-efficient way of performing a reduction over a slice
+//! is to collect the results into a vector using vertical operations,
+//! and performing a single horizontal operation at the end:
+//!
+//! ```
+//! # use packed_simd::*;
+//! fn reduce(x: &[i32]) -> i32 {
+//! assert_eq!(x.len() % 4, 0);
+//! let mut sum = i32x4::splat(0); // [0, 0, 0, 0]
+//! for i in (0..x.len()).step_by(4) {
+//! sum += i32x4::from_slice_unaligned(&x[i..]);
+//! }
+//! sum.wrapping_sum()
+//! }
+//!
+//! let x = [0, 1, 2, 3, 4, 5, 6, 7];
+//! assert_eq!(reduce(&x), 28);
+//! ```
+//!
+//! ## Vector types
+//!
+//! The vector type aliases are named according to the following scheme:
+//!
+//! > `{element_type}x{number_of_lanes} == Simd<[element_type;
+//! number_of_lanes]>`
+//!
+//! where the following element types are supported:
+//!
+//! * `i{element_width}`: signed integer
+//! * `u{element_width}`: unsigned integer
+//! * `f{element_width}`: float
+//! * `m{element_width}`: mask (see below)
+//! * `*{const,mut} T`: `const` and `mut` pointers
+//!
+//! ## Basic operations
+//!
+//! ```
+//! # use packed_simd::*;
+//! // Sets all elements to `0`:
+//! let a = i32x4::splat(0);
+//!
+//! // Reads a vector from a slice:
+//! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5];
+//! let b = i32x4::from_slice_unaligned(&arr);
+//!
+//! // Reads the 4-th element of a vector:
+//! assert_eq!(b.extract(3), 1);
+//!
+//! // Returns a new vector where the 4-th element is replaced with `1`:
+//! let a = a.replace(3, 1);
+//! assert_eq!(a, b);
+//!
+//! // Writes a vector to a slice:
+//! let a = a.replace(2, 1);
+//! a.write_to_slice_unaligned(&mut arr[4..]);
+//! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]);
+//! ```
+//!
+//! ## Conditional operations
+//!
+//! One often needs to perform an operation on some lanes of the vector. Vector
+//! masks, like `m32x4`, allow selecting on which vector lanes an operation is
+//! to be performed:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 1, 2, 2);
+//!
+//! // Add `1` to the first two lanes of the vector.
+//! let m = m16x4::new(true, true, false, false);
+//! let a = m.select(a + 1, a);
+//! assert_eq!(a, i32x4::splat(2));
+//! ```
+//!
+//! The elements of a vector mask are either `true` or `false`. Here `true`
+//! means that a lane is "selected", while `false` means that a lane is not
+//! selected.
+//!
+//! All vector masks implement a `mask.select(a: T, b: T) -> T` method that
+//! works on all vectors that have the same number of lanes as the mask. The
+//! resulting vector contains the elements of `a` for those lanes for which the
+//! mask is `true`, and the elements of `b` otherwise.
+//!
+//! The example constructs a mask with the first two lanes set to `true` and
+//! the last two lanes set to `false`. This selects the first two lanes of `a +
+//! 1` and the last two lanes of `a`, producing a vector where the first two
+//! lanes have been incremented by `1`.
+//!
+//! > note: mask `select` can be used on vector types that have the same number
+//! > of lanes as the mask. The example shows this by using [`m16x4`] instead
+//! > of [`m32x4`]. It is _typically_ more performant to use a mask element
+//! > width equal to the element width of the vectors being operated upon.
+//! > This is, however, not true for 512-bit wide vectors when targeting
+//! > AVX-512, where the most efficient masks use only 1-bit per element.
+//!
+//! All vertical comparison operations returns masks:
+//!
+//! ```
+//! # use packed_simd::*;
+//! let a = i32x4::new(1, 1, 3, 3);
+//! let b = i32x4::new(2, 2, 0, 0);
+//!
+//! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne).
+//! let m = a.ge(i32x4::splat(2));
+//!
+//! if m.any() {
+//! // all / any / none allow coherent control flow
+//! let d = m.select(a, b);
+//! assert_eq!(d, i32x4::new(2, 2, 3, 3));
+//! }
+//! ```
+//!
+//! ## Conversions
+//!
+//! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for
+//! vectors with the same number of lanes when the conversion is value
+//! preserving (same as in `std`).
+//!
+//! * **safe bitwise conversions**: The cargo feature `into_bits` provides the
+//! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise
+//! `transmute`s when all bit patterns of the source type are valid bit
+//! patterns of the target type and are also implemented for the
+//! architecture-specific vector types of `std::arch`. For example, `let x:
+//! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit
+//! patterns are valid `u8x8` bit patterns. However, the opposite is not
+//! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this
+//! operation cannot be performed safely using `x.into_bits()`; one needs to
+//! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the
+//! value in the `u8x8` is a valid bit-pattern of `m8x8`.
+//!
+//! * **numeric casts** (`as`): are performed using [`FromCast`]/[`Cast`]
+//! (`x.cast()`), just like `as`:
+//!
+//! * casting integer vectors whose lane types have the same size (e.g.
+//! `i32xN` -> `u32xN`) is a **no-op**,
+//!
+//! * casting from a larger integer to a smaller integer (e.g. `u32xN` ->
+//! `u8xN`) will **truncate**,
+//!
+//! * casting from a smaller integer to a larger integer (e.g. `u8xN` ->
+//! `u32xN`) will:
+//! * **zero-extend** if the source is unsigned, or
+//! * **sign-extend** if the source is signed,
+//!
+//! * casting from a float to an integer will **round the float towards
+//! zero**,
+//!
+//! * casting from an integer to float will produce the floating point
+//! representation of the integer, **rounding to nearest, ties to even**,
+//!
+//! * casting from an `f32` to an `f64` is perfect and lossless,
+//!
+//! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**.
+//!
+//! Numeric casts are not very "precise": sometimes lossy, sometimes value
+//! preserving, etc.
+//!
+//! ## Hardware Features
+//!
+//! This crate can use different hardware features based on your configured
+//! `RUSTFLAGS`. For example, with no configured `RUSTFLAGS`, `u64x8` on
+//! x86_64 will use SSE2 operations like `PCMPEQD`. If you configure
+//! `RUSTFLAGS='-C target-feature=+avx2,+avx'` on supported x86_64 hardware
+//! the same `u64x8` may use wider AVX2 operations like `VPCMPEQQ`. It is
+//! important for performance and for hardware support requirements that
+//! you choose an appropriate set of `target-feature` and `target-cpu`
+//! options during builds. For more information, see the [Performance
+//! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/)
+
+#![feature(
+ adt_const_params,
+ repr_simd,
+ rustc_attrs,
+ platform_intrinsics,
+ stdsimd,
+ arm_target_feature,
+ link_llvm_intrinsics,
+ core_intrinsics,
+ stmt_expr_attributes,
+ custom_inner_attributes,
+)]
+#![allow(non_camel_case_types, non_snake_case,
+ // FIXME: these types are unsound in C FFI already
+ // See https://github.com/rust-lang/rust/issues/53346
+ improper_ctypes_definitions,
+ incomplete_features,
+ clippy::cast_possible_truncation,
+ clippy::cast_lossless,
+ clippy::cast_possible_wrap,
+ clippy::cast_precision_loss,
+ // TODO: manually add the `#[must_use]` attribute where appropriate
+ clippy::must_use_candidate,
+ // This lint is currently broken for generic code
+ // See https://github.com/rust-lang/rust-clippy/issues/3410
+ clippy::use_self,
+ clippy::wrong_self_convention,
+ clippy::from_over_into,
+)]
+#![cfg_attr(test, feature(hashmap_internals))]
+#![cfg_attr(doc_cfg, feature(doc_cfg))]
+#![deny(rust_2018_idioms, clippy::missing_inline_in_public_items)]
+#![no_std]
+
+use cfg_if::cfg_if;
+
+cfg_if! {
+ if #[cfg(feature = "core_arch")] {
+ #[allow(unused_imports)]
+ use core_arch as arch;
+ } else {
+ #[allow(unused_imports)]
+ use core::arch;
+ }
+}
+
+#[cfg(all(target_arch = "wasm32", test))]
+use wasm_bindgen_test::*;
+
+#[allow(unused_imports)]
+use core::{
+ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, i16, i32, i64, i8, intrinsics,
+ isize, iter, marker, mem, ops, ptr, slice, u128, u16, u32, u64, u8, usize,
+};
+
+#[macro_use]
+mod testing;
+#[macro_use]
+mod api;
+mod codegen;
+mod sealed;
+
+pub use crate::sealed::{Mask, Shuffle, Simd as SimdVector, SimdArray};
+
+/// Packed SIMD vector type.
+///
+/// # Examples
+///
+/// ```
+/// # use packed_simd::Simd;
+/// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3);
+/// assert_eq!(v.extract(2), 2);
+/// ```
+#[repr(transparent)]
+#[derive(Copy, Clone)]
+pub struct Simd<A: sealed::SimdArray>(
+ // FIXME: this type should be private,
+ // but it currently must be public for the
+ // `shuffle!` macro to work: it needs to
+ // access the internal `repr(simd)` type
+ // to call the shuffle intrinsics.
+ #[doc(hidden)] pub <A as sealed::SimdArray>::Tuple,
+);
+
+impl<A: sealed::SimdArray> sealed::Seal for Simd<A> {}
+
+/// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd`
+/// and/or `Ord` traits.
+#[repr(transparent)]
+#[derive(Copy, Clone, Debug)]
+#[allow(clippy::missing_inline_in_public_items)]
+pub struct LexicographicallyOrdered<T>(T);
+
+mod masks;
+pub use self::masks::*;
+
+mod v16;
+pub use self::v16::*;
+
+mod v32;
+pub use self::v32::*;
+
+mod v64;
+pub use self::v64::*;
+
+mod v128;
+pub use self::v128::*;
+
+mod v256;
+pub use self::v256::*;
+
+mod v512;
+pub use self::v512::*;
+
+mod vSize;
+pub use self::vSize::*;
+
+mod vPtr;
+pub use self::vPtr::*;
+
+pub use self::api::cast::*;
+
+#[cfg(feature = "into_bits")]
+pub use self::api::into_bits::*;
+
+// Re-export the shuffle intrinsics required by the `shuffle!` macro.
+#[doc(hidden)]
+pub use self::codegen::llvm::{
+ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, __shuffle_vector4, __shuffle_vector64,
+ __shuffle_vector8,
+};
+
+pub(crate) mod llvm {
+ pub(crate) use crate::codegen::llvm::*;
+}
diff --git a/third_party/rust/packed_simd/src/masks.rs b/third_party/rust/packed_simd/src/masks.rs
new file mode 100644
index 0000000000..04534eab2b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/masks.rs
@@ -0,0 +1,126 @@
+//! Mask types
+
+macro_rules! impl_mask_ty {
+ ($id:ident : $elem_ty:ident | #[$doc:meta]) => {
+ #[$doc]
+ #[derive(Copy, Clone)]
+ pub struct $id($elem_ty);
+
+ impl crate::sealed::Seal for $id {}
+ impl crate::sealed::Mask for $id {
+ #[inline]
+ fn test(&self) -> bool {
+ $id::test(self)
+ }
+ }
+
+ impl $id {
+ /// Instantiate a mask with `value`
+ #[inline]
+ pub fn new(x: bool) -> Self {
+ if x {
+ $id(!0)
+ } else {
+ $id(0)
+ }
+ }
+ /// Test if the mask is set
+ #[inline]
+ pub fn test(&self) -> bool {
+ self.0 != 0
+ }
+ }
+
+ impl Default for $id {
+ #[inline]
+ fn default() -> Self {
+ $id(0)
+ }
+ }
+
+ #[allow(clippy::partialeq_ne_impl)]
+ impl PartialEq<$id> for $id {
+ #[inline]
+ fn eq(&self, other: &Self) -> bool {
+ self.0 == other.0
+ }
+ #[inline]
+ fn ne(&self, other: &Self) -> bool {
+ self.0 != other.0
+ }
+ }
+
+ impl Eq for $id {}
+
+ impl PartialOrd<$id> for $id {
+ #[inline]
+ fn partial_cmp(&self, other: &Self) -> Option<crate::cmp::Ordering> {
+ use crate::cmp::Ordering;
+ if self == other {
+ Some(Ordering::Equal)
+ } else if self.0 > other.0 {
+ // Note:
+ // * false = 0_i
+ // * true == !0_i == -1_i
+ Some(Ordering::Less)
+ } else {
+ Some(Ordering::Greater)
+ }
+ }
+
+ #[inline]
+ fn lt(&self, other: &Self) -> bool {
+ self.0 > other.0
+ }
+ #[inline]
+ fn gt(&self, other: &Self) -> bool {
+ self.0 < other.0
+ }
+ #[inline]
+ fn le(&self, other: &Self) -> bool {
+ self.0 >= other.0
+ }
+ #[inline]
+ fn ge(&self, other: &Self) -> bool {
+ self.0 <= other.0
+ }
+ }
+
+ impl Ord for $id {
+ #[inline]
+ fn cmp(&self, other: &Self) -> crate::cmp::Ordering {
+ match self.partial_cmp(other) {
+ Some(x) => x,
+ None => unsafe { crate::hint::unreachable_unchecked() },
+ }
+ }
+ }
+
+ impl crate::hash::Hash for $id {
+ #[inline]
+ fn hash<H: crate::hash::Hasher>(&self, state: &mut H) {
+ (self.0 != 0).hash(state);
+ }
+ }
+
+ impl crate::fmt::Debug for $id {
+ #[inline]
+ fn fmt(&self, fmtter: &mut crate::fmt::Formatter<'_>) -> Result<(), crate::fmt::Error> {
+ write!(fmtter, "{}({})", stringify!($id), self.0 != 0)
+ }
+ }
+ };
+}
+
+impl_mask_ty!(m8: i8 | /// 8-bit wide mask.
+);
+impl_mask_ty!(m16: i16 | /// 16-bit wide mask.
+);
+impl_mask_ty!(m32: i32 | /// 32-bit wide mask.
+);
+impl_mask_ty!(m64: i64 | /// 64-bit wide mask.
+);
+impl_mask_ty!(m128: i128 | /// 128-bit wide mask.
+);
+impl_mask_ty!(msize: isize | /// isize-wide mask.
+);
diff --git a/third_party/rust/packed_simd/src/sealed.rs b/third_party/rust/packed_simd/src/sealed.rs
new file mode 100644
index 0000000000..0ec20300fd
--- /dev/null
+++ b/third_party/rust/packed_simd/src/sealed.rs
@@ -0,0 +1,42 @@
+//! Sealed traits
+
+/// A sealed trait, this is logically private to the crate
+/// and will prevent implementations from outside the crate
+pub trait Seal<T = ()> {}
+
+/// Trait implemented by arrays that can be SIMD types.
+pub trait SimdArray: Seal {
+ /// The type of the #[repr(simd)] type.
+ type Tuple: Copy + Clone;
+ /// The element type of the vector.
+ type T;
+ /// The number of elements in the array.
+ const N: usize;
+ /// The type: `[u32; Self::N]`.
+ type NT;
+}
+
+/// This traits is used to constraint the arguments
+/// and result type of the portable shuffles.
+#[doc(hidden)]
+pub trait Shuffle<Lanes>: Seal<Lanes> {
+ // Lanes is a `[u32; N]` where `N` is the number of vector lanes
+
+ /// The result type of the shuffle.
+ type Output;
+}
+
+/// This trait is implemented by all SIMD vector types.
+pub trait Simd: Seal {
+ /// Element type of the SIMD vector
+ type Element;
+ /// The number of elements in the SIMD vector.
+ const LANES: usize;
+ /// The type: `[u32; Self::N]`.
+ type LanesType;
+}
+
+/// This trait is implemented by all mask types
+pub trait Mask: Seal {
+ fn test(&self) -> bool;
+}
diff --git a/third_party/rust/packed_simd/src/testing.rs b/third_party/rust/packed_simd/src/testing.rs
new file mode 100644
index 0000000000..6320b28055
--- /dev/null
+++ b/third_party/rust/packed_simd/src/testing.rs
@@ -0,0 +1,8 @@
+//! Testing macros and other utilities.
+
+#[macro_use]
+mod macros;
+
+#[cfg(test)]
+#[macro_use]
+pub(crate) mod utils;
diff --git a/third_party/rust/packed_simd/src/testing/macros.rs b/third_party/rust/packed_simd/src/testing/macros.rs
new file mode 100644
index 0000000000..7bc4268b90
--- /dev/null
+++ b/third_party/rust/packed_simd/src/testing/macros.rs
@@ -0,0 +1,44 @@
+//! Testing macros
+
+macro_rules! test_if {
+ ($cfg_tt:tt: $it:item) => {
+ #[cfg(any(
+ // Test everything if:
+ //
+ // * tests are enabled,
+ // * no features about exclusively testing
+ // specific vector classes are enabled
+ all(test, not(any(
+ test_v16,
+ test_v32,
+ test_v64,
+ test_v128,
+ test_v256,
+ test_v512,
+ test_none, // disables all tests
+ ))),
+ // Test if:
+ //
+ // * tests are enabled
+ // * a particular cfg token tree returns true
+ all(test, $cfg_tt),
+ ))]
+ $it
+ };
+}
+
+#[cfg(test)]
+#[allow(unused)]
+macro_rules! ref_ {
+ ($anything:tt) => {
+ &$anything
+ };
+}
+
+#[cfg(test)]
+#[allow(unused)]
+macro_rules! ref_mut_ {
+ ($anything:tt) => {
+ &mut $anything
+ };
+}
diff --git a/third_party/rust/packed_simd/src/testing/utils.rs b/third_party/rust/packed_simd/src/testing/utils.rs
new file mode 100644
index 0000000000..7d8f395739
--- /dev/null
+++ b/third_party/rust/packed_simd/src/testing/utils.rs
@@ -0,0 +1,130 @@
+//! Testing utilities
+
+#![allow(dead_code)]
+// FIXME: Or don't. But it's true this is a problematic comparison.
+#![allow(clippy::neg_cmp_op_on_partial_ord)]
+
+use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered};
+
+/// Tests PartialOrd for `a` and `b` where `a < b` is true.
+pub fn test_lt<T>(a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>)
+where
+ LexicographicallyOrdered<T>: Debug + PartialOrd,
+{
+ assert!(a < b, "{:?}, {:?}", a, b);
+ assert!(b > a, "{:?}, {:?}", a, b);
+
+ assert!(!(a == b), "{:?}, {:?}", a, b);
+ assert_ne!(a, b, "{:?}, {:?}", a, b);
+
+ assert!(a <= b, "{:?}, {:?}", a, b);
+ assert!(b >= a, "{:?}, {:?}", a, b);
+
+ // The elegance of the mathematical expression of irreflexivity is more
+ // than clippy can handle.
+ #[allow(clippy::eq_op)]
+ {
+ // Irreflexivity
+ assert!(!(a < a), "{:?}, {:?}", a, b);
+ assert!(!(b < b), "{:?}, {:?}", a, b);
+ assert!(!(a > a), "{:?}, {:?}", a, b);
+ assert!(!(b > b), "{:?}, {:?}", a, b);
+
+ assert!(a <= a, "{:?}, {:?}", a, b);
+ assert!(b <= b, "{:?}, {:?}", a, b);
+ }
+}
+
+/// Tests PartialOrd for `a` and `b` where `a <= b` is true.
+pub fn test_le<T>(a: LexicographicallyOrdered<T>, b: LexicographicallyOrdered<T>)
+where
+ LexicographicallyOrdered<T>: Debug + PartialOrd,
+{
+ assert!(a <= b, "{:?}, {:?}", a, b);
+ assert!(b >= a, "{:?}, {:?}", a, b);
+
+ assert!(a <= b, "{:?}, {:?}", a, b);
+ assert!(b >= a, "{:?}, {:?}", a, b);
+
+ if a == b {
+ assert!(!(a < b), "{:?}, {:?}", a, b);
+ assert!(!(b > a), "{:?}, {:?}", a, b);
+
+ assert!(!(a != b), "{:?}, {:?}", a, b);
+ } else {
+ assert_ne!(a, b, "{:?}, {:?}", a, b);
+ test_lt(a, b);
+ }
+}
+
+/// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering`
+pub fn test_cmp<T>(
+ a: LexicographicallyOrdered<T>,
+ b: LexicographicallyOrdered<T>,
+ o: Option<crate::cmp::Ordering>,
+) where
+ LexicographicallyOrdered<T>: PartialOrd + Debug,
+ T: Debug + crate::sealed::Simd + Copy + Clone,
+ <T as crate::sealed::Simd>::Element: Default + Copy + Clone + PartialOrd,
+{
+ assert!(T::LANES <= 64, "array length in these two arrays needs updating");
+ let mut arr_a: [T::Element; 64] = [Default::default(); 64];
+ let mut arr_b: [T::Element; 64] = [Default::default(); 64];
+
+ unsafe { crate::ptr::write_unaligned(arr_a.as_mut_ptr() as *mut LexicographicallyOrdered<T>, a) }
+ unsafe { crate::ptr::write_unaligned(arr_b.as_mut_ptr() as *mut LexicographicallyOrdered<T>, b) }
+ let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]);
+ let result = a.partial_cmp(&b);
+ assert_eq!(expected, result, "{:?}, {:?}", a, b);
+ assert_eq!(o, result, "{:?}, {:?}", a, b);
+ match o {
+ Some(crate::cmp::Ordering::Less) => {
+ test_lt(a, b);
+ test_le(a, b);
+ }
+ Some(crate::cmp::Ordering::Greater) => {
+ test_lt(b, a);
+ test_le(b, a);
+ }
+ Some(crate::cmp::Ordering::Equal) => {
+ assert!(a == b, "{:?}, {:?}", a, b);
+ assert!(!(a != b), "{:?}, {:?}", a, b);
+ assert!(!(a < b), "{:?}, {:?}", a, b);
+ assert!(!(b < a), "{:?}, {:?}", a, b);
+ assert!(!(a > b), "{:?}, {:?}", a, b);
+ assert!(!(b > a), "{:?}, {:?}", a, b);
+
+ test_le(a, b);
+ test_le(b, a);
+ }
+ None => {
+ assert!(!(a == b), "{:?}, {:?}", a, b);
+ assert!(!(a != b), "{:?}, {:?}", a, b);
+ assert!(!(a < b), "{:?}, {:?}", a, b);
+ assert!(!(a > b), "{:?}, {:?}", a, b);
+ assert!(!(b < a), "{:?}, {:?}", a, b);
+ assert!(!(b > a), "{:?}, {:?}", a, b);
+ assert!(!(a <= b), "{:?}, {:?}", a, b);
+ assert!(!(b <= a), "{:?}, {:?}", a, b);
+ assert!(!(a >= b), "{:?}, {:?}", a, b);
+ assert!(!(b >= a), "{:?}, {:?}", a, b);
+ }
+ }
+}
+
+// Returns a tuple containing two distinct pointer values of the same type as
+// the element type of the Simd vector `$id`.
+#[allow(unused)]
+macro_rules! ptr_vals {
+ ($id:ty) => {
+ // expands to an expression
+ #[allow(unused_unsafe)]
+ unsafe {
+ // all bits cleared
+ let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed();
+ // all bits set
+ let set: <$id as sealed::Simd>::Element = crate::mem::transmute(-1_isize);
+ (clear, set)
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/v128.rs b/third_party/rust/packed_simd/src/v128.rs
new file mode 100644
index 0000000000..7949f6619a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v128.rs
@@ -0,0 +1,80 @@
+//! 128-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 16]: i8x16, m8x16 | i8, u16 | test_v128 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: |
+ /// A 128-bit vector with 16 `i8` lanes.
+);
+impl_u!([u8; 16]: u8x16, m8x16 | u8, u16 | test_v128 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: |
+ /// A 128-bit vector with 16 `u8` lanes.
+);
+impl_m!([m8; 16]: m8x16 | i8, u16 | test_v128 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: m16x16 |
+ /// A 128-bit vector mask with 16 `m8` lanes.
+);
+
+impl_i!([i16; 8]: i16x8, m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: i8x8, u8x8 |
+ /// A 128-bit vector with 8 `i16` lanes.
+);
+impl_u!([u16; 8]: u16x8, m16x8 | u16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: u8x8 |
+ /// A 128-bit vector with 8 `u16` lanes.
+);
+impl_m!([m16; 8]: m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: m8x8, m32x8 |
+ /// A 128-bit vector mask with 8 `m16` lanes.
+);
+
+impl_i!([i32; 4]: i32x4, m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
+ From: i8x4, u8x4, i16x4, u16x4 |
+ /// A 128-bit vector with 4 `i32` lanes.
+);
+impl_u!([u32; 4]: u32x4, m32x4 | u32, u8 | test_v128 | x0, x1, x2, x3 |
+ From: u8x4, u16x4 |
+ /// A 128-bit vector with 4 `u32` lanes.
+);
+impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 |
+ From: i8x4, u8x4, i16x4, u16x4 |
+ /// A 128-bit vector with 4 `f32` lanes.
+);
+impl_m!([m32; 4]: m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 |
+ From: m8x4, m16x4, m64x4 |
+ /// A 128-bit vector mask with 4 `m32` lanes.
+);
+
+impl_i!([i64; 2]: i64x2, m64x2 | i64, u8 | test_v128 | x0, x1 |
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 |
+ /// A 128-bit vector with 2 `i64` lanes.
+);
+impl_u!([u64; 2]: u64x2, m64x2 | u64, u8 | test_v128 | x0, x1 |
+ From: u8x2, u16x2, u32x2 |
+ /// A 128-bit vector with 2 `u64` lanes.
+);
+impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 |
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 |
+ /// A 128-bit vector with 2 `f64` lanes.
+);
+impl_m!([m64; 2]: m64x2 | i64, u8 | test_v128 | x0, x1 |
+ From: m8x2, m16x2, m32x2, m128x2 |
+ /// A 128-bit vector mask with 2 `m64` lanes.
+);
+
+impl_i!([i128; 1]: i128x1, m128x1 | i128, u8 | test_v128 | x0 |
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types
+ /// A 128-bit vector with 1 `i128` lane.
+);
+impl_u!([u128; 1]: u128x1, m128x1 | u128, u8 | test_v128 | x0 |
+ From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types
+ /// A 128-bit vector with 1 `u128` lane.
+);
+impl_m!([m128; 1]: m128x1 | i128, u8 | test_v128 | x0 |
+ From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types
+ /// A 128-bit vector mask with 1 `m128` lane.
+);
diff --git a/third_party/rust/packed_simd/src/v16.rs b/third_party/rust/packed_simd/src/v16.rs
new file mode 100644
index 0000000000..4ca5afb2a7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v16.rs
@@ -0,0 +1,16 @@
+//! 16-bit wide vector types
+
+use crate::*;
+
+impl_i!([i8; 2]: i8x2, m8x2 | i8, u8 | test_v16 | x0, x1 |
+ From: |
+ /// A 16-bit vector with 2 `i8` lanes.
+);
+impl_u!([u8; 2]: u8x2, m8x2 | u8, u8 | test_v16 | x0, x1 |
+ From: |
+ /// A 16-bit vector with 2 `u8` lanes.
+);
+impl_m!([m8; 2]: m8x2 | i8, u8 | test_v16 | x0, x1 |
+ From: m16x2, m32x2, m64x2, m128x2 |
+ /// A 16-bit vector mask with 2 `m8` lanes.
+);
diff --git a/third_party/rust/packed_simd/src/v256.rs b/third_party/rust/packed_simd/src/v256.rs
new file mode 100644
index 0000000000..f0c3bc2813
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v256.rs
@@ -0,0 +1,86 @@
+//! 256-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 32]: i8x32, m8x32 | i8, u32 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: |
+ /// A 256-bit vector with 32 `i8` lanes.
+);
+impl_u!([u8; 32]: u8x32, m8x32 | u8, u32 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: |
+ /// A 256-bit vector with 32 `u8` lanes.
+);
+impl_m!([m8; 32]: m8x32 | i8, u32 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: |
+ /// A 256-bit vector mask with 32 `m8` lanes.
+);
+
+impl_i!([i16; 16]: i16x16, m16x16 | i16, u16 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: i8x16, u8x16 |
+ /// A 256-bit vector with 16 `i16` lanes.
+);
+impl_u!([u16; 16]: u16x16, m16x16 | u16, u16 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: u8x16 |
+ /// A 256-bit vector with 16 `u16` lanes.
+);
+impl_m!([m16; 16]: m16x16 | i16, u16 | test_v256 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: m8x16 |
+ /// A 256-bit vector mask with 16 `m16` lanes.
+);
+
+impl_i!([i32; 8]: i32x8, m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: i8x8, u8x8, i16x8, u16x8 |
+ /// A 256-bit vector with 8 `i32` lanes.
+);
+impl_u!([u32; 8]: u32x8, m32x8 | u32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: u8x8, u16x8 |
+ /// A 256-bit vector with 8 `u32` lanes.
+);
+impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: i8x8, u8x8, i16x8, u16x8 |
+ /// A 256-bit vector with 8 `f32` lanes.
+);
+impl_m!([m32; 8]: m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: m8x8, m16x8 |
+ /// A 256-bit vector mask with 8 `m32` lanes.
+);
+
+impl_i!([i64; 4]: i64x4, m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 |
+ /// A 256-bit vector with 4 `i64` lanes.
+);
+impl_u!([u64; 4]: u64x4, m64x4 | u64, u8 | test_v256 | x0, x1, x2, x3 |
+ From: u8x4, u16x4, u32x4 |
+ /// A 256-bit vector with 4 `u64` lanes.
+);
+impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 |
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 |
+ /// A 256-bit vector with 4 `f64` lanes.
+);
+impl_m!([m64; 4]: m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 |
+ From: m8x4, m16x4, m32x4 |
+ /// A 256-bit vector mask with 4 `m64` lanes.
+);
+
+impl_i!([i128; 2]: i128x2, m128x2 | i128, u8 | test_v256 | x0, x1 |
+ From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 |
+ /// A 256-bit vector with 2 `i128` lanes.
+);
+impl_u!([u128; 2]: u128x2, m128x2 | u128, u8 | test_v256 | x0, x1 |
+ From: u8x2, u16x2, u32x2, u64x2 |
+ /// A 256-bit vector with 2 `u128` lanes.
+);
+impl_m!([m128; 2]: m128x2 | i128, u8 | test_v256 | x0, x1 |
+ From: m8x2, m16x2, m32x2, m64x2 |
+ /// A 256-bit vector mask with 2 `m128` lanes.
+);
diff --git a/third_party/rust/packed_simd/src/v32.rs b/third_party/rust/packed_simd/src/v32.rs
new file mode 100644
index 0000000000..75a1838e5e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v32.rs
@@ -0,0 +1,29 @@
+//! 32-bit wide vector types
+
+use crate::*;
+
+impl_i!([i8; 4]: i8x4, m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
+ From: |
+ /// A 32-bit vector with 4 `i8` lanes.
+);
+impl_u!([u8; 4]: u8x4, m8x4 | u8, u8 | test_v32 | x0, x1, x2, x3 |
+ From: |
+ /// A 32-bit vector with 4 `u8` lanes.
+);
+impl_m!([m8; 4]: m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 |
+ From: m16x4, m32x4, m64x4 |
+ /// A 32-bit vector mask with 4 `m8` lanes.
+);
+
+impl_i!([i16; 2]: i16x2, m16x2 | i16, u8 | test_v32 | x0, x1 |
+ From: i8x2, u8x2 |
+ /// A 32-bit vector with 2 `i16` lanes.
+);
+impl_u!([u16; 2]: u16x2, m16x2 | u16, u8 | test_v32 | x0, x1 |
+ From: u8x2 |
+ /// A 32-bit vector with 2 `u16` lanes.
+);
+impl_m!([m16; 2]: m16x2 | i16, u8 | test_v32 | x0, x1 |
+ From: m8x2, m32x2, m64x2, m128x2 |
+ /// A 32-bit vector mask with 2 `m16` lanes.
+);
diff --git a/third_party/rust/packed_simd/src/v512.rs b/third_party/rust/packed_simd/src/v512.rs
new file mode 100644
index 0000000000..4c8c71338a
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v512.rs
@@ -0,0 +1,99 @@
+//! 512-bit wide vector types
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_i!([i8; 64]: i8x64, m8x64 | i8, u64 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+ From: |
+ /// A 512-bit vector with 64 `i8` lanes.
+);
+impl_u!([u8; 64]: u8x64, m8x64 | u8, u64 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+ From: |
+ /// A 512-bit vector with 64 `u8` lanes.
+);
+impl_m!([m8; 64]: m8x64 | i8, u64 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+ x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47,
+ x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 |
+ From: |
+ /// A 512-bit vector mask with 64 `m8` lanes.
+);
+
+impl_i!([i16; 32]: i16x32, m16x32 | i16, u32 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: i8x32, u8x32 |
+ /// A 512-bit vector with 32 `i16` lanes.
+);
+impl_u!([u16; 32]: u16x32, m16x32 | u16, u32 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: u8x32 |
+ /// A 512-bit vector with 32 `u16` lanes.
+);
+impl_m!([m16; 32]: m16x32 | i16, u32 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+ x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 |
+ From: m8x32 |
+ /// A 512-bit vector mask with 32 `m16` lanes.
+);
+
+impl_i!([i32; 16]: i32x16, m32x16 | i32, u16 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: i8x16, u8x16, i16x16, u16x16 |
+ /// A 512-bit vector with 16 `i32` lanes.
+);
+impl_u!([u32; 16]: u32x16, m32x16 | u32, u16 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: u8x16, u16x16 |
+ /// A 512-bit vector with 16 `u32` lanes.
+);
+impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: i8x16, u8x16, i16x16, u16x16 |
+ /// A 512-bit vector with 16 `f32` lanes.
+);
+impl_m!([m32; 16]: m32x16 | i32, u16 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
+ From: m8x16, m16x16 |
+ /// A 512-bit vector mask with 16 `m32` lanes.
+);
+
+impl_i!([i64; 8]: i64x8, m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 |
+ /// A 512-bit vector with 8 `i64` lanes.
+);
+impl_u!([u64; 8]: u64x8, m64x8 | u64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: u8x8, u16x8, u32x8 |
+ /// A 512-bit vector with 8 `u64` lanes.
+);
+impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 |
+ /// A 512-bit vector with 8 `f64` lanes.
+);
+impl_m!([m64; 8]: m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: m8x8, m16x8, m32x8 |
+ /// A 512-bit vector mask with 8 `m64` lanes.
+);
+
+impl_i!([i128; 4]: i128x4, m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
+ From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 |
+ /// A 512-bit vector with 4 `i128` lanes.
+);
+impl_u!([u128; 4]: u128x4, m128x4 | u128, u8 | test_v512 | x0, x1, x2, x3 |
+ From: u8x4, u16x4, u32x4, u64x4 |
+ /// A 512-bit vector with 4 `u128` lanes.
+);
+impl_m!([m128; 4]: m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 |
+ From: m8x4, m16x4, m32x4, m64x4 |
+ /// A 512-bit vector mask with 4 `m128` lanes.
+);
diff --git a/third_party/rust/packed_simd/src/v64.rs b/third_party/rust/packed_simd/src/v64.rs
new file mode 100644
index 0000000000..bf6b9de610
--- /dev/null
+++ b/third_party/rust/packed_simd/src/v64.rs
@@ -0,0 +1,66 @@
+//! 64-bit wide vector types
+#[rustfmt::skip]
+
+use super::*;
+
+impl_i!([i8; 8]: i8x8, m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: |
+ /// A 64-bit vector with 8 `i8` lanes.
+);
+impl_u!([u8; 8]: u8x8, m8x8 | u8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: |
+ /// A 64-bit vector with 8 `u8` lanes.
+);
+impl_m!([m8; 8]: m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: m16x8, m32x8 |
+ /// A 64-bit vector mask with 8 `m8` lanes.
+);
+
+impl_i!([i16; 4]: i16x4, m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
+ From: i8x4, u8x4 |
+ /// A 64-bit vector with 4 `i16` lanes.
+);
+impl_u!([u16; 4]: u16x4, m16x4 | u16, u8 | test_v64 | x0, x1, x2, x3 |
+ From: u8x4 |
+ /// A 64-bit vector with 4 `u16` lanes.
+);
+impl_m!([m16; 4]: m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 |
+ From: m8x4, m32x4, m64x4 |
+ /// A 64-bit vector mask with 4 `m16` lanes.
+);
+
+impl_i!([i32; 2]: i32x2, m32x2 | i32, u8 | test_v64 | x0, x1 |
+ From: i8x2, u8x2, i16x2, u16x2 |
+ /// A 64-bit vector with 2 `i32` lanes.
+);
+impl_u!([u32; 2]: u32x2, m32x2 | u32, u8 | test_v64 | x0, x1 |
+ From: u8x2, u16x2 |
+ /// A 64-bit vector with 2 `u32` lanes.
+);
+impl_m!([m32; 2]: m32x2 | i32, u8 | test_v64 | x0, x1 |
+ From: m8x2, m16x2, m64x2, m128x2 |
+ /// A 64-bit vector mask with 2 `m32` lanes.
+);
+impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 |
+ From: i8x2, u8x2, i16x2, u16x2 |
+ /// A 64-bit vector with 2 `f32` lanes.
+);
+
+/*
+impl_i!([i64; 1]: i64x1, m64x1 | i64, u8 | test_v64 | x0 |
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion
+ /// A 64-bit vector with 1 `i64` lanes.
+);
+impl_u!([u64; 1]: u64x1, m64x1 | u64, u8 | test_v64 | x0 |
+ From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion
+ /// A 64-bit vector with 1 `u64` lanes.
+);
+impl_m!([m64; 1]: m64x1 | i64, u8 | test_v64 | x0 |
+ From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types
+ /// A 64-bit vector mask with 1 `m64` lanes.
+);
+impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 |
+ From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types
+ /// A 64-bit vector with 1 `f64` lanes.
+);
+*/
diff --git a/third_party/rust/packed_simd/src/vPtr.rs b/third_party/rust/packed_simd/src/vPtr.rs
new file mode 100644
index 0000000000..e34cb170eb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/vPtr.rs
@@ -0,0 +1,34 @@
+//! Vectors of pointers
+#[rustfmt::skip]
+
+use crate::*;
+
+impl_const_p!(
+ [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
+ /// A vector with 2 `*const T` lanes
+);
+
+impl_mut_p!(
+ [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: |
+ /// A vector with 2 `*mut T` lanes
+);
+
+impl_const_p!(
+ [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
+ /// A vector with 4 `*const T` lanes
+);
+
+impl_mut_p!(
+ [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: |
+ /// A vector with 4 `*mut T` lanes
+);
+
+impl_const_p!(
+ [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
+ /// A vector with 8 `*const T` lanes
+);
+
+impl_mut_p!(
+ [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: |
+ /// A vector with 8 `*mut T` lanes
+);
diff --git a/third_party/rust/packed_simd/src/vSize.rs b/third_party/rust/packed_simd/src/vSize.rs
new file mode 100644
index 0000000000..b5d8910061
--- /dev/null
+++ b/third_party/rust/packed_simd/src/vSize.rs
@@ -0,0 +1,53 @@
+//! Vectors with pointer-sized elements
+
+use crate::codegen::pointer_sized_int::{isize_, usize_};
+use crate::*;
+
+impl_i!([isize; 2]: isizex2, msizex2 | isize_, u8 | test_v128 |
+ x0, x1|
+ From: |
+ /// A vector with 2 `isize` lanes.
+);
+
+impl_u!([usize; 2]: usizex2, msizex2 | usize_, u8 | test_v128 |
+ x0, x1|
+ From: |
+ /// A vector with 2 `usize` lanes.
+);
+impl_m!([msize; 2]: msizex2 | isize_, u8 | test_v128 |
+ x0, x1 |
+ From: |
+ /// A vector mask with 2 `msize` lanes.
+);
+
+impl_i!([isize; 4]: isizex4, msizex4 | isize_, u8 | test_v256 |
+ x0, x1, x2, x3 |
+ From: |
+ /// A vector with 4 `isize` lanes.
+);
+impl_u!([usize; 4]: usizex4, msizex4 | usize_, u8 | test_v256 |
+ x0, x1, x2, x3|
+ From: |
+ /// A vector with 4 `usize` lanes.
+);
+impl_m!([msize; 4]: msizex4 | isize_, u8 | test_v256 |
+ x0, x1, x2, x3 |
+ From: |
+ /// A vector mask with 4 `msize` lanes.
+);
+
+impl_i!([isize; 8]: isizex8, msizex8 | isize_, u8 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: |
+ /// A vector with 8 `isize` lanes.
+);
+impl_u!([usize; 8]: usizex8, msizex8 | usize_, u8 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: |
+ /// A vector with 8 `usize` lanes.
+);
+impl_m!([msize; 8]: msizex8 | isize_, u8 | test_v512 |
+ x0, x1, x2, x3, x4, x5, x6, x7 |
+ From: |
+ /// A vector mask with 8 `msize` lanes.
+);
diff --git a/third_party/rust/packed_simd/tests/endianness.rs b/third_party/rust/packed_simd/tests/endianness.rs
new file mode 100644
index 0000000000..17a7796b1b
--- /dev/null
+++ b/third_party/rust/packed_simd/tests/endianness.rs
@@ -0,0 +1,268 @@
+#[cfg(target_arch = "wasm32")]
+use wasm_bindgen_test::*;
+
+use packed_simd::*;
+use std::{mem, slice};
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_indexing() {
+ let v = i32x4::new(0, 1, 2, 3);
+ assert_eq!(v.extract(0), 0);
+ assert_eq!(v.extract(1), 1);
+ assert_eq!(v.extract(2), 2);
+ assert_eq!(v.extract(3), 3);
+}
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_bitcasts() {
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let t: i16x8 = unsafe { mem::transmute(x) };
+ let e: i16x8 = if cfg!(target_endian = "little") {
+ i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854)
+ } else {
+ i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599)
+ };
+ assert_eq!(t, e);
+}
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_casts() {
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let t: i16x16 = x.into(); // simd_cast
+ #[rustfmt::skip]
+ let e = i16x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ assert_eq!(t, e);
+}
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_load_and_stores() {
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let mut y: [i16; 8] = [0; 8];
+ x.write_to_slice_unaligned(unsafe { slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) });
+
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
+ } else {
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
+ };
+ assert_eq!(y, e);
+
+ let z = i8x16::from_slice_unaligned(unsafe { slice::from_raw_parts(&y as *const _ as *const i8, 16) });
+ assert_eq!(z, x);
+}
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_array_union() {
+ union A {
+ data: [f32; 4],
+ vec: f32x4,
+ }
+ let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
+ // As all of these are integer values within the mantissa^1 range, it
+ // would be very unusual for them to actually fail to compare.
+ #[allow(clippy::float_cmp)]
+ {
+ assert_eq!(x[0], 0_f32);
+ assert_eq!(x[1], 1_f32);
+ assert_eq!(x[2], 2_f32);
+ assert_eq!(x[3], 3_f32);
+ }
+ let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec };
+ assert_eq!(y, f32x4::new(3., 2., 1., 0.));
+
+ union B {
+ data: [i8; 16],
+ vec: i8x16,
+ }
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let x: [i8; 16] = unsafe { B { vec: x }.data };
+
+ for (i, v) in x.iter().enumerate() {
+ assert_eq!(i as i8, *v);
+ }
+
+ #[rustfmt::skip]
+ let y = [
+ 15, 14, 13, 12, 11, 19, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ ];
+ #[rustfmt::skip]
+ let e = i8x16::new(
+ 15, 14, 13, 12, 11, 19, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ );
+ let z = unsafe { B { data: y }.vec };
+ assert_eq!(z, e);
+
+ union C {
+ data: [i16; 8],
+ vec: i8x16,
+ }
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let x: [i16; 8] = unsafe { C { vec: x }.data };
+
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
+ } else {
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
+ };
+ assert_eq!(x, e);
+}
+
+#[cfg_attr(not(target_arch = "wasm32"), test)]
+#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+fn endian_tuple_access() {
+ type F32x4T = (f32, f32, f32, f32);
+ union A {
+ data: F32x4T,
+ vec: f32x4,
+ }
+ let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data };
+ // As all of these are integer values within the mantissa^1 range, it
+ // would be very unusual for them to actually fail to compare.
+ #[allow(clippy::float_cmp)]
+ {
+ assert_eq!(x.0, 0_f32);
+ assert_eq!(x.1, 1_f32);
+ assert_eq!(x.2, 2_f32);
+ assert_eq!(x.3, 3_f32);
+ }
+ let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec };
+ assert_eq!(y, f32x4::new(3., 2., 1., 0.));
+
+ #[rustfmt::skip]
+ type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
+ union B {
+ data: I8x16T,
+ vec: i8x16,
+ }
+
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let x: I8x16T = unsafe { B { vec: x }.data };
+
+ assert_eq!(x.0, 0);
+ assert_eq!(x.1, 1);
+ assert_eq!(x.2, 2);
+ assert_eq!(x.3, 3);
+ assert_eq!(x.4, 4);
+ assert_eq!(x.5, 5);
+ assert_eq!(x.6, 6);
+ assert_eq!(x.7, 7);
+ assert_eq!(x.8, 8);
+ assert_eq!(x.9, 9);
+ assert_eq!(x.10, 10);
+ assert_eq!(x.11, 11);
+ assert_eq!(x.12, 12);
+ assert_eq!(x.13, 13);
+ assert_eq!(x.14, 14);
+ assert_eq!(x.15, 15);
+
+ #[rustfmt::skip]
+ let y = (
+ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ );
+ let z: i8x16 = unsafe { B { data: y }.vec };
+ #[rustfmt::skip]
+ let e = i8x16::new(
+ 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0
+ );
+ assert_eq!(e, z);
+
+ #[rustfmt::skip]
+ type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16);
+ union C {
+ data: I16x8T,
+ vec: i8x16,
+ }
+
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let x: I16x8T = unsafe { C { vec: x }.data };
+
+ let e: [i16; 8] = if cfg!(target_endian = "little") {
+ [256, 770, 1284, 1798, 2312, 2826, 3340, 3854]
+ } else {
+ [1, 515, 1029, 1543, 2057, 2571, 3085, 3599]
+ };
+ assert_eq!(x.0, e[0]);
+ assert_eq!(x.1, e[1]);
+ assert_eq!(x.2, e[2]);
+ assert_eq!(x.3, e[3]);
+ assert_eq!(x.4, e[4]);
+ assert_eq!(x.5, e[5]);
+ assert_eq!(x.6, e[6]);
+ assert_eq!(x.7, e[7]);
+
+ #[rustfmt::skip]
+ #[repr(C)]
+ #[derive(Copy ,Clone)]
+ pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16,
+ pub i8, pub i8, pub i16, pub i8, pub i8, pub i16);
+
+ union D {
+ data: Tup,
+ vec: i8x16,
+ }
+
+ #[rustfmt::skip]
+ let x = i8x16::new(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ let x: Tup = unsafe { D { vec: x }.data };
+
+ let e: [i16; 12] = if cfg!(target_endian = "little") {
+ [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854]
+ } else {
+ [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599]
+ };
+ assert_eq!(x.0 as i16, e[0]);
+ assert_eq!(x.1 as i16, e[1]);
+ assert_eq!(x.2 as i16, e[2]);
+ assert_eq!(x.3 as i16, e[3]);
+ assert_eq!(x.4 as i16, e[4]);
+ assert_eq!(x.5 as i16, e[5]);
+ assert_eq!(x.6 as i16, e[6]);
+ assert_eq!(x.7 as i16, e[7]);
+ assert_eq!(x.8 as i16, e[8]);
+ assert_eq!(x.9 as i16, e[9]);
+ assert_eq!(x.10 as i16, e[10]);
+ assert_eq!(x.11 as i16, e[11]);
+}