diff options
Diffstat (limited to 'rust/vendor/sha2')
27 files changed, 2804 insertions, 0 deletions
diff --git a/rust/vendor/sha2/.cargo-checksum.json b/rust/vendor/sha2/.cargo-checksum.json new file mode 100644 index 0000000..7c06903 --- /dev/null +++ b/rust/vendor/sha2/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"b7b0a14409ac2880f86fe50d9584acc81f2346ebcb4e46a9e2235b54ac5b02ef","Cargo.toml":"5fdf94b86fc47d105d2f2cc55c6346d15e7f3d2d7ea92031b1ce2d24276e7778","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"b4eb00df6e2a4d22518fcaa6a2b4646f249b3a3c9814509b22bd2091f1392ff1","README.md":"b7af562922e4a631657acf264772d2af2b72a08d9bbc5fbcf56d9324f9027708","benches/mod.rs":"c32d9f91a541821ea988c14eee710963e623ef1edf69b02b41a29bc44e04ba95","src/consts.rs":"2f820349fa7cbf9fecc1d4aabbd1a721bb1badc3f32ef9e903826960b6f42523","src/core_api.rs":"73b160d98bfa6737688875ad73da5e3c2c93582604dc313d208200e12fdab676","src/lib.rs":"9d0ec0ba86a801bd9b2024f0b84ee322a26c7376a623dd61210e0eb9d6355aa1","src/sha256.rs":"78e84eea5d517554aa5a10860bf2ce5013ca26d529e78643cd59062546e0746f","src/sha256/aarch64.rs":"18121a25867a575fec8ef64da763693ece4e3e3e84da095254b8471234c6f1f8","src/sha256/loongarch64_asm.rs":"79e2d5e3c039581e2319f8789de9ed13a8dd819ebffd13532dbd83448c7ad662","src/sha256/soft.rs":"98e765a8e8dfa0af31f2b76570f212e6b3099522bf300e1554cbbd9fd5d02960","src/sha256/x86.rs":"70f1597f2029522b35bfd026df0a8908f086523ab2a80ba3ef35e6231b56353c","src/sha512.rs":"1b19c23c63e9cfca8b42fd9e108a8570dd03e22a37d4d6f499f2fa5e566cb2de","src/sha512/aarch64.rs":"2ed929329a0fa66180e4726d028713a49f99cc223e635078fc1f3252a44981e0","src/sha512/loongarch64_asm.rs":"58a7b54d95a0e037ba80570d96ffe0dd7c0014c7fcb45b90725e522cc4992d8a","src/sha512/soft.rs":"0183ad89418b886859d2afa9bf061bc92759ae337c1d26147b4300042e63ef42","src/sha512/x86.rs":"c7dd8bdf3212e1e8c4cc9cc6b380dc0468f79dcfd0f61a445d0d38cead45a03a","tests/data/sha224.blb":"59b185972521af418fd49a079de3d5f5bed74cd76d80473da51cab3faee6c7d0","tests/data/sha256.blb":"bb096934bb7e43e41ce143d211397afca6fcdfe243a39811688ea31aae6f800a","tests/data/sha384.blb":"e8fe66c07ba336fae2c0aa4c87cb768f41bd4ed318ee1a36fbde0a68581946ec","tests/data/sha512.blb":"1cc0e86571f2f4e3bc81438ce7b6c25c118d2d7437355240113f59cbb782c8d6","tests/data/sha512_224.blb":"b02dd46741db1034112e0888d0cdb233a21b9a82c319456f806bbaae49acf440","tests/data/sha512_256.blb":"95195b758e362d92ff0cebebac4cca696512ea5811b635243bc70e29164e5786","tests/mod.rs":"61be596fd9b45a8db345950ff2ed6f87eaf4d239ac156885f36e819da0597644"},"package":"793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"}
\ No newline at end of file diff --git a/rust/vendor/sha2/CHANGELOG.md b/rust/vendor/sha2/CHANGELOG.md new file mode 100644 index 0000000..a5182bc --- /dev/null +++ b/rust/vendor/sha2/CHANGELOG.md @@ -0,0 +1,181 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.10.8 (2023-09-26) +### Added +- `asm!`-based backend for LoongArch64 targets gated behind `loongarch64_asm` feature [#507] + +[#507]: https://github.com/RustCrypto/hashes/pull/507 + +## 0.10.7 (2023-06-15) +### Added +- AArch64 Neon-based backend ([#490]) + +[#490]: https://github.com/RustCrypto/hashes/pull/490 + +## 0.10.6 (2022-09-16) +### Added +- Feature-gated OID support ([#405]) + +[#405]: https://github.com/RustCrypto/hashes/pull/405 + +## 0.10.5 (2022-09-02) +### Fixed +- MSRV issue which was not resolved by v0.10.4 ([#401]) + +[#401]: https://github.com/RustCrypto/hashes/pull/401 + +## 0.10.4 (2022-09-02) +### Fixed +- MSRV issue caused by publishing v0.10.3 using a buggy Nightly toolchain ([#399]) + +[#399]: https://github.com/RustCrypto/hashes/pull/399 + +## 0.10.3 (2022-08-30) +### Changed +- Ignore `asm` feature on unsupported targets ([#388]) + +[#388]: https://github.com/RustCrypto/hashes/pull/388 + +## 0.10.2 (2022-02-17) +### Fixed +- Minimal versions build ([#363]) + +[#363]: https://github.com/RustCrypto/hashes/pull/363 + +## 0.10.1 (2022-01-06) +### Fixed +- Bug in the AVX2 backend ([#345]) + +## 0.10.0 (2021-12-07) [YANKED] +### Changed +- Update to `digest` v0.10 ([#217]) +- Rename `Sha512Trunc224` and `Sha512Trunc256` to `Sha512_224` and `Sha512_256` respectively. ([#217]) + +[#217]: https://github.com/RustCrypto/hashes/pull/217 + +## 0.9.9 (2022-01-06) +### Fixed +- Backport [#345] bug fix for the AVX2 backend ([#346]) + +[#345]: https://github.com/RustCrypto/hashes/pull/345 +[#346]: https://github.com/RustCrypto/hashes/pull/346 + +## 0.9.8 (2021-09-09) [YANKED] +### Fixed +- Bug in the AVX2 backend ([#314]) + +[#314]: https://github.com/RustCrypto/hashes/pull/314 + +## 0.9.7 (2021-09-08) [YANKED] +### Added +- x86 intrinsics support for SHA-512 ([#312]) + +[#312]: https://github.com/RustCrypto/hashes/pull/312 + +## 0.9.6 (2021-08-27) +### Changed +- Bump `cpufeatures` dependency to 0.2 ([#306]) + +[#306]: https://github.com/RustCrypto/hashes/pull/306 + +## 0.9.5 (2021-05-11) +### Changed +- Use `cpufeatures` to detect intrinsics support on `aarch64` targets ([#267]) + +[#267]: https://github.com/RustCrypto/hashes/pull/267 + +## 0.9.4 (2021-05-05) +### Added +- Hardware accelerated SHA-256 for Apple M1 CPUs with `asm` feature ([#262]) + +### Changed +- Bump `sha2-asm` to v0.6.1 release ([#262]) +- Switch from `cpuid-bool` to `cpufeatures` ([#263]) + +[#262]: https://github.com/RustCrypto/hashes/pull/262 +[#263]: https://github.com/RustCrypto/hashes/pull/263 + +## 0.9.3 (2021-01-30) +### Changed +- Use the SHA-NI extension backend with enabled `asm` feature. ([#224]) + +[#224]: https://github.com/RustCrypto/hashes/pull/224 + +## 0.9.2 (2020-11-04) +### Added +- `force-soft` feature to enforce use of software implementation. ([#203]) + +### Changed +- `cfg-if` dependency updated to v1.0. ([#197]) + +[#197]: https://github.com/RustCrypto/hashes/pull/197 +[#203]: https://github.com/RustCrypto/hashes/pull/203 + +## 0.9.1 (2020-06-24) +### Added +- x86 hardware acceleration of SHA-256 via SHA extension instrinsics. ([#167]) + +[#167]: https://github.com/RustCrypto/hashes/pull/167 + +## 0.9.0 (2020-06-09) +### Changed +- Update to `digest` v0.9 release; MSRV 1.41+ ([#155]) +- Use new `*Dirty` traits from the `digest` crate ([#153]) +- Bump `block-buffer` to v0.8 release ([#151]) +- Rename `*result*` to `finalize` ([#148]) +- Upgrade to Rust 2018 edition ([#133]) + +[#155]: https://github.com/RustCrypto/hashes/pull/155 +[#153]: https://github.com/RustCrypto/hashes/pull/153 +[#151]: https://github.com/RustCrypto/hashes/pull/151 +[#148]: https://github.com/RustCrypto/hashes/pull/148 +[#133]: https://github.com/RustCrypto/hashes/pull/133 + +## 0.8.2 (2020-05-23) +### Added +- Expose compression function under the `compress` feature flag ([#108]) + +### Changed +- Use `libc` crate for `aarch64` consts ([#109]) +- Minor code cleanups ([#94]) + +[#109]: https://github.com/RustCrypto/hashes/pull/109 +[#108]: https://github.com/RustCrypto/hashes/pull/108 +[#94]: https://github.com/RustCrypto/hashes/pull/94 + +## 0.8.1 (2020-01-05) + +## 0.8.0 (2018-10-02) + +## 0.7.1 (2018-04-27) + +## 0.6.0 (2017-06-12) + +## 0.5.3 (2017-06-03) + +## 0.5.2 (2017-05-08) + +## 0.5.1 (2017-05-01) + +## 0.5.0 (2017-04-06) + +## 0.4.2 (2017-01-23) + +## 0.4.1 (2017-01-20) + +## 0.4.0 (2016-12-24) + +## 0.3.0 (2016-11-17) + +## 0.2.0 (2016-10-26) + +## 0.1.2 (2016-05-06) + +## 0.1.1 (2016-05-06) + +## 0.1.0 (2016-05-06) diff --git a/rust/vendor/sha2/Cargo.toml b/rust/vendor/sha2/Cargo.toml new file mode 100644 index 0000000..0a9655c --- /dev/null +++ b/rust/vendor/sha2/Cargo.toml @@ -0,0 +1,71 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "sha2" +version = "0.10.8" +authors = ["RustCrypto Developers"] +description = """ +Pure Rust implementation of the SHA-2 hash function family +including SHA-224, SHA-256, SHA-384, and SHA-512. +""" +documentation = "https://docs.rs/sha2" +readme = "README.md" +keywords = [ + "crypto", + "sha2", + "hash", + "digest", +] +categories = [ + "cryptography", + "no-std", +] +license = "MIT OR Apache-2.0" +repository = "https://github.com/RustCrypto/hashes" + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = [ + "--cfg", + "docsrs", +] + +[dependencies.cfg-if] +version = "1.0" + +[dependencies.digest] +version = "0.10.7" + +[dev-dependencies.digest] +version = "0.10.7" +features = ["dev"] + +[dev-dependencies.hex-literal] +version = "0.2.2" + +[features] +asm = ["sha2-asm"] +asm-aarch64 = ["asm"] +compress = [] +default = ["std"] +force-soft = [] +loongarch64_asm = [] +oid = ["digest/oid"] +std = ["digest/std"] + +[target."cfg(any(target_arch = \"aarch64\", target_arch = \"x86_64\", target_arch = \"x86\"))".dependencies.cpufeatures] +version = "0.2" + +[target."cfg(any(target_arch = \"aarch64\", target_arch = \"x86_64\", target_arch = \"x86\"))".dependencies.sha2-asm] +version = "0.6.1" +optional = true diff --git a/rust/vendor/sha2/LICENSE-APACHE b/rust/vendor/sha2/LICENSE-APACHE new file mode 100644 index 0000000..78173fa --- /dev/null +++ b/rust/vendor/sha2/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/rust/vendor/sha2/LICENSE-MIT b/rust/vendor/sha2/LICENSE-MIT new file mode 100644 index 0000000..66cf755 --- /dev/null +++ b/rust/vendor/sha2/LICENSE-MIT @@ -0,0 +1,27 @@ +Copyright (c) 2006-2009 Graydon Hoare +Copyright (c) 2009-2013 Mozilla Foundation +Copyright (c) 2016 Artyom Pavlov + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/rust/vendor/sha2/README.md b/rust/vendor/sha2/README.md new file mode 100644 index 0000000..594e953 --- /dev/null +++ b/rust/vendor/sha2/README.md @@ -0,0 +1,59 @@ +# RustCrypto: SHA-2 + +[![crate][crate-image]][crate-link] +[![Docs][docs-image]][docs-link] +![Apache2/MIT licensed][license-image] +![Rust Version][rustc-image] +[![Project Chat][chat-image]][chat-link] +[![Build Status][build-image]][build-link] + +Pure Rust implementation of the [SHA-2 hash function family][1] +including SHA-224, SHA-256, SHA-384, and SHA-512. + +[Documentation][docs-link] + +<img src="https://raw.githubusercontent.com/RustCrypto/meta/master/img/hashes/sha2.png" width="480px"> + +## Minimum Supported Rust Version + +Rust **1.41** or higher. + +Minimum supported Rust version can be changed in the future, but it will be +done with a minor version bump. + +## SemVer Policy + +- All on-by-default features of this library are covered by SemVer +- MSRV is considered exempt from SemVer as noted above + +## License + +Licensed under either of: + + * [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) + * [MIT license](http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in the work by you, as defined in the Apache-2.0 license, shall be +dual licensed as above, without any additional terms or conditions. + +[//]: # (badges) + +[crate-image]: https://img.shields.io/crates/v/sha2.svg +[crate-link]: https://crates.io/crates/sha2 +[docs-image]: https://docs.rs/sha2/badge.svg +[docs-link]: https://docs.rs/sha2/ +[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg +[rustc-image]: https://img.shields.io/badge/rustc-1.41+-blue.svg +[chat-image]: https://img.shields.io/badge/zulip-join_chat-blue.svg +[chat-link]: https://rustcrypto.zulipchat.com/#narrow/stream/260041-hashes +[build-image]: https://github.com/RustCrypto/hashes/workflows/sha2/badge.svg?branch=master +[build-link]: https://github.com/RustCrypto/hashes/actions?query=workflow%3Asha2 + +[//]: # (general links) + +[1]: https://en.wikipedia.org/wiki/SHA-2 diff --git a/rust/vendor/sha2/benches/mod.rs b/rust/vendor/sha2/benches/mod.rs new file mode 100644 index 0000000..8c60ccc --- /dev/null +++ b/rust/vendor/sha2/benches/mod.rs @@ -0,0 +1,22 @@ +#![feature(test)] +extern crate test; + +use digest::bench_update; +use sha2::{Sha256, Sha512}; +use test::Bencher; + +bench_update!( + Sha256::default(); + sha256_10 10; + sha256_100 100; + sha256_1000 1000; + sha256_10000 10000; +); + +bench_update!( + Sha512::default(); + sha512_10 10; + sha512_100 100; + sha512_1000 1000; + sha512_10000 10000; +); diff --git a/rust/vendor/sha2/src/consts.rs b/rust/vendor/sha2/src/consts.rs new file mode 100644 index 0000000..8c0bbab --- /dev/null +++ b/rust/vendor/sha2/src/consts.rs @@ -0,0 +1,107 @@ +#![allow(dead_code, clippy::unreadable_literal)] + +pub const STATE_LEN: usize = 8; +pub const BLOCK_LEN: usize = 16; + +pub type State256 = [u32; STATE_LEN]; +pub type State512 = [u64; STATE_LEN]; + +/// Constants necessary for SHA-256 family of digests. +pub const K32: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +/// Constants necessary for SHA-256 family of digests. +pub const K32X4: [[u32; 4]; 16] = [ + [K32[3], K32[2], K32[1], K32[0]], + [K32[7], K32[6], K32[5], K32[4]], + [K32[11], K32[10], K32[9], K32[8]], + [K32[15], K32[14], K32[13], K32[12]], + [K32[19], K32[18], K32[17], K32[16]], + [K32[23], K32[22], K32[21], K32[20]], + [K32[27], K32[26], K32[25], K32[24]], + [K32[31], K32[30], K32[29], K32[28]], + [K32[35], K32[34], K32[33], K32[32]], + [K32[39], K32[38], K32[37], K32[36]], + [K32[43], K32[42], K32[41], K32[40]], + [K32[47], K32[46], K32[45], K32[44]], + [K32[51], K32[50], K32[49], K32[48]], + [K32[55], K32[54], K32[53], K32[52]], + [K32[59], K32[58], K32[57], K32[56]], + [K32[63], K32[62], K32[61], K32[60]], +]; + +/// Constants necessary for SHA-512 family of digests. +pub const K64: [u64; 80] = [ + 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc, + 0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, + 0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694, + 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, + 0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, + 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4, + 0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70, + 0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, + 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b, + 0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30, + 0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8, + 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, + 0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, + 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b, + 0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, + 0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, + 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c, + 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817, +]; + +/// Constants necessary for SHA-512 family of digests. +pub const K64X2: [[u64; 2]; 40] = [ + [K64[1], K64[0]], [K64[3], K64[2]], [K64[5], K64[4]], [K64[7], K64[6]], + [K64[9], K64[8]], [K64[11], K64[10]], [K64[13], K64[12]], [K64[15], K64[14]], + [K64[17], K64[16]], [K64[19], K64[18]], [K64[21], K64[20]], [K64[23], K64[22]], + [K64[25], K64[24]], [K64[27], K64[26]], [K64[29], K64[28]], [K64[31], K64[30]], + [K64[33], K64[32]], [K64[35], K64[34]], [K64[37], K64[36]], [K64[39], K64[38]], + [K64[41], K64[40]], [K64[43], K64[42]], [K64[45], K64[44]], [K64[47], K64[46]], + [K64[49], K64[48]], [K64[51], K64[50]], [K64[53], K64[52]], [K64[55], K64[54]], + [K64[57], K64[56]], [K64[59], K64[58]], [K64[61], K64[60]], [K64[63], K64[62]], + [K64[65], K64[64]], [K64[67], K64[66]], [K64[69], K64[68]], [K64[71], K64[70]], + [K64[73], K64[72]], [K64[75], K64[74]], [K64[77], K64[76]], [K64[79], K64[78]], +]; + +pub const H256_224: State256 = [ + 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, + 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4, +]; + +pub const H256_256: State256 = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +pub const H512_224: State512 = [ + 0x8c3d37c819544da2, 0x73e1996689dcd4d6, 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf, + 0x0f6d2b697bd44da8, 0x77e36f7304c48942, 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1, +]; + +pub const H512_256: State512 = [ + 0x22312194fc2bf72c, 0x9f555fa3c84c64c2, 0x2393b86b6f53b151, 0x963877195940eabd, + 0x96283ee2a88effe3, 0xbe5e1e2553863992, 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2, +]; + +pub const H512_384: State512 = [ + 0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939, + 0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4, +]; + +pub const H512_512: State512 = [ + 0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, + 0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, +]; diff --git a/rust/vendor/sha2/src/core_api.rs b/rust/vendor/sha2/src/core_api.rs new file mode 100644 index 0000000..cfec02a --- /dev/null +++ b/rust/vendor/sha2/src/core_api.rs @@ -0,0 +1,157 @@ +use crate::{consts, sha256::compress256, sha512::compress512}; +use core::{fmt, slice::from_ref}; +use digest::{ + block_buffer::Eager, + core_api::{ + AlgorithmName, Block, BlockSizeUser, Buffer, BufferKindUser, OutputSizeUser, TruncSide, + UpdateCore, VariableOutputCore, + }, + typenum::{Unsigned, U128, U32, U64}, + HashMarker, InvalidOutputSize, Output, +}; + +/// Core block-level SHA-256 hasher with variable output size. +/// +/// Supports initialization only for 28 and 32 byte output sizes, +/// i.e. 224 and 256 bits respectively. +#[derive(Clone)] +pub struct Sha256VarCore { + state: consts::State256, + block_len: u64, +} + +impl HashMarker for Sha256VarCore {} + +impl BlockSizeUser for Sha256VarCore { + type BlockSize = U64; +} + +impl BufferKindUser for Sha256VarCore { + type BufferKind = Eager; +} + +impl UpdateCore for Sha256VarCore { + #[inline] + fn update_blocks(&mut self, blocks: &[Block<Self>]) { + self.block_len += blocks.len() as u64; + compress256(&mut self.state, blocks); + } +} + +impl OutputSizeUser for Sha256VarCore { + type OutputSize = U32; +} + +impl VariableOutputCore for Sha256VarCore { + const TRUNC_SIDE: TruncSide = TruncSide::Left; + + #[inline] + fn new(output_size: usize) -> Result<Self, InvalidOutputSize> { + let state = match output_size { + 28 => consts::H256_224, + 32 => consts::H256_256, + _ => return Err(InvalidOutputSize), + }; + let block_len = 0; + Ok(Self { state, block_len }) + } + + #[inline] + fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) { + let bs = Self::BlockSize::U64; + let bit_len = 8 * (buffer.get_pos() as u64 + bs * self.block_len); + buffer.len64_padding_be(bit_len, |b| compress256(&mut self.state, from_ref(b))); + + for (chunk, v) in out.chunks_exact_mut(4).zip(self.state.iter()) { + chunk.copy_from_slice(&v.to_be_bytes()); + } + } +} + +impl AlgorithmName for Sha256VarCore { + #[inline] + fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Sha256") + } +} + +impl fmt::Debug for Sha256VarCore { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Sha256VarCore { ... }") + } +} + +/// Core block-level SHA-512 hasher with variable output size. +/// +/// Supports initialization only for 28, 32, 48, and 64 byte output sizes, +/// i.e. 224, 256, 384, and 512 bits respectively. +#[derive(Clone)] +pub struct Sha512VarCore { + state: consts::State512, + block_len: u128, +} + +impl HashMarker for Sha512VarCore {} + +impl BlockSizeUser for Sha512VarCore { + type BlockSize = U128; +} + +impl BufferKindUser for Sha512VarCore { + type BufferKind = Eager; +} + +impl UpdateCore for Sha512VarCore { + #[inline] + fn update_blocks(&mut self, blocks: &[Block<Self>]) { + self.block_len += blocks.len() as u128; + compress512(&mut self.state, blocks); + } +} + +impl OutputSizeUser for Sha512VarCore { + type OutputSize = U64; +} + +impl VariableOutputCore for Sha512VarCore { + const TRUNC_SIDE: TruncSide = TruncSide::Left; + + #[inline] + fn new(output_size: usize) -> Result<Self, InvalidOutputSize> { + let state = match output_size { + 28 => consts::H512_224, + 32 => consts::H512_256, + 48 => consts::H512_384, + 64 => consts::H512_512, + _ => return Err(InvalidOutputSize), + }; + let block_len = 0; + Ok(Self { state, block_len }) + } + + #[inline] + fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) { + let bs = Self::BlockSize::U64 as u128; + let bit_len = 8 * (buffer.get_pos() as u128 + bs * self.block_len); + buffer.len128_padding_be(bit_len, |b| compress512(&mut self.state, from_ref(b))); + + for (chunk, v) in out.chunks_exact_mut(8).zip(self.state.iter()) { + chunk.copy_from_slice(&v.to_be_bytes()); + } + } +} + +impl AlgorithmName for Sha512VarCore { + #[inline] + fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Sha512") + } +} + +impl fmt::Debug for Sha512VarCore { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("Sha512VarCore { ... }") + } +} diff --git a/rust/vendor/sha2/src/lib.rs b/rust/vendor/sha2/src/lib.rs new file mode 100644 index 0000000..a3482e8 --- /dev/null +++ b/rust/vendor/sha2/src/lib.rs @@ -0,0 +1,96 @@ +//! An implementation of the [SHA-2][1] cryptographic hash algorithms. +//! +//! There are 6 standard algorithms specified in the SHA-2 standard: [`Sha224`], +//! [`Sha256`], [`Sha512_224`], [`Sha512_256`], [`Sha384`], and [`Sha512`]. +//! +//! Algorithmically, there are only 2 core algorithms: SHA-256 and SHA-512. +//! All other algorithms are just applications of these with different initial +//! hash values, and truncated to different digest bit lengths. The first two +//! algorithms in the list are based on SHA-256, while the last four are based +//! on SHA-512. +//! +//! # Usage +//! +//! ```rust +//! use hex_literal::hex; +//! use sha2::{Sha256, Sha512, Digest}; +//! +//! // create a Sha256 object +//! let mut hasher = Sha256::new(); +//! +//! // write input message +//! hasher.update(b"hello world"); +//! +//! // read hash digest and consume hasher +//! let result = hasher.finalize(); +//! +//! assert_eq!(result[..], hex!(" +//! b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9 +//! ")[..]); +//! +//! // same for Sha512 +//! let mut hasher = Sha512::new(); +//! hasher.update(b"hello world"); +//! let result = hasher.finalize(); +//! +//! assert_eq!(result[..], hex!(" +//! 309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f +//! 989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f +//! ")[..]); +//! ``` +//! +//! Also see [RustCrypto/hashes][2] readme. +//! +//! [1]: https://en.wikipedia.org/wiki/SHA-2 +//! [2]: https://github.com/RustCrypto/hashes + +#![no_std] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![doc( + html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms)] + +pub use digest::{self, Digest}; + +#[cfg(feature = "oid")] +use digest::const_oid::{AssociatedOid, ObjectIdentifier}; +use digest::{ + consts::{U28, U32, U48, U64}, + core_api::{CoreWrapper, CtVariableCoreWrapper}, + impl_oid_carrier, +}; + +#[rustfmt::skip] +mod consts; +mod core_api; +mod sha256; +mod sha512; + +#[cfg(feature = "compress")] +pub use sha256::compress256; +#[cfg(feature = "compress")] +pub use sha512::compress512; + +pub use core_api::{Sha256VarCore, Sha512VarCore}; + +impl_oid_carrier!(OidSha256, "2.16.840.1.101.3.4.2.1"); +impl_oid_carrier!(OidSha384, "2.16.840.1.101.3.4.2.2"); +impl_oid_carrier!(OidSha512, "2.16.840.1.101.3.4.2.3"); +impl_oid_carrier!(OidSha224, "2.16.840.1.101.3.4.2.4"); +impl_oid_carrier!(OidSha512_224, "2.16.840.1.101.3.4.2.5"); +impl_oid_carrier!(OidSha512_256, "2.16.840.1.101.3.4.2.6"); + +/// SHA-224 hasher. +pub type Sha224 = CoreWrapper<CtVariableCoreWrapper<Sha256VarCore, U28, OidSha224>>; +/// SHA-256 hasher. +pub type Sha256 = CoreWrapper<CtVariableCoreWrapper<Sha256VarCore, U32, OidSha256>>; +/// SHA-512/224 hasher. +pub type Sha512_224 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U28, OidSha512_224>>; +/// SHA-512/256 hasher. +pub type Sha512_256 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U32, OidSha512_256>>; +/// SHA-384 hasher. +pub type Sha384 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U48, OidSha384>>; +/// SHA-512 hasher. +pub type Sha512 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U64, OidSha512>>; diff --git a/rust/vendor/sha2/src/sha256.rs b/rust/vendor/sha2/src/sha256.rs new file mode 100644 index 0000000..8f82878 --- /dev/null +++ b/rust/vendor/sha2/src/sha256.rs @@ -0,0 +1,40 @@ +use digest::{generic_array::GenericArray, typenum::U64}; + +cfg_if::cfg_if! { + if #[cfg(feature = "force-soft")] { + mod soft; + use soft::compress; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[cfg(not(feature = "asm"))] + mod soft; + #[cfg(feature = "asm")] + mod soft { + pub(crate) use sha2_asm::compress256 as compress; + } + mod x86; + use x86::compress; + } else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] { + mod soft; + mod aarch64; + use aarch64::compress; + } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] { + mod loongarch64_asm; + use loongarch64_asm::compress; + } else { + mod soft; + use soft::compress; + } +} + +/// Raw SHA-256 compression function. +/// +/// This is a low-level "hazmat" API which provides direct access to the core +/// functionality of SHA-256. +#[cfg_attr(docsrs, doc(cfg(feature = "compress")))] +pub fn compress256(state: &mut [u32; 8], blocks: &[GenericArray<u8, U64>]) { + // SAFETY: GenericArray<u8, U64> and [u8; 64] have + // exactly the same memory layout + let p = blocks.as_ptr() as *const [u8; 64]; + let blocks = unsafe { core::slice::from_raw_parts(p, blocks.len()) }; + compress(state, blocks) +} diff --git a/rust/vendor/sha2/src/sha256/aarch64.rs b/rust/vendor/sha2/src/sha256/aarch64.rs new file mode 100644 index 0000000..9d220a3 --- /dev/null +++ b/rust/vendor/sha2/src/sha256/aarch64.rs @@ -0,0 +1,159 @@ +//! SHA-256 `aarch64` backend. + +// Implementation adapted from mbedtls. + +// TODO: stdarch intrinsics: RustCrypto/hashes#257 + +use core::arch::{aarch64::*, asm}; + +use crate::consts::K32; + +cpufeatures::new!(sha2_hwcap, "sha2"); + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 + // after stabilization + if sha2_hwcap::get() { + unsafe { sha256_compress(state, blocks) } + } else { + super::soft::compress(state, blocks); + } +} + +#[target_feature(enable = "sha2")] +unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + // SAFETY: Requires the sha2 feature. + + // Load state into vectors. + let mut abcd = vld1q_u32(state[0..4].as_ptr()); + let mut efgh = vld1q_u32(state[4..8].as_ptr()); + + // Iterate through the message blocks. + for block in blocks { + // Keep original state values. + let abcd_orig = abcd; + let efgh_orig = efgh; + + // Load the message block into vectors, assuming little endianness. + let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr()))); + let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr()))); + let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr()))); + let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr()))); + + // Rounds 0 to 3 + let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0])); + let mut abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 4 to 7 + tmp = vaddq_u32(s1, vld1q_u32(&K32[4])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 8 to 11 + tmp = vaddq_u32(s2, vld1q_u32(&K32[8])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds 12 to 15 + tmp = vaddq_u32(s3, vld1q_u32(&K32[12])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + for t in (16..64).step_by(16) { + // Rounds t to t + 3 + s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3); + tmp = vaddq_u32(s0, vld1q_u32(&K32[t])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 4 to t + 7 + s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0); + tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 8 to t + 11 + s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1); + tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + + // Rounds t + 12 to t + 15 + s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2); + tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12])); + abcd_prev = abcd; + abcd = vsha256hq_u32(abcd_prev, efgh, tmp); + efgh = vsha256h2q_u32(efgh, abcd_prev, tmp); + } + + // Add the block-specific state to the original state. + abcd = vaddq_u32(abcd, abcd_orig); + efgh = vaddq_u32(efgh, efgh_orig); + } + + // Store vectors into state. + vst1q_u32(state[0..4].as_mut_ptr(), abcd); + vst1q_u32(state[4..8].as_mut_ptr(), efgh); +} + +// TODO remove these polyfills once SHA2 intrinsics land + +#[inline(always)] +unsafe fn vsha256hq_u32( + mut hash_efgh: uint32x4_t, + hash_abcd: uint32x4_t, + wk: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256H {:q}, {:q}, {:v}.4S", + inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk, + options(pure, nomem, nostack, preserves_flags) + ); + hash_efgh +} + +#[inline(always)] +unsafe fn vsha256h2q_u32( + mut hash_efgh: uint32x4_t, + hash_abcd: uint32x4_t, + wk: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256H2 {:q}, {:q}, {:v}.4S", + inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk, + options(pure, nomem, nostack, preserves_flags) + ); + hash_efgh +} + +#[inline(always)] +unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { + asm!( + "SHA256SU0 {:v}.4S, {:v}.4S", + inout(vreg) w0_3, in(vreg) w4_7, + options(pure, nomem, nostack, preserves_flags) + ); + w0_3 +} + +#[inline(always)] +unsafe fn vsha256su1q_u32( + mut tw0_3: uint32x4_t, + w8_11: uint32x4_t, + w12_15: uint32x4_t, +) -> uint32x4_t { + asm!( + "SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S", + inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15, + options(pure, nomem, nostack, preserves_flags) + ); + tw0_3 +} diff --git a/rust/vendor/sha2/src/sha256/loongarch64_asm.rs b/rust/vendor/sha2/src/sha256/loongarch64_asm.rs new file mode 100644 index 0000000..c80fce8 --- /dev/null +++ b/rust/vendor/sha2/src/sha256/loongarch64_asm.rs @@ -0,0 +1,227 @@ +//! LoongArch64 assembly backend + +macro_rules! c { + ($($l:expr)*) => { + concat!($($l ,)*) + }; +} + +macro_rules! rounda { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + "ld.w $a5, $a1, (" $i " * 4);" + "revb.2h $a5, $a5;" + "rotri.w $a5, $a5, 16;" + roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h) + ) + }; +} + +macro_rules! roundb { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + "ld.w $a4, $sp, (((" $i " - 15) & 0xF) * 4);" + "ld.w $a5, $sp, (((" $i " - 16) & 0xF) * 4);" + "ld.w $a6, $sp, (((" $i " - 7) & 0xF) * 4);" + "add.w $a5, $a5, $a6;" + "rotri.w $a6, $a4, 18;" + "srli.w $a7, $a4, 3;" + "rotri.w $a4, $a4, 7;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.w $a5, $a5, $a4;" + "ld.w $a4, $sp, (((" $i " - 2) & 0xF) * 4);" + "rotri.w $a6, $a4, 19;" + "srli.w $a7, $a4, 10;" + "rotri.w $a4, $a4, 17;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.w $a5, $a5, $a4;" + roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h) + ) + }; +} + +macro_rules! roundtail { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + // Part 0 + "rotri.w $a6, " $e ", 11;" + "rotri.w $a7, " $e ", 25;" + "rotri.w $a4, " $e ", 6;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "xor $a6, " $g ", " $f ";" + "ld.w $a7, $a3, " $i " * 4;" + "and $a6, $a6, " $e ";" + "xor $a6, $a6, " $g ";" + "add.w $a4, $a4, $a6;" + "add.w $a4, $a4, $a7;" + "add.w " $h ", " $h ", $a5;" + "add.w " $h ", " $h ", $a4;" + // Part 1 + "add.w " $d ", " $d ", " $h ";" + // Part 2 + "rotri.w $a6, " $a ", 13;" + "rotri.w $a7, " $a ", 22;" + "rotri.w $a4, " $a ", 2;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.w " $h ", " $h ", $a4;" + "or $a4, " $c ", " $b ";" + "and $a6, " $c ", " $b ";" + "and $a4, $a4, " $a ";" + "or $a4, $a4, $a6;" + "add.w " $h ", " $h ", $a4;" + "st.w $a5, $sp, ((" $i " & 0xF) * 4);" + ) + }; +} + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + if blocks.is_empty() { + return; + } + + unsafe { + core::arch::asm!( + // Allocate scratch stack space + "addi.d $sp, $sp, -64;", + + // Load state + "ld.w $t0, $a0, 0", + "ld.w $t1, $a0, 4", + "ld.w $t2, $a0, 8", + "ld.w $t3, $a0, 12", + "ld.w $t4, $a0, 16", + "ld.w $t5, $a0, 20", + "ld.w $t6, $a0, 24", + "ld.w $t7, $a0, 28", + + "42:", + + // Do 64 rounds of hashing + rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + + // Update state registers + "ld.w $a4, $a0, 0", // a + "ld.w $a5, $a0, 4", // b + "ld.w $a6, $a0, 8", // c + "ld.w $a7, $a0, 12", // d + "add.w $t0, $t0, $a4", + "add.w $t1, $t1, $a5", + "add.w $t2, $t2, $a6", + "add.w $t3, $t3, $a7", + "ld.w $a4, $a0, 16", // e + "ld.w $a5, $a0, 20", // f + "ld.w $a6, $a0, 24", // g + "ld.w $a7, $a0, 28", // h + "add.w $t4, $t4, $a4", + "add.w $t5, $t5, $a5", + "add.w $t6, $t6, $a6", + "add.w $t7, $t7, $a7", + + // Save updated state + "st.w $t0, $a0, 0", + "st.w $t1, $a0, 4", + "st.w $t2, $a0, 8", + "st.w $t3, $a0, 12", + "st.w $t4, $a0, 16", + "st.w $t5, $a0, 20", + "st.w $t6, $a0, 24", + "st.w $t7, $a0, 28", + + // Looping over blocks + "addi.d $a1, $a1, 64", + "addi.d $a2, $a2, -1", + "bnez $a2, 42b", + + // Restore stack register + "addi.d $sp, $sp, 64", + + in("$a0") state, + inout("$a1") blocks.as_ptr() => _, + inout("$a2") blocks.len() => _, + in("$a3") crate::consts::K32.as_ptr(), + + // Clobbers + out("$a4") _, + out("$a5") _, + out("$a6") _, + out("$a7") _, + out("$t0") _, + out("$t1") _, + out("$t2") _, + out("$t3") _, + out("$t4") _, + out("$t5") _, + out("$t6") _, + out("$t7") _, + + options(preserves_flags), + ); + } +} diff --git a/rust/vendor/sha2/src/sha256/soft.rs b/rust/vendor/sha2/src/sha256/soft.rs new file mode 100644 index 0000000..34826a7 --- /dev/null +++ b/rust/vendor/sha2/src/sha256/soft.rs @@ -0,0 +1,218 @@ +#![allow(clippy::many_single_char_names)] +use crate::consts::BLOCK_LEN; +use core::convert::TryInto; + +#[inline(always)] +fn shl(v: [u32; 4], o: u32) -> [u32; 4] { + [v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o] +} + +#[inline(always)] +fn shr(v: [u32; 4], o: u32) -> [u32; 4] { + [v[0] << o, v[1] << o, v[2] << o, v[3] << o] +} + +#[inline(always)] +fn or(a: [u32; 4], b: [u32; 4]) -> [u32; 4] { + [a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]] +} + +#[inline(always)] +fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] { + [a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]] +} + +#[inline(always)] +fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] { + [ + a[0].wrapping_add(b[0]), + a[1].wrapping_add(b[1]), + a[2].wrapping_add(b[2]), + a[3].wrapping_add(b[3]), + ] +} + +fn sha256load(v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] { + [v3[3], v2[0], v2[1], v2[2]] +} + +fn sha256swap(v0: [u32; 4]) -> [u32; 4] { + [v0[2], v0[3], v0[0], v0[1]] +} + +fn sha256msg1(v0: [u32; 4], v1: [u32; 4]) -> [u32; 4] { + // sigma 0 on vectors + #[inline] + fn sigma0x4(x: [u32; 4]) -> [u32; 4] { + let t1 = or(shl(x, 7), shr(x, 25)); + let t2 = or(shl(x, 18), shr(x, 14)); + let t3 = shl(x, 3); + xor(xor(t1, t2), t3) + } + + add(v0, sigma0x4(sha256load(v0, v1))) +} + +fn sha256msg2(v4: [u32; 4], v3: [u32; 4]) -> [u32; 4] { + macro_rules! sigma1 { + ($a:expr) => { + $a.rotate_right(17) ^ $a.rotate_right(19) ^ ($a >> 10) + }; + } + + let [x3, x2, x1, x0] = v4; + let [w15, w14, _, _] = v3; + + let w16 = x0.wrapping_add(sigma1!(w14)); + let w17 = x1.wrapping_add(sigma1!(w15)); + let w18 = x2.wrapping_add(sigma1!(w16)); + let w19 = x3.wrapping_add(sigma1!(w17)); + + [w19, w18, w17, w16] +} + +fn sha256_digest_round_x2(cdgh: [u32; 4], abef: [u32; 4], wk: [u32; 4]) -> [u32; 4] { + macro_rules! big_sigma0 { + ($a:expr) => { + ($a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22)) + }; + } + macro_rules! big_sigma1 { + ($a:expr) => { + ($a.rotate_right(6) ^ $a.rotate_right(11) ^ $a.rotate_right(25)) + }; + } + macro_rules! bool3ary_202 { + ($a:expr, $b:expr, $c:expr) => { + $c ^ ($a & ($b ^ $c)) + }; + } // Choose, MD5F, SHA1C + macro_rules! bool3ary_232 { + ($a:expr, $b:expr, $c:expr) => { + ($a & $b) ^ ($a & $c) ^ ($b & $c) + }; + } // Majority, SHA1M + + let [_, _, wk1, wk0] = wk; + let [a0, b0, e0, f0] = abef; + let [c0, d0, g0, h0] = cdgh; + + // a round + let x0 = big_sigma1!(e0) + .wrapping_add(bool3ary_202!(e0, f0, g0)) + .wrapping_add(wk0) + .wrapping_add(h0); + let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0)); + let (a1, b1, c1, d1, e1, f1, g1, h1) = ( + x0.wrapping_add(y0), + a0, + b0, + c0, + x0.wrapping_add(d0), + e0, + f0, + g0, + ); + + // a round + let x1 = big_sigma1!(e1) + .wrapping_add(bool3ary_202!(e1, f1, g1)) + .wrapping_add(wk1) + .wrapping_add(h1); + let y1 = big_sigma0!(a1).wrapping_add(bool3ary_232!(a1, b1, c1)); + let (a2, b2, _, _, e2, f2, _, _) = ( + x1.wrapping_add(y1), + a1, + b1, + c1, + x1.wrapping_add(d1), + e1, + f1, + g1, + ); + + [a2, b2, e2, f2] +} + +fn schedule(v0: [u32; 4], v1: [u32; 4], v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] { + let t1 = sha256msg1(v0, v1); + let t2 = sha256load(v2, v3); + let t3 = add(t1, t2); + sha256msg2(t3, v3) +} + +macro_rules! rounds4 { + ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{ + let t1 = add($rest, crate::consts::K32X4[$i]); + $cdgh = sha256_digest_round_x2($cdgh, $abef, t1); + let t2 = sha256swap(t1); + $abef = sha256_digest_round_x2($abef, $cdgh, t2); + }}; +} + +macro_rules! schedule_rounds4 { + ( + $abef:ident, $cdgh:ident, + $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr, + $i: expr + ) => {{ + $w4 = schedule($w0, $w1, $w2, $w3); + rounds4!($abef, $cdgh, $w4, $i); + }}; +} + +/// Process a block with the SHA-256 algorithm. +fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) { + let mut abef = [state[0], state[1], state[4], state[5]]; + let mut cdgh = [state[2], state[3], state[6], state[7]]; + + // Rounds 0..64 + let mut w0 = [block[3], block[2], block[1], block[0]]; + let mut w1 = [block[7], block[6], block[5], block[4]]; + let mut w2 = [block[11], block[10], block[9], block[8]]; + let mut w3 = [block[15], block[14], block[13], block[12]]; + let mut w4; + + rounds4!(abef, cdgh, w0, 0); + rounds4!(abef, cdgh, w1, 1); + rounds4!(abef, cdgh, w2, 2); + rounds4!(abef, cdgh, w3, 3); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5); + schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6); + schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7); + schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10); + schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11); + schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12); + schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15); + + let [a, b, e, f] = abef; + let [c, d, g, h] = cdgh; + + state[0] = state[0].wrapping_add(a); + state[1] = state[1].wrapping_add(b); + state[2] = state[2].wrapping_add(c); + state[3] = state[3].wrapping_add(d); + state[4] = state[4].wrapping_add(e); + state[5] = state[5].wrapping_add(f); + state[6] = state[6].wrapping_add(g); + state[7] = state[7].wrapping_add(h); +} + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + let mut block_u32 = [0u32; BLOCK_LEN]; + // since LLVM can't properly use aliasing yet it will make + // unnecessary state stores without this copy + let mut state_cpy = *state; + for block in blocks { + for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) { + *o = u32::from_be_bytes(chunk.try_into().unwrap()); + } + sha256_digest_block_u32(&mut state_cpy, &block_u32); + } + *state = state_cpy; +} diff --git a/rust/vendor/sha2/src/sha256/x86.rs b/rust/vendor/sha2/src/sha256/x86.rs new file mode 100644 index 0000000..4601938 --- /dev/null +++ b/rust/vendor/sha2/src/sha256/x86.rs @@ -0,0 +1,112 @@ +//! SHA-256 `x86`/`x86_64` backend + +#![allow(clippy::many_single_char_names)] + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i { + let t1 = _mm_sha256msg1_epu32(v0, v1); + let t2 = _mm_alignr_epi8(v3, v2, 4); + let t3 = _mm_add_epi32(t1, t2); + _mm_sha256msg2_epu32(t3, v3) +} + +macro_rules! rounds4 { + ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{ + let k = crate::consts::K32X4[$i]; + let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32); + let t1 = _mm_add_epi32($rest, kv); + $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1); + let t2 = _mm_shuffle_epi32(t1, 0x0E); + $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2); + }}; +} + +macro_rules! schedule_rounds4 { + ( + $abef:ident, $cdgh:ident, + $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr, + $i: expr + ) => {{ + $w4 = schedule($w0, $w1, $w2, $w3); + rounds4!($abef, $cdgh, $w4, $i); + }}; +} + +// we use unaligned loads with `__m128i` pointers +#[allow(clippy::cast_ptr_alignment)] +#[target_feature(enable = "sha,sse2,ssse3,sse4.1")] +unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + #[allow(non_snake_case)] + let MASK: __m128i = _mm_set_epi64x( + 0x0C0D_0E0F_0809_0A0Bu64 as i64, + 0x0405_0607_0001_0203u64 as i64, + ); + + let state_ptr = state.as_ptr() as *const __m128i; + let dcba = _mm_loadu_si128(state_ptr.add(0)); + let efgh = _mm_loadu_si128(state_ptr.add(1)); + + let cdab = _mm_shuffle_epi32(dcba, 0xB1); + let efgh = _mm_shuffle_epi32(efgh, 0x1B); + let mut abef = _mm_alignr_epi8(cdab, efgh, 8); + let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0); + + for block in blocks { + let abef_save = abef; + let cdgh_save = cdgh; + + let data_ptr = block.as_ptr() as *const __m128i; + let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK); + let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK); + let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK); + let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(3)), MASK); + let mut w4; + + rounds4!(abef, cdgh, w0, 0); + rounds4!(abef, cdgh, w1, 1); + rounds4!(abef, cdgh, w2, 2); + rounds4!(abef, cdgh, w3, 3); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5); + schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6); + schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7); + schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10); + schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11); + schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12); + schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13); + schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14); + schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15); + + abef = _mm_add_epi32(abef, abef_save); + cdgh = _mm_add_epi32(cdgh, cdgh_save); + } + + let feba = _mm_shuffle_epi32(abef, 0x1B); + let dchg = _mm_shuffle_epi32(cdgh, 0xB1); + let dcba = _mm_blend_epi16(feba, dchg, 0xF0); + let hgef = _mm_alignr_epi8(dchg, feba, 8); + + let state_ptr_mut = state.as_mut_ptr() as *mut __m128i; + _mm_storeu_si128(state_ptr_mut.add(0), dcba); + _mm_storeu_si128(state_ptr_mut.add(1), hgef); +} + +cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1"); + +pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { + // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 + // after stabilization + if shani_cpuid::get() { + unsafe { + digest_blocks(state, blocks); + } + } else { + super::soft::compress(state, blocks); + } +} diff --git a/rust/vendor/sha2/src/sha512.rs b/rust/vendor/sha2/src/sha512.rs new file mode 100644 index 0000000..dfe0b45 --- /dev/null +++ b/rust/vendor/sha2/src/sha512.rs @@ -0,0 +1,42 @@ +use digest::{generic_array::GenericArray, typenum::U128}; + +cfg_if::cfg_if! { + if #[cfg(feature = "force-soft")] { + mod soft; + use soft::compress; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[cfg(not(feature = "asm"))] + mod soft; + #[cfg(feature = "asm")] + mod soft { + pub(crate) fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + sha2_asm::compress512(state, blocks); + } + } + mod x86; + use x86::compress; + } else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] { + mod soft; + mod aarch64; + use aarch64::compress; + } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] { + mod loongarch64_asm; + use loongarch64_asm::compress; + } else { + mod soft; + use soft::compress; + } +} + +/// Raw SHA-512 compression function. +/// +/// This is a low-level "hazmat" API which provides direct access to the core +/// functionality of SHA-512. +#[cfg_attr(docsrs, doc(cfg(feature = "compress")))] +pub fn compress512(state: &mut [u64; 8], blocks: &[GenericArray<u8, U128>]) { + // SAFETY: GenericArray<u8, U64> and [u8; 64] have + // exactly the same memory layout + let p = blocks.as_ptr() as *const [u8; 128]; + let blocks = unsafe { core::slice::from_raw_parts(p, blocks.len()) }; + compress(state, blocks) +} diff --git a/rust/vendor/sha2/src/sha512/aarch64.rs b/rust/vendor/sha2/src/sha512/aarch64.rs new file mode 100644 index 0000000..fbf441c --- /dev/null +++ b/rust/vendor/sha2/src/sha512/aarch64.rs @@ -0,0 +1,235 @@ +// Implementation adapted from mbedtls. + +use core::arch::{aarch64::*, asm}; + +use crate::consts::K64; + +cpufeatures::new!(sha3_hwcap, "sha3"); + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 + // after stabilization + if sha3_hwcap::get() { + unsafe { sha512_compress(state, blocks) } + } else { + super::soft::compress(state, blocks); + } +} + +#[target_feature(enable = "sha3")] +unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + // SAFETY: Requires the sha3 feature. + + // Load state into vectors. + let mut ab = vld1q_u64(state[0..2].as_ptr()); + let mut cd = vld1q_u64(state[2..4].as_ptr()); + let mut ef = vld1q_u64(state[4..6].as_ptr()); + let mut gh = vld1q_u64(state[6..8].as_ptr()); + + // Iterate through the message blocks. + for block in blocks { + // Keep original state values. + let ab_orig = ab; + let cd_orig = cd; + let ef_orig = ef; + let gh_orig = gh; + + // Load the message block into vectors, assuming little endianness. + let mut s0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[0..16].as_ptr()))); + let mut s1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[16..32].as_ptr()))); + let mut s2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[32..48].as_ptr()))); + let mut s3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[48..64].as_ptr()))); + let mut s4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[64..80].as_ptr()))); + let mut s5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[80..96].as_ptr()))); + let mut s6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[96..112].as_ptr()))); + let mut s7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[112..128].as_ptr()))); + + // Rounds 0 and 1 + let mut initial_sum = vaddq_u64(s0, vld1q_u64(&K64[0])); + let mut sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + let mut intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds 2 and 3 + initial_sum = vaddq_u64(s1, vld1q_u64(&K64[2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds 4 and 5 + initial_sum = vaddq_u64(s2, vld1q_u64(&K64[4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds 6 and 7 + initial_sum = vaddq_u64(s3, vld1q_u64(&K64[6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + // Rounds 8 and 9 + initial_sum = vaddq_u64(s4, vld1q_u64(&K64[8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds 10 and 11 + initial_sum = vaddq_u64(s5, vld1q_u64(&K64[10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds 12 and 13 + initial_sum = vaddq_u64(s6, vld1q_u64(&K64[12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds 14 and 15 + initial_sum = vaddq_u64(s7, vld1q_u64(&K64[14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + for t in (16..80).step_by(16) { + // Rounds t and t + 1 + s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1)); + initial_sum = vaddq_u64(s0, vld1q_u64(&K64[t])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds t + 2 and t + 3 + s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1)); + initial_sum = vaddq_u64(s1, vld1q_u64(&K64[t + 2])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds t + 4 and t + 5 + s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1)); + initial_sum = vaddq_u64(s2, vld1q_u64(&K64[t + 4])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds t + 6 and t + 7 + s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1)); + initial_sum = vaddq_u64(s3, vld1q_u64(&K64[t + 6])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + + // Rounds t + 8 and t + 9 + s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1)); + initial_sum = vaddq_u64(s4, vld1q_u64(&K64[t + 8])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh); + intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1)); + gh = vsha512h2q_u64(intermed, cd, ab); + cd = vaddq_u64(cd, intermed); + + // Rounds t + 10 and t + 11 + s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1)); + initial_sum = vaddq_u64(s5, vld1q_u64(&K64[t + 10])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef); + intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1)); + ef = vsha512h2q_u64(intermed, ab, gh); + ab = vaddq_u64(ab, intermed); + + // Rounds t + 12 and t + 13 + s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1)); + initial_sum = vaddq_u64(s6, vld1q_u64(&K64[t + 12])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd); + intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1)); + cd = vsha512h2q_u64(intermed, gh, ef); + gh = vaddq_u64(gh, intermed); + + // Rounds t + 14 and t + 15 + s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1)); + initial_sum = vaddq_u64(s7, vld1q_u64(&K64[t + 14])); + sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab); + intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1)); + ab = vsha512h2q_u64(intermed, ef, cd); + ef = vaddq_u64(ef, intermed); + } + + // Add the block-specific state to the original state. + ab = vaddq_u64(ab, ab_orig); + cd = vaddq_u64(cd, cd_orig); + ef = vaddq_u64(ef, ef_orig); + gh = vaddq_u64(gh, gh_orig); + } + + // Store vectors into state. + vst1q_u64(state[0..2].as_mut_ptr(), ab); + vst1q_u64(state[2..4].as_mut_ptr(), cd); + vst1q_u64(state[4..6].as_mut_ptr(), ef); + vst1q_u64(state[6..8].as_mut_ptr(), gh); +} + +// TODO remove these polyfills once SHA3 intrinsics land + +#[inline(always)] +unsafe fn vsha512hq_u64( + mut hash_ed: uint64x2_t, + hash_gf: uint64x2_t, + kwh_kwh2: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512H {:q}, {:q}, {:v}.2D", + inout(vreg) hash_ed, in(vreg) hash_gf, in(vreg) kwh_kwh2, + options(pure, nomem, nostack, preserves_flags) + ); + hash_ed +} + +#[inline(always)] +unsafe fn vsha512h2q_u64( + mut sum_ab: uint64x2_t, + hash_c_: uint64x2_t, + hash_ab: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512H2 {:q}, {:q}, {:v}.2D", + inout(vreg) sum_ab, in(vreg) hash_c_, in(vreg) hash_ab, + options(pure, nomem, nostack, preserves_flags) + ); + sum_ab +} + +#[inline(always)] +unsafe fn vsha512su0q_u64(mut w0_1: uint64x2_t, w2_: uint64x2_t) -> uint64x2_t { + asm!( + "SHA512SU0 {:v}.2D, {:v}.2D", + inout(vreg) w0_1, in(vreg) w2_, + options(pure, nomem, nostack, preserves_flags) + ); + w0_1 +} + +#[inline(always)] +unsafe fn vsha512su1q_u64( + mut s01_s02: uint64x2_t, + w14_15: uint64x2_t, + w9_10: uint64x2_t, +) -> uint64x2_t { + asm!( + "SHA512SU1 {:v}.2D, {:v}.2D, {:v}.2D", + inout(vreg) s01_s02, in(vreg) w14_15, in(vreg) w9_10, + options(pure, nomem, nostack, preserves_flags) + ); + s01_s02 +} diff --git a/rust/vendor/sha2/src/sha512/loongarch64_asm.rs b/rust/vendor/sha2/src/sha512/loongarch64_asm.rs new file mode 100644 index 0000000..557089d --- /dev/null +++ b/rust/vendor/sha2/src/sha512/loongarch64_asm.rs @@ -0,0 +1,242 @@ +//! LoongArch64 assembly backend + +macro_rules! c { + ($($l:expr)*) => { + concat!($($l ,)*) + }; +} + +macro_rules! rounda { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + "ld.d $a5, $a1, (" $i " * 8);" + "revb.d $a5, $a5;" + roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h) + ) + }; +} + +macro_rules! roundb { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + "ld.d $a4, $sp, (((" $i " - 15) & 0xF) * 8);" + "ld.d $a5, $sp, (((" $i " - 16) & 0xF) * 8);" + "ld.d $a6, $sp, (((" $i " - 7) & 0xF) * 8);" + "add.d $a5, $a5, $a6;" + "rotri.d $a6, $a4, 8;" + "srli.d $a7, $a4, 7;" + "rotri.d $a4, $a4, 1;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.d $a5, $a5, $a4;" + "ld.d $a4, $sp, (((" $i " - 2) & 0xF) * 8);" + "rotri.d $a6, $a4, 61;" + "srli.d $a7, $a4, 6;" + "rotri.d $a4, $a4, 19;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.d $a5, $a5, $a4;" + roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h) + ) + }; +} + +macro_rules! roundtail { + ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => { + c!( + // Part 0 + "rotri.d $a6, " $e ", 18;" + "rotri.d $a7, " $e ", 41;" + "rotri.d $a4, " $e ", 14;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "xor $a6, " $g ", " $f ";" + "ld.d $a7, $a3, " $i " * 8;" + "and $a6, $a6, " $e ";" + "xor $a6, $a6, " $g ";" + "add.d $a4, $a4, $a6;" + "add.d $a4, $a4, $a7;" + "add.d " $h ", " $h ", $a5;" + "add.d " $h ", " $h ", $a4;" + // Part 1 + "add.d " $d ", " $d ", " $h ";" + // Part 2 + "rotri.d $a6, " $a ", 39;" + "rotri.d $a7, " $a ", 34;" + "rotri.d $a4, " $a ", 28;" + "xor $a6, $a6, $a7;" + "xor $a4, $a4, $a6;" + "add.d " $h ", " $h ", $a4;" + "or $a4, " $c ", " $b ";" + "and $a6, " $c ", " $b ";" + "and $a4, $a4, " $a ";" + "or $a4, $a4, $a6;" + "add.d " $h ", " $h ", $a4;" + "st.d $a5, $sp, ((" $i " & 0xF) * 8);" + ) + }; +} + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + if blocks.is_empty() { + return; + } + + unsafe { + core::arch::asm!( + // Allocate scratch stack space + "addi.d $sp, $sp, -128;", + + // Load state + "ld.d $t0, $a0, 0", + "ld.d $t1, $a0, 8", + "ld.d $t2, $a0, 16", + "ld.d $t3, $a0, 24", + "ld.d $t4, $a0, 32", + "ld.d $t5, $a0, 40", + "ld.d $t6, $a0, 48", + "ld.d $t7, $a0, 56", + + "42:", + + // Do 64 rounds of hashing + rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(64, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(65, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(66, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(67, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(68, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(69, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(70, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(71, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + roundb!(72, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"), + roundb!(73, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"), + roundb!(74, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"), + roundb!(75, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"), + roundb!(76, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"), + roundb!(77, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"), + roundb!(78, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"), + roundb!(79, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"), + + // Update state registers + "ld.d $a4, $a0, 0", // a + "ld.d $a5, $a0, 8", // b + "ld.d $a6, $a0, 16", // c + "ld.d $a7, $a0, 24", // d + "add.d $t0, $t0, $a4", + "add.d $t1, $t1, $a5", + "add.d $t2, $t2, $a6", + "add.d $t3, $t3, $a7", + "ld.d $a4, $a0, 32", // e + "ld.d $a5, $a0, 40", // f + "ld.d $a6, $a0, 48", // g + "ld.d $a7, $a0, 56", // h + "add.d $t4, $t4, $a4", + "add.d $t5, $t5, $a5", + "add.d $t6, $t6, $a6", + "add.d $t7, $t7, $a7", + + // Save updated state + "st.d $t0, $a0, 0", + "st.d $t1, $a0, 8", + "st.d $t2, $a0, 16", + "st.d $t3, $a0, 24", + "st.d $t4, $a0, 32", + "st.d $t5, $a0, 40", + "st.d $t6, $a0, 48", + "st.d $t7, $a0, 56", + + // Looping over blocks + "addi.d $a1, $a1, 128", + "addi.d $a2, $a2, -1", + "bnez $a2, 42b", + + // Restore stack register + "addi.d $sp, $sp, 128", + + in("$a0") state, + inout("$a1") blocks.as_ptr() => _, + inout("$a2") blocks.len() => _, + in("$a3") crate::consts::K64.as_ptr(), + + // Clobbers + out("$a4") _, + out("$a5") _, + out("$a6") _, + out("$a7") _, + out("$t0") _, + out("$t1") _, + out("$t2") _, + out("$t3") _, + out("$t4") _, + out("$t5") _, + out("$t6") _, + out("$t7") _, + + options(preserves_flags), + ); + } +} diff --git a/rust/vendor/sha2/src/sha512/soft.rs b/rust/vendor/sha2/src/sha512/soft.rs new file mode 100644 index 0000000..ab6d568 --- /dev/null +++ b/rust/vendor/sha2/src/sha512/soft.rs @@ -0,0 +1,215 @@ +#![allow(clippy::many_single_char_names)] +use crate::consts::{BLOCK_LEN, K64X2}; +use core::convert::TryInto; + +fn add(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { + [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] +} + +/// Not an intrinsic, but works like an unaligned load. +fn sha512load(v0: [u64; 2], v1: [u64; 2]) -> [u64; 2] { + [v1[1], v0[0]] +} + +/// Performs 2 rounds of the SHA-512 message schedule update. +pub fn sha512_schedule_x2(v0: [u64; 2], v1: [u64; 2], v4to5: [u64; 2], v7: [u64; 2]) -> [u64; 2] { + // sigma 0 + fn sigma0(x: u64) -> u64 { + ((x << 63) | (x >> 1)) ^ ((x << 56) | (x >> 8)) ^ (x >> 7) + } + + // sigma 1 + fn sigma1(x: u64) -> u64 { + ((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6) + } + + let [w1, w0] = v0; + let [_, w2] = v1; + let [w10, w9] = v4to5; + let [w15, w14] = v7; + + let w16 = sigma1(w14) + .wrapping_add(w9) + .wrapping_add(sigma0(w1)) + .wrapping_add(w0); + let w17 = sigma1(w15) + .wrapping_add(w10) + .wrapping_add(sigma0(w2)) + .wrapping_add(w1); + + [w17, w16] +} + +/// Performs one round of the SHA-512 message block digest. +pub fn sha512_digest_round( + ae: [u64; 2], + bf: [u64; 2], + cg: [u64; 2], + dh: [u64; 2], + wk0: u64, +) -> [u64; 2] { + macro_rules! big_sigma0 { + ($a:expr) => { + ($a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39)) + }; + } + macro_rules! big_sigma1 { + ($a:expr) => { + ($a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41)) + }; + } + macro_rules! bool3ary_202 { + ($a:expr, $b:expr, $c:expr) => { + $c ^ ($a & ($b ^ $c)) + }; + } // Choose, MD5F, SHA1C + macro_rules! bool3ary_232 { + ($a:expr, $b:expr, $c:expr) => { + ($a & $b) ^ ($a & $c) ^ ($b & $c) + }; + } // Majority, SHA1M + + let [a0, e0] = ae; + let [b0, f0] = bf; + let [c0, g0] = cg; + let [d0, h0] = dh; + + // a round + let x0 = big_sigma1!(e0) + .wrapping_add(bool3ary_202!(e0, f0, g0)) + .wrapping_add(wk0) + .wrapping_add(h0); + let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0)); + let (a1, _, _, _, e1, _, _, _) = ( + x0.wrapping_add(y0), + a0, + b0, + c0, + x0.wrapping_add(d0), + e0, + f0, + g0, + ); + + [a1, e1] +} + +/// Process a block with the SHA-512 algorithm. +pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) { + let k = &K64X2; + + macro_rules! schedule { + ($v0:expr, $v1:expr, $v4:expr, $v5:expr, $v7:expr) => { + sha512_schedule_x2($v0, $v1, sha512load($v4, $v5), $v7) + }; + } + + macro_rules! rounds4 { + ($ae:ident, $bf:ident, $cg:ident, $dh:ident, $wk0:expr, $wk1:expr) => {{ + let [u, t] = $wk0; + let [w, v] = $wk1; + + $dh = sha512_digest_round($ae, $bf, $cg, $dh, t); + $cg = sha512_digest_round($dh, $ae, $bf, $cg, u); + $bf = sha512_digest_round($cg, $dh, $ae, $bf, v); + $ae = sha512_digest_round($bf, $cg, $dh, $ae, w); + }}; + } + + let mut ae = [state[0], state[4]]; + let mut bf = [state[1], state[5]]; + let mut cg = [state[2], state[6]]; + let mut dh = [state[3], state[7]]; + + // Rounds 0..20 + let (mut w1, mut w0) = ([block[3], block[2]], [block[1], block[0]]); + rounds4!(ae, bf, cg, dh, add(k[0], w0), add(k[1], w1)); + let (mut w3, mut w2) = ([block[7], block[6]], [block[5], block[4]]); + rounds4!(ae, bf, cg, dh, add(k[2], w2), add(k[3], w3)); + let (mut w5, mut w4) = ([block[11], block[10]], [block[9], block[8]]); + rounds4!(ae, bf, cg, dh, add(k[4], w4), add(k[5], w5)); + let (mut w7, mut w6) = ([block[15], block[14]], [block[13], block[12]]); + rounds4!(ae, bf, cg, dh, add(k[6], w6), add(k[7], w7)); + let mut w8 = schedule!(w0, w1, w4, w5, w7); + let mut w9 = schedule!(w1, w2, w5, w6, w8); + rounds4!(ae, bf, cg, dh, add(k[8], w8), add(k[9], w9)); + + // Rounds 20..40 + w0 = schedule!(w2, w3, w6, w7, w9); + w1 = schedule!(w3, w4, w7, w8, w0); + rounds4!(ae, bf, cg, dh, add(k[10], w0), add(k[11], w1)); + w2 = schedule!(w4, w5, w8, w9, w1); + w3 = schedule!(w5, w6, w9, w0, w2); + rounds4!(ae, bf, cg, dh, add(k[12], w2), add(k[13], w3)); + w4 = schedule!(w6, w7, w0, w1, w3); + w5 = schedule!(w7, w8, w1, w2, w4); + rounds4!(ae, bf, cg, dh, add(k[14], w4), add(k[15], w5)); + w6 = schedule!(w8, w9, w2, w3, w5); + w7 = schedule!(w9, w0, w3, w4, w6); + rounds4!(ae, bf, cg, dh, add(k[16], w6), add(k[17], w7)); + w8 = schedule!(w0, w1, w4, w5, w7); + w9 = schedule!(w1, w2, w5, w6, w8); + rounds4!(ae, bf, cg, dh, add(k[18], w8), add(k[19], w9)); + + // Rounds 40..60 + w0 = schedule!(w2, w3, w6, w7, w9); + w1 = schedule!(w3, w4, w7, w8, w0); + rounds4!(ae, bf, cg, dh, add(k[20], w0), add(k[21], w1)); + w2 = schedule!(w4, w5, w8, w9, w1); + w3 = schedule!(w5, w6, w9, w0, w2); + rounds4!(ae, bf, cg, dh, add(k[22], w2), add(k[23], w3)); + w4 = schedule!(w6, w7, w0, w1, w3); + w5 = schedule!(w7, w8, w1, w2, w4); + rounds4!(ae, bf, cg, dh, add(k[24], w4), add(k[25], w5)); + w6 = schedule!(w8, w9, w2, w3, w5); + w7 = schedule!(w9, w0, w3, w4, w6); + rounds4!(ae, bf, cg, dh, add(k[26], w6), add(k[27], w7)); + w8 = schedule!(w0, w1, w4, w5, w7); + w9 = schedule!(w1, w2, w5, w6, w8); + rounds4!(ae, bf, cg, dh, add(k[28], w8), add(k[29], w9)); + + // Rounds 60..80 + w0 = schedule!(w2, w3, w6, w7, w9); + w1 = schedule!(w3, w4, w7, w8, w0); + rounds4!(ae, bf, cg, dh, add(k[30], w0), add(k[31], w1)); + w2 = schedule!(w4, w5, w8, w9, w1); + w3 = schedule!(w5, w6, w9, w0, w2); + rounds4!(ae, bf, cg, dh, add(k[32], w2), add(k[33], w3)); + w4 = schedule!(w6, w7, w0, w1, w3); + w5 = schedule!(w7, w8, w1, w2, w4); + rounds4!(ae, bf, cg, dh, add(k[34], w4), add(k[35], w5)); + w6 = schedule!(w8, w9, w2, w3, w5); + w7 = schedule!(w9, w0, w3, w4, w6); + rounds4!(ae, bf, cg, dh, add(k[36], w6), add(k[37], w7)); + w8 = schedule!(w0, w1, w4, w5, w7); + w9 = schedule!(w1, w2, w5, w6, w8); + rounds4!(ae, bf, cg, dh, add(k[38], w8), add(k[39], w9)); + + let [a, e] = ae; + let [b, f] = bf; + let [c, g] = cg; + let [d, h] = dh; + + state[0] = state[0].wrapping_add(a); + state[1] = state[1].wrapping_add(b); + state[2] = state[2].wrapping_add(c); + state[3] = state[3].wrapping_add(d); + state[4] = state[4].wrapping_add(e); + state[5] = state[5].wrapping_add(f); + state[6] = state[6].wrapping_add(g); + state[7] = state[7].wrapping_add(h); +} + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + let mut block_u32 = [0u64; BLOCK_LEN]; + // since LLVM can't properly use aliasing yet it will make + // unnecessary state stores without this copy + let mut state_cpy = *state; + for block in blocks { + for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(8)) { + *o = u64::from_be_bytes(chunk.try_into().unwrap()); + } + sha512_digest_block_u64(&mut state_cpy, &block_u32); + } + *state = state_cpy; +} diff --git a/rust/vendor/sha2/src/sha512/x86.rs b/rust/vendor/sha2/src/sha512/x86.rs new file mode 100644 index 0000000..bb79040 --- /dev/null +++ b/rust/vendor/sha2/src/sha512/x86.rs @@ -0,0 +1,357 @@ +//! SHA-512 `x86`/`x86_64` backend + +#![allow(clippy::many_single_char_names)] + +use core::mem::size_of; + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use crate::consts::K64; + +cpufeatures::new!(avx2_cpuid, "avx2"); + +pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725 + // after stabilization + if avx2_cpuid::get() { + unsafe { + sha512_compress_x86_64_avx2(state, blocks); + } + } else { + super::soft::compress(state, blocks); + } +} + +#[target_feature(enable = "avx2")] +unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]) { + let mut start_block = 0; + + if blocks.len() & 0b1 != 0 { + sha512_compress_x86_64_avx(state, &blocks[0]); + start_block += 1; + } + + let mut ms: MsgSchedule = [_mm_setzero_si128(); 8]; + let mut t2: RoundStates = [_mm_setzero_si128(); 40]; + let mut x = [_mm256_setzero_si256(); 8]; + + for i in (start_block..blocks.len()).step_by(2) { + load_data_avx2(&mut x, &mut ms, &mut t2, blocks.as_ptr().add(i) as *const _); + + // First block + let mut current_state = *state; + rounds_0_63_avx2(&mut current_state, &mut x, &mut ms, &mut t2); + rounds_64_79(&mut current_state, &ms); + accumulate_state(state, ¤t_state); + + // Second block + current_state = *state; + process_second_block(&mut current_state, &t2); + accumulate_state(state, ¤t_state); + } +} + +#[inline(always)] +unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) { + let mut ms = [_mm_setzero_si128(); 8]; + let mut x = [_mm_setzero_si128(); 8]; + + // Reduced to single iteration + let mut current_state = *state; + load_data_avx(&mut x, &mut ms, block.as_ptr() as *const _); + rounds_0_63_avx(&mut current_state, &mut x, &mut ms); + rounds_64_79(&mut current_state, &ms); + accumulate_state(state, ¤t_state); +} + +#[inline(always)] +unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const __m128i) { + #[allow(non_snake_case)] + let MASK = _mm_setr_epi32(0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b); + + macro_rules! unrolled_iterations { + ($($i:literal),*) => {$( + x[$i] = _mm_loadu_si128(data.add($i) as *const _); + x[$i] = _mm_shuffle_epi8(x[$i], MASK); + + let y = _mm_add_epi64( + x[$i], + _mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _), + ); + + ms[$i] = y; + )*}; + } + + unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7); +} + +#[inline(always)] +unsafe fn load_data_avx2( + x: &mut [__m256i; 8], + ms: &mut MsgSchedule, + t2: &mut RoundStates, + data: *const __m128i, +) { + #[allow(non_snake_case)] + let MASK = _mm256_set_epi64x( + 0x0809_0A0B_0C0D_0E0F_i64, + 0x0001_0203_0405_0607_i64, + 0x0809_0A0B_0C0D_0E0F_i64, + 0x0001_0203_0405_0607_i64, + ); + + macro_rules! unrolled_iterations { + ($($i:literal),*) => {$( + x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add(8 + $i) as *const _), 1); + x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i) as *const _), 0); + + x[$i] = _mm256_shuffle_epi8(x[$i], MASK); + + let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _); + let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t)); + + ms[$i] = _mm256_extracti128_si256(y, 0); + t2[$i] = _mm256_extracti128_si256(y, 1); + )*}; + } + + unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7); +} + +#[inline(always)] +unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &mut MsgSchedule) { + let mut k64_idx: usize = SHA512_BLOCK_WORDS_NUM; + + for _ in 0..4 { + for j in 0..8 { + let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _); + let y = sha512_update_x_avx(x, k64); + + { + let ms = cast_ms(ms); + sha_round(current_state, ms[2 * j]); + sha_round(current_state, ms[2 * j + 1]); + } + + ms[j] = y; + k64_idx += 2; + } + } +} + +#[inline(always)] +unsafe fn rounds_0_63_avx2( + current_state: &mut State, + x: &mut [__m256i; 8], + ms: &mut MsgSchedule, + t2: &mut RoundStates, +) { + let mut k64x4_idx: usize = SHA512_BLOCK_WORDS_NUM; + + for i in 1..5 { + for j in 0..8 { + let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _); + let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t)); + + { + let ms = cast_ms(ms); + sha_round(current_state, ms[2 * j]); + sha_round(current_state, ms[2 * j + 1]); + } + + ms[j] = _mm256_extracti128_si256(y, 0); + t2[8 * i + j] = _mm256_extracti128_si256(y, 1); + + k64x4_idx += 2; + } + } +} + +#[inline(always)] +fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) { + let ms = cast_ms(ms); + for i in 64..80 { + sha_round(current_state, ms[i & 0xf]); + } +} + +#[inline(always)] +fn process_second_block(current_state: &mut State, t2: &RoundStates) { + for t2 in cast_rs(t2).iter() { + sha_round(current_state, *t2); + } +} + +#[inline(always)] +fn sha_round(s: &mut State, x: u64) { + macro_rules! big_sigma0 { + ($a:expr) => { + $a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39) + }; + } + macro_rules! big_sigma1 { + ($a:expr) => { + $a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41) + }; + } + macro_rules! bool3ary_202 { + ($a:expr, $b:expr, $c:expr) => { + $c ^ ($a & ($b ^ $c)) + }; + } // Choose, MD5F, SHA1C + macro_rules! bool3ary_232 { + ($a:expr, $b:expr, $c:expr) => { + ($a & $b) ^ ($a & $c) ^ ($b & $c) + }; + } // Majority, SHA1M + + macro_rules! rotate_state { + ($s:ident) => {{ + let tmp = $s[7]; + $s[7] = $s[6]; + $s[6] = $s[5]; + $s[5] = $s[4]; + $s[4] = $s[3]; + $s[3] = $s[2]; + $s[2] = $s[1]; + $s[1] = $s[0]; + $s[0] = tmp; + }}; + } + + let t = x + .wrapping_add(s[7]) + .wrapping_add(big_sigma1!(s[4])) + .wrapping_add(bool3ary_202!(s[4], s[5], s[6])); + + s[7] = t + .wrapping_add(big_sigma0!(s[0])) + .wrapping_add(bool3ary_232!(s[0], s[1], s[2])); + s[3] = s[3].wrapping_add(t); + + rotate_state!(s); +} + +#[inline(always)] +fn accumulate_state(dst: &mut State, src: &State) { + for i in 0..SHA512_HASH_WORDS_NUM { + dst[i] = dst[i].wrapping_add(src[i]); + } +} + +macro_rules! fn_sha512_update_x { + ($name:ident, $ty:ident, { + ADD64 = $ADD64:ident, + ALIGNR8 = $ALIGNR8:ident, + SRL64 = $SRL64:ident, + SLL64 = $SLL64:ident, + XOR = $XOR:ident, + }) => { + unsafe fn $name(x: &mut [$ty; 8], k64: $ty) -> $ty { + // q[2:1] + let mut t0 = $ALIGNR8(x[1], x[0], 8); + // q[10:9] + let mut t3 = $ALIGNR8(x[5], x[4], 8); + // q[2:1] >> s0[0] + let mut t2 = $SRL64(t0, 1); + // q[1:0] + q[10:9] + x[0] = $ADD64(x[0], t3); + // q[2:1] >> s0[2] + t3 = $SRL64(t0, 7); + // q[2:1] << (64 - s0[1]) + let mut t1 = $SLL64(t0, 64 - 8); + // (q[2:1] >> s0[2]) ^ + // (q[2:1] >> s0[0]) + t0 = $XOR(t3, t2); + // q[2:1] >> s0[1] + t2 = $SRL64(t2, 8 - 1); + // (q[2:1] >> s0[2]) ^ + // (q[2:1] >> s0[0]) ^ + // q[2:1] << (64 - s0[1]) + t0 = $XOR(t0, t1); + // q[2:1] << (64 - s0[0]) + t1 = $SLL64(t1, 8 - 1); + // sigma1(q[2:1]) + t0 = $XOR(t0, t2); + t0 = $XOR(t0, t1); + // q[15:14] >> s1[2] + t3 = $SRL64(x[7], 6); + // q[15:14] >> (64 - s1[1]) + t2 = $SLL64(x[7], 64 - 61); + // q[1:0] + sigma0(q[2:1]) + x[0] = $ADD64(x[0], t0); + // q[15:14] >> s1[0] + t1 = $SRL64(x[7], 19); + // q[15:14] >> s1[2] ^ + // q[15:14] >> (64 - s1[1]) + t3 = $XOR(t3, t2); + // q[15:14] >> (64 - s1[0]) + t2 = $SLL64(t2, 61 - 19); + // q[15:14] >> s1[2] ^ + // q[15:14] >> (64 - s1[1] ^ + // q[15:14] >> s1[0] + t3 = $XOR(t3, t1); + // q[15:14] >> s1[1] + t1 = $SRL64(t1, 61 - 19); + // sigma1(q[15:14]) + t3 = $XOR(t3, t2); + t3 = $XOR(t3, t1); + + // q[1:0] + q[10:9] + sigma1(q[15:14]) + sigma0(q[2:1]) + x[0] = $ADD64(x[0], t3); + + // rotate + let temp = x[0]; + x[0] = x[1]; + x[1] = x[2]; + x[2] = x[3]; + x[3] = x[4]; + x[4] = x[5]; + x[5] = x[6]; + x[6] = x[7]; + x[7] = temp; + + $ADD64(x[7], k64) + } + }; +} + +fn_sha512_update_x!(sha512_update_x_avx, __m128i, { + ADD64 = _mm_add_epi64, + ALIGNR8 = _mm_alignr_epi8, + SRL64 = _mm_srli_epi64, + SLL64 = _mm_slli_epi64, + XOR = _mm_xor_si128, +}); + +fn_sha512_update_x!(sha512_update_x_avx2, __m256i, { + ADD64 = _mm256_add_epi64, + ALIGNR8 = _mm256_alignr_epi8, + SRL64 = _mm256_srli_epi64, + SLL64 = _mm256_slli_epi64, + XOR = _mm256_xor_si256, +}); + +#[inline(always)] +fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] { + unsafe { &*(ms as *const MsgSchedule as *const _) } +} + +#[inline(always)] +fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] { + unsafe { &*(rs as *const RoundStates as *const _) } +} + +type State = [u64; SHA512_HASH_WORDS_NUM]; +type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2]; +type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2]; + +const SHA512_BLOCK_BYTE_LEN: usize = 128; +const SHA512_ROUNDS_NUM: usize = 80; +const SHA512_HASH_BYTE_LEN: usize = 64; +const SHA512_HASH_WORDS_NUM: usize = SHA512_HASH_BYTE_LEN / size_of::<u64>(); +const SHA512_BLOCK_WORDS_NUM: usize = SHA512_BLOCK_BYTE_LEN / size_of::<u64>(); diff --git a/rust/vendor/sha2/tests/data/sha224.blb b/rust/vendor/sha2/tests/data/sha224.blb Binary files differnew file mode 100644 index 0000000..6932704 --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha224.blb diff --git a/rust/vendor/sha2/tests/data/sha256.blb b/rust/vendor/sha2/tests/data/sha256.blb Binary files differnew file mode 100644 index 0000000..389a125 --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha256.blb diff --git a/rust/vendor/sha2/tests/data/sha384.blb b/rust/vendor/sha2/tests/data/sha384.blb Binary files differnew file mode 100644 index 0000000..74a151f --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha384.blb diff --git a/rust/vendor/sha2/tests/data/sha512.blb b/rust/vendor/sha2/tests/data/sha512.blb Binary files differnew file mode 100644 index 0000000..bb2ec39 --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha512.blb diff --git a/rust/vendor/sha2/tests/data/sha512_224.blb b/rust/vendor/sha2/tests/data/sha512_224.blb Binary files differnew file mode 100644 index 0000000..3c3ab8f --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha512_224.blb diff --git a/rust/vendor/sha2/tests/data/sha512_256.blb b/rust/vendor/sha2/tests/data/sha512_256.blb Binary files differnew file mode 100644 index 0000000..acd380a --- /dev/null +++ b/rust/vendor/sha2/tests/data/sha512_256.blb diff --git a/rust/vendor/sha2/tests/mod.rs b/rust/vendor/sha2/tests/mod.rs new file mode 100644 index 0000000..91df468 --- /dev/null +++ b/rust/vendor/sha2/tests/mod.rs @@ -0,0 +1,35 @@ +use digest::dev::{feed_rand_16mib, fixed_reset_test}; +use digest::new_test; +use hex_literal::hex; +use sha2::{Digest, Sha224, Sha256, Sha384, Sha512, Sha512_224, Sha512_256}; + +new_test!(sha224_main, "sha224", Sha224, fixed_reset_test); +new_test!(sha256_main, "sha256", Sha256, fixed_reset_test); +new_test!(sha512_224_main, "sha512_224", Sha512_224, fixed_reset_test); +new_test!(sha512_256_main, "sha512_256", Sha512_256, fixed_reset_test); +new_test!(sha384_main, "sha384", Sha384, fixed_reset_test); +new_test!(sha512_main, "sha512", Sha512, fixed_reset_test); + +#[test] +fn sha256_rand() { + let mut h = Sha256::new(); + feed_rand_16mib(&mut h); + assert_eq!( + h.finalize()[..], + hex!("45f51fead87328fe837a86f4f1ac0eb15116ab1473adc0423ef86c62eb2320c7")[..] + ); +} + +#[test] +#[rustfmt::skip] +fn sha512_rand() { + let mut h = Sha512::new(); + feed_rand_16mib(&mut h); + assert_eq!( + h.finalize()[..], + hex!(" + 9084d75a7c0721541d737b6171eb465dc9ba08a119a182a8508484aa27a176cd + e7c2103b108393eb024493ced4aac56be6f57222cac41b801f11494886264997 + ")[..] + ); +} |